net: use indirect call wrappers for skb_copy_datagram_iter()
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
index 4b049921599522e52e9f9075eab7c94f96895d2e..901f88a886c8023e9e6107421c6f366c8f0dde38 100644 (file)
@@ -56,6 +56,7 @@
 #include "en/port.h"
 #include "en/tc_tun.h"
 #include "en/mapping.h"
+#include "en/tc_ct.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
 #include "diag/en_tc_tracepoint.h"
@@ -87,6 +88,7 @@ enum {
        MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
        MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
        MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
+       MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -193,6 +195,12 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
                .soffset = MLX5_BYTE_OFF(fte_match_param,
                                         misc_parameters_2.metadata_reg_c_1),
        },
+       [ZONE_TO_REG] = zone_to_reg_ct,
+       [CTSTATE_TO_REG] = ctstate_to_reg_ct,
+       [MARK_TO_REG] = mark_to_reg_ct,
+       [LABELS_TO_REG] = labels_to_reg_ct,
+       [FTEID_TO_REG] = fteid_to_reg_ct,
+       [TUPLEID_TO_REG] = tupleid_to_reg_ct,
 };
 
 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
@@ -1143,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
                           struct mlx5_flow_spec *spec,
                           struct mlx5_esw_flow_attr *attr)
 {
+       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
        struct mlx5_flow_handle *rule;
 
+       if (flow_flag_test(flow, CT)) {
+               mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+
+               return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr,
+                                              mod_hdr_acts);
+       }
+
        rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
        if (IS_ERR(rule))
                return rule;
@@ -1163,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 static void
 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
                             struct mlx5e_tc_flow *flow,
-                          struct mlx5_esw_flow_attr *attr)
+                            struct mlx5_esw_flow_attr *attr)
 {
        flow_flag_clear(flow, OFFLOADED);
 
+       if (flow_flag_test(flow, CT)) {
+               mlx5_tc_ct_delete_flow(flow->priv, flow, attr);
+               return;
+       }
+
        if (attr->split_count)
                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 
@@ -1938,6 +1959,11 @@ static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
                               enc_opts_id);
 }
 
+u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
+{
+       return flow->tunnel_id;
+}
+
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
                             struct mlx5e_tc_flow *flow,
                             struct mlx5_flow_spec *spec,
@@ -1959,11 +1985,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
        *match_inner = !needs_mapping;
 
        if ((needs_mapping || sets_mapping) &&
-           !mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+           !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
                NL_SET_ERR_MSG(extack,
-                              "Chains on tunnel devices isn't supported without register metadata support");
+                              "Chains on tunnel devices isn't supported without register loopback support");
                netdev_warn(priv->netdev,
-                           "Chains on tunnel devices isn't supported without register metadata support");
+                           "Chains on tunnel devices isn't supported without register loopback support");
                return -EOPNOTSUPP;
        }
 
@@ -2103,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_TCP) |
              BIT(FLOW_DISSECTOR_KEY_IP)  |
+             BIT(FLOW_DISSECTOR_KEY_CT) |
              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
              BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
                NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
@@ -2913,7 +2940,9 @@ struct ipv6_hoplimit_word {
        __u8    hop_limit;
 };
 
-static bool is_action_keys_supported(const struct flow_action_entry *act)
+static int is_action_keys_supported(const struct flow_action_entry *act,
+                                   bool ct_flow, bool *modify_ip_header,
+                                   struct netlink_ext_ack *extack)
 {
        u32 mask, offset;
        u8 htype;
@@ -2932,7 +2961,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
                if (offset != offsetof(struct iphdr, ttl) ||
                    ttl_word->protocol ||
                    ttl_word->check) {
-                       return true;
+                       *modify_ip_header = true;
+               }
+
+               if (ct_flow && offset >= offsetof(struct iphdr, saddr)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "can't offload re-write of ipv4 address with action ct");
+                       return -EOPNOTSUPP;
                }
        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
                struct ipv6_hoplimit_word *hoplimit_word =
@@ -2941,15 +2976,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
                if (offset != offsetof(struct ipv6hdr, payload_len) ||
                    hoplimit_word->payload_len ||
                    hoplimit_word->nexthdr) {
-                       return true;
+                       *modify_ip_header = true;
+               }
+
+               if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "can't offload re-write of ipv6 address with action ct");
+                       return -EOPNOTSUPP;
                }
+       } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
+                              htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "can't offload re-write of transport header ports with action ct");
+               return -EOPNOTSUPP;
        }
-       return false;
+
+       return 0;
 }
 
 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
                                          struct flow_action *flow_action,
-                                         u32 actions,
+                                         u32 actions, bool ct_flow,
                                          struct netlink_ext_ack *extack)
 {
        const struct flow_action_entry *act;
@@ -2957,7 +3004,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
        void *headers_v;
        u16 ethertype;
        u8 ip_proto;
-       int i;
+       int i, err;
 
        headers_v = get_match_headers_value(actions, spec);
        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
@@ -2972,10 +3019,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
                    act->id != FLOW_ACTION_ADD)
                        continue;
 
-               if (is_action_keys_supported(act)) {
-                       modify_ip_header = true;
-                       break;
-               }
+               err = is_action_keys_supported(act, ct_flow,
+                                              &modify_ip_header, extack);
+               if (err)
+                       return err;
        }
 
        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
@@ -2997,31 +3044,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
                                    struct mlx5e_tc_flow *flow,
                                    struct netlink_ext_ack *extack)
 {
-       struct net_device *filter_dev = parse_attr->filter_dev;
-       bool drop_action, pop_action;
+       bool ct_flow;
        u32 actions;
 
-       if (mlx5e_is_eswitch_flow(flow))
+       ct_flow = flow_flag_test(flow, CT);
+       if (mlx5e_is_eswitch_flow(flow)) {
                actions = flow->esw_attr->action;
-       else
-               actions = flow->nic_attr->action;
-
-       drop_action = actions & MLX5_FLOW_CONTEXT_ACTION_DROP;
-       pop_action = actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 
-       if (flow_flag_test(flow, EGRESS) && !drop_action) {
-               /* We only support filters on tunnel device, or on vlan
-                * devices if they have pop/drop action
-                */
-               if (!mlx5e_get_tc_tun(filter_dev) ||
-                   (is_vlan_dev(filter_dev) && !pop_action))
-                       return false;
+               if (flow->esw_attr->split_count && ct_flow) {
+                       /* All registers used by ct are cleared when using
+                        * split rules.
+                        */
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Can't offload mirroring with action ct");
+                       return -EOPNOTSUPP;
+               }
+       } else {
+               actions = flow->nic_attr->action;
        }
 
        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                return modify_header_match_supported(&parse_attr->spec,
                                                     flow_action, actions,
-                                                    extack);
+                                                    ct_flow, extack);
 
        return true;
 }
@@ -3122,8 +3167,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
        if (!flow_action_has_entries(flow_action))
                return -EINVAL;
 
-       if (!flow_action_hw_stats_types_check(flow_action, extack,
-                                             FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT))
+       if (!flow_action_hw_stats_check(flow_action, extack,
+                                       FLOW_ACTION_HW_STATS_DELAYED_BIT))
                return -EOPNOTSUPP;
 
        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@ -3596,6 +3641,46 @@ static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
        return 0;
 }
 
+static int verify_uplink_forwarding(struct mlx5e_priv *priv,
+                                   struct mlx5e_tc_flow *flow,
+                                   struct net_device *out_dev,
+                                   struct netlink_ext_ack *extack)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+       struct mlx5e_rep_priv *rep_priv;
+
+       /* Forwarding non encapsulated traffic between
+        * uplink ports is allowed only if
+        * termination_table_raw_traffic cap is set.
+        *
+        * Input vport was stored esw_attr->in_rep.
+        * In LAG case, *priv* is the private data of
+        * uplink which may be not the input vport.
+        */
+       rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
+
+       if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+             mlx5e_eswitch_uplink_rep(out_dev)))
+               return 0;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+                                       termination_table_raw_traffic)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "devices are both uplink, can't offload forwarding");
+                       pr_err("devices %s %s are both uplink, can't offload forwarding\n",
+                              priv->netdev->name, out_dev->name);
+                       return -EOPNOTSUPP;
+       } else if (out_dev != rep_priv->netdev) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "devices are not the same uplink, can't offload forwarding");
+               pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
+                      priv->netdev->name, out_dev->name);
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
                                struct mlx5e_tc_flow *flow,
@@ -3617,8 +3702,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
        if (!flow_action_has_entries(flow_action))
                return -EINVAL;
 
-       if (!flow_action_hw_stats_types_check(flow_action, extack,
-                                             FLOW_ACTION_HW_STATS_TYPE_DELAYED_BIT))
+       if (!flow_action_hw_stats_check(flow_action, extack,
+                                       FLOW_ACTION_HW_STATS_DELAYED_BIT))
                return -EOPNOTSUPP;
 
        flow_action_for_each(i, act, flow_action) {
@@ -3693,7 +3778,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
                                struct net_device *uplink_upper;
-                               struct mlx5e_rep_priv *rep_priv;
 
                                if (is_duplicated_output_device(priv->netdev,
                                                                out_dev,
@@ -3729,21 +3813,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                                return err;
                                }
 
-                               /* Don't allow forwarding between uplink.
-                                *
-                                * Input vport was stored esw_attr->in_rep.
-                                * In LAG case, *priv* is the private data of
-                                * uplink which may be not the input vport.
-                                */
-                               rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
-                               if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
-                                   mlx5e_eswitch_uplink_rep(out_dev)) {
-                                       NL_SET_ERR_MSG_MOD(extack,
-                                                          "devices are both uplink, can't offload forwarding");
-                                       pr_err("devices %s %s are both uplink, can't offload forwarding\n",
-                                              priv->netdev->name, out_dev->name);
-                                       return -EOPNOTSUPP;
-                               }
+                               err = verify_uplink_forwarding(priv, flow, out_dev, extack);
+                               if (err)
+                                       return err;
 
                                if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
                                        NL_SET_ERR_MSG_MOD(extack,
@@ -3826,6 +3898,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
                        attr->dest_chain = act->chain_index;
                        break;
+               case FLOW_ACTION_CT:
+                       err = mlx5_tc_ct_parse_action(priv, attr, act, extack);
+                       if (err)
+                               return err;
+
+                       flow_flag_set(flow, CT);
+                       break;
                default:
                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
                        return -EOPNOTSUPP;
@@ -4066,6 +4145,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
+       err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack);
+       if (err)
+               goto err_free;
+
        err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
        complete_all(&flow->init_done);
        if (err) {
@@ -4350,7 +4433,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
                goto errout;
        }
 
-       if (mlx5e_is_offloaded_flow(flow)) {
+       if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
                counter = mlx5e_tc_get_counter(flow);
                if (!counter)
                        goto errout;
@@ -4441,7 +4524,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
        }
 
-       if (!flow_action_basic_hw_stats_types_check(flow_action, extack))
+       if (!flow_action_basic_hw_stats_check(flow_action, extack))
                return -EOPNOTSUPP;
 
        flow_action_for_each(i, act, flow_action) {
@@ -4465,8 +4548,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
                                struct tc_cls_matchall_offload *ma)
 {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct netlink_ext_ack *extack = ma->common.extack;
 
+       if (!mlx5_esw_qos_enabled(esw)) {
+               NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
+               return -EOPNOTSUPP;
+       }
+
        if (ma->common.prio != 1) {
                NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
                return -EINVAL;
@@ -4622,6 +4711,10 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
        priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
 
+       err = mlx5_tc_ct_init(uplink_priv);
+       if (err)
+               goto err_ct;
+
        mapping = mapping_create(sizeof(struct tunnel_match_key),
                                 TUNNEL_INFO_BITS_MASK, true);
        if (IS_ERR(mapping)) {
@@ -4648,6 +4741,8 @@ err_ht_init:
 err_enc_opts_mapping:
        mapping_destroy(uplink_priv->tunnel_mapping);
 err_tun_mapping:
+       mlx5_tc_ct_clean(uplink_priv);
+err_ct:
        netdev_warn(priv->netdev,
                    "Failed to initialize tc (eswitch), err: %d", err);
        return err;
@@ -4662,6 +4757,8 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
        mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
        mapping_destroy(uplink_priv->tunnel_mapping);
+
+       mlx5_tc_ct_clean(uplink_priv);
 }
 
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
@@ -4779,7 +4876,9 @@ bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
                             struct mlx5e_tc_update_priv *tc_priv)
 {
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-       u32 chain = 0, reg_c0, reg_c1, tunnel_id;
+       u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *uplink_rpriv;
        struct tc_skb_ext *tc_skb_ext;
        struct mlx5_eswitch *esw;
        struct mlx5e_priv *priv;
@@ -4813,6 +4912,13 @@ bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
                }
 
                tc_skb_ext->chain = chain;
+
+               tuple_id = reg_c1 & TUPLE_ID_MAX;
+
+               uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+               uplink_priv = &uplink_rpriv->uplink_priv;
+               if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
+                       return false;
        }
 
        tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;