net: use indirect call wrappers for skb_copy_datagram_iter()
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
index cdc63dd598673c0f827d25dad4dca402e9b14c4b..901f88a886c8023e9e6107421c6f366c8f0dde38 100644 (file)
 #include "fs_core.h"
 #include "en/port.h"
 #include "en/tc_tun.h"
+#include "en/mapping.h"
+#include "en/tc_ct.h"
 #include "lib/devcom.h"
 #include "lib/geneve.h"
 #include "diag/en_tc_tracepoint.h"
 
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+
 struct mlx5_nic_flow_attr {
        u32 action;
        u32 flow_tag;
@@ -84,6 +88,7 @@ enum {
        MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
        MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
        MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
+       MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -134,6 +139,8 @@ struct mlx5e_tc_flow {
        refcount_t              refcnt;
        struct rcu_head         rcu_head;
        struct completion       init_done;
+       int tunnel_id; /* the mapped tunnel id of this flow */
+
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
                struct mlx5_nic_flow_attr nic_attr[0];
@@ -144,15 +151,118 @@ struct mlx5e_tc_flow_parse_attr {
        const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
        struct net_device *filter_dev;
        struct mlx5_flow_spec spec;
-       int num_mod_hdr_actions;
-       int max_mod_hdr_actions;
-       void *mod_hdr_actions;
+       struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 };
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
+struct tunnel_match_key {
+       struct flow_dissector_key_control enc_control;
+       struct flow_dissector_key_keyid enc_key_id;
+       struct flow_dissector_key_ports enc_tp;
+       struct flow_dissector_key_ip enc_ip;
+       union {
+               struct flow_dissector_key_ipv4_addrs enc_ipv4;
+               struct flow_dissector_key_ipv6_addrs enc_ipv6;
+       };
+
+       int filter_ifindex;
+};
+
+/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
+ * Upper TUNNEL_INFO_BITS for general tunnel info.
+ * Lower ENC_OPTS_BITS bits for enc_opts.
+ */
+#define TUNNEL_INFO_BITS 6
+#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
+#define ENC_OPTS_BITS 2
+#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
+#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
+#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
+
+struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
+       [CHAIN_TO_REG] = {
+               .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
+               .moffset = 0,
+               .mlen = 2,
+       },
+       [TUNNEL_TO_REG] = {
+               .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
+               .moffset = 3,
+               .mlen = 1,
+               .soffset = MLX5_BYTE_OFF(fte_match_param,
+                                        misc_parameters_2.metadata_reg_c_1),
+       },
+       [ZONE_TO_REG] = zone_to_reg_ct,
+       [CTSTATE_TO_REG] = ctstate_to_reg_ct,
+       [MARK_TO_REG] = mark_to_reg_ct,
+       [LABELS_TO_REG] = labels_to_reg_ct,
+       [FTEID_TO_REG] = fteid_to_reg_ct,
+       [TUPLEID_TO_REG] = tupleid_to_reg_ct,
+};
+
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
+
+void
+mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
+                           enum mlx5e_tc_attr_to_reg type,
+                           u32 data,
+                           u32 mask)
+{
+       int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
+       int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+       void *headers_c = spec->match_criteria;
+       void *headers_v = spec->match_value;
+       void *fmask, *fval;
+
+       fmask = headers_c + soffset;
+       fval = headers_v + soffset;
+
+       mask = cpu_to_be32(mask) >> (32 - (match_len * 8));
+       data = cpu_to_be32(data) >> (32 - (match_len * 8));
+
+       memcpy(fmask, &mask, match_len);
+       memcpy(fval, &data, match_len);
+
+       spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+}
+
+int
+mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
+                         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
+                         enum mlx5e_tc_attr_to_reg type,
+                         u32 data)
+{
+       int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
+       int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
+       int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
+       char *modact;
+       int err;
+
+       err = alloc_mod_hdr_actions(mdev, MLX5_FLOW_NAMESPACE_FDB,
+                                   mod_hdr_acts);
+       if (err)
+               return err;
+
+       modact = mod_hdr_acts->actions +
+                (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
+
+       /* Firmware has 5bit length field and 0 means 32bits */
+       if (mlen == 4)
+               mlen = 0;
+
+       MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
+       MLX5_SET(set_action_in, modact, field, mfield);
+       MLX5_SET(set_action_in, modact, offset, moffset * 8);
+       MLX5_SET(set_action_in, modact, length, mlen * 8);
+       MLX5_SET(set_action_in, modact, data, data);
+       mod_hdr_acts->num_actions++;
+
+       return 0;
+}
+
 struct mlx5e_hairpin {
        struct mlx5_hairpin *pair;
 
@@ -210,8 +320,6 @@ struct mlx5e_mod_hdr_entry {
        int compl_result;
 };
 
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
-
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
                              struct mlx5e_tc_flow *flow);
 
@@ -361,10 +469,10 @@ static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
        struct mod_hdr_key key;
        u32 hash_key;
 
-       num_actions  = parse_attr->num_mod_hdr_actions;
+       num_actions  = parse_attr->mod_hdr_acts.num_actions;
        actions_size = MLX5_MH_ACT_SZ * num_actions;
 
-       key.actions = parse_attr->mod_hdr_actions;
+       key.actions = parse_attr->mod_hdr_acts.actions;
        key.num_actions = num_actions;
 
        hash_key = hash_mod_hdr_info(&key);
@@ -954,7 +1062,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
                flow_act.modify_hdr = attr->modify_hdr;
-               kfree(parse_attr->mod_hdr_actions);
+               dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
                if (err)
                        return err;
        }
@@ -1043,8 +1151,16 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
                           struct mlx5_flow_spec *spec,
                           struct mlx5_esw_flow_attr *attr)
 {
+       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
        struct mlx5_flow_handle *rule;
 
+       if (flow_flag_test(flow, CT)) {
+               mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+
+               return mlx5_tc_ct_flow_offload(flow->priv, flow, spec, attr,
+                                              mod_hdr_acts);
+       }
+
        rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
        if (IS_ERR(rule))
                return rule;
@@ -1063,10 +1179,15 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 static void
 mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
                             struct mlx5e_tc_flow *flow,
-                          struct mlx5_esw_flow_attr *attr)
+                            struct mlx5_esw_flow_attr *attr)
 {
        flow_flag_clear(flow, OFFLOADED);
 
+       if (flow_flag_test(flow, CT)) {
+               mlx5_tc_ct_delete_flow(flow->priv, flow, attr);
+               return;
+       }
+
        if (attr->split_count)
                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 
@@ -1224,7 +1345,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
-               kfree(parse_attr->mod_hdr_actions);
+               dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
                if (err)
                        return err;
        }
@@ -1274,6 +1395,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
        int out_index;
 
+       mlx5e_put_flow_tunnel_id(flow);
+
        if (flow_flag_test(flow, NOT_READY)) {
                remove_unready_flow(flow);
                kvfree(attr->parse_attr);
@@ -1662,150 +1785,272 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
        }
 }
 
+static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
+{
+       struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+       struct flow_action *flow_action = &rule->action;
+       const struct flow_action_entry *act;
+       int i;
 
-static int parse_tunnel_attr(struct mlx5e_priv *priv,
-                            struct mlx5_flow_spec *spec,
-                            struct flow_cls_offload *f,
-                            struct net_device *filter_dev, u8 *match_level)
+       flow_action_for_each(i, act, flow_action) {
+               switch (act->id) {
+               case FLOW_ACTION_GOTO:
+                       return true;
+               default:
+                       continue;
+               }
+       }
+
+       return false;
+}
+
+static int
+enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
+                                   struct flow_dissector_key_enc_opts *opts,
+                                   struct netlink_ext_ack *extack,
+                                   bool *dont_care)
+{
+       struct geneve_opt *opt;
+       int off = 0;
+
+       *dont_care = true;
+
+       while (opts->len > off) {
+               opt = (struct geneve_opt *)&opts->data[off];
+
+               if (!(*dont_care) || opt->opt_class || opt->type ||
+                   memchr_inv(opt->opt_data, 0, opt->length * 4)) {
+                       *dont_care = false;
+
+                       if (opt->opt_class != U16_MAX ||
+                           opt->type != U8_MAX ||
+                           memchr_inv(opt->opt_data, 0xFF,
+                                      opt->length * 4)) {
+                               NL_SET_ERR_MSG(extack,
+                                              "Partial match of tunnel options in chain > 0 isn't supported");
+                               netdev_warn(priv->netdev,
+                                           "Partial match of tunnel options in chain > 0 isn't supported");
+                               return -EOPNOTSUPP;
+                       }
+               }
+
+               off += sizeof(struct geneve_opt) + opt->length * 4;
+       }
+
+       return 0;
+}
+
+#define COPY_DISSECTOR(rule, diss_key, dst)\
+({ \
+       struct flow_rule *__rule = (rule);\
+       typeof(dst) __dst = dst;\
+\
+       memcpy(__dst,\
+              skb_flow_dissector_target(__rule->match.dissector,\
+                                        diss_key,\
+                                        __rule->match.key),\
+              sizeof(*__dst));\
+})
+
+static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
+                                   struct mlx5e_tc_flow *flow,
+                                   struct flow_cls_offload *f,
+                                   struct net_device *filter_dev)
 {
-       struct netlink_ext_ack *extack = f->common.extack;
-       void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                                      outer_headers);
-       void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                                      outer_headers);
        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+       struct netlink_ext_ack *extack = f->common.extack;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+       struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
+       struct flow_match_enc_opts enc_opts_match;
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *uplink_rpriv;
+       struct tunnel_match_key tunnel_key;
+       bool enc_opts_is_dont_care = true;
+       u32 tun_id, enc_opts_id = 0;
+       struct mlx5_eswitch *esw;
+       u32 value, mask;
        int err;
 
-       err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
-                                headers_c, headers_v, match_level);
-       if (err) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "failed to parse tunnel attributes");
+       esw = priv->mdev->priv.eswitch;
+       uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+       uplink_priv = &uplink_rpriv->uplink_priv;
+
+       memset(&tunnel_key, 0, sizeof(tunnel_key));
+       COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
+                      &tunnel_key.enc_control);
+       if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+               COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+                              &tunnel_key.enc_ipv4);
+       else
+               COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+                              &tunnel_key.enc_ipv6);
+       COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
+       COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
+                      &tunnel_key.enc_tp);
+       COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
+                      &tunnel_key.enc_key_id);
+       tunnel_key.filter_ifindex = filter_dev->ifindex;
+
+       err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
+       if (err)
                return err;
-       }
 
-       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
-               struct flow_match_control match;
-               u16 addr_type;
+       flow_rule_match_enc_opts(rule, &enc_opts_match);
+       err = enc_opts_is_dont_care_or_full_match(priv,
+                                                 enc_opts_match.mask,
+                                                 extack,
+                                                 &enc_opts_is_dont_care);
+       if (err)
+               goto err_enc_opts;
 
-               flow_rule_match_enc_control(rule, &match);
-               addr_type = match.key->addr_type;
+       if (!enc_opts_is_dont_care) {
+               err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
+                                 enc_opts_match.key, &enc_opts_id);
+               if (err)
+                       goto err_enc_opts;
+       }
 
-               /* For tunnel addr_type used same key id`s as for non-tunnel */
-               if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
-                       struct flow_match_ipv4_addrs match;
+       value = tun_id << ENC_OPTS_BITS | enc_opts_id;
+       mask = enc_opts_id ? TUNNEL_ID_MASK :
+                            (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
 
-                       flow_rule_match_enc_ipv4_addrs(rule, &match);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                                src_ipv4_src_ipv6.ipv4_layout.ipv4,
-                                ntohl(match.mask->src));
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                                src_ipv4_src_ipv6.ipv4_layout.ipv4,
-                                ntohl(match.key->src));
+       if (attr->chain) {
+               mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
+                                           TUNNEL_TO_REG, value, mask);
+       } else {
+               mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
+               err = mlx5e_tc_match_to_reg_set(priv->mdev,
+                                               mod_hdr_acts,
+                                               TUNNEL_TO_REG, value);
+               if (err)
+                       goto err_set;
 
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_c,
-                                dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-                                ntohl(match.mask->dst));
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v,
-                                dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
-                                ntohl(match.key->dst));
-
-                       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
-                                        ethertype);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-                                ETH_P_IP);
-               } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
-                       struct flow_match_ipv6_addrs match;
-
-                       flow_rule_match_enc_ipv6_addrs(rule, &match);
-                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-                                           src_ipv4_src_ipv6.ipv6_layout.ipv6),
-                              &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
-                                                                  ipv6));
-                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-                                           src_ipv4_src_ipv6.ipv6_layout.ipv6),
-                              &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
-                                                                 ipv6));
-
-                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
-                                           dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-                              &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
-                                                                  ipv6));
-                       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-                                           dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-                              &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
-                                                                 ipv6));
-
-                       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
-                                        ethertype);
-                       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-                                ETH_P_IPV6);
-               }
+               attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
        }
 
-       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
-               struct flow_match_ip match;
+       flow->tunnel_id = value;
+       return 0;
 
-               flow_rule_match_enc_ip(rule, &match);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
-                        match.mask->tos & 0x3);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
-                        match.key->tos & 0x3);
+err_set:
+       if (enc_opts_id)
+               mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+                              enc_opts_id);
+err_enc_opts:
+       mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+       return err;
+}
 
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
-                        match.mask->tos >> 2);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
-                        match.key->tos  >> 2);
+static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
+{
+       u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
+       u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *uplink_rpriv;
+       struct mlx5_eswitch *esw;
 
-               MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
-                        match.mask->ttl);
-               MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
-                        match.key->ttl);
+       esw = flow->priv->mdev->priv.eswitch;
+       uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+       uplink_priv = &uplink_rpriv->uplink_priv;
+
+       if (tun_id)
+               mapping_remove(uplink_priv->tunnel_mapping, tun_id);
+       if (enc_opts_id)
+               mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
+                              enc_opts_id);
+}
 
-               if (match.mask->ttl &&
-                   !MLX5_CAP_ESW_FLOWTABLE_FDB
-                       (priv->mdev,
-                        ft_field_support.outer_ipv4_ttl)) {
+u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
+{
+       return flow->tunnel_id;
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+                            struct mlx5e_tc_flow *flow,
+                            struct mlx5_flow_spec *spec,
+                            struct flow_cls_offload *f,
+                            struct net_device *filter_dev,
+                            u8 *match_level,
+                            bool *match_inner)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct netlink_ext_ack *extack = f->common.extack;
+       bool needs_mapping, sets_mapping;
+       int err;
+
+       if (!mlx5e_is_eswitch_flow(flow))
+               return -EOPNOTSUPP;
+
+       needs_mapping = !!flow->esw_attr->chain;
+       sets_mapping = !flow->esw_attr->chain && flow_has_tc_fwd_action(f);
+       *match_inner = !needs_mapping;
+
+       if ((needs_mapping || sets_mapping) &&
+           !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
+               NL_SET_ERR_MSG(extack,
+                              "Chains on tunnel devices isn't supported without register loopback support");
+               netdev_warn(priv->netdev,
+                           "Chains on tunnel devices isn't supported without register loopback support");
+               return -EOPNOTSUPP;
+       }
+
+       if (!flow->esw_attr->chain) {
+               err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
+                                        match_level);
+               if (err) {
                        NL_SET_ERR_MSG_MOD(extack,
-                                          "Matching on TTL is not supported");
-                       return -EOPNOTSUPP;
+                                          "Failed to parse tunnel attributes");
+                       netdev_warn(priv->netdev,
+                                   "Failed to parse tunnel attributes");
+                       return err;
                }
 
+               flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
        }
 
-       /* Enforce DMAC when offloading incoming tunneled flows.
-        * Flow counters require a match on the DMAC.
-        */
-       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
-       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
-       ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
-                                    dmac_47_16), priv->netdev->dev_addr);
+       if (!needs_mapping && !sets_mapping)
+               return 0;
 
-       /* let software handle IP fragments */
-       MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
-       MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+       return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
+}
 
-       return 0;
+static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
+{
+       return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                           inner_headers);
 }
 
-static void *get_match_headers_criteria(u32 flags,
-                                       struct mlx5_flow_spec *spec)
+static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
 {
-       return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
-               MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                            inner_headers) :
-               MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
-                            outer_headers);
+       return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                           inner_headers);
+}
+
+static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
+{
+       return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+                           outer_headers);
+}
+
+static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
+{
+       return MLX5_ADDR_OF(fte_match_param, spec->match_value,
+                           outer_headers);
 }
 
 static void *get_match_headers_value(u32 flags,
                                     struct mlx5_flow_spec *spec)
 {
        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
-               MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                            inner_headers) :
-               MLX5_ADDR_OF(fte_match_param, spec->match_value,
-                            outer_headers);
+               get_match_inner_headers_value(spec) :
+               get_match_outer_headers_value(spec);
+}
+
+static void *get_match_headers_criteria(u32 flags,
+                                       struct mlx5_flow_spec *spec)
+{
+       return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
+               get_match_inner_headers_criteria(spec) :
+               get_match_outer_headers_criteria(spec);
 }
 
 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
@@ -1843,6 +2088,7 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
 }
 
 static int __parse_cls_flower(struct mlx5e_priv *priv,
+                             struct mlx5e_tc_flow *flow,
                              struct mlx5_flow_spec *spec,
                              struct flow_cls_offload *f,
                              struct net_device *filter_dev,
@@ -1883,6 +2129,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
              BIT(FLOW_DISSECTOR_KEY_TCP) |
              BIT(FLOW_DISSECTOR_KEY_IP)  |
+             BIT(FLOW_DISSECTOR_KEY_CT) |
              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
              BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
                NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
@@ -1892,18 +2139,22 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
        }
 
        if (mlx5e_get_tc_tun(filter_dev)) {
-               if (parse_tunnel_attr(priv, spec, f, filter_dev,
-                                     outer_match_level))
-                       return -EOPNOTSUPP;
+               bool match_inner = false;
 
-               /* At this point, header pointers should point to the inner
-                * headers, outer header were already set by parse_tunnel_attr
-                */
-               match_level = inner_match_level;
-               headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
-                                                      spec);
-               headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
-                                                   spec);
+               err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
+                                       outer_match_level, &match_inner);
+               if (err)
+                       return err;
+
+               if (match_inner) {
+                       /* header pointers should point to the inner headers
+                        * if the packet was decapsulated already.
+                        * outer headers are set by parse_tunnel_attr.
+                        */
+                       match_level = inner_match_level;
+                       headers_c = get_match_inner_headers_criteria(spec);
+                       headers_v = get_match_inner_headers_value(spec);
+               }
        }
 
        err = mlx5e_flower_parse_meta(filter_dev, f);
@@ -2220,8 +2471,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
        inner_match_level = MLX5_MATCH_NONE;
        outer_match_level = MLX5_MATCH_NONE;
 
-       err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
-                                &outer_match_level);
+       err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
+                                &inner_match_level, &outer_match_level);
        non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
                                 outer_match_level : inner_match_level;
 
@@ -2381,25 +2632,26 @@ static struct mlx5_fields fields[] = {
        OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
 };
 
-/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
- * max from the SW pedit action. On success, attr->num_mod_hdr_actions
- * says how many HW actions were actually parsed.
- */
-static int offload_pedit_fields(struct pedit_headers_action *hdrs,
+static int offload_pedit_fields(struct mlx5e_priv *priv,
+                               int namespace,
+                               struct pedit_headers_action *hdrs,
                                struct mlx5e_tc_flow_parse_attr *parse_attr,
                                u32 *action_flags,
                                struct netlink_ext_ack *extack)
 {
        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
-       int i, action_size, nactions, max_actions, first, last, next_z;
+       int i, action_size, first, last, next_z;
        void *headers_c, *headers_v, *action, *vals_p;
        u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
+       struct mlx5e_tc_mod_hdr_acts *mod_acts;
        struct mlx5_fields *f;
        unsigned long mask;
        __be32 mask_be32;
        __be16 mask_be16;
+       int err;
        u8 cmd;
 
+       mod_acts = &parse_attr->mod_hdr_acts;
        headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
        headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
 
@@ -2409,11 +2661,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
        add_vals = &hdrs[1].vals;
 
        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
-       action = parse_attr->mod_hdr_actions +
-                parse_attr->num_mod_hdr_actions * action_size;
-
-       max_actions = parse_attr->max_mod_hdr_actions;
-       nactions = parse_attr->num_mod_hdr_actions;
 
        for (i = 0; i < ARRAY_SIZE(fields); i++) {
                bool skip;
@@ -2439,13 +2686,6 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                        return -EOPNOTSUPP;
                }
 
-               if (nactions == max_actions) {
-                       NL_SET_ERR_MSG_MOD(extack,
-                                          "too many pedit actions, can't offload");
-                       printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
-                       return -EOPNOTSUPP;
-               }
-
                skip = false;
                if (s_mask) {
                        void *match_mask = headers_c + f->match_offset;
@@ -2492,6 +2732,18 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                        return -EOPNOTSUPP;
                }
 
+               err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
+               if (err) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "too many pedit actions, can't offload");
+                       mlx5_core_warn(priv->mdev,
+                                      "mlx5: parsed %d pedit actions, can't do more\n",
+                                      mod_acts->num_actions);
+                       return err;
+               }
+
+               action = mod_acts->actions +
+                        (mod_acts->num_actions * action_size);
                MLX5_SET(set_action_in, action, action_type, cmd);
                MLX5_SET(set_action_in, action, field, f->field);
 
@@ -2514,11 +2766,9 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs,
                else if (f->field_bsize == 8)
                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
 
-               action += action_size;
-               nactions++;
+               ++mod_acts->num_actions;
        }
 
-       parse_attr->num_mod_hdr_actions = nactions;
        return 0;
 }
 
@@ -2531,29 +2781,48 @@ static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
                return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
 }
 
-static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
-                                struct pedit_headers_action *hdrs,
-                                int namespace,
-                                struct mlx5e_tc_flow_parse_attr *parse_attr)
+int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
+                         int namespace,
+                         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
 {
-       int nkeys, action_size, max_actions;
+       int action_size, new_num_actions, max_hw_actions;
+       size_t new_sz, old_sz;
+       void *ret;
 
-       nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
-               hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
-       action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+       if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
+               return 0;
 
-       max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
-       /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
-       max_actions = min(max_actions, nkeys * 16);
+       action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
 
-       parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
-       if (!parse_attr->mod_hdr_actions)
+       max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
+                                                               namespace);
+       new_num_actions = min(max_hw_actions,
+                             mod_hdr_acts->actions ?
+                             mod_hdr_acts->max_actions * 2 : 1);
+       if (mod_hdr_acts->max_actions == new_num_actions)
+               return -ENOSPC;
+
+       new_sz = action_size * new_num_actions;
+       old_sz = mod_hdr_acts->max_actions * action_size;
+       ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
+       if (!ret)
                return -ENOMEM;
 
-       parse_attr->max_mod_hdr_actions = max_actions;
+       memset(ret + old_sz, 0, new_sz - old_sz);
+       mod_hdr_acts->actions = ret;
+       mod_hdr_acts->max_actions = new_num_actions;
+
        return 0;
 }
 
+void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
+{
+       kfree(mod_hdr_acts->actions);
+       mod_hdr_acts->actions = NULL;
+       mod_hdr_acts->num_actions = 0;
+       mod_hdr_acts->max_actions = 0;
+}
+
 static const struct pedit_headers zero_masks = {};
 
 static int parse_tc_pedit_action(struct mlx5e_priv *priv,
@@ -2605,13 +2874,8 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
        int err;
        u8 cmd;
 
-       if (!parse_attr->mod_hdr_actions) {
-               err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
-               if (err)
-                       goto out_err;
-       }
-
-       err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
+       err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
+                                  action_flags, extack);
        if (err < 0)
                goto out_dealloc_parsed_actions;
 
@@ -2631,8 +2895,7 @@ static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
        return 0;
 
 out_dealloc_parsed_actions:
-       kfree(parse_attr->mod_hdr_actions);
-out_err:
+       dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
        return err;
 }
 
@@ -2677,7 +2940,9 @@ struct ipv6_hoplimit_word {
        __u8    hop_limit;
 };
 
-static bool is_action_keys_supported(const struct flow_action_entry *act)
+static int is_action_keys_supported(const struct flow_action_entry *act,
+                                   bool ct_flow, bool *modify_ip_header,
+                                   struct netlink_ext_ack *extack)
 {
        u32 mask, offset;
        u8 htype;
@@ -2696,7 +2961,13 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
                if (offset != offsetof(struct iphdr, ttl) ||
                    ttl_word->protocol ||
                    ttl_word->check) {
-                       return true;
+                       *modify_ip_header = true;
+               }
+
+               if (ct_flow && offset >= offsetof(struct iphdr, saddr)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "can't offload re-write of ipv4 address with action ct");
+                       return -EOPNOTSUPP;
                }
        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
                struct ipv6_hoplimit_word *hoplimit_word =
@@ -2705,15 +2976,27 @@ static bool is_action_keys_supported(const struct flow_action_entry *act)
                if (offset != offsetof(struct ipv6hdr, payload_len) ||
                    hoplimit_word->payload_len ||
                    hoplimit_word->nexthdr) {
-                       return true;
+                       *modify_ip_header = true;
                }
+
+               if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr)) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "can't offload re-write of ipv6 address with action ct");
+                       return -EOPNOTSUPP;
+               }
+       } else if (ct_flow && (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
+                              htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "can't offload re-write of transport header ports with action ct");
+               return -EOPNOTSUPP;
        }
-       return false;
+
+       return 0;
 }
 
 static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
                                          struct flow_action *flow_action,
-                                         u32 actions,
+                                         u32 actions, bool ct_flow,
                                          struct netlink_ext_ack *extack)
 {
        const struct flow_action_entry *act;
@@ -2721,7 +3004,7 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
        void *headers_v;
        u16 ethertype;
        u8 ip_proto;
-       int i;
+       int i, err;
 
        headers_v = get_match_headers_value(actions, spec);
        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
@@ -2736,10 +3019,10 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
                    act->id != FLOW_ACTION_ADD)
                        continue;
 
-               if (is_action_keys_supported(act)) {
-                       modify_ip_header = true;
-                       break;
-               }
+               err = is_action_keys_supported(act, ct_flow,
+                                              &modify_ip_header, extack);
+               if (err)
+                       return err;
        }
 
        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
@@ -2761,23 +3044,29 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
                                    struct mlx5e_tc_flow *flow,
                                    struct netlink_ext_ack *extack)
 {
+       bool ct_flow;
        u32 actions;
 
-       if (mlx5e_is_eswitch_flow(flow))
+       ct_flow = flow_flag_test(flow, CT);
+       if (mlx5e_is_eswitch_flow(flow)) {
                actions = flow->esw_attr->action;
-       else
-               actions = flow->nic_attr->action;
 
-       if (flow_flag_test(flow, EGRESS) &&
-           !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
-             (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
-             (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
-               return false;
+               if (flow->esw_attr->split_count && ct_flow) {
+                       /* All registers used by ct are cleared when using
+                        * split rules.
+                        */
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "Can't offload mirroring with action ct");
+                       return -EOPNOTSUPP;
+               }
+       } else {
+               actions = flow->nic_attr->action;
+       }
 
        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
                return modify_header_match_supported(&parse_attr->spec,
                                                     flow_action, actions,
-                                                    extack);
+                                                    ct_flow, extack);
 
        return true;
 }
@@ -2878,8 +3167,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
        if (!flow_action_has_entries(flow_action))
                return -EINVAL;
 
-       if (!flow_action_hw_stats_types_check(flow_action, extack,
-                                             FLOW_ACTION_HW_STATS_TYPE_DELAYED))
+       if (!flow_action_hw_stats_check(flow_action, extack,
+                                       FLOW_ACTION_HW_STATS_DELAYED_BIT))
                return -EOPNOTSUPP;
 
        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
@@ -2968,9 +3257,9 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
                /* in case all pedit actions are skipped, remove the MOD_HDR
                 * flag.
                 */
-               if (parse_attr->num_mod_hdr_actions == 0) {
+               if (parse_attr->mod_hdr_acts.num_actions == 0) {
                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-                       kfree(parse_attr->mod_hdr_actions);
+                       dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
                }
        }
 
@@ -3313,6 +3602,85 @@ static bool is_duplicated_output_device(struct net_device *dev,
        return false;
 }
 
+static int mlx5_validate_goto_chain(struct mlx5_eswitch *esw,
+                                   struct mlx5e_tc_flow *flow,
+                                   const struct flow_action_entry *act,
+                                   u32 actions,
+                                   struct netlink_ext_ack *extack)
+{
+       u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+       bool ft_flow = mlx5e_is_ft_flow(flow);
+       u32 dest_chain = act->chain_index;
+
+       if (ft_flow) {
+               NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
+               return -EOPNOTSUPP;
+       }
+
+       if (!mlx5_esw_chains_backwards_supported(esw) &&
+           dest_chain <= attr->chain) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Goto lower numbered chain isn't supported");
+               return -EOPNOTSUPP;
+       }
+       if (dest_chain > max_chain) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Requested destination chain is out of supported range");
+               return -EOPNOTSUPP;
+       }
+
+       if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
+                      MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
+           !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_and_fwd_to_table)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Goto chain is not allowed if action has reformat or decap");
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static int verify_uplink_forwarding(struct mlx5e_priv *priv,
+                                   struct mlx5e_tc_flow *flow,
+                                   struct net_device *out_dev,
+                                   struct netlink_ext_ack *extack)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+       struct mlx5e_rep_priv *rep_priv;
+
+       /* Forwarding non encapsulated traffic between
+        * uplink ports is allowed only if
+        * termination_table_raw_traffic cap is set.
+        *
+        * Input vport was stored esw_attr->in_rep.
+        * In LAG case, *priv* is the private data of
+        * uplink which may be not the input vport.
+        */
+       rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
+
+       if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
+             mlx5e_eswitch_uplink_rep(out_dev)))
+               return 0;
+
+       if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
+                                       termination_table_raw_traffic)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "devices are both uplink, can't offload forwarding");
+                       pr_err("devices %s %s are both uplink, can't offload forwarding\n",
+                              priv->netdev->name, out_dev->name);
+                       return -EOPNOTSUPP;
+       } else if (out_dev != rep_priv->netdev) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "devices are not the same uplink, can't offload forwarding");
+               pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
+                      priv->netdev->name, out_dev->name);
+               return -EOPNOTSUPP;
+       }
+       return 0;
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct flow_action *flow_action,
                                struct mlx5e_tc_flow *flow,
@@ -3327,15 +3695,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
        int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
        bool ft_flow = mlx5e_is_ft_flow(flow);
        const struct flow_action_entry *act;
+       bool encap = false, decap = false;
+       u32 action = attr->action;
        int err, i, if_count = 0;
-       bool encap = false;
-       u32 action = 0;
 
        if (!flow_action_has_entries(flow_action))
                return -EINVAL;
 
-       if (!flow_action_hw_stats_types_check(flow_action, extack,
-                                             FLOW_ACTION_HW_STATS_TYPE_DELAYED))
+       if (!flow_action_hw_stats_check(flow_action, extack,
+                                       FLOW_ACTION_HW_STATS_DELAYED_BIT))
                return -EOPNOTSUPP;
 
        flow_action_for_each(i, act, flow_action) {
@@ -3410,7 +3778,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
                                struct net_device *uplink_upper;
-                               struct mlx5e_rep_priv *rep_priv;
 
                                if (is_duplicated_output_device(priv->netdev,
                                                                out_dev,
@@ -3446,21 +3813,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                                                return err;
                                }
 
-                               /* Don't allow forwarding between uplink.
-                                *
-                                * Input vport was stored esw_attr->in_rep.
-                                * In LAG case, *priv* is the private data of
-                                * uplink which may be not the input vport.
-                                */
-                               rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
-                               if (mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
-                                   mlx5e_eswitch_uplink_rep(out_dev)) {
-                                       NL_SET_ERR_MSG_MOD(extack,
-                                                          "devices are both uplink, can't offload forwarding");
-                                       pr_err("devices %s %s are both uplink, can't offload forwarding\n",
-                                              priv->netdev->name, out_dev->name);
-                                       return -EOPNOTSUPP;
-                               }
+                               err = verify_uplink_forwarding(priv, flow, out_dev, extack);
+                               if (err)
+                                       return err;
 
                                if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
                                        NL_SET_ERR_MSG_MOD(extack,
@@ -3532,31 +3887,24 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                        attr->split_count = attr->out_count;
                        break;
                case FLOW_ACTION_TUNNEL_DECAP:
-                       action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+                       decap = true;
                        break;
-               case FLOW_ACTION_GOTO: {
-                       u32 dest_chain = act->chain_index;
-                       u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
+               case FLOW_ACTION_GOTO:
+                       err = mlx5_validate_goto_chain(esw, flow, act, action,
+                                                      extack);
+                       if (err)
+                               return err;
 
-                       if (ft_flow) {
-                               NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
-                               return -EOPNOTSUPP;
-                       }
-                       if (!mlx5_esw_chains_backwards_supported(esw) &&
-                           dest_chain <= attr->chain) {
-                               NL_SET_ERR_MSG_MOD(extack,
-                                                  "Goto earlier chain isn't supported");
-                               return -EOPNOTSUPP;
-                       }
-                       if (dest_chain > max_chain) {
-                               NL_SET_ERR_MSG_MOD(extack,
-                                                  "Requested destination chain is out of supported range");
-                               return -EOPNOTSUPP;
-                       }
                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
-                       attr->dest_chain = dest_chain;
+                       attr->dest_chain = act->chain_index;
+                       break;
+               case FLOW_ACTION_CT:
+                       err = mlx5_tc_ct_parse_action(priv, attr, act, extack);
+                       if (err)
+                               return err;
+
+                       flow_flag_set(flow, CT);
                        break;
-                       }
                default:
                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
                        return -EOPNOTSUPP;
@@ -3585,9 +3933,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                 * flag. we might have set split_count either by pedit or
                 * pop/push. if there is no pop/push either, reset it too.
                 */
-               if (parse_attr->num_mod_hdr_actions == 0) {
+               if (parse_attr->mod_hdr_acts.num_actions == 0) {
                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-                       kfree(parse_attr->mod_hdr_actions);
+                       dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
                        if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
                              (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
                                attr->split_count = 0;
@@ -3599,6 +3947,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
 
        if (attr->dest_chain) {
+               if (decap) {
+                       /* It can be supported if we'll create a mapping for
+                        * the tunnel device only (without tunnel), and set
+                        * this tunnel id with this decap flow.
+                        *
+                        * On restore (miss), we'll just set this saved tunnel
+                        * device.
+                        */
+
+                       NL_SET_ERR_MSG(extack,
+                                      "Decap with goto isn't supported");
+                       netdev_warn(priv->netdev,
+                                   "Decap with goto isn't supported");
+                       return -EOPNOTSUPP;
+               }
+
                if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "Mirroring goto chain rules isn't supported");
@@ -3781,6 +4145,10 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
        if (err)
                goto err_free;
 
+       err = mlx5_tc_ct_parse_match(priv, &parse_attr->spec, f, extack);
+       if (err)
+               goto err_free;
+
        err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
        complete_all(&flow->init_done);
        if (err) {
@@ -4065,7 +4433,7 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
                goto errout;
        }
 
-       if (mlx5e_is_offloaded_flow(flow)) {
+       if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
                counter = mlx5e_tc_get_counter(flow);
                if (!counter)
                        goto errout;
@@ -4156,7 +4524,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
                return -EOPNOTSUPP;
        }
 
-       if (!flow_action_basic_hw_stats_types_check(flow_action, extack))
+       if (!flow_action_basic_hw_stats_check(flow_action, extack))
                return -EOPNOTSUPP;
 
        flow_action_for_each(i, act, flow_action) {
@@ -4180,8 +4548,14 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
                                struct tc_cls_matchall_offload *ma)
 {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct netlink_ext_ack *extack = ma->common.extack;
 
+       if (!mlx5_esw_qos_enabled(esw)) {
+               NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
+               return -EOPNOTSUPP;
+       }
+
        if (ma->common.prio != 1) {
                NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
                return -EINVAL;
@@ -4328,12 +4702,63 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 {
-       return rhashtable_init(tc_ht, &tc_ht_params);
+       const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts);
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *priv;
+       struct mapping_ctx *mapping;
+       int err;
+
+       uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+       priv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+
+       err = mlx5_tc_ct_init(uplink_priv);
+       if (err)
+               goto err_ct;
+
+       mapping = mapping_create(sizeof(struct tunnel_match_key),
+                                TUNNEL_INFO_BITS_MASK, true);
+       if (IS_ERR(mapping)) {
+               err = PTR_ERR(mapping);
+               goto err_tun_mapping;
+       }
+       uplink_priv->tunnel_mapping = mapping;
+
+       mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
+       if (IS_ERR(mapping)) {
+               err = PTR_ERR(mapping);
+               goto err_enc_opts_mapping;
+       }
+       uplink_priv->tunnel_enc_opts_mapping = mapping;
+
+       err = rhashtable_init(tc_ht, &tc_ht_params);
+       if (err)
+               goto err_ht_init;
+
+       return err;
+
+err_ht_init:
+       mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+err_enc_opts_mapping:
+       mapping_destroy(uplink_priv->tunnel_mapping);
+err_tun_mapping:
+       mlx5_tc_ct_clean(uplink_priv);
+err_ct:
+       netdev_warn(priv->netdev,
+                   "Failed to initialize tc (eswitch), err: %d", err);
+       return err;
 }
 
 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
 {
+       struct mlx5_rep_uplink_priv *uplink_priv;
+
        rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
+
+       uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
+       mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
+       mapping_destroy(uplink_priv->tunnel_mapping);
+
+       mlx5_tc_ct_clean(uplink_priv);
 }
 
 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
@@ -4365,3 +4790,147 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
        }
        mutex_unlock(&rpriv->unready_flows_lock);
 }
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+                                struct mlx5e_tc_update_priv *tc_priv,
+                                u32 tunnel_id)
+{
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct flow_dissector_key_enc_opts enc_opts = {};
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *uplink_rpriv;
+       struct metadata_dst *tun_dst;
+       struct tunnel_match_key key;
+       u32 tun_id, enc_opts_id;
+       struct net_device *dev;
+       int err;
+
+       enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+       tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+       if (!tun_id)
+               return true;
+
+       uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+       uplink_priv = &uplink_rpriv->uplink_priv;
+
+       err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+       if (err) {
+               WARN_ON_ONCE(true);
+               netdev_dbg(priv->netdev,
+                          "Couldn't find tunnel for tun_id: %d, err: %d\n",
+                          tun_id, err);
+               return false;
+       }
+
+       if (enc_opts_id) {
+               err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+                                  enc_opts_id, &enc_opts);
+               if (err) {
+                       netdev_dbg(priv->netdev,
+                                  "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+                                  enc_opts_id, err);
+                       return false;
+               }
+       }
+
+       tun_dst = tun_rx_dst(enc_opts.len);
+       if (!tun_dst) {
+               WARN_ON_ONCE(true);
+               return false;
+       }
+
+       ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+                          key.enc_ipv4.src, key.enc_ipv4.dst,
+                          key.enc_ip.tos, key.enc_ip.ttl,
+                          0, /* label */
+                          key.enc_tp.src, key.enc_tp.dst,
+                          key32_to_tunnel_id(key.enc_key_id.keyid),
+                          TUNNEL_KEY);
+
+       if (enc_opts.len)
+               ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data,
+                                       enc_opts.len, enc_opts.dst_opt_type);
+
+       skb_dst_set(skb, (struct dst_entry *)tun_dst);
+       dev = dev_get_by_index(&init_net, key.filter_ifindex);
+       if (!dev) {
+               netdev_dbg(priv->netdev,
+                          "Couldn't find tunnel device with ifindex: %d\n",
+                          key.filter_ifindex);
+               return false;
+       }
+
+       /* Set tun_dev so we do dev_put() after datapath */
+       tc_priv->tun_dev = dev;
+
+       skb->dev = dev;
+
+       return true;
+}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
+                            struct sk_buff *skb,
+                            struct mlx5e_tc_update_priv *tc_priv)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+       u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
+       struct mlx5_rep_uplink_priv *uplink_priv;
+       struct mlx5e_rep_priv *uplink_rpriv;
+       struct tc_skb_ext *tc_skb_ext;
+       struct mlx5_eswitch *esw;
+       struct mlx5e_priv *priv;
+       int tunnel_moffset;
+       int err;
+
+       reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+       if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+               reg_c0 = 0;
+       reg_c1 = be32_to_cpu(cqe->imm_inval_pkey);
+
+       if (!reg_c0)
+               return true;
+
+       priv = netdev_priv(skb->dev);
+       esw = priv->mdev->priv.eswitch;
+
+       err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+       if (err) {
+               netdev_dbg(priv->netdev,
+                          "Couldn't find chain for chain tag: %d, err: %d\n",
+                          reg_c0, err);
+               return false;
+       }
+
+       if (chain) {
+               tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+               if (!tc_skb_ext) {
+                       WARN_ON(1);
+                       return false;
+               }
+
+               tc_skb_ext->chain = chain;
+
+               tuple_id = reg_c1 & TUPLE_ID_MAX;
+
+               uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+               uplink_priv = &uplink_rpriv->uplink_priv;
+               if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
+                       return false;
+       }
+
+       tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
+       tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+       return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+       return true;
+}
+
+void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+       if (tc_priv->tun_dev)
+               dev_put(tc_priv->tun_dev);
+}