sfc: parse mangle actions (NAT) in conntrack entries
authorEdward Cree <ecree.xilinx@gmail.com>
Tue, 10 Oct 2023 21:51:59 +0000 (22:51 +0100)
committerDavid S. Miller <davem@davemloft.net>
Sun, 15 Oct 2023 13:25:03 +0000 (14:25 +0100)
The MAE can edit either address, L4 port, or both, for either source
 or destination.  These can't be mixed; i.e. it can edit source addr
 and source port, but not (say) source addr and dest port.

Reviewed-by: Pieter Jansen van Vuuren <pieter.jansen-van-vuuren@amd.com>
Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/sfc/tc_conntrack.c

index 44bb57670340dae06cad31fde1deaa60a1ae6e90..d90206f27161e4e3cf7d3bae93cb648b14f9584a 100644 (file)
@@ -276,10 +276,84 @@ static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
        return 0;
 }
 
+/**
+ * struct efx_tc_ct_mangler_state - tracks which fields have been pedited
+ *
+ * @ipv4: IP source or destination addr has been set
+ * @tcpudp: TCP/UDP source or destination port has been set
+ */
+struct efx_tc_ct_mangler_state {
+       u8 ipv4:1;
+       u8 tcpudp:1;
+};
+
+static int efx_tc_ct_mangle(struct efx_nic *efx, struct efx_tc_ct_entry *conn,
+                           const struct flow_action_entry *fa,
+                           struct efx_tc_ct_mangler_state *mung)
+{
+       /* Is this the first mangle we've processed for this rule? */
+       bool first = !(mung->ipv4 || mung->tcpudp);
+       bool dnat = false;
+
+       switch (fa->mangle.htype) {
+       case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
+               switch (fa->mangle.offset) {
+               case offsetof(struct iphdr, daddr):
+                       dnat = true;
+                       fallthrough;
+               case offsetof(struct iphdr, saddr):
+                       if (fa->mangle.mask)
+                               return -EOPNOTSUPP;
+                       conn->nat_ip = htonl(fa->mangle.val);
+                       mung->ipv4 = 1;
+                       break;
+               default:
+                       return -EOPNOTSUPP;
+               }
+               break;
+       case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
+       case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
+               /* Both struct tcphdr and struct udphdr start with
+                *      __be16 source;
+                *      __be16 dest;
+                * so we can use the same code for both.
+                */
+               switch (fa->mangle.offset) {
+               case offsetof(struct tcphdr, dest):
+                       BUILD_BUG_ON(offsetof(struct tcphdr, dest) !=
+                                    offsetof(struct udphdr, dest));
+                       dnat = true;
+                       fallthrough;
+               case offsetof(struct tcphdr, source):
+                       BUILD_BUG_ON(offsetof(struct tcphdr, source) !=
+                                    offsetof(struct udphdr, source));
+                       if (~fa->mangle.mask != 0xffff)
+                               return -EOPNOTSUPP;
+                       conn->l4_natport = htons(fa->mangle.val);
+                       mung->tcpudp = 1;
+                       break;
+               default:
+                       return -EOPNOTSUPP;
+               }
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+       /* first mangle tells us whether this is SNAT or DNAT;
+        * subsequent mangles must match that
+        */
+       if (first)
+               conn->dnat = dnat;
+       else if (conn->dnat != dnat)
+               return -EOPNOTSUPP;
+       return 0;
+}
+
 static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
                             struct flow_cls_offload *tc)
 {
        struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
+       struct efx_tc_ct_mangler_state mung = {};
        struct efx_tc_ct_entry *conn, *old;
        struct efx_nic *efx = ct_zone->efx;
        const struct flow_action_entry *fa;
@@ -326,6 +400,17 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
                                goto release;
                        }
                        break;
+               case FLOW_ACTION_MANGLE:
+                       if (conn->eth_proto != htons(ETH_P_IP)) {
+                               netif_dbg(efx, drv, efx->net_dev,
+                                         "NAT only supported for IPv4\n");
+                               rc = -EOPNOTSUPP;
+                               goto release;
+                       }
+                       rc = efx_tc_ct_mangle(efx, conn, fa, &mung);
+                       if (rc)
+                               goto release;
+                       break;
                default:
                        netif_dbg(efx, drv, efx->net_dev,
                                  "Unhandled action %u for conntrack\n", fa->id);
@@ -335,8 +420,10 @@ static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
        }
 
        /* fill in defaults for unmangled values */
-       conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
-       conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
+       if (!mung.ipv4)
+               conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
+       if (!mung.tcpudp)
+               conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
 
        cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
        if (IS_ERR(cnt)) {