Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
authorDavid S. Miller <davem@davemloft.net>
Mon, 11 Apr 2022 10:47:58 +0000 (11:47 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 11 Apr 2022 10:47:58 +0000 (11:47 +0100)
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Replace unnecessary list_for_each_entry_continue() in nf_tables,
   from Jakob Koschel.

2) Add struct nf_conntrack_net_ecache to conntrack event cache and
   use it, from Florian Westphal.

3) Refactor ctnetlink_dump_list(), also from Florian.

4) Bump module reference counter on cttimeout object addition/removal,
   from Florian.

5) Consolidate nf_log MAC printer, from Phil Sutter.

6) Add basic logging support for unknown ethertype, from Phil Sutter.

7) Consolidate check for sysctl nf_log_all_netns toggle, also from Phil.

8) Replace hardcode value in nft_bitwise, from Jeremy Sowden.

9) Rename BASIC-like goto tags in nft_bitwise to more meaningful names,
   also from Jeremy.

10) nft_fib support for reverse path filtering with policy-based routing
    on iif. Extend selftests to cover for this new usecase, from Florian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
include/net/netfilter/nf_conntrack.h
net/ipv4/netfilter/nft_fib_ipv4.c
net/ipv6/netfilter/nft_fib_ipv6.c
net/netfilter/nf_conntrack_ecache.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_log_syslog.c
net/netfilter/nf_tables_api.c
net/netfilter/nfnetlink_cttimeout.c
net/netfilter/nft_bitwise.c
net/netfilter/nft_fib.c
tools/testing/selftests/netfilter/nft_fib.sh

index b08b70989d2cf2f327de443f3abf95daa15ee957..69e6c6a218be82732643b681d5e8bcd81237d980 100644 (file)
@@ -43,6 +43,11 @@ union nf_conntrack_expect_proto {
        /* insert expect proto private data here */
 };
 
+struct nf_conntrack_net_ecache {
+       struct delayed_work dwork;
+       struct netns_ct *ct_net;
+};
+
 struct nf_conntrack_net {
        /* only used when new connection is allocated: */
        atomic_t count;
@@ -58,8 +63,7 @@ struct nf_conntrack_net {
        struct ctl_table_header *sysctl_header;
 #endif
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
-       struct delayed_work ecache_dwork;
-       struct netns_ct *ct_net;
+       struct nf_conntrack_net_ecache ecache;
 #endif
 };
 
index 4151eb1262ddcd8e751ee530383920825eaf862f..b75cac69bd7e6b95eafc2b07aab571b93a8321fc 100644 (file)
@@ -112,6 +112,10 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
                fl4.daddr = iph->daddr;
                fl4.saddr = get_saddr(iph->saddr);
        } else {
+               if (nft_hook(pkt) == NF_INET_FORWARD &&
+                   priv->flags & NFTA_FIB_F_IIF)
+                       fl4.flowi4_iif = nft_out(pkt)->ifindex;
+
                fl4.daddr = iph->saddr;
                fl4.saddr = get_saddr(iph->daddr);
        }
index b3f163b40c2b2fc3c3549c895e3fa05cc95efe8a..8970d0b4faeb422fbcf335d9b75fa932dd87bd85 100644 (file)
@@ -30,6 +30,10 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
                fl6->daddr = iph->daddr;
                fl6->saddr = iph->saddr;
        } else {
+               if (nft_hook(pkt) == NF_INET_FORWARD &&
+                   priv->flags & NFTA_FIB_F_IIF)
+                       fl6->flowi6_iif = nft_out(pkt)->ifindex;
+
                fl6->daddr = iph->saddr;
                fl6->saddr = iph->daddr;
        }
index 07e65b4e92f86b944bb5c1a2b6ab676658120da5..0cb2da0a759a68f335db0273a464ad5d40e8b807 100644 (file)
@@ -96,8 +96,8 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
 
 static void ecache_work(struct work_struct *work)
 {
-       struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache_dwork.work);
-       struct netns_ct *ctnet = cnet->ct_net;
+       struct nf_conntrack_net *cnet = container_of(work, struct nf_conntrack_net, ecache.dwork.work);
+       struct netns_ct *ctnet = cnet->ecache.ct_net;
        int cpu, delay = -1;
        struct ct_pcpu *pcpu;
 
@@ -127,7 +127,7 @@ static void ecache_work(struct work_struct *work)
 
        ctnet->ecache_dwork_pending = delay > 0;
        if (delay >= 0)
-               schedule_delayed_work(&cnet->ecache_dwork, delay);
+               schedule_delayed_work(&cnet->ecache.dwork, delay);
 }
 
 static int __nf_conntrack_eventmask_report(struct nf_conntrack_ecache *e,
@@ -293,12 +293,12 @@ void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state)
        struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
        if (state == NFCT_ECACHE_DESTROY_FAIL &&
-           !delayed_work_pending(&cnet->ecache_dwork)) {
-               schedule_delayed_work(&cnet->ecache_dwork, HZ);
+           !delayed_work_pending(&cnet->ecache.dwork)) {
+               schedule_delayed_work(&cnet->ecache.dwork, HZ);
                net->ct.ecache_dwork_pending = true;
        } else if (state == NFCT_ECACHE_DESTROY_SENT) {
                net->ct.ecache_dwork_pending = false;
-               mod_delayed_work(system_wq, &cnet->ecache_dwork, 0);
+               mod_delayed_work(system_wq, &cnet->ecache.dwork, 0);
        }
 }
 
@@ -310,8 +310,9 @@ void nf_conntrack_ecache_pernet_init(struct net *net)
        struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
        net->ct.sysctl_events = nf_ct_events;
-       cnet->ct_net = &net->ct;
-       INIT_DELAYED_WORK(&cnet->ecache_dwork, ecache_work);
+
+       cnet->ecache.ct_net = &net->ct;
+       INIT_DELAYED_WORK(&cnet->ecache.dwork, ecache_work);
 
        BUILD_BUG_ON(__IPCT_MAX >= 16); /* e->ctmask is u16 */
 }
@@ -320,5 +321,5 @@ void nf_conntrack_ecache_pernet_fini(struct net *net)
 {
        struct nf_conntrack_net *cnet = nf_ct_pernet(net);
 
-       cancel_delayed_work_sync(&cnet->ecache_dwork);
+       cancel_delayed_work_sync(&cnet->ecache.dwork);
 }
index 1ea2ad732d578dcab975938770d852fec183120b..924d766e6c53ea62898004ce48d223cc5e08a64c 100644 (file)
@@ -1708,6 +1708,47 @@ static int ctnetlink_done_list(struct netlink_callback *cb)
        return 0;
 }
 
+static int ctnetlink_dump_one_entry(struct sk_buff *skb,
+                                   struct netlink_callback *cb,
+                                   struct nf_conn *ct,
+                                   bool dying)
+{
+       struct ctnetlink_list_dump_ctx *ctx = (void *)cb->ctx;
+       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+       u8 l3proto = nfmsg->nfgen_family;
+       int res;
+
+       if (l3proto && nf_ct_l3num(ct) != l3proto)
+               return 0;
+
+       if (ctx->last) {
+               if (ct != ctx->last)
+                       return 0;
+
+               ctx->last = NULL;
+       }
+
+       /* We can't dump extension info for the unconfirmed
+        * list because unconfirmed conntracks can have
+        * ct->ext reallocated (and thus freed).
+        *
+        * In the dying list case ct->ext can't be free'd
+        * until after we drop pcpu->lock.
+        */
+       res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
+                                 cb->nlh->nlmsg_seq,
+                                 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+                                 ct, dying, 0);
+       if (res < 0) {
+               if (!refcount_inc_not_zero(&ct->ct_general.use))
+                       return 0;
+
+               ctx->last = ct;
+       }
+
+       return res;
+}
+
 static int
 ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying)
 {
@@ -1715,12 +1756,9 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
        struct nf_conn *ct, *last;
        struct nf_conntrack_tuple_hash *h;
        struct hlist_nulls_node *n;
-       struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
-       u_int8_t l3proto = nfmsg->nfgen_family;
-       int res;
-       int cpu;
        struct hlist_nulls_head *list;
        struct net *net = sock_net(skb->sk);
+       int res, cpu;
 
        if (ctx->done)
                return 0;
@@ -1739,30 +1777,10 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
 restart:
                hlist_nulls_for_each_entry(h, n, list, hnnode) {
                        ct = nf_ct_tuplehash_to_ctrack(h);
-                       if (l3proto && nf_ct_l3num(ct) != l3proto)
-                               continue;
-                       if (ctx->last) {
-                               if (ct != last)
-                                       continue;
-                               ctx->last = NULL;
-                       }
 
-                       /* We can't dump extension info for the unconfirmed
-                        * list because unconfirmed conntracks can have
-                        * ct->ext reallocated (and thus freed).
-                        *
-                        * In the dying list case ct->ext can't be free'd
-                        * until after we drop pcpu->lock.
-                        */
-                       res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
-                                                 cb->nlh->nlmsg_seq,
-                                                 NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-                                                 ct, dying, 0);
+                       res = ctnetlink_dump_one_entry(skb, cb, ct, dying);
                        if (res < 0) {
-                               if (!refcount_inc_not_zero(&ct->ct_general.use))
-                                       continue;
                                ctx->cpu = cpu;
-                               ctx->last = ct;
                                spin_unlock_bh(&pcpu->lock);
                                goto out;
                        }
index 13234641cdb347e9daa0eac171b4c9d37f1880d0..77bcb10fc586a1379838d3c2d3a34f967a1c81ff 100644 (file)
@@ -40,6 +40,12 @@ struct arppayload {
        unsigned char ip_dst[4];
 };
 
+/* Guard against containers flooding syslog. */
+static bool nf_log_allowed(const struct net *net)
+{
+       return net_eq(net, &init_net) || sysctl_nf_log_all_netns;
+}
+
 static void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
 {
        u16 vid;
@@ -133,8 +139,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
 {
        struct nf_log_buf *m;
 
-       /* FIXME: Disabled from containers until syslog ns is supported */
-       if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+       if (!nf_log_allowed(net))
                return;
 
        m = nf_log_buf_open();
@@ -766,9 +771,9 @@ dump_ipv6_packet(struct net *net, struct nf_log_buf *m,
                nf_log_buf_add(m, "MARK=0x%x ", skb->mark);
 }
 
-static void dump_ipv4_mac_header(struct nf_log_buf *m,
-                                const struct nf_loginfo *info,
-                                const struct sk_buff *skb)
+static void dump_mac_header(struct nf_log_buf *m,
+                           const struct nf_loginfo *info,
+                           const struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
        unsigned int logflags = 0;
@@ -798,9 +803,26 @@ fallback:
                const unsigned char *p = skb_mac_header(skb);
                unsigned int i;
 
-               nf_log_buf_add(m, "%02x", *p++);
-               for (i = 1; i < dev->hard_header_len; i++, p++)
-                       nf_log_buf_add(m, ":%02x", *p);
+               if (dev->type == ARPHRD_SIT) {
+                       p -= ETH_HLEN;
+
+                       if (p < skb->head)
+                               p = NULL;
+               }
+
+               if (p) {
+                       nf_log_buf_add(m, "%02x", *p++);
+                       for (i = 1; i < dev->hard_header_len; i++)
+                               nf_log_buf_add(m, ":%02x", *p++);
+               }
+
+               if (dev->type == ARPHRD_SIT) {
+                       const struct iphdr *iph =
+                               (struct iphdr *)skb_mac_header(skb);
+
+                       nf_log_buf_add(m, " TUNNEL=%pI4->%pI4", &iph->saddr,
+                                      &iph->daddr);
+               }
        }
        nf_log_buf_add(m, " ");
 }
@@ -814,8 +836,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
 {
        struct nf_log_buf *m;
 
-       /* FIXME: Disabled from containers until syslog ns is supported */
-       if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+       if (!nf_log_allowed(net))
                return;
 
        m = nf_log_buf_open();
@@ -827,7 +848,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
                                  out, loginfo, prefix);
 
        if (in)
-               dump_ipv4_mac_header(m, loginfo, skb);
+               dump_mac_header(m, loginfo, skb);
 
        dump_ipv4_packet(net, m, loginfo, skb, 0);
 
@@ -841,64 +862,6 @@ static struct nf_logger nf_ip_logger __read_mostly = {
        .me             = THIS_MODULE,
 };
 
-static void dump_ipv6_mac_header(struct nf_log_buf *m,
-                                const struct nf_loginfo *info,
-                                const struct sk_buff *skb)
-{
-       struct net_device *dev = skb->dev;
-       unsigned int logflags = 0;
-
-       if (info->type == NF_LOG_TYPE_LOG)
-               logflags = info->u.log.logflags;
-
-       if (!(logflags & NF_LOG_MACDECODE))
-               goto fallback;
-
-       switch (dev->type) {
-       case ARPHRD_ETHER:
-               nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
-                              eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
-               nf_log_dump_vlan(m, skb);
-               nf_log_buf_add(m, "MACPROTO=%04x ",
-                              ntohs(eth_hdr(skb)->h_proto));
-               return;
-       default:
-               break;
-       }
-
-fallback:
-       nf_log_buf_add(m, "MAC=");
-       if (dev->hard_header_len &&
-           skb->mac_header != skb->network_header) {
-               const unsigned char *p = skb_mac_header(skb);
-               unsigned int len = dev->hard_header_len;
-               unsigned int i;
-
-               if (dev->type == ARPHRD_SIT) {
-                       p -= ETH_HLEN;
-
-                       if (p < skb->head)
-                               p = NULL;
-               }
-
-               if (p) {
-                       nf_log_buf_add(m, "%02x", *p++);
-                       for (i = 1; i < len; i++)
-                               nf_log_buf_add(m, ":%02x", *p++);
-               }
-               nf_log_buf_add(m, " ");
-
-               if (dev->type == ARPHRD_SIT) {
-                       const struct iphdr *iph =
-                               (struct iphdr *)skb_mac_header(skb);
-                       nf_log_buf_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr,
-                                      &iph->daddr);
-               }
-       } else {
-               nf_log_buf_add(m, " ");
-       }
-}
-
 static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
                              unsigned int hooknum, const struct sk_buff *skb,
                              const struct net_device *in,
@@ -908,8 +871,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
 {
        struct nf_log_buf *m;
 
-       /* FIXME: Disabled from containers until syslog ns is supported */
-       if (!net_eq(net, &init_net) && !sysctl_nf_log_all_netns)
+       if (!nf_log_allowed(net))
                return;
 
        m = nf_log_buf_open();
@@ -921,7 +883,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
                                  loginfo, prefix);
 
        if (in)
-               dump_ipv6_mac_header(m, loginfo, skb);
+               dump_mac_header(m, loginfo, skb);
 
        dump_ipv6_packet(net, m, loginfo, skb, skb_network_offset(skb), 1);
 
@@ -935,6 +897,32 @@ static struct nf_logger nf_ip6_logger __read_mostly = {
        .me             = THIS_MODULE,
 };
 
+static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
+                                 unsigned int hooknum,
+                                 const struct sk_buff *skb,
+                                 const struct net_device *in,
+                                 const struct net_device *out,
+                                 const struct nf_loginfo *loginfo,
+                                 const char *prefix)
+{
+       struct nf_log_buf *m;
+
+       if (!nf_log_allowed(net))
+               return;
+
+       m = nf_log_buf_open();
+
+       if (!loginfo)
+               loginfo = &default_loginfo;
+
+       nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
+                                 prefix);
+
+       dump_mac_header(m, loginfo, skb);
+
+       nf_log_buf_close(m);
+}
+
 static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
                                 unsigned int hooknum,
                                 const struct sk_buff *skb,
@@ -954,6 +942,10 @@ static void nf_log_netdev_packet(struct net *net, u_int8_t pf,
        case htons(ETH_P_RARP):
                nf_log_arp_packet(net, pf, hooknum, skb, in, out, loginfo, prefix);
                break;
+       default:
+               nf_log_unknown_packet(net, pf, hooknum, skb,
+                                     in, out, loginfo, prefix);
+               break;
        }
 }
 
index 128ee3b300d610d0993886b69f2b1971ddd4a92a..217006faebded20ed0f13a57191909825ec940d8 100644 (file)
@@ -8367,10 +8367,8 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
        if (chain->blob_next || !nft_is_active_next(net, chain))
                return 0;
 
-       rule = list_entry(&chain->rules, struct nft_rule, list);
-
        data_size = 0;
-       list_for_each_entry_continue(rule, &chain->rules, list) {
+       list_for_each_entry(rule, &chain->rules, list) {
                if (nft_is_active_next(net, rule)) {
                        data_size += sizeof(*prule) + rule->dlen;
                        if (data_size > INT_MAX)
@@ -8387,7 +8385,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
        data_boundary = data + data_size;
        size = 0;
 
-       list_for_each_entry_continue(rule, &chain->rules, list) {
+       list_for_each_entry(rule, &chain->rules, list) {
                if (!nft_is_active_next(net, rule))
                        continue;
 
index b0d8888a539bd4a7f700835a62cc218215e5732f..eea486f3297107675b84e93615410569e868768d 100644 (file)
@@ -158,6 +158,7 @@ static int cttimeout_new_timeout(struct sk_buff *skb,
        timeout->timeout.l3num = l3num;
        timeout->timeout.l4proto = l4proto;
        refcount_set(&timeout->refcnt, 1);
+       __module_get(THIS_MODULE);
        list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list);
 
        return 0;
@@ -506,13 +507,8 @@ static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net,
                if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
                        continue;
 
-               if (!try_module_get(THIS_MODULE))
+               if (!refcount_inc_not_zero(&timeout->refcnt))
                        goto err;
-
-               if (!refcount_inc_not_zero(&timeout->refcnt)) {
-                       module_put(THIS_MODULE);
-                       goto err;
-               }
                matching = timeout;
                break;
        }
@@ -525,10 +521,10 @@ static void ctnl_timeout_put(struct nf_ct_timeout *t)
        struct ctnl_timeout *timeout =
                container_of(t, struct ctnl_timeout, timeout);
 
-       if (refcount_dec_and_test(&timeout->refcnt))
+       if (refcount_dec_and_test(&timeout->refcnt)) {
                kfree_rcu(timeout, rcu_head);
-
-       module_put(THIS_MODULE);
+               module_put(THIS_MODULE);
+       }
 }
 
 static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = {
index f590ee1c8a1be46e59c20ca3ab0d9faa23b7dc5b..83590afe3768e1f13a5868b6210fb0916ecb25da 100644 (file)
@@ -30,7 +30,7 @@ static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
 {
        unsigned int i;
 
-       for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
+       for (i = 0; i < DIV_ROUND_UP(priv->len, sizeof(u32)); i++)
                dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i];
 }
 
@@ -109,22 +109,23 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
                return err;
        if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) {
                err = -EINVAL;
-               goto err1;
+               goto err_mask_release;
        }
 
        err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor,
                            tb[NFTA_BITWISE_XOR]);
        if (err < 0)
-               goto err1;
+               goto err_mask_release;
        if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) {
                err = -EINVAL;
-               goto err2;
+               goto err_xor_release;
        }
 
        return 0;
-err2:
+
+err_xor_release:
        nft_data_release(&priv->xor, xor.type);
-err1:
+err_mask_release:
        nft_data_release(&priv->mask, mask.type);
        return err;
 }
index f198f2d9ef90941211db0d23ea783495a48ec2f1..1f12d7ade606c89496de381ba43b01addedac2f9 100644 (file)
@@ -35,6 +35,10 @@ int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
        case NFT_FIB_RESULT_OIF:
        case NFT_FIB_RESULT_OIFNAME:
                hooks = (1 << NF_INET_PRE_ROUTING);
+               if (priv->flags & NFTA_FIB_F_IIF) {
+                       hooks |= (1 << NF_INET_LOCAL_IN) |
+                                (1 << NF_INET_FORWARD);
+               }
                break;
        case NFT_FIB_RESULT_ADDRTYPE:
                if (priv->flags & NFTA_FIB_F_IIF)
index 695a1958723f561a14b97adea40568f6caa863b7..fd76b69635a44c878b44810972b29df7d4644d89 100755 (executable)
@@ -66,6 +66,20 @@ table inet filter {
 EOF
 }
 
+load_pbr_ruleset() {
+       local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+       chain forward {
+               type filter hook forward priority raw;
+               fib saddr . iif oif gt 0 accept
+               log drop
+       }
+}
+EOF
+}
+
 load_ruleset_count() {
        local netns=$1
 
@@ -219,4 +233,40 @@ sleep 2
 ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
 check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
 
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+       echo "SKIP: Could not load fib forward ruleset"
+       exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+       ip -net ${nsrouter} nft list ruleset
+       echo "FAIL: fib mismatch in pbr setup"
+       exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
 exit 0