Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Mon, 18 Oct 2021 13:05:25 +0000 (14:05 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 18 Oct 2021 13:05:25 +0000 (14:05 +0100)
Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS for net-next:

1) Add new run_estimation toggle to IPVS to stop the estimation_timer
   logic, from Dust Li.

2) Relax superfluous dynset check on NFT_SET_TIMEOUT.

3) Add egress hook, from Lukas Wunner.

4) Nowadays, almost all hook functions in x_table land just call the hook
   evaluation loop. Remove remaining hook wrappers from iptables and IPVS.
   From Florian Westphal.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
40 files changed:
Documentation/networking/ipvs-sysctl.rst
drivers/net/ifb.c
include/linux/netdevice.h
include/linux/netfilter_arp/arp_tables.h
include/linux/netfilter_bridge/ebtables.h
include/linux/netfilter_ingress.h [deleted file]
include/linux/netfilter_ipv4/ip_tables.h
include/linux/netfilter_ipv6/ip6_tables.h
include/linux/netfilter_netdev.h [new file with mode: 0644]
include/linux/skbuff.h
include/net/ip_vs.h
include/uapi/linux/netfilter.h
net/bridge/netfilter/ebtable_broute.c
net/bridge/netfilter/ebtable_filter.c
net/bridge/netfilter/ebtable_nat.c
net/bridge/netfilter/ebtables.c
net/core/dev.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/arptable_filter.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/iptable_filter.c
net/ipv4/netfilter/iptable_mangle.c
net/ipv4/netfilter/iptable_nat.c
net/ipv4/netfilter/iptable_raw.c
net/ipv4/netfilter/iptable_security.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/netfilter/ip6table_filter.c
net/ipv6/netfilter/ip6table_mangle.c
net/ipv6/netfilter/ip6table_nat.c
net/ipv6/netfilter/ip6table_raw.c
net/ipv6/netfilter/ip6table_security.c
net/netfilter/Kconfig
net/netfilter/core.c
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_est.c
net/netfilter/nfnetlink_hook.c
net/netfilter/nft_chain_filter.c
net/netfilter/nft_dynset.c
net/packet/af_packet.c

index 2afccc63856ee07357d6d800d9f200b3f6549d94..95ef56d62077f76f4909383d740946e8becec801 100644 (file)
@@ -300,3 +300,14 @@ sync_version - INTEGER
 
        Kernels with this sync_version entry are able to receive messages
        of both version 1 and version 2 of the synchronisation protocol.
+
+run_estimation - BOOLEAN
+       0 - disabled
+       not 0 - enabled (default)
+
+       If disabled, the estimation will be stop, and you can't see
+       any update on speed estimation data.
+
+       You can always re-enable estimation by setting this value to 1.
+       But be careful, the first estimation after re-enable is not
+       accurate.
index e9258a9f3702ca8b02f68745cbf6c9d5bda43062..2c319dd27f29740c2605dc2d93bb24170f289f4d 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/moduleparam.h>
+#include <linux/netfilter_netdev.h>
 #include <net/pkt_sched.h>
 #include <net/net_namespace.h>
 
@@ -75,8 +76,10 @@ static void ifb_ri_tasklet(struct tasklet_struct *t)
        }
 
        while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
+               /* Skip tc and netfilter to prevent redirection loop. */
                skb->redirected = 0;
                skb->tc_skip_classify = 1;
+               nf_skip_egress(skb, true);
 
                u64_stats_update_begin(&txp->tsync);
                txp->tx_packets++;
index f9cd6fea213f33dd64aebaa14dfef6da2d98b6b1..3ec42495a43a56dbd51fecd166d572a9e586e3e4 100644 (file)
@@ -1861,6 +1861,7 @@ enum netdev_ml_priv_type {
  *     @xps_maps:      XXX: need comments on this one
  *     @miniq_egress:          clsact qdisc specific data for
  *                             egress processing
+ *     @nf_hooks_egress:       netfilter hooks executed for egress packets
  *     @qdisc_hash:            qdisc hash table
  *     @watchdog_timeo:        Represents the timeout that is used by
  *                             the watchdog (see dev_watchdog())
@@ -2160,6 +2161,9 @@ struct net_device {
 #ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc __rcu *miniq_egress;
 #endif
+#ifdef CONFIG_NETFILTER_EGRESS
+       struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
 
 #ifdef CONFIG_NET_SCHED
        DECLARE_HASHTABLE       (qdisc_hash, 4);
index 4f9a4b3c589261397225b436a4f1abcbc0b294e2..a40aaf645fa4798b4d517f515d77a6d3d312ef52 100644 (file)
@@ -54,9 +54,8 @@ int arpt_register_table(struct net *net, const struct xt_table *table,
                        const struct nf_hook_ops *ops);
 void arpt_unregister_table(struct net *net, const char *name);
 void arpt_unregister_table_pre_exit(struct net *net, const char *name);
-extern unsigned int arpt_do_table(struct sk_buff *skb,
-                                 const struct nf_hook_state *state,
-                                 struct xt_table *table);
+extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb,
+                                 const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
index 10a01978bc0d3837e1bc9609caac7e3af30ff16a..a13296d6c7ceb2386e3870f6c1671686d990970a 100644 (file)
@@ -112,9 +112,8 @@ extern int ebt_register_table(struct net *net,
                              const struct nf_hook_ops *ops);
 extern void ebt_unregister_table(struct net *net, const char *tablename);
 void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
-extern unsigned int ebt_do_table(struct sk_buff *skb,
-                                const struct nf_hook_state *state,
-                                struct ebt_table *table);
+extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+                                const struct nf_hook_state *state);
 
 /* True if the hook mask denotes that the rule is in a base chain,
  * used in the check() functions */
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
deleted file mode 100644 (file)
index a13774b..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NETFILTER_INGRESS_H_
-#define _NETFILTER_INGRESS_H_
-
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-
-#ifdef CONFIG_NETFILTER_INGRESS
-static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
-{
-#ifdef CONFIG_JUMP_LABEL
-       if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
-               return false;
-#endif
-       return rcu_access_pointer(skb->dev->nf_hooks_ingress);
-}
-
-/* caller must hold rcu_read_lock */
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-       struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
-       struct nf_hook_state state;
-       int ret;
-
-       /* Must recheck the ingress hook head, in the event it became NULL
-        * after the check in nf_hook_ingress_active evaluated to true.
-        */
-       if (unlikely(!e))
-               return 0;
-
-       nf_hook_state_init(&state, NF_NETDEV_INGRESS,
-                          NFPROTO_NETDEV, skb->dev, NULL, NULL,
-                          dev_net(skb->dev), NULL);
-       ret = nf_hook_slow(skb, &state, e, 0);
-       if (ret == 0)
-               return -1;
-
-       return ret;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev)
-{
-       RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
-}
-#else /* CONFIG_NETFILTER_INGRESS */
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
-{
-       return 0;
-}
-
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
-       return 0;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev) {}
-#endif /* CONFIG_NETFILTER_INGRESS */
-#endif /* _NETFILTER_INGRESS_H_ */
index 8d09bfe850dc3741e3ce418ea6964ef99282e886..132b0e4a6d4df6359df1059aec617e2596916e11 100644 (file)
@@ -63,9 +63,9 @@ struct ipt_error {
 }
 
 extern void *ipt_alloc_initial_table(const struct xt_table *);
-extern unsigned int ipt_do_table(struct sk_buff *skb,
-                                const struct nf_hook_state *state,
-                                struct xt_table *table);
+extern unsigned int ipt_do_table(void *priv,
+                                struct sk_buff *skb,
+                                const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
index 79e73fd7d965c2aad652e0aa60ff3af701807f87..8b8885a73c764994e8a3c68d63bcc12ef20a287b 100644 (file)
@@ -29,9 +29,8 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
                        const struct nf_hook_ops *ops);
 void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
 void ip6t_unregister_table_exit(struct net *net, const char *name);
-extern unsigned int ip6t_do_table(struct sk_buff *skb,
-                                 const struct nf_hook_state *state,
-                                 struct xt_table *table);
+extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb,
+                                 const struct nf_hook_state *state);
 
 #ifdef CONFIG_NETFILTER_XTABLES_COMPAT
 #include <net/compat.h>
diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h
new file mode 100644 (file)
index 0000000..b71b57a
--- /dev/null
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NETFILTER_NETDEV_H_
+#define _NETFILTER_NETDEV_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
+{
+#ifdef CONFIG_JUMP_LABEL
+       if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+               return false;
+#endif
+       return rcu_access_pointer(skb->dev->nf_hooks_ingress);
+}
+
+/* caller must hold rcu_read_lock */
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+       struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+       struct nf_hook_state state;
+       int ret;
+
+       /* Must recheck the ingress hook head, in the event it became NULL
+        * after the check in nf_hook_ingress_active evaluated to true.
+        */
+       if (unlikely(!e))
+               return 0;
+
+       nf_hook_state_init(&state, NF_NETDEV_INGRESS,
+                          NFPROTO_NETDEV, skb->dev, NULL, NULL,
+                          dev_net(skb->dev), NULL);
+       ret = nf_hook_slow(skb, &state, e, 0);
+       if (ret == 0)
+               return -1;
+
+       return ret;
+}
+
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+       return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+       return 0;
+}
+#endif /* CONFIG_NETFILTER_INGRESS */
+
+#ifdef CONFIG_NETFILTER_EGRESS
+static inline bool nf_hook_egress_active(void)
+{
+#ifdef CONFIG_JUMP_LABEL
+       if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS]))
+               return false;
+#endif
+       return true;
+}
+
+/**
+ * nf_hook_egress - classify packets before transmission
+ * @skb: packet to be classified
+ * @rc: result code which shall be returned by __dev_queue_xmit() on failure
+ * @dev: netdev whose egress hooks shall be applied to @skb
+ *
+ * Returns @skb on success or %NULL if the packet was consumed or filtered.
+ * Caller must hold rcu_read_lock.
+ *
+ * On ingress, packets are classified first by tc, then by netfilter.
+ * On egress, the order is reversed for symmetry.  Conceptually, tc and
+ * netfilter can be thought of as layers, with netfilter layered above tc:
+ * When tc redirects a packet to another interface, netfilter is not applied
+ * because the packet is on the tc layer.
+ *
+ * The nf_skip_egress flag controls whether netfilter is applied on egress.
+ * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the
+ * packet passes through tc and netfilter.  Because __dev_queue_xmit() may be
+ * called recursively by tunnel drivers such as vxlan, the flag is reverted to
+ * false after sch_handle_egress().  This ensures that netfilter is applied
+ * both on the overlay and underlying network.
+ */
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+                                            struct net_device *dev)
+{
+       struct nf_hook_entries *e;
+       struct nf_hook_state state;
+       int ret;
+
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+       if (skb->nf_skip_egress)
+               return skb;
+#endif
+
+       e = rcu_dereference(dev->nf_hooks_egress);
+       if (!e)
+               return skb;
+
+       nf_hook_state_init(&state, NF_NETDEV_EGRESS,
+                          NFPROTO_NETDEV, dev, NULL, NULL,
+                          dev_net(dev), NULL);
+       ret = nf_hook_slow(skb, &state, e, 0);
+
+       if (ret == 1) {
+               return skb;
+       } else if (ret < 0) {
+               *rc = NET_XMIT_DROP;
+               return NULL;
+       } else { /* ret == 0 */
+               *rc = NET_XMIT_SUCCESS;
+               return NULL;
+       }
+}
+#else /* CONFIG_NETFILTER_EGRESS */
+static inline bool nf_hook_egress_active(void)
+{
+       return false;
+}
+
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+                                            struct net_device *dev)
+{
+       return skb;
+}
+#endif /* CONFIG_NETFILTER_EGRESS */
+
+static inline void nf_skip_egress(struct sk_buff *skb, bool skip)
+{
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+       skb->nf_skip_egress = skip;
+#endif
+}
+
+static inline void nf_hook_netdev_init(struct net_device *dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+       RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+       RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
+#endif
+}
+
+#endif /* _NETFILTER_NETDEV_H_ */
index 841e2f0f5240ba9e210bb9a3fc1cbedc2162b2a8..cb96f1e6460c5fced5553d15a1786f894c353c9b 100644 (file)
@@ -652,6 +652,7 @@ typedef unsigned char *sk_buff_data_t;
  *     @tc_at_ingress: used within tc_classify to distinguish in/egress
  *     @redirected: packet was redirected by packet classifier
  *     @from_ingress: packet was redirected from the ingress path
+ *     @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
  *     @peeked: this packet has been seen already, so stats have been
  *             done for it, don't do them again
  *     @nf_trace: netfilter packet trace flag
@@ -868,6 +869,9 @@ struct sk_buff {
 #ifdef CONFIG_NET_REDIRECT
        __u8                    from_ingress:1;
 #endif
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+       __u8                    nf_skip_egress:1;
+#endif
 #ifdef CONFIG_TLS_DEVICE
        __u8                    decrypted:1;
 #endif
index 7cb5a1aace40db0e08d6f29e58cc3371f318aef2..ff1804a0c4692e6e8e15bcb2ccd77b6a027f05f2 100644 (file)
@@ -931,6 +931,7 @@ struct netns_ipvs {
        int                     sysctl_conn_reuse_mode;
        int                     sysctl_schedule_icmp;
        int                     sysctl_ignore_tunneled;
+       int                     sysctl_run_estimation;
 
        /* ip_vs_lblc */
        int                     sysctl_lblc_expiration;
@@ -1071,6 +1072,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
        return ipvs->sysctl_cache_bypass;
 }
 
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_run_estimation;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1163,6 +1169,11 @@ static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
        return 0;
 }
 
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+       return 1;
+}
+
 #endif
 
 /* IPVS core functions
index ef9a44286e23d5efab2a5add0b1178422c16e20b..53411ccc69db08009608de551a11a1b23100db9c 100644 (file)
@@ -51,6 +51,7 @@ enum nf_inet_hooks {
 
 enum nf_dev_hooks {
        NF_NETDEV_INGRESS,
+       NF_NETDEV_EGRESS,
        NF_NETDEV_NUMHOOKS
 };
 
index a7af4eaff17d30303d51e951efdf5f7db7320546..1a11064f9990719588c44d80a93c3269f4582c00 100644 (file)
@@ -66,7 +66,7 @@ static unsigned int ebt_broute(void *priv, struct sk_buff *skb,
                           NFPROTO_BRIDGE, s->in, NULL, NULL,
                           s->net, NULL);
 
-       ret = ebt_do_table(skb, &state, priv);
+       ret = ebt_do_table(priv, skb, &state);
        if (ret != NF_DROP)
                return ret;
 
index c0b121df4a9af5965aef107d172706313ab3da1c..cb949436bc0e34c2a721d5ca423c8db07e4bac2b 100644 (file)
@@ -58,28 +58,21 @@ static const struct ebt_table frame_filter = {
        .me             = THIS_MODULE,
 };
 
-static unsigned int
-ebt_filter_hook(void *priv, struct sk_buff *skb,
-               const struct nf_hook_state *state)
-{
-       return ebt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops ebt_ops_filter[] = {
        {
-               .hook           = ebt_filter_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_LOCAL_IN,
                .priority       = NF_BR_PRI_FILTER_BRIDGED,
        },
        {
-               .hook           = ebt_filter_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_FORWARD,
                .priority       = NF_BR_PRI_FILTER_BRIDGED,
        },
        {
-               .hook           = ebt_filter_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_LOCAL_OUT,
                .priority       = NF_BR_PRI_FILTER_OTHER,
index 4078151c224fb757894357434eb1aee1866fc5d3..5ee0531ae50610e456b07f051cf7769bb5cb004e 100644 (file)
@@ -58,27 +58,21 @@ static const struct ebt_table frame_nat = {
        .me             = THIS_MODULE,
 };
 
-static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
-                                const struct nf_hook_state *state)
-{
-       return ebt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops ebt_ops_nat[] = {
        {
-               .hook           = ebt_nat_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_LOCAL_OUT,
                .priority       = NF_BR_PRI_NAT_DST_OTHER,
        },
        {
-               .hook           = ebt_nat_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_POST_ROUTING,
                .priority       = NF_BR_PRI_NAT_SRC,
        },
        {
-               .hook           = ebt_nat_hook,
+               .hook           = ebt_do_table,
                .pf             = NFPROTO_BRIDGE,
                .hooknum        = NF_BR_PRE_ROUTING,
                .priority       = NF_BR_PRI_NAT_DST_BRIDGED,
index 83d1798dfbb444ebe0f4ab0b0262dc7b2052ebb0..4a1508a1c56655d23640e1ef0831f77ab76d08a9 100644 (file)
@@ -189,10 +189,10 @@ ebt_get_target_c(const struct ebt_entry *e)
 }
 
 /* Do some firewalling */
-unsigned int ebt_do_table(struct sk_buff *skb,
-                         const struct nf_hook_state *state,
-                         struct ebt_table *table)
+unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+                         const struct nf_hook_state *state)
 {
+       struct ebt_table *table = priv;
        unsigned int hook = state->hook;
        int i, nentries;
        struct ebt_entry *point;
index eb61a8821b3a59d0b069d67ed240d7c5deecedd5..26cdf971ca958e68f91273e5abafc2b67f219add 100644 (file)
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
-#include <linux/netfilter_ingress.h>
+#include <linux/netfilter_netdev.h>
 #include <linux/crash_dump.h>
 #include <linux/sctp.h>
 #include <net/udp_tunnel.h>
@@ -3926,6 +3926,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
 static struct sk_buff *
 sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
 {
+#ifdef CONFIG_NET_CLS_ACT
        struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
        struct tcf_result cl_res;
 
@@ -3961,6 +3962,7 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
        default:
                break;
        }
+#endif /* CONFIG_NET_CLS_ACT */
 
        return skb;
 }
@@ -4154,13 +4156,20 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
        qdisc_pkt_len_init(skb);
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
        if (static_branch_unlikely(&egress_needed_key)) {
+               if (nf_hook_egress_active()) {
+                       skb = nf_hook_egress(skb, &rc, dev);
+                       if (!skb)
+                               goto out;
+               }
+               nf_skip_egress(skb, true);
                skb = sch_handle_egress(skb, &rc, dev);
                if (!skb)
                        goto out;
+               nf_skip_egress(skb, false);
        }
-# endif
 #endif
        /* If device/qdisc don't need skb->dst, release it right now while
         * its hot in this cpu cache.
@@ -5302,6 +5311,7 @@ skip_taps:
        if (static_branch_unlikely(&ingress_needed_key)) {
                bool another = false;
 
+               nf_skip_egress(skb, true);
                skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
                                         &another);
                if (another)
@@ -5309,6 +5319,7 @@ skip_taps:
                if (!skb)
                        goto out;
 
+               nf_skip_egress(skb, false);
                if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
                        goto out;
        }
@@ -10870,7 +10881,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        if (!dev->ethtool_ops)
                dev->ethtool_ops = &default_ethtool_ops;
 
-       nf_hook_ingress_init(dev);
+       nf_hook_netdev_init(dev);
 
        return dev;
 
index c53f14b943560766d38dfffbd7b5d529e95902fd..ffc0cab7cf189f92e0c735007972b90fef117754 100644 (file)
@@ -179,10 +179,11 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
        return (void *)entry + entry->next_offset;
 }
 
-unsigned int arpt_do_table(struct sk_buff *skb,
-                          const struct nf_hook_state *state,
-                          struct xt_table *table)
+unsigned int arpt_do_table(void *priv,
+                          struct sk_buff *skb,
+                          const struct nf_hook_state *state)
 {
+       const struct xt_table *table = priv;
        unsigned int hook = state->hook;
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        unsigned int verdict = NF_DROP;
index 3de78416ec76292aff24d0b4be4edcd507f98ca3..78cd5ee24448f624526ea6595e4ad8252b927425 100644 (file)
@@ -26,14 +26,6 @@ static const struct xt_table packet_filter = {
        .priority       = NF_IP_PRI_FILTER,
 };
 
-/* The work comes in here from netfilter.c */
-static unsigned int
-arptable_filter_hook(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       return arpt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *arpfilter_ops __read_mostly;
 
 static int arptable_filter_table_init(struct net *net)
@@ -72,7 +64,7 @@ static int __init arptable_filter_init(void)
        if (ret < 0)
                return ret;
 
-       arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+       arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
        if (IS_ERR(arpfilter_ops)) {
                xt_unregister_template(&packet_filter);
                return PTR_ERR(arpfilter_ops);
index 13acb687c19ab7c8e969f6077a937f320d43c12f..2ed7c58b471aca605445003a49ee77e1f6842ade 100644 (file)
@@ -222,10 +222,11 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ipt_do_table(struct sk_buff *skb,
-            const struct nf_hook_state *state,
-            struct xt_table *table)
+ipt_do_table(void *priv,
+            struct sk_buff *skb,
+            const struct nf_hook_state *state)
 {
+       const struct xt_table *table = priv;
        unsigned int hook = state->hook;
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        const struct iphdr *ip;
index 0eb0e2ab9bfc4f9cef3ee2ac25a38c1301f38df9..b9062f4552ace52cf23f7b0543b69ba166e2eecd 100644 (file)
@@ -28,13 +28,6 @@ static const struct xt_table packet_filter = {
        .priority       = NF_IP_PRI_FILTER,
 };
 
-static unsigned int
-iptable_filter_hook(void *priv, struct sk_buff *skb,
-                   const struct nf_hook_state *state)
-{
-       return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
@@ -90,7 +83,7 @@ static int __init iptable_filter_init(void)
        if (ret < 0)
                return ret;
 
-       filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+       filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
        if (IS_ERR(filter_ops)) {
                xt_unregister_template(&packet_filter);
                return PTR_ERR(filter_ops);
index 40417a3f930b2f38c870b63b1f477da2a7fdb70d..3abb430af9e6f12be81eeed7d02cdb15b9077279 100644 (file)
@@ -34,7 +34,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
        unsigned int ret;
        const struct iphdr *iph;
@@ -50,7 +50,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pri
        daddr = iph->daddr;
        tos = iph->tos;
 
-       ret = ipt_do_table(skb, state, priv);
+       ret = ipt_do_table(priv, skb, state);
        /* Reroute for ANY change. */
        if (ret != NF_DROP && ret != NF_STOLEN) {
                iph = ip_hdr(skb);
@@ -75,8 +75,8 @@ iptable_mangle_hook(void *priv,
                     const struct nf_hook_state *state)
 {
        if (state->hook == NF_INET_LOCAL_OUT)
-               return ipt_mangle_out(skb, state, priv);
-       return ipt_do_table(skb, state, priv);
+               return ipt_mangle_out(priv, skb, state);
+       return ipt_do_table(priv, skb, state);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
index 45d7e072e6a549df1c26d60d9c4cf6afb3c2f1bb..56f6ecc43451ecac0c831ee559b1f68a41f77978 100644 (file)
@@ -29,34 +29,27 @@ static const struct xt_table nf_nat_ipv4_table = {
        .af             = NFPROTO_IPV4,
 };
 
-static unsigned int iptable_nat_do_chain(void *priv,
-                                        struct sk_buff *skb,
-                                        const struct nf_hook_state *state)
-{
-       return ipt_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
        {
-               .hook           = iptable_nat_do_chain,
+               .hook           = ipt_do_table,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_PRE_ROUTING,
                .priority       = NF_IP_PRI_NAT_DST,
        },
        {
-               .hook           = iptable_nat_do_chain,
+               .hook           = ipt_do_table,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_POST_ROUTING,
                .priority       = NF_IP_PRI_NAT_SRC,
        },
        {
-               .hook           = iptable_nat_do_chain,
+               .hook           = ipt_do_table,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_NAT_DST,
        },
        {
-               .hook           = iptable_nat_do_chain,
+               .hook           = ipt_do_table,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC,
index 8265c676570533cf3f752562dbdd54bfb0508688..ca5e5b21587cda49e27abb66115856caebcaecf5 100644 (file)
@@ -32,14 +32,6 @@ static const struct xt_table packet_raw_before_defrag = {
        .priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-iptable_raw_hook(void *priv, struct sk_buff *skb,
-                const struct nf_hook_state *state)
-{
-       return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int iptable_raw_table_init(struct net *net)
@@ -90,7 +82,7 @@ static int __init iptable_raw_init(void)
        if (ret < 0)
                return ret;
 
-       rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
+       rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
        if (IS_ERR(rawtable_ops)) {
                xt_unregister_template(table);
                return PTR_ERR(rawtable_ops);
index f519162a2fa512b2fdd0d70ae881b039c5dbbd63..d885443cb267984eb475475b44fbc09c1b5468f8 100644 (file)
@@ -33,13 +33,6 @@ static const struct xt_table security_table = {
        .priority       = NF_IP_PRI_SECURITY,
 };
 
-static unsigned int
-iptable_security_hook(void *priv, struct sk_buff *skb,
-                     const struct nf_hook_state *state)
-{
-       return ipt_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int iptable_security_table_init(struct net *net)
@@ -78,7 +71,7 @@ static int __init iptable_security_init(void)
        if (ret < 0)
                return ret;
 
-       sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+       sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
        if (IS_ERR(sectbl_ops)) {
                xt_unregister_template(&security_table);
                return PTR_ERR(sectbl_ops);
index a579ea14a69b67cdd641e1bf1dc943b8b8468007..2d816277f2c5a8036ab279a5fcb5096f4109fc2c 100644 (file)
@@ -247,10 +247,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
 
 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
 unsigned int
-ip6t_do_table(struct sk_buff *skb,
-             const struct nf_hook_state *state,
-             struct xt_table *table)
+ip6t_do_table(void *priv, struct sk_buff *skb,
+             const struct nf_hook_state *state)
 {
+       const struct xt_table *table = priv;
        unsigned int hook = state->hook;
        static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        /* Initializing verdict to NF_DROP keeps gcc happy. */
index 727ee80970124dfd485f0b046572dab9e0677c5c..df785ebda0ca43ebdfe364da3a7ca051ce46aa79 100644 (file)
@@ -27,14 +27,6 @@ static const struct xt_table packet_filter = {
        .priority       = NF_IP6_PRI_FILTER,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_filter_hook(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *filter_ops __read_mostly;
 
 /* Default to forward because I got too much mail already. */
@@ -90,7 +82,7 @@ static int __init ip6table_filter_init(void)
        if (ret < 0)
                return ret;
 
-       filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+       filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
        if (IS_ERR(filter_ops)) {
                xt_unregister_template(&packet_filter);
                return PTR_ERR(filter_ops);
index 9b518ce37d6ae6f43ec7e6267a12824c14716277..a88b2ce4a3cb8d1e4cf0776122611c98f0879bf5 100644 (file)
@@ -29,7 +29,7 @@ static const struct xt_table packet_mangler = {
 };
 
 static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
        unsigned int ret;
        struct in6_addr saddr, daddr;
@@ -46,7 +46,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *pr
        /* flowlabel and prio (includes version, which shouldn't change either */
        flowlabel = *((u_int32_t *)ipv6_hdr(skb));
 
-       ret = ip6t_do_table(skb, state, priv);
+       ret = ip6t_do_table(priv, skb, state);
 
        if (ret != NF_DROP && ret != NF_STOLEN &&
            (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -68,8 +68,8 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
                     const struct nf_hook_state *state)
 {
        if (state->hook == NF_INET_LOCAL_OUT)
-               return ip6t_mangle_out(skb, state, priv);
-       return ip6t_do_table(skb, state, priv);
+               return ip6t_mangle_out(priv, skb, state);
+       return ip6t_do_table(priv, skb, state);
 }
 
 static struct nf_hook_ops *mangle_ops __read_mostly;
index 921c1723a01e4a24bd21ada3fa2fb0b16fff8b31..bf3cb3a13600cd418b6c9066c9e4d667854d21c9 100644 (file)
@@ -31,34 +31,27 @@ static const struct xt_table nf_nat_ipv6_table = {
        .af             = NFPROTO_IPV6,
 };
 
-static unsigned int ip6table_nat_do_chain(void *priv,
-                                         struct sk_buff *skb,
-                                         const struct nf_hook_state *state)
-{
-       return ip6t_do_table(skb, state, priv);
-}
-
 static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
        {
-               .hook           = ip6table_nat_do_chain,
+               .hook           = ip6t_do_table,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_PRE_ROUTING,
                .priority       = NF_IP6_PRI_NAT_DST,
        },
        {
-               .hook           = ip6table_nat_do_chain,
+               .hook           = ip6t_do_table,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_POST_ROUTING,
                .priority       = NF_IP6_PRI_NAT_SRC,
        },
        {
-               .hook           = ip6table_nat_do_chain,
+               .hook           = ip6t_do_table,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_NAT_DST,
        },
        {
-               .hook           = ip6table_nat_do_chain,
+               .hook           = ip6t_do_table,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP6_PRI_NAT_SRC,
index 4f2a04af71d320655ba517bdf62b82eebc7cd34d..08861d5d1f4db36b90cac3097d16393c04a5e4f7 100644 (file)
@@ -31,14 +31,6 @@ static const struct xt_table packet_raw_before_defrag = {
        .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
 };
 
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_raw_hook(void *priv, struct sk_buff *skb,
-                 const struct nf_hook_state *state)
-{
-       return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *rawtable_ops __read_mostly;
 
 static int ip6table_raw_table_init(struct net *net)
@@ -88,7 +80,7 @@ static int __init ip6table_raw_init(void)
                return ret;
 
        /* Register hooks */
-       rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
+       rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
        if (IS_ERR(rawtable_ops)) {
                xt_unregister_template(table);
                return PTR_ERR(rawtable_ops);
index 931674034d8be5f3807da3c337976ab3407658f8..4df14a9bae782d808147ee10af6299235ce227a7 100644 (file)
@@ -32,13 +32,6 @@ static const struct xt_table security_table = {
        .priority       = NF_IP6_PRI_SECURITY,
 };
 
-static unsigned int
-ip6table_security_hook(void *priv, struct sk_buff *skb,
-                      const struct nf_hook_state *state)
-{
-       return ip6t_do_table(skb, state, priv);
-}
-
 static struct nf_hook_ops *sectbl_ops __read_mostly;
 
 static int ip6table_security_table_init(struct net *net)
@@ -77,7 +70,7 @@ static int __init ip6table_security_init(void)
        if (ret < 0)
                return ret;
 
-       sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+       sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
        if (IS_ERR(sectbl_ops)) {
                xt_unregister_template(&security_table);
                return PTR_ERR(sectbl_ops);
index 54395266339d7352ef3fe370d5b057580c0906a6..49c9fae9c62c690959dfe2f7b6a51950d9b87098 100644 (file)
@@ -10,6 +10,17 @@ config NETFILTER_INGRESS
          This allows you to classify packets from ingress using the Netfilter
          infrastructure.
 
+config NETFILTER_EGRESS
+       bool "Netfilter egress support"
+       default y
+       select NET_EGRESS
+       help
+         This allows you to classify packets before transmission using the
+         Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+       def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
 config NETFILTER_NETLINK
        tristate
 
index 63d032191e6269acff9e48b51b6c0e09a411ceb1..6dec9cd395f1570c845346b6956ba2586a4235d6 100644 (file)
@@ -316,6 +316,12 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
                if (dev && dev_net(dev) == net)
                        return &dev->nf_hooks_ingress;
        }
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+       if (hooknum == NF_NETDEV_EGRESS) {
+               if (dev && dev_net(dev) == net)
+                       return &dev->nf_hooks_egress;
+       }
 #endif
        WARN_ON_ONCE(1);
        return NULL;
@@ -335,7 +341,8 @@ static int nf_ingress_check(struct net *net, const struct nf_hook_ops *reg,
        return 0;
 }
 
-static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
+static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg,
+                                                 int pf)
 {
        if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
            (pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
@@ -344,6 +351,12 @@ static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
        return false;
 }
 
+static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg,
+                                                int pf)
+{
+       return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
 static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
 {
 #ifdef CONFIG_JUMP_LABEL
@@ -383,9 +396,18 @@ static int __nf_register_net_hook(struct net *net, int pf,
 
        switch (pf) {
        case NFPROTO_NETDEV:
-               err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
-               if (err < 0)
-                       return err;
+#ifndef CONFIG_NETFILTER_INGRESS
+               if (reg->hooknum == NF_NETDEV_INGRESS)
+                       return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+               if (reg->hooknum == NF_NETDEV_EGRESS)
+                       return -EOPNOTSUPP;
+#endif
+               if ((reg->hooknum != NF_NETDEV_INGRESS &&
+                    reg->hooknum != NF_NETDEV_EGRESS) ||
+                   !reg->dev || dev_net(reg->dev) != net)
+                       return -EINVAL;
                break;
        case NFPROTO_INET:
                if (reg->hooknum != NF_INET_INGRESS)
@@ -417,6 +439,10 @@ static int __nf_register_net_hook(struct net *net, int pf,
 #ifdef CONFIG_NETFILTER_INGRESS
        if (nf_ingress_hook(reg, pf))
                net_inc_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+       if (nf_egress_hook(reg, pf))
+               net_inc_egress_queue();
 #endif
        nf_static_key_inc(reg, pf);
 
@@ -474,6 +500,10 @@ static void __nf_unregister_net_hook(struct net *net, int pf,
 #ifdef CONFIG_NETFILTER_INGRESS
                if (nf_ingress_hook(reg, pf))
                        net_dec_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+               if (nf_egress_hook(reg, pf))
+                       net_dec_egress_queue();
 #endif
                nf_static_key_dec(reg, pf);
        } else {
index 128690c512dff1189c766904e64cfc12faed5e90..e93c937a8bf026248001200ee674e04214c56468 100644 (file)
@@ -1330,12 +1330,15 @@ drop:
  *     Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
-ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
+       struct netns_ipvs *ipvs = net_ipvs(state->net);
+       unsigned int hooknum = state->hook;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_proto_data *pd;
        struct ip_vs_conn *cp;
+       int af = state->pf;
        struct sock *sk;
 
        EnterFunction(11);
@@ -1468,56 +1471,6 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
        return NF_ACCEPT;
 }
 
-/*
- *     It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- *     used only for VS/NAT.
- *     Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply4(void *priv, struct sk_buff *skb,
-            const struct nf_hook_state *state)
-{
-       return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- *     It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- *     Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply4(void *priv, struct sk_buff *skb,
-                  const struct nf_hook_state *state)
-{
-       return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- *     It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- *     used only for VS/NAT.
- *     Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply6(void *priv, struct sk_buff *skb,
-            const struct nf_hook_state *state)
-{
-       return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- *     It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- *     Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply6(void *priv, struct sk_buff *skb,
-                  const struct nf_hook_state *state)
-{
-       return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
 static unsigned int
 ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
                      struct ip_vs_proto_data *pd,
@@ -1957,8 +1910,10 @@ out:
  *     and send it on its way...
  */
 static unsigned int
-ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
 {
+       struct netns_ipvs *ipvs = net_ipvs(state->net);
+       unsigned int hooknum = state->hook;
        struct ip_vs_iphdr iph;
        struct ip_vs_protocol *pp;
        struct ip_vs_proto_data *pd;
@@ -1966,6 +1921,7 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
        int ret, pkts;
        int conn_reuse_mode;
        struct sock *sk;
+       int af = state->pf;
 
        /* Already marked as IPVS request or reply? */
        if (skb->ipvs_property)
@@ -2137,55 +2093,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
        return ret;
 }
 
-/*
- *     AF_INET handler in NF_INET_LOCAL_IN chain
- *     Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request4(void *priv, struct sk_buff *skb,
-                     const struct nf_hook_state *state)
-{
-       return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- *     AF_INET handler in NF_INET_LOCAL_OUT chain
- *     Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request4(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- *     AF_INET6 handler in NF_INET_LOCAL_IN chain
- *     Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request6(void *priv, struct sk_buff *skb,
-                     const struct nf_hook_state *state)
-{
-       return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- *     AF_INET6 handler in NF_INET_LOCAL_OUT chain
- *     Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request6(void *priv, struct sk_buff *skb,
-                    const struct nf_hook_state *state)
-{
-       return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
-
 /*
  *     It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
  *      related packets destined for 0.0.0.0/0.
@@ -2199,45 +2106,36 @@ static unsigned int
 ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
                   const struct nf_hook_state *state)
 {
-       int r;
        struct netns_ipvs *ipvs = net_ipvs(state->net);
-
-       if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
-               return NF_ACCEPT;
+       int r;
 
        /* ipvs enabled in this netns ? */
        if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
                return NF_ACCEPT;
 
-       return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
-}
-
+       if (state->pf == NFPROTO_IPV4) {
+               if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+                       return NF_ACCEPT;
 #ifdef CONFIG_IP_VS_IPV6
-static unsigned int
-ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
-                     const struct nf_hook_state *state)
-{
-       int r;
-       struct netns_ipvs *ipvs = net_ipvs(state->net);
-       struct ip_vs_iphdr iphdr;
+       } else {
+               struct ip_vs_iphdr iphdr;
 
-       ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
-       if (iphdr.protocol != IPPROTO_ICMPV6)
-               return NF_ACCEPT;
+               ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
 
-       /* ipvs enabled in this netns ? */
-       if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
-               return NF_ACCEPT;
+               if (iphdr.protocol != IPPROTO_ICMPV6)
+                       return NF_ACCEPT;
 
-       return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
-}
+               return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
 #endif
+       }
 
+       return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
+}
 
 static const struct nf_hook_ops ip_vs_ops4[] = {
        /* After packet filtering, change source only for VS/NAT */
        {
-               .hook           = ip_vs_reply4,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC - 2,
@@ -2246,21 +2144,21 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
         * or VS/NAT(change destination), so that filtering rules can be
         * applied to IPVS. */
        {
-               .hook           = ip_vs_remote_request4,
+               .hook           = ip_vs_in_hook,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC - 1,
        },
        /* Before ip_vs_in, change source only for VS/NAT */
        {
-               .hook           = ip_vs_local_reply4,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_NAT_DST + 1,
        },
        /* After mangle, schedule and forward local requests */
        {
-               .hook           = ip_vs_local_request4,
+               .hook           = ip_vs_in_hook,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP_PRI_NAT_DST + 2,
@@ -2275,7 +2173,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
        },
        /* After packet filtering, change source only for VS/NAT */
        {
-               .hook           = ip_vs_reply4,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 100,
@@ -2286,7 +2184,7 @@ static const struct nf_hook_ops ip_vs_ops4[] = {
 static const struct nf_hook_ops ip_vs_ops6[] = {
        /* After packet filtering, change source only for VS/NAT */
        {
-               .hook           = ip_vs_reply6,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP6_PRI_NAT_SRC - 2,
@@ -2295,21 +2193,21 @@ static const struct nf_hook_ops ip_vs_ops6[] = {
         * or VS/NAT(change destination), so that filtering rules can be
         * applied to IPVS. */
        {
-               .hook           = ip_vs_remote_request6,
+               .hook           = ip_vs_in_hook,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP6_PRI_NAT_SRC - 1,
        },
        /* Before ip_vs_in, change source only for VS/NAT */
        {
-               .hook           = ip_vs_local_reply6,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_NAT_DST + 1,
        },
        /* After mangle, schedule and forward local requests */
        {
-               .hook           = ip_vs_local_request6,
+               .hook           = ip_vs_in_hook,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_LOCAL_OUT,
                .priority       = NF_IP6_PRI_NAT_DST + 2,
@@ -2317,14 +2215,14 @@ static const struct nf_hook_ops ip_vs_ops6[] = {
        /* After packet filtering (but before ip_vs_out_icmp), catch icmp
         * destined for 0.0.0.0/0, which is for incoming IPVS connections */
        {
-               .hook           = ip_vs_forward_icmp_v6,
+               .hook           = ip_vs_forward_icmp,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 99,
        },
        /* After packet filtering, change source only for VS/NAT */
        {
-               .hook           = ip_vs_reply6,
+               .hook           = ip_vs_out_hook,
                .pf             = NFPROTO_IPV6,
                .hooknum        = NF_INET_FORWARD,
                .priority       = 100,
index c25097092a060bf579edec4ac9638edb4f6fd569..cbea5a68afb5d688089d6d1940197fee3171119e 100644 (file)
@@ -2017,6 +2017,12 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "run_estimation",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
 #ifdef CONFIG_IP_VS_DEBUG
        {
                .procname       = "debug_level",
@@ -4090,6 +4096,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
        tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
        tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
        tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+       ipvs->sysctl_run_estimation = 1;
+       tbl[idx++].data = &ipvs->sysctl_run_estimation;
 
        ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
        if (ipvs->sysctl_hdr == NULL) {
index 05b8112ffb378f670056bdb2d7fc0e01fd5c5c94..9a1a7af6a186a6e8fe299b42497cdafed62752c2 100644 (file)
@@ -100,6 +100,9 @@ static void estimation_timer(struct timer_list *t)
        u64 rate;
        struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
 
+       if (!sysctl_run_estimation(ipvs))
+               goto skip;
+
        spin_lock(&ipvs->est_lock);
        list_for_each_entry(e, &ipvs->est_list, list) {
                s = container_of(e, struct ip_vs_stats, est);
@@ -131,6 +134,8 @@ static void estimation_timer(struct timer_list *t)
                spin_unlock(&s->lock);
        }
        spin_unlock(&ipvs->est_lock);
+
+skip:
        mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }
 
index f554e2ea32eed6347bf6cd066cc26430e2efde10..d5c719c9e36ca8228ac1526df00006a798260765 100644 (file)
@@ -185,7 +185,7 @@ static const struct nf_hook_entries *
 nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
 {
        const struct nf_hook_entries *hook_head = NULL;
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
        struct net_device *netdev;
 #endif
 
@@ -221,9 +221,9 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
                hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
                break;
 #endif
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
        case NFPROTO_NETDEV:
-               if (hook != NF_NETDEV_INGRESS)
+               if (hook >= NF_NETDEV_NUMHOOKS)
                        return ERR_PTR(-EOPNOTSUPP);
 
                if (!dev)
@@ -233,7 +233,15 @@ nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *de
                if (!netdev)
                        return ERR_PTR(-ENODEV);
 
-               return rcu_dereference(netdev->nf_hooks_ingress);
+#ifdef CONFIG_NETFILTER_INGRESS
+               if (hook == NF_NETDEV_INGRESS)
+                       return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+               if (hook == NF_NETDEV_EGRESS)
+                       return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+               fallthrough;
 #endif
        default:
                return ERR_PTR(-EPROTONOSUPPORT);
index 5b02408a920bf83a0bdcf2a0ab1d09ede0791fa6..680fe557686e42d3421a445b6c5472bd4056a65a 100644 (file)
@@ -310,9 +310,11 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
        .name           = "filter",
        .type           = NFT_CHAIN_T_DEFAULT,
        .family         = NFPROTO_NETDEV,
-       .hook_mask      = (1 << NF_NETDEV_INGRESS),
+       .hook_mask      = (1 << NF_NETDEV_INGRESS) |
+                         (1 << NF_NETDEV_EGRESS),
        .hooks          = {
                [NF_NETDEV_INGRESS]     = nft_do_chain_netdev,
+               [NF_NETDEV_EGRESS]      = nft_do_chain_netdev,
        },
 };
 
index 6ba3256fa844989461b28af793e7465a40353f17..87f3af4645d9c75697b95adc7ad0a2c2fd01d593 100644 (file)
@@ -198,17 +198,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
                return -EBUSY;
 
        priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
-       switch (priv->op) {
-       case NFT_DYNSET_OP_ADD:
-       case NFT_DYNSET_OP_DELETE:
-               break;
-       case NFT_DYNSET_OP_UPDATE:
-               if (!(set->flags & NFT_SET_TIMEOUT))
-                       return -EOPNOTSUPP;
-               break;
-       default:
+       if (priv->op > NFT_DYNSET_OP_DELETE)
                return -EOPNOTSUPP;
-       }
 
        timeout = 0;
        if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
index 2a2bc64f75cfd86af81aed0cc9841503775f96d4..46943a18a10d5413db57955dbd24302af7ef1d97 100644 (file)
@@ -91,6 +91,7 @@
 #endif
 #include <linux/bpf.h>
 #include <net/compat.h>
+#include <linux/netfilter_netdev.h>
 
 #include "internal.h"
 
@@ -241,8 +242,42 @@ struct packet_skb_cb {
 static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
 static void __fanout_link(struct sock *sk, struct packet_sock *po);
 
+#ifdef CONFIG_NETFILTER_EGRESS
+static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
+{
+       struct sk_buff *next, *head = NULL, *tail;
+       int rc;
+
+       rcu_read_lock();
+       for (; skb != NULL; skb = next) {
+               next = skb->next;
+               skb_mark_not_on_list(skb);
+
+               if (!nf_hook_egress(skb, &rc, skb->dev))
+                       continue;
+
+               if (!head)
+                       head = skb;
+               else
+                       tail->next = skb;
+
+               tail = skb;
+       }
+       rcu_read_unlock();
+
+       return head;
+}
+#endif
+
 static int packet_direct_xmit(struct sk_buff *skb)
 {
+#ifdef CONFIG_NETFILTER_EGRESS
+       if (nf_hook_egress_active()) {
+               skb = nf_hook_direct_egress(skb);
+               if (!skb)
+                       return NET_XMIT_DROP;
+       }
+#endif
        return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
 }