Kernels with this sync_version entry are able to receive messages
of both version 1 and version 2 of the synchronisation protocol.
+
+run_estimation - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ If disabled, the estimation will be stop, and you can't see
+ any update on speed estimation data.
+
+ You can always re-enable estimation by setting this value to 1.
+ But be careful, the first estimation after re-enable is not
+ accurate.
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/moduleparam.h>
+#include <linux/netfilter_netdev.h>
#include <net/pkt_sched.h>
#include <net/net_namespace.h>
}
while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
+ /* Skip tc and netfilter to prevent redirection loop. */
skb->redirected = 0;
skb->tc_skip_classify = 1;
+ nf_skip_egress(skb, true);
u64_stats_update_begin(&txp->tsync);
txp->tx_packets++;
* @xps_maps: XXX: need comments on this one
* @miniq_egress: clsact qdisc specific data for
* egress processing
+ * @nf_hooks_egress: netfilter hooks executed for egress packets
* @qdisc_hash: qdisc hash table
* @watchdog_timeo: Represents the timeout that is used by
* the watchdog (see dev_watchdog())
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;
#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ struct nf_hook_entries __rcu *nf_hooks_egress;
+#endif
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
const struct nf_hook_ops *ops);
void arpt_unregister_table(struct net *net, const char *name);
void arpt_unregister_table_pre_exit(struct net *net, const char *name);
-extern unsigned int arpt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table);
+extern unsigned int arpt_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
const struct nf_hook_ops *ops);
extern void ebt_unregister_table(struct net *net, const char *tablename);
void ebt_unregister_table_pre_exit(struct net *net, const char *tablename);
-extern unsigned int ebt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct ebt_table *table);
+extern unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
/* True if the hook mask denotes that the rule is in a base chain,
* used in the check() functions */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NETFILTER_INGRESS_H_
-#define _NETFILTER_INGRESS_H_
-
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-
-#ifdef CONFIG_NETFILTER_INGRESS
-static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
-{
-#ifdef CONFIG_JUMP_LABEL
- if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
- return false;
-#endif
- return rcu_access_pointer(skb->dev->nf_hooks_ingress);
-}
-
-/* caller must hold rcu_read_lock */
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
- struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
- struct nf_hook_state state;
- int ret;
-
- /* Must recheck the ingress hook head, in the event it became NULL
- * after the check in nf_hook_ingress_active evaluated to true.
- */
- if (unlikely(!e))
- return 0;
-
- nf_hook_state_init(&state, NF_NETDEV_INGRESS,
- NFPROTO_NETDEV, skb->dev, NULL, NULL,
- dev_net(skb->dev), NULL);
- ret = nf_hook_slow(skb, &state, e, 0);
- if (ret == 0)
- return -1;
-
- return ret;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev)
-{
- RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
-}
-#else /* CONFIG_NETFILTER_INGRESS */
-static inline int nf_hook_ingress_active(struct sk_buff *skb)
-{
- return 0;
-}
-
-static inline int nf_hook_ingress(struct sk_buff *skb)
-{
- return 0;
-}
-
-static inline void nf_hook_ingress_init(struct net_device *dev) {}
-#endif /* CONFIG_NETFILTER_INGRESS */
-#endif /* _NETFILTER_INGRESS_H_ */
}
extern void *ipt_alloc_initial_table(const struct xt_table *);
-extern unsigned int ipt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table);
+extern unsigned int ipt_do_table(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
const struct nf_hook_ops *ops);
void ip6t_unregister_table_pre_exit(struct net *net, const char *name);
void ip6t_unregister_table_exit(struct net *net, const char *name);
-extern unsigned int ip6t_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table);
+extern unsigned int ip6t_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state);
#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
#include <net/compat.h>
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NETFILTER_NETDEV_H_
+#define _NETFILTER_NETDEV_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline bool nf_hook_ingress_active(const struct sk_buff *skb)
+{
+#ifdef CONFIG_JUMP_LABEL
+ if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
+ return false;
+#endif
+ return rcu_access_pointer(skb->dev->nf_hooks_ingress);
+}
+
+/* caller must hold rcu_read_lock */
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+ struct nf_hook_entries *e = rcu_dereference(skb->dev->nf_hooks_ingress);
+ struct nf_hook_state state;
+ int ret;
+
+ /* Must recheck the ingress hook head, in the event it became NULL
+ * after the check in nf_hook_ingress_active evaluated to true.
+ */
+ if (unlikely(!e))
+ return 0;
+
+ nf_hook_state_init(&state, NF_NETDEV_INGRESS,
+ NFPROTO_NETDEV, skb->dev, NULL, NULL,
+ dev_net(skb->dev), NULL);
+ ret = nf_hook_slow(skb, &state, e, 0);
+ if (ret == 0)
+ return -1;
+
+ return ret;
+}
+
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+ return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+ return 0;
+}
+#endif /* CONFIG_NETFILTER_INGRESS */
+
+#ifdef CONFIG_NETFILTER_EGRESS
+static inline bool nf_hook_egress_active(void)
+{
+#ifdef CONFIG_JUMP_LABEL
+ if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_EGRESS]))
+ return false;
+#endif
+ return true;
+}
+
+/**
+ * nf_hook_egress - classify packets before transmission
+ * @skb: packet to be classified
+ * @rc: result code which shall be returned by __dev_queue_xmit() on failure
+ * @dev: netdev whose egress hooks shall be applied to @skb
+ *
+ * Returns @skb on success or %NULL if the packet was consumed or filtered.
+ * Caller must hold rcu_read_lock.
+ *
+ * On ingress, packets are classified first by tc, then by netfilter.
+ * On egress, the order is reversed for symmetry. Conceptually, tc and
+ * netfilter can be thought of as layers, with netfilter layered above tc:
+ * When tc redirects a packet to another interface, netfilter is not applied
+ * because the packet is on the tc layer.
+ *
+ * The nf_skip_egress flag controls whether netfilter is applied on egress.
+ * It is updated by __netif_receive_skb_core() and __dev_queue_xmit() when the
+ * packet passes through tc and netfilter. Because __dev_queue_xmit() may be
+ * called recursively by tunnel drivers such as vxlan, the flag is reverted to
+ * false after sch_handle_egress(). This ensures that netfilter is applied
+ * both on the overlay and underlying network.
+ */
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+ struct net_device *dev)
+{
+ struct nf_hook_entries *e;
+ struct nf_hook_state state;
+ int ret;
+
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ if (skb->nf_skip_egress)
+ return skb;
+#endif
+
+ e = rcu_dereference(dev->nf_hooks_egress);
+ if (!e)
+ return skb;
+
+ nf_hook_state_init(&state, NF_NETDEV_EGRESS,
+ NFPROTO_NETDEV, dev, NULL, NULL,
+ dev_net(dev), NULL);
+ ret = nf_hook_slow(skb, &state, e, 0);
+
+ if (ret == 1) {
+ return skb;
+ } else if (ret < 0) {
+ *rc = NET_XMIT_DROP;
+ return NULL;
+ } else { /* ret == 0 */
+ *rc = NET_XMIT_SUCCESS;
+ return NULL;
+ }
+}
+#else /* CONFIG_NETFILTER_EGRESS */
+static inline bool nf_hook_egress_active(void)
+{
+ return false;
+}
+
+static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc,
+ struct net_device *dev)
+{
+ return skb;
+}
+#endif /* CONFIG_NETFILTER_EGRESS */
+
+static inline void nf_skip_egress(struct sk_buff *skb, bool skip)
+{
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ skb->nf_skip_egress = skip;
+#endif
+}
+
+static inline void nf_hook_netdev_init(struct net_device *dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+ RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ RCU_INIT_POINTER(dev->nf_hooks_egress, NULL);
+#endif
+}
+
+#endif /* _NETFILTER_NETDEV_H_ */
* @tc_at_ingress: used within tc_classify to distinguish in/egress
* @redirected: packet was redirected by packet classifier
* @from_ingress: packet was redirected from the ingress path
+ * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h
* @peeked: this packet has been seen already, so stats have been
* done for it, don't do them again
* @nf_trace: netfilter packet trace flag
#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1;
#endif
+#ifdef CONFIG_NETFILTER_SKIP_EGRESS
+ __u8 nf_skip_egress:1;
+#endif
#ifdef CONFIG_TLS_DEVICE
__u8 decrypted:1;
#endif
int sysctl_conn_reuse_mode;
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
+ int sysctl_run_estimation;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
return ipvs->sysctl_cache_bypass;
}
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_run_estimation;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
return 0;
}
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+ return 1;
+}
+
#endif
/* IPVS core functions
enum nf_dev_hooks {
NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
NF_NETDEV_NUMHOOKS
};
NFPROTO_BRIDGE, s->in, NULL, NULL,
s->net, NULL);
- ret = ebt_do_table(skb, &state, priv);
+ ret = ebt_do_table(priv, skb, &state);
if (ret != NF_DROP)
return ret;
.me = THIS_MODULE,
};
-static unsigned int
-ebt_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops ebt_ops_filter[] = {
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
- .hook = ebt_filter_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_FILTER_OTHER,
.me = THIS_MODULE,
};
-static unsigned int ebt_nat_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ebt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops ebt_ops_nat[] = {
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_NAT_DST_OTHER,
},
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_NAT_SRC,
},
{
- .hook = ebt_nat_hook,
+ .hook = ebt_do_table,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_NAT_DST_BRIDGED,
}
/* Do some firewalling */
-unsigned int ebt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct ebt_table *table)
+unsigned int ebt_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ struct ebt_table *table = priv;
unsigned int hook = state->hook;
int i, nentries;
struct ebt_entry *point;
#include <linux/if_macvlan.h>
#include <linux/errqueue.h>
#include <linux/hrtimer.h>
-#include <linux/netfilter_ingress.h>
+#include <linux/netfilter_netdev.h>
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
static struct sk_buff *
sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
+#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
struct tcf_result cl_res;
default:
break;
}
+#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
qdisc_pkt_len_init(skb);
#ifdef CONFIG_NET_CLS_ACT
skb->tc_at_ingress = 0;
-# ifdef CONFIG_NET_EGRESS
+#endif
+#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_egress(skb, &rc, dev);
+ if (!skb)
+ goto out;
+ }
+ nf_skip_egress(skb, true);
skb = sch_handle_egress(skb, &rc, dev);
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
}
-# endif
#endif
/* If device/qdisc don't need skb->dst, release it right now while
* its hot in this cpu cache.
if (static_branch_unlikely(&ingress_needed_key)) {
bool another = false;
+ nf_skip_egress(skb, true);
skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev,
&another);
if (another)
if (!skb)
goto out;
+ nf_skip_egress(skb, false);
if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
goto out;
}
if (!dev->ethtool_ops)
dev->ethtool_ops = &default_ethtool_ops;
- nf_hook_ingress_init(dev);
+ nf_hook_netdev_init(dev);
return dev;
return (void *)entry + entry->next_offset;
}
-unsigned int arpt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+unsigned int arpt_do_table(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
.priority = NF_IP_PRI_FILTER,
};
-/* The work comes in here from netfilter.c */
-static unsigned int
-arptable_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return arpt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *arpfilter_ops __read_mostly;
static int arptable_filter_table_init(struct net *net)
if (ret < 0)
return ret;
- arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook);
+ arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arpt_do_table);
if (IS_ERR(arpfilter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(arpfilter_ops);
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ipt_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+ipt_do_table(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
.priority = NF_IP_PRI_FILTER,
};
-static unsigned int
-iptable_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
if (ret < 0)
return ret;
- filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook);
+ filter_ops = xt_hook_ops_alloc(&packet_filter, ipt_do_table);
if (IS_ERR(filter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
};
static unsigned int
-ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ipt_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
const struct iphdr *iph;
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, state, priv);
+ ret = ipt_do_table(priv, skb, state);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ipt_mangle_out(skb, state, priv);
- return ipt_do_table(skb, state, priv);
+ return ipt_mangle_out(priv, skb, state);
+ return ipt_do_table(priv, skb, state);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
.af = NFPROTO_IPV4,
};
-static unsigned int iptable_nat_do_chain(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
},
{
- .hook = iptable_nat_do_chain,
+ .hook = ipt_do_table,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
.priority = NF_IP_PRI_RAW_BEFORE_DEFRAG,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-iptable_raw_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *rawtable_ops __read_mostly;
static int iptable_raw_table_init(struct net *net)
if (ret < 0)
return ret;
- rawtable_ops = xt_hook_ops_alloc(table, iptable_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ipt_do_table);
if (IS_ERR(rawtable_ops)) {
xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
.priority = NF_IP_PRI_SECURITY,
};
-static unsigned int
-iptable_security_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ipt_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *sectbl_ops __read_mostly;
static int iptable_security_table_init(struct net *net)
if (ret < 0)
return ret;
- sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook);
+ sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table);
if (IS_ERR(sectbl_ops)) {
xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ip6t_do_table(struct sk_buff *skb,
- const struct nf_hook_state *state,
- struct xt_table *table)
+ip6t_do_table(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
{
+ const struct xt_table *table = priv;
unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
/* Initializing verdict to NF_DROP keeps gcc happy. */
.priority = NF_IP6_PRI_FILTER,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_filter_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
if (ret < 0)
return ret;
- filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
+ filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table);
if (IS_ERR(filter_ops)) {
xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
};
static unsigned int
-ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state, void *priv)
+ip6t_mangle_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
unsigned int ret;
struct in6_addr saddr, daddr;
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(skb, state, priv);
+ ret = ip6t_do_table(priv, skb, state);
if (ret != NF_DROP && ret != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
const struct nf_hook_state *state)
{
if (state->hook == NF_INET_LOCAL_OUT)
- return ip6t_mangle_out(skb, state, priv);
- return ip6t_do_table(skb, state, priv);
+ return ip6t_mangle_out(priv, skb, state);
+ return ip6t_do_table(priv, skb, state);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
.af = NFPROTO_IPV6,
};
-static unsigned int ip6table_nat_do_chain(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
{
- .hook = ip6table_nat_do_chain,
+ .hook = ip6t_do_table,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ip6table_raw_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *rawtable_ops __read_mostly;
static int ip6table_raw_table_init(struct net *net)
return ret;
/* Register hooks */
- rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ip6t_do_table);
if (IS_ERR(rawtable_ops)) {
xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
.priority = NF_IP6_PRI_SECURITY,
};
-static unsigned int
-ip6table_security_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip6t_do_table(skb, state, priv);
-}
-
static struct nf_hook_ops *sectbl_ops __read_mostly;
static int ip6table_security_table_init(struct net *net)
if (ret < 0)
return ret;
- sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
+ sectbl_ops = xt_hook_ops_alloc(&security_table, ip6t_do_table);
if (IS_ERR(sectbl_ops)) {
xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
This allows you to classify packets from ingress using the Netfilter
infrastructure.
+config NETFILTER_EGRESS
+ bool "Netfilter egress support"
+ default y
+ select NET_EGRESS
+ help
+ This allows you to classify packets before transmission using the
+ Netfilter infrastructure.
+
+config NETFILTER_SKIP_EGRESS
+ def_bool NETFILTER_EGRESS && (NET_CLS_ACT || IFB)
+
config NETFILTER_NETLINK
tristate
if (dev && dev_net(dev) == net)
return &dev->nf_hooks_ingress;
}
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hooknum == NF_NETDEV_EGRESS) {
+ if (dev && dev_net(dev) == net)
+ return &dev->nf_hooks_egress;
+ }
#endif
WARN_ON_ONCE(1);
return NULL;
return 0;
}
-static inline bool nf_ingress_hook(const struct nf_hook_ops *reg, int pf)
+static inline bool __maybe_unused nf_ingress_hook(const struct nf_hook_ops *reg,
+ int pf)
{
if ((pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) ||
(pf == NFPROTO_INET && reg->hooknum == NF_INET_INGRESS))
return false;
}
+static inline bool __maybe_unused nf_egress_hook(const struct nf_hook_ops *reg,
+ int pf)
+{
+ return pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_EGRESS;
+}
+
static void nf_static_key_inc(const struct nf_hook_ops *reg, int pf)
{
#ifdef CONFIG_JUMP_LABEL
switch (pf) {
case NFPROTO_NETDEV:
- err = nf_ingress_check(net, reg, NF_NETDEV_INGRESS);
- if (err < 0)
- return err;
+#ifndef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS)
+ return -EOPNOTSUPP;
+#endif
+#ifndef CONFIG_NETFILTER_EGRESS
+ if (reg->hooknum == NF_NETDEV_EGRESS)
+ return -EOPNOTSUPP;
+#endif
+ if ((reg->hooknum != NF_NETDEV_INGRESS &&
+ reg->hooknum != NF_NETDEV_EGRESS) ||
+ !reg->dev || dev_net(reg->dev) != net)
+ return -EINVAL;
break;
case NFPROTO_INET:
if (reg->hooknum != NF_INET_INGRESS)
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_inc_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_inc_egress_queue();
#endif
nf_static_key_inc(reg, pf);
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_ingress_hook(reg, pf))
net_dec_ingress_queue();
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_egress_hook(reg, pf))
+ net_dec_egress_queue();
#endif
nf_static_key_dec(reg, pf);
} else {
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
+ unsigned int hooknum = state->hook;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ int af = state->pf;
struct sock *sk;
EnterFunction(11);
return NF_ACCEPT;
}
-/*
- * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- * used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
- * used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_reply6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
- * Check if packet is reply for established ip_vs_conn.
- */
-static unsigned int
-ip_vs_local_reply6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
static unsigned int
ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
* and send it on its way...
*/
static unsigned int
-ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *state)
{
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
+ unsigned int hooknum = state->hook;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
int ret, pkts;
int conn_reuse_mode;
struct sock *sk;
+ int af = state->pf;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
return ret;
}
-/*
- * AF_INET handler in NF_INET_LOCAL_IN chain
- * Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-/*
- * AF_INET handler in NF_INET_LOCAL_OUT chain
- * Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request4(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
-}
-
-#ifdef CONFIG_IP_VS_IPV6
-
-/*
- * AF_INET6 handler in NF_INET_LOCAL_IN chain
- * Schedule and forward packets from remote clients
- */
-static unsigned int
-ip_vs_remote_request6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-/*
- * AF_INET6 handler in NF_INET_LOCAL_OUT chain
- * Schedule and forward packets from local clients
- */
-static unsigned int
-ip_vs_local_request6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
-}
-
-#endif
-
-
/*
* It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
* related packets destined for 0.0.0.0/0.
ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- int r;
struct netns_ipvs *ipvs = net_ipvs(state->net);
-
- if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
- return NF_ACCEPT;
+ int r;
/* ipvs enabled in this netns ? */
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
-}
-
+ if (state->pf == NFPROTO_IPV4) {
+ if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
+ return NF_ACCEPT;
#ifdef CONFIG_IP_VS_IPV6
-static unsigned int
-ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- int r;
- struct netns_ipvs *ipvs = net_ipvs(state->net);
- struct ip_vs_iphdr iphdr;
+ } else {
+ struct ip_vs_iphdr iphdr;
- ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
- if (iphdr.protocol != IPPROTO_ICMPV6)
- return NF_ACCEPT;
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
- /* ipvs enabled in this netns ? */
- if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
- return NF_ACCEPT;
+ if (iphdr.protocol != IPPROTO_ICMPV6)
+ return NF_ACCEPT;
- return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
-}
+ return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
#endif
+ }
+ return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
+}
static const struct nf_hook_ops ip_vs_ops4[] = {
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_remote_request4,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
},
/* Before ip_vs_in, change source only for VS/NAT */
{
- .hook = ip_vs_local_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
},
/* After mangle, schedule and forward local requests */
{
- .hook = ip_vs_local_request4,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
},
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply4,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
static const struct nf_hook_ops ip_vs_ops6[] = {
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 2,
* or VS/NAT(change destination), so that filtering rules can be
* applied to IPVS. */
{
- .hook = ip_vs_remote_request6,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 1,
},
/* Before ip_vs_in, change source only for VS/NAT */
{
- .hook = ip_vs_local_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
},
/* After mangle, schedule and forward local requests */
{
- .hook = ip_vs_local_request6,
+ .hook = ip_vs_in_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 2,
/* After packet filtering (but before ip_vs_out_icmp), catch icmp
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
- .hook = ip_vs_forward_icmp_v6,
+ .hook = ip_vs_forward_icmp,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
},
/* After packet filtering, change source only for VS/NAT */
{
- .hook = ip_vs_reply6,
+ .hook = ip_vs_out_hook,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "run_estimation",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+ ipvs->sysctl_run_estimation = 1;
+ tbl[idx++].data = &ipvs->sysctl_run_estimation;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
u64 rate;
struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
+ if (!sysctl_run_estimation(ipvs))
+ goto skip;
+
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
+
+skip:
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev)
{
const struct nf_hook_entries *hook_head = NULL;
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
struct net_device *netdev;
#endif
hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
break;
#endif
-#ifdef CONFIG_NETFILTER_INGRESS
+#if defined(CONFIG_NETFILTER_INGRESS) || defined(CONFIG_NETFILTER_EGRESS)
case NFPROTO_NETDEV:
- if (hook != NF_NETDEV_INGRESS)
+ if (hook >= NF_NETDEV_NUMHOOKS)
return ERR_PTR(-EOPNOTSUPP);
if (!dev)
if (!netdev)
return ERR_PTR(-ENODEV);
- return rcu_dereference(netdev->nf_hooks_ingress);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (hook == NF_NETDEV_INGRESS)
+ return rcu_dereference(netdev->nf_hooks_ingress);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (hook == NF_NETDEV_EGRESS)
+ return rcu_dereference(netdev->nf_hooks_egress);
+#endif
+ fallthrough;
#endif
default:
return ERR_PTR(-EPROTONOSUPPORT);
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
.family = NFPROTO_NETDEV,
- .hook_mask = (1 << NF_NETDEV_INGRESS),
+ .hook_mask = (1 << NF_NETDEV_INGRESS) |
+ (1 << NF_NETDEV_EGRESS),
.hooks = {
[NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ [NF_NETDEV_EGRESS] = nft_do_chain_netdev,
},
};
return -EBUSY;
priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
- switch (priv->op) {
- case NFT_DYNSET_OP_ADD:
- case NFT_DYNSET_OP_DELETE:
- break;
- case NFT_DYNSET_OP_UPDATE:
- if (!(set->flags & NFT_SET_TIMEOUT))
- return -EOPNOTSUPP;
- break;
- default:
+ if (priv->op > NFT_DYNSET_OP_DELETE)
return -EOPNOTSUPP;
- }
timeout = 0;
if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
#endif
#include <linux/bpf.h>
#include <net/compat.h>
+#include <linux/netfilter_netdev.h>
#include "internal.h"
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
+#ifdef CONFIG_NETFILTER_EGRESS
+static noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
+{
+ struct sk_buff *next, *head = NULL, *tail;
+ int rc;
+
+ rcu_read_lock();
+ for (; skb != NULL; skb = next) {
+ next = skb->next;
+ skb_mark_not_on_list(skb);
+
+ if (!nf_hook_egress(skb, &rc, skb->dev))
+ continue;
+
+ if (!head)
+ head = skb;
+ else
+ tail->next = skb;
+
+ tail = skb;
+ }
+ rcu_read_unlock();
+
+ return head;
+}
+#endif
+
static int packet_direct_xmit(struct sk_buff *skb)
{
+#ifdef CONFIG_NETFILTER_EGRESS
+ if (nf_hook_egress_active()) {
+ skb = nf_hook_direct_egress(skb);
+ if (!skb)
+ return NET_XMIT_DROP;
+ }
+#endif
return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
}