Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
authorDavid S. Miller <davem@davemloft.net>
Mon, 2 Mar 2015 19:55:05 +0000 (14:55 -0500)
committerDavid S. Miller <davem@davemloft.net>
Mon, 2 Mar 2015 19:55:05 +0000 (14:55 -0500)
Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

A small batch with accumulated updates in nf-next, mostly IPVS updates,
they are:

1) Add 64-bits stats counters to IPVS, from Julian Anastasov.

2) Move NETFILTER_XT_MATCH_ADDRTYPE out of NETFILTER_ADVANCED as docker
seem to require this, from Anton Blanchard.

3) Use boolean instead of numeric value in set_match_v*(), from
coccinelle via Fengguang Wu.

4) Allows rescheduling of new connections in IPVS when port reuse is
detected, from Marcelo Ricardo Leitner.

5) Add missing bits to support arptables extensions from nft_compat,
from Arturo Borrero.

Patrick is preparing a large batch to enhance the set infrastructure,
named expressions among other things, that should follow up soon after
this batch.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
Documentation/networking/ipvs-sysctl.txt
include/net/ip_vs.h
include/uapi/linux/ip_vs.h
net/netfilter/Kconfig
net/netfilter/ipvs/ip_vs_core.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/ipvs/ip_vs_est.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nft_compat.c
net/netfilter/xt_set.c

index 7a3c047295914cbc8c4273506a9b6d35246a1750..3ba709531adba970595251fa73d6d471ed14c5c1 100644 (file)
@@ -22,6 +22,27 @@ backup_only - BOOLEAN
        If set, disable the director function while the server is
        in backup mode to avoid packet loops for DR/TUN methods.
 
+conn_reuse_mode - INTEGER
+       1 - default
+
+       Controls how ipvs will deal with connections that are detected
+       port reuse. It is a bitmap, with the values being:
+
+       0: disable any special handling on port reuse. The new
+       connection will be delivered to the same real server that was
+       servicing the previous connection. This will effectively
+       disable expire_nodest_conn.
+
+       bit 1: enable rescheduling of new connections when it is safe.
+       That is, whenever expire_nodest_conn and for TCP sockets, when
+       the connection is in TIME_WAIT state (which is only possible if
+       you use NAT mode).
+
+       bit 2: it is bit 1 plus, for TCP connections, when connections
+       are in FIN_WAIT state, as this is the last state seen by load
+       balancer in Direct Routing mode. This bit helps on adding new
+       real servers to a very busy cluster.
+
 conntrack - BOOLEAN
        0 - disabled (default)
        not 0 - enabled
index 615b20b585452111a25085890d8fa875657dbe76..20fd23398537c393968d57e94b0027f1e6bca03b 100644 (file)
@@ -365,15 +365,15 @@ struct ip_vs_seq {
 
 /* counters per cpu */
 struct ip_vs_counters {
-       __u32           conns;          /* connections scheduled */
-       __u32           inpkts;         /* incoming packets */
-       __u32           outpkts;        /* outgoing packets */
+       __u64           conns;          /* connections scheduled */
+       __u64           inpkts;         /* incoming packets */
+       __u64           outpkts;        /* outgoing packets */
        __u64           inbytes;        /* incoming bytes */
        __u64           outbytes;       /* outgoing bytes */
 };
 /* Stats per cpu */
 struct ip_vs_cpu_stats {
-       struct ip_vs_counters   ustats;
+       struct ip_vs_counters   cnt;
        struct u64_stats_sync   syncp;
 };
 
@@ -383,23 +383,40 @@ struct ip_vs_estimator {
 
        u64                     last_inbytes;
        u64                     last_outbytes;
-       u32                     last_conns;
-       u32                     last_inpkts;
-       u32                     last_outpkts;
-
-       u32                     cps;
-       u32                     inpps;
-       u32                     outpps;
-       u32                     inbps;
-       u32                     outbps;
+       u64                     last_conns;
+       u64                     last_inpkts;
+       u64                     last_outpkts;
+
+       u64                     cps;
+       u64                     inpps;
+       u64                     outpps;
+       u64                     inbps;
+       u64                     outbps;
+};
+
+/*
+ * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
+ */
+struct ip_vs_kstats {
+       u64                     conns;          /* connections scheduled */
+       u64                     inpkts;         /* incoming packets */
+       u64                     outpkts;        /* outgoing packets */
+       u64                     inbytes;        /* incoming bytes */
+       u64                     outbytes;       /* outgoing bytes */
+
+       u64                     cps;            /* current connection rate */
+       u64                     inpps;          /* current in packet rate */
+       u64                     outpps;         /* current out packet rate */
+       u64                     inbps;          /* current in byte rate */
+       u64                     outbps;         /* current out byte rate */
 };
 
 struct ip_vs_stats {
-       struct ip_vs_stats_user ustats;         /* statistics */
+       struct ip_vs_kstats     kstats;         /* kernel statistics */
        struct ip_vs_estimator  est;            /* estimator */
        struct ip_vs_cpu_stats __percpu *cpustats;      /* per cpu counters */
        spinlock_t              lock;           /* spin lock */
-       struct ip_vs_stats_user ustats0;        /* reset values */
+       struct ip_vs_kstats     kstats0;        /* reset values */
 };
 
 struct dst_entry;
@@ -924,6 +941,7 @@ struct netns_ipvs {
        int                     sysctl_nat_icmp_send;
        int                     sysctl_pmtu_disc;
        int                     sysctl_backup_only;
+       int                     sysctl_conn_reuse_mode;
 
        /* ip_vs_lblc */
        int                     sysctl_lblc_expiration;
@@ -1042,6 +1060,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
               ipvs->sysctl_backup_only;
 }
 
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+       return ipvs->sysctl_conn_reuse_mode;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1109,6 +1132,11 @@ static inline int sysctl_backup_only(struct netns_ipvs *ipvs)
        return 0;
 }
 
+static inline int sysctl_conn_reuse_mode(struct netns_ipvs *ipvs)
+{
+       return 1;
+}
+
 #endif
 
 /* IPVS core functions
@@ -1388,8 +1416,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_zero_estimator(struct ip_vs_stats *stats);
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
-                         struct ip_vs_stats *stats);
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
 
 /* Various IPVS packet transmitters (from ip_vs_xmit.c) */
 int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
index cabe95d5b4613c78af31f16be630ef03146a7d0b..3199243f20282050fa992de45f9f627a5a72199e 100644 (file)
@@ -358,6 +358,8 @@ enum {
 
        IPVS_SVC_ATTR_PE_NAME,          /* name of ct retriever */
 
+       IPVS_SVC_ATTR_STATS64,          /* nested attribute for service stats */
+
        __IPVS_SVC_ATTR_MAX,
 };
 
@@ -387,6 +389,8 @@ enum {
 
        IPVS_DEST_ATTR_ADDR_FAMILY,     /* Address family of address */
 
+       IPVS_DEST_ATTR_STATS64,         /* nested attribute for dest stats */
+
        __IPVS_DEST_ATTR_MAX,
 };
 
@@ -410,7 +414,8 @@ enum {
 /*
  * Attributes used to describe service or destination entry statistics
  *
- * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
+ * Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS,
+ * IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64.
  */
 enum {
        IPVS_STATS_ATTR_UNSPEC = 0,
index b02660fa9eb00cd28aeb01f10b76f6604eae8096..c68c3b441381671737078fbd71513ed2f3150a16 100644 (file)
@@ -951,7 +951,7 @@ comment "Xtables matches"
 
 config NETFILTER_XT_MATCH_ADDRTYPE
        tristate '"addrtype" address type match support'
-       depends on NETFILTER_ADVANCED
+       default m if NETFILTER_ADVANCED=n
        ---help---
          This option allows you to match what routing thinks of an address,
          eg. UNICAST, LOCAL, BROADCAST, ...
index b87ca32efa0b4e6edc7f251c2c32c4ba3b55659c..04dbd9c7213fe86e1993e009372b6838127e4688 100644 (file)
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                struct ip_vs_service *svc;
 
                s = this_cpu_ptr(dest->stats.cpustats);
-               s->ustats.inpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.inbytes += skb->len;
+               s->cnt.inpkts++;
+               s->cnt.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
 
                rcu_read_lock();
                svc = rcu_dereference(dest->svc);
                s = this_cpu_ptr(svc->stats.cpustats);
-               s->ustats.inpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.inbytes += skb->len;
+               s->cnt.inpkts++;
+               s->cnt.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
                rcu_read_unlock();
 
                s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-               s->ustats.inpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.inbytes += skb->len;
+               s->cnt.inpkts++;
+               s->cnt.inbytes += skb->len;
                u64_stats_update_end(&s->syncp);
        }
 }
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
                struct ip_vs_service *svc;
 
                s = this_cpu_ptr(dest->stats.cpustats);
-               s->ustats.outpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.outbytes += skb->len;
+               s->cnt.outpkts++;
+               s->cnt.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
 
                rcu_read_lock();
                svc = rcu_dereference(dest->svc);
                s = this_cpu_ptr(svc->stats.cpustats);
-               s->ustats.outpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.outbytes += skb->len;
+               s->cnt.outpkts++;
+               s->cnt.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
                rcu_read_unlock();
 
                s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-               s->ustats.outpkts++;
                u64_stats_update_begin(&s->syncp);
-               s->ustats.outbytes += skb->len;
+               s->cnt.outpkts++;
+               s->cnt.outbytes += skb->len;
                u64_stats_update_end(&s->syncp);
        }
 }
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
        struct ip_vs_cpu_stats *s;
 
        s = this_cpu_ptr(cp->dest->stats.cpustats);
-       s->ustats.conns++;
+       u64_stats_update_begin(&s->syncp);
+       s->cnt.conns++;
+       u64_stats_update_end(&s->syncp);
 
        s = this_cpu_ptr(svc->stats.cpustats);
-       s->ustats.conns++;
+       u64_stats_update_begin(&s->syncp);
+       s->cnt.conns++;
+       u64_stats_update_end(&s->syncp);
 
        s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-       s->ustats.conns++;
+       u64_stats_update_begin(&s->syncp);
+       s->cnt.conns++;
+       u64_stats_update_end(&s->syncp);
 }
 
 
@@ -1046,6 +1052,26 @@ static inline bool is_new_conn(const struct sk_buff *skb,
        }
 }
 
+static inline bool is_new_conn_expected(const struct ip_vs_conn *cp,
+                                       int conn_reuse_mode)
+{
+       /* Controlled (FTP DATA or persistence)? */
+       if (cp->control)
+               return false;
+
+       switch (cp->protocol) {
+       case IPPROTO_TCP:
+               return (cp->state == IP_VS_TCP_S_TIME_WAIT) ||
+                       ((conn_reuse_mode & 2) &&
+                        (cp->state == IP_VS_TCP_S_FIN_WAIT) &&
+                        (cp->flags & IP_VS_CONN_F_NOOUTPUT));
+       case IPPROTO_SCTP:
+               return cp->state == IP_VS_SCTP_S_CLOSED;
+       default:
+               return false;
+       }
+}
+
 /* Handle response packets: rewrite addresses and send away...
  */
 static unsigned int
@@ -1585,6 +1611,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
        struct ip_vs_conn *cp;
        int ret, pkts;
        struct netns_ipvs *ipvs;
+       int conn_reuse_mode;
 
        /* Already marked as IPVS request or reply? */
        if (skb->ipvs_property)
@@ -1653,10 +1680,14 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
         */
        cp = pp->conn_in_get(af, skb, &iph, 0);
 
-       if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp && cp->dest &&
-           unlikely(!atomic_read(&cp->dest->weight)) && !iph.fragoffs &&
-           is_new_conn(skb, &iph)) {
-               ip_vs_conn_expire_now(cp);
+       conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
+       if (conn_reuse_mode && !iph.fragoffs &&
+           is_new_conn(skb, &iph) && cp &&
+           ((unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
+             unlikely(!atomic_read(&cp->dest->weight))) ||
+            unlikely(is_new_conn_expected(cp, conn_reuse_mode)))) {
+               if (!atomic_read(&cp->n_control))
+                       ip_vs_conn_expire_now(cp);
                __ip_vs_conn_put(cp);
                cp = NULL;
        }
index e55759056361c47ed1fcfa5c656541ba39bfd260..76cc9ffd87fa80c594c685fb81f105ee7ca90ae4 100644 (file)
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
 }
 
 static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
 {
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
 
        spin_lock_bh(&src->lock);
 
@@ -746,6 +746,21 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
        spin_unlock_bh(&src->lock);
 }
 
+static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+       dst->conns = (u32)src->conns;
+       dst->inpkts = (u32)src->inpkts;
+       dst->outpkts = (u32)src->outpkts;
+       dst->inbytes = src->inbytes;
+       dst->outbytes = src->outbytes;
+       dst->cps = (u32)src->cps;
+       dst->inpps = (u32)src->inpps;
+       dst->outpps = (u32)src->outpps;
+       dst->inbps = (u32)src->inbps;
+       dst->outbps = (u32)src->outbps;
+}
+
 static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
@@ -753,7 +768,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
 
        /* get current counters as zero point, rates are zeroed */
 
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
 
        IP_VS_ZERO_STATS_COUNTER(conns);
        IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -1808,6 +1823,12 @@ static struct ctl_table vs_vars[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
+       {
+               .procname       = "conn_reuse_mode",
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec,
+       },
 #ifdef CONFIG_IP_VS_DEBUG
        {
                .procname       = "debug_level",
@@ -2044,7 +2065,7 @@ static const struct file_operations ip_vs_info_fops = {
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
        struct net *net = seq_file_single_net(seq);
-       struct ip_vs_stats_user show;
+       struct ip_vs_kstats show;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
@@ -2053,17 +2074,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
                   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
        ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
-       seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
-                  show.inpkts, show.outpkts,
-                  (unsigned long long) show.inbytes,
-                  (unsigned long long) show.outbytes);
-
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+       seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+                  (unsigned long long)show.conns,
+                  (unsigned long long)show.inpkts,
+                  (unsigned long long)show.outpkts,
+                  (unsigned long long)show.inbytes,
+                  (unsigned long long)show.outbytes);
+
+/*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
        seq_puts(seq,
-                  " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-       seq_printf(seq, "%8X %8X %8X %16X %16X\n",
-                       show.cps, show.inpps, show.outpps,
-                       show.inbps, show.outbps);
+                " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+                  (unsigned long long)show.cps,
+                  (unsigned long long)show.inpps,
+                  (unsigned long long)show.outpps,
+                  (unsigned long long)show.inbps,
+                  (unsigned long long)show.outbps);
 
        return 0;
 }
@@ -2086,7 +2112,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
        struct net *net = seq_file_single_net(seq);
        struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
        struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
-       struct ip_vs_stats_user rates;
+       struct ip_vs_kstats kstats;
        int i;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2124,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
        for_each_possible_cpu(i) {
                struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
                unsigned int start;
-               __u64 inbytes, outbytes;
+               u64 conns, inpkts, outpkts, inbytes, outbytes;
 
                do {
                        start = u64_stats_fetch_begin_irq(&u->syncp);
-                       inbytes = u->ustats.inbytes;
-                       outbytes = u->ustats.outbytes;
+                       conns = u->cnt.conns;
+                       inpkts = u->cnt.inpkts;
+                       outpkts = u->cnt.outpkts;
+                       inbytes = u->cnt.inbytes;
+                       outbytes = u->cnt.outbytes;
                } while (u64_stats_fetch_retry_irq(&u->syncp, start));
 
-               seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
-                          i, u->ustats.conns, u->ustats.inpkts,
-                          u->ustats.outpkts, (__u64)inbytes,
-                          (__u64)outbytes);
+               seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+                          i, (u64)conns, (u64)inpkts,
+                          (u64)outpkts, (u64)inbytes,
+                          (u64)outbytes);
        }
 
-       spin_lock_bh(&tot_stats->lock);
-
-       seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
-                  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
-                  tot_stats->ustats.outpkts,
-                  (unsigned long long) tot_stats->ustats.inbytes,
-                  (unsigned long long) tot_stats->ustats.outbytes);
-
-       ip_vs_read_estimator(&rates, tot_stats);
+       ip_vs_copy_stats(&kstats, tot_stats);
 
-       spin_unlock_bh(&tot_stats->lock);
+       seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+                  (unsigned long long)kstats.conns,
+                  (unsigned long long)kstats.inpkts,
+                  (unsigned long long)kstats.outpkts,
+                  (unsigned long long)kstats.inbytes,
+                  (unsigned long long)kstats.outbytes);
 
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
        seq_puts(seq,
-                  "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-       seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
-                       rates.cps,
-                       rates.inpps,
-                       rates.outpps,
-                       rates.inbps,
-                       rates.outbps);
+                "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+       seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
+                  kstats.cps,
+                  kstats.inpps,
+                  kstats.outpps,
+                  kstats.inbps,
+                  kstats.outbps);
 
        return 0;
 }
@@ -2400,6 +2426,7 @@ static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
        struct ip_vs_scheduler *sched;
+       struct ip_vs_kstats kstats;
 
        sched = rcu_dereference_protected(src->scheduler, 1);
        dst->protocol = src->protocol;
@@ -2411,7 +2438,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
        dst->timeout = src->timeout / HZ;
        dst->netmask = src->netmask;
        dst->num_dests = src->num_dests;
-       ip_vs_copy_stats(&dst->stats, &src->stats);
+       ip_vs_copy_stats(&kstats, &src->stats);
+       ip_vs_export_stats_user(&dst->stats, &kstats);
 }
 
 static inline int
@@ -2485,6 +2513,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                int count = 0;
                struct ip_vs_dest *dest;
                struct ip_vs_dest_entry entry;
+               struct ip_vs_kstats kstats;
 
                memset(&entry, 0, sizeof(entry));
                list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2535,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
                        entry.activeconns = atomic_read(&dest->activeconns);
                        entry.inactconns = atomic_read(&dest->inactconns);
                        entry.persistconns = atomic_read(&dest->persistconns);
-                       ip_vs_copy_stats(&entry.stats, &dest->stats);
+                       ip_vs_copy_stats(&kstats, &dest->stats);
+                       ip_vs_export_stats_user(&entry.stats, &kstats);
                        if (copy_to_user(&uptr->entrytable[count],
                                         &entry, sizeof(entry))) {
                                ret = -EFAULT;
@@ -2798,25 +2828,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 };
 
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
-                                struct ip_vs_stats *stats)
+                                struct ip_vs_kstats *kstats)
 {
-       struct ip_vs_stats_user ustats;
        struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
        if (!nl_stats)
                return -EMSGSIZE;
 
-       ip_vs_copy_stats(&ustats, stats);
-
-       if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
-           nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
-           nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
-           nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+       if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+           nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+               goto nla_put_failure;
+       nla_nest_end(skb, nl_stats);
+
+       return 0;
+
+nla_put_failure:
+       nla_nest_cancel(skb, nl_stats);
+       return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+                                  struct ip_vs_kstats *kstats)
+{
+       struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+       if (!nl_stats)
+               return -EMSGSIZE;
+
+       if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+           nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
                goto nla_put_failure;
        nla_nest_end(skb, nl_stats);
 
@@ -2835,6 +2891,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
        struct nlattr *nl_service;
        struct ip_vs_flags flags = { .flags = svc->flags,
                                     .mask = ~0 };
+       struct ip_vs_kstats kstats;
 
        nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
        if (!nl_service)
@@ -2860,7 +2917,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
            nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
            nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
                goto nla_put_failure;
-       if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+       ip_vs_copy_stats(&kstats, &svc->stats);
+       if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+               goto nla_put_failure;
+       if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
                goto nla_put_failure;
 
        nla_nest_end(skb, nl_service);
@@ -3032,6 +3092,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 {
        struct nlattr *nl_dest;
+       struct ip_vs_kstats kstats;
 
        nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
        if (!nl_dest)
@@ -3054,7 +3115,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
                        atomic_read(&dest->persistconns)) ||
            nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
                goto nla_put_failure;
-       if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+       ip_vs_copy_stats(&kstats, &dest->stats);
+       if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+               goto nla_put_failure;
+       if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
                goto nla_put_failure;
 
        nla_nest_end(skb, nl_dest);
@@ -3732,6 +3796,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
        ipvs->sysctl_pmtu_disc = 1;
        tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
        tbl[idx++].data = &ipvs->sysctl_backup_only;
+       ipvs->sysctl_conn_reuse_mode = 1;
+       tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
 
 
        ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
index 1425e9a924c4f64429637bc49cbde204b0bb1921..ef0eb0a8d552944c6149559848e73c9fdd821700 100644 (file)
 
   NOTES.
 
-  * The stored value for average bps is scaled by 2^5, so that maximal
-    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+  * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
 
-  * A lot code is taken from net/sched/estimator.c
+  * Netlink users can see 64-bit values but sockopt users are restricted
+    to 32-bit values for conns, packets, bps, cps and pps.
+
+  * A lot of code is taken from net/core/gen_estimator.c
  */
 
 
 /*
  * Make a summary from each cpu
  */
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
                                 struct ip_vs_cpu_stats __percpu *stats)
 {
        int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
        for_each_possible_cpu(i) {
                struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
                unsigned int start;
-               __u64 inbytes, outbytes;
+               u64 conns, inpkts, outpkts, inbytes, outbytes;
+
                if (add) {
-                       sum->conns += s->ustats.conns;
-                       sum->inpkts += s->ustats.inpkts;
-                       sum->outpkts += s->ustats.outpkts;
                        do {
                                start = u64_stats_fetch_begin(&s->syncp);
-                               inbytes = s->ustats.inbytes;
-                               outbytes = s->ustats.outbytes;
+                               conns = s->cnt.conns;
+                               inpkts = s->cnt.inpkts;
+                               outpkts = s->cnt.outpkts;
+                               inbytes = s->cnt.inbytes;
+                               outbytes = s->cnt.outbytes;
                        } while (u64_stats_fetch_retry(&s->syncp, start));
+                       sum->conns += conns;
+                       sum->inpkts += inpkts;
+                       sum->outpkts += outpkts;
                        sum->inbytes += inbytes;
                        sum->outbytes += outbytes;
                } else {
                        add = true;
-                       sum->conns = s->ustats.conns;
-                       sum->inpkts = s->ustats.inpkts;
-                       sum->outpkts = s->ustats.outpkts;
                        do {
                                start = u64_stats_fetch_begin(&s->syncp);
-                               sum->inbytes = s->ustats.inbytes;
-                               sum->outbytes = s->ustats.outbytes;
+                               sum->conns = s->cnt.conns;
+                               sum->inpkts = s->cnt.inpkts;
+                               sum->outpkts = s->cnt.outpkts;
+                               sum->inbytes = s->cnt.inbytes;
+                               sum->outbytes = s->cnt.outbytes;
                        } while (u64_stats_fetch_retry(&s->syncp, start));
                }
        }
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
 {
        struct ip_vs_estimator *e;
        struct ip_vs_stats *s;
-       u32 n_conns;
-       u32 n_inpkts, n_outpkts;
-       u64 n_inbytes, n_outbytes;
-       u32 rate;
+       u64 rate;
        struct net *net = (struct net *)arg;
        struct netns_ipvs *ipvs;
 
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
                s = container_of(e, struct ip_vs_stats, est);
 
                spin_lock(&s->lock);
-               ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
-               n_conns = s->ustats.conns;
-               n_inpkts = s->ustats.inpkts;
-               n_outpkts = s->ustats.outpkts;
-               n_inbytes = s->ustats.inbytes;
-               n_outbytes = s->ustats.outbytes;
+               ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
 
                /* scaled by 2^10, but divided 2 seconds */
-               rate = (n_conns - e->last_conns) << 9;
-               e->last_conns = n_conns;
-               e->cps += ((long)rate - (long)e->cps) >> 2;
-
-               rate = (n_inpkts - e->last_inpkts) << 9;
-               e->last_inpkts = n_inpkts;
-               e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
-               rate = (n_outpkts - e->last_outpkts) << 9;
-               e->last_outpkts = n_outpkts;
-               e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
-               rate = (n_inbytes - e->last_inbytes) << 4;
-               e->last_inbytes = n_inbytes;
-               e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
-               rate = (n_outbytes - e->last_outbytes) << 4;
-               e->last_outbytes = n_outbytes;
-               e->outbps += ((long)rate - (long)e->outbps) >> 2;
+               rate = (s->kstats.conns - e->last_conns) << 9;
+               e->last_conns = s->kstats.conns;
+               e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+               rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+               e->last_inpkts = s->kstats.inpkts;
+               e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+               rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+               e->last_outpkts = s->kstats.outpkts;
+               e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+               /* scaled by 2^5, but divided 2 seconds */
+               rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+               e->last_inbytes = s->kstats.inbytes;
+               e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+               rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+               e->last_outbytes = s->kstats.outbytes;
+               e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
                spin_unlock(&s->lock);
        }
        spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 {
        struct ip_vs_estimator *est = &stats->est;
-       struct ip_vs_stats_user *u = &stats->ustats;
+       struct ip_vs_kstats *k = &stats->kstats;
 
        /* reset counters, caller must hold the stats->lock lock */
-       est->last_inbytes = u->inbytes;
-       est->last_outbytes = u->outbytes;
-       est->last_conns = u->conns;
-       est->last_inpkts = u->inpkts;
-       est->last_outpkts = u->outpkts;
+       est->last_inbytes = k->inbytes;
+       est->last_outbytes = k->outbytes;
+       est->last_conns = k->conns;
+       est->last_inpkts = k->inpkts;
+       est->last_outpkts = k->outpkts;
        est->cps = 0;
        est->inpps = 0;
        est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 }
 
 /* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
-                         struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
 {
        struct ip_vs_estimator *e = &stats->est;
 
index c47ffd7a0a709cb73834c84652f251960f25db79..f96229cdb6e184543b6b958575c08c5a3c1b4b72 100644 (file)
@@ -845,10 +845,27 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
        struct ip_vs_conn *cp;
        struct netns_ipvs *ipvs = net_ipvs(net);
 
-       if (!(flags & IP_VS_CONN_F_TEMPLATE))
+       if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
                cp = ip_vs_conn_in_get(param);
-       else
+               if (cp && ((cp->dport != dport) ||
+                          !ip_vs_addr_equal(cp->daf, &cp->daddr, daddr))) {
+                       if (!(flags & IP_VS_CONN_F_INACTIVE)) {
+                               ip_vs_conn_expire_now(cp);
+                               __ip_vs_conn_put(cp);
+                               cp = NULL;
+                       } else {
+                               /* This is the expiration message for the
+                                * connection that was already replaced, so we
+                                * just ignore it.
+                                */
+                               __ip_vs_conn_put(cp);
+                               kfree(param->pe_data);
+                               return;
+                       }
+               }
+       } else {
                cp = ip_vs_ct_in_get(param);
+       }
 
        if (cp) {
                /* Free pe_data */
index c598f74063a19ebd51ea786530c0669d6f92b8c3..a990df2f3f7100d1e47acfb188e6ba6cd907d833 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
 #include <linux/netfilter_bridge/ebtables.h>
+#include <linux/netfilter_arp/arp_tables.h>
 #include <net/netfilter/nf_tables.h>
 
 static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -42,6 +43,7 @@ union nft_entry {
        struct ipt_entry e4;
        struct ip6t_entry e6;
        struct ebt_entry ebt;
+       struct arpt_entry arp;
 };
 
 static inline void
@@ -140,6 +142,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
                entry->ebt.ethproto = proto;
                entry->ebt.invflags = inv ? EBT_IPROTO : 0;
                break;
+       case NFPROTO_ARP:
+               break;
        }
        par->entryinfo  = entry;
        par->target     = target;
@@ -351,6 +355,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
                entry->ebt.ethproto = proto;
                entry->ebt.invflags = inv ? EBT_IPROTO : 0;
                break;
+       case NFPROTO_ARP:
+               break;
        }
        par->entryinfo  = entry;
        par->match      = match;
@@ -537,6 +543,9 @@ nfnl_compat_get(struct sock *nfnl, struct sk_buff *skb,
        case NFPROTO_BRIDGE:
                fmt = "ebt_%s";
                break;
+       case NFPROTO_ARP:
+               fmt = "arpt_%s";
+               break;
        default:
                pr_err("nft_compat: unsupported protocol %d\n",
                        nfmsg->nfgen_family);
index 0d47afea968240623ae4486d56c9a87fe2b7b121..89045982ec9468e01c81f6d86f50d508981e4593 100644 (file)
@@ -193,7 +193,7 @@ set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
                return ret;
 
        if (!match_counter0(opt.ext.packets, &info->packets))
-               return 0;
+               return false;
        return match_counter0(opt.ext.bytes, &info->bytes);
 }
 
@@ -239,7 +239,7 @@ set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
                return ret;
 
        if (!match_counter(opt.ext.packets, &info->packets))
-               return 0;
+               return false;
        return match_counter(opt.ext.bytes, &info->bytes);
 }