Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
authorDavid S. Miller <davem@davemloft.net>
Tue, 14 Jul 2015 00:28:09 +0000 (17:28 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 14 Jul 2015 00:28:09 +0000 (17:28 -0700)
Conflicts:
net/bridge/br_mdb.c

Minor conflict in br_mdb.c, in 'net' we added a memset of the
on-stack 'ip' variable whereas in 'net-next' we assign a new
member 'vid'.

Signed-off-by: David S. Miller <davem@davemloft.net>
105 files changed:
Documentation/networking/ip-sysctl.txt
drivers/net/dsa/Kconfig
drivers/net/dsa/mv88e6352.c
drivers/net/dsa/mv88e6xxx.c
drivers/net/dsa/mv88e6xxx.h
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/l2t.c
drivers/net/ethernet/chelsio/cxgb4/l2t.h
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.h
drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/ec_bhf.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/emulex/benet/be_ethtool.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/hisilicon/hip04_eth.c
drivers/net/ethernet/hisilicon/hip04_mdio.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/neterion/s2io.c
drivers/net/ethernet/neterion/s2io.h
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/rocker/rocker.h
drivers/net/ethernet/ti/netcp_core.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ifb.c
drivers/net/phy/marvell.c
drivers/net/phy/phy.c
drivers/net/xen-netback/common.h
include/linux/ipv6.h
include/linux/phy.h
include/net/act_api.h
include/net/inet_hashtables.h
include/net/inet_timewait_sock.h
include/net/netns/ipv6.h
include/net/sch_generic.h
include/net/tc_act/tc_gact.h
include/net/tc_act/tc_mirred.h
include/net/tcp.h
include/net/timewait_sock.h
include/uapi/linux/if_bridge.h
kernel/bpf/core.c
lib/test_bpf.c
net/bridge/br_mdb.c
net/bridge/br_multicast.c
net/bridge/br_private.h
net/core/dev.c
net/core/timestamping.c
net/ipv4/inet_hashtables.c
net/ipv4/inet_timewait_sock.c
net/ipv4/ip_fragment.c
net/ipv4/ping.c
net/ipv4/route.c
net/ipv4/tcp_bic.c
net/ipv4/tcp_cdg.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_highspeed.c
net/ipv4/tcp_htcp.c
net/ipv4/tcp_hybla.c
net/ipv4/tcp_illinois.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_scalable.c
net/ipv4/tcp_timer.c
net/ipv4/tcp_vegas.c
net/ipv4/tcp_veno.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/datagram.c
net/ipv6/exthdrs.c
net/ipv6/inet6_hashtables.c
net/ipv6/raw.c
net/ipv6/sysctl_net_ipv6.c
net/ipv6/tcp_ipv6.c
net/netfilter/xt_TPROXY.c
net/sched/act_api.c
net/sched/act_bpf.c
net/sched/act_connmark.c
net/sched/act_csum.c
net/sched/act_gact.c
net/sched/act_ipt.c
net/sched/act_mirred.c
net/sched/act_nat.c
net/sched/act_pedit.c
net/sched/act_simple.c
net/sched/act_skbedit.c
net/sched/act_vlan.c
samples/bpf/bpf_helpers.h
samples/bpf/tracex1_kern.c
samples/bpf/tracex2_kern.c
samples/bpf/tracex3_kern.c
samples/bpf/tracex4_kern.c
samples/bpf/tracex5_kern.c

index 5fae7704daab292cf900158666c2d4bb80dd2424..f63aeefd2c240f114e6c3090cacc2963c03c652c 100644 (file)
@@ -1435,6 +1435,11 @@ mtu - INTEGER
        Default Maximum Transfer Unit
        Default: 1280 (IPv6 required minimum)
 
+ip_nonlocal_bind - BOOLEAN
+       If set, allows processes to bind() to non-local IPv6 addresses,
+       which can be quite useful - but may break some applications.
+       Default: 0
+
 router_probe_interval - INTEGER
        Minimum interval (in seconds) between Router Probing described
        in RFC4191.
index 7ad0a4d8e475f519b0f1a6618091c3c93cb60427..4c483d937481777025e4ab7e8d23ce75316d6f17 100644 (file)
@@ -46,13 +46,13 @@ config NET_DSA_MV88E6171
          ethernet switches chips.
 
 config NET_DSA_MV88E6352
-       tristate "Marvell 88E6172/88E6176/88E6352 ethernet switch chip support"
+       tristate "Marvell 88E6172/6176/6320/6321/6352 ethernet switch chip support"
        depends on NET_DSA
        select NET_DSA_MV88E6XXX
        select NET_DSA_TAG_EDSA
        ---help---
-         This enables support for the Marvell 88E6172, 88E6176 and 88E6352
-         ethernet switch chips.
+         This enables support for the Marvell 88E6172, 88E6176, 88E6320,
+         88E6321 and 88E6352 ethernet switch chips.
 
 config NET_DSA_BCM_SF2
        tristate "Broadcom Starfighter 2 Ethernet switch support"
index 632815c10a401f7bd873e077a262528b73ceed7d..cfece5ae9d5fd148d1cb2297db489b6c36a785ec 100644 (file)
@@ -36,6 +36,18 @@ static char *mv88e6352_probe(struct device *host_dev, int sw_addr)
                        return "Marvell 88E6172";
                if ((ret & 0xfff0) == PORT_SWITCH_ID_6176)
                        return "Marvell 88E6176";
+               if (ret == PORT_SWITCH_ID_6320_A1)
+                       return "Marvell 88E6320 (A1)";
+               if (ret == PORT_SWITCH_ID_6320_A2)
+                       return "Marvell 88e6320 (A2)";
+               if ((ret & 0xfff0) == PORT_SWITCH_ID_6320)
+                       return "Marvell 88E6320";
+               if (ret == PORT_SWITCH_ID_6321_A1)
+                       return "Marvell 88E6321 (A1)";
+               if (ret == PORT_SWITCH_ID_6321_A2)
+                       return "Marvell 88e6321 (A2)";
+               if ((ret & 0xfff0) == PORT_SWITCH_ID_6321)
+                       return "Marvell 88E6321";
                if (ret == PORT_SWITCH_ID_6352_A0)
                        return "Marvell 88E6352 (A0)";
                if (ret == PORT_SWITCH_ID_6352_A1)
@@ -84,11 +96,12 @@ static int mv88e6352_setup_global(struct dsa_switch *ds)
 
 static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp)
 {
+       int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
        int ret;
 
        *temp = 0;
 
-       ret = mv88e6xxx_phy_page_read(ds, 0, 6, 27);
+       ret = mv88e6xxx_phy_page_read(ds, phy, 6, 27);
        if (ret < 0)
                return ret;
 
@@ -99,11 +112,12 @@ static int mv88e6352_get_temp(struct dsa_switch *ds, int *temp)
 
 static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp)
 {
+       int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
        int ret;
 
        *temp = 0;
 
-       ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+       ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
        if (ret < 0)
                return ret;
 
@@ -114,23 +128,25 @@ static int mv88e6352_get_temp_limit(struct dsa_switch *ds, int *temp)
 
 static int mv88e6352_set_temp_limit(struct dsa_switch *ds, int temp)
 {
+       int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
        int ret;
 
-       ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+       ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
        if (ret < 0)
                return ret;
        temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f);
-       return mv88e6xxx_phy_page_write(ds, 0, 6, 26,
+       return mv88e6xxx_phy_page_write(ds, phy, 6, 26,
                                        (ret & 0xe0ff) | (temp << 8));
 }
 
 static int mv88e6352_get_temp_alarm(struct dsa_switch *ds, bool *alarm)
 {
+       int phy = mv88e6xxx_6320_family(ds) ? 3 : 0;
        int ret;
 
        *alarm = false;
 
-       ret = mv88e6xxx_phy_page_read(ds, 0, 6, 26);
+       ret = mv88e6xxx_phy_page_read(ds, phy, 6, 26);
        if (ret < 0)
                return ret;
 
@@ -394,5 +410,8 @@ struct dsa_switch_driver mv88e6352_switch_driver = {
        .fdb_getnext            = mv88e6xxx_port_fdb_getnext,
 };
 
-MODULE_ALIAS("platform:mv88e6352");
 MODULE_ALIAS("platform:mv88e6172");
+MODULE_ALIAS("platform:mv88e6176");
+MODULE_ALIAS("platform:mv88e6320");
+MODULE_ALIAS("platform:mv88e6321");
+MODULE_ALIAS("platform:mv88e6352");
index fd8547c2b79d46786b10807a0c62f338b6a60e27..f394e4d4d9e00b967854a0108cb1d9d9aab0ce0c 100644 (file)
@@ -517,6 +517,18 @@ static bool mv88e6xxx_6185_family(struct dsa_switch *ds)
        return false;
 }
 
+bool mv88e6xxx_6320_family(struct dsa_switch *ds)
+{
+       struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
+
+       switch (ps->id) {
+       case PORT_SWITCH_ID_6320:
+       case PORT_SWITCH_ID_6321:
+               return true;
+       }
+       return false;
+}
+
 static bool mv88e6xxx_6351_family(struct dsa_switch *ds)
 {
        struct mv88e6xxx_priv_state *ps = ds_to_priv(ds);
@@ -565,7 +577,7 @@ static int _mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port)
 {
        int ret;
 
-       if (mv88e6xxx_6352_family(ds))
+       if (mv88e6xxx_6320_family(ds) || mv88e6xxx_6352_family(ds))
                port = (port + 1) << 5;
 
        /* Snapshot the hardware statistics counters for this port. */
@@ -1377,7 +1389,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
            mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
-           mv88e6xxx_6065_family(ds)) {
+           mv88e6xxx_6065_family(ds) || mv88e6xxx_6320_family(ds)) {
                /* MAC Forcing register: don't force link, speed,
                 * duplex or flow control state to any particular
                 * values on physical ports, but force the CPU port
@@ -1423,7 +1435,7 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
            mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
-           mv88e6xxx_6185_family(ds))
+           mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds))
                reg = PORT_CONTROL_IGMP_MLD_SNOOP |
                PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP |
                PORT_CONTROL_STATE_FORWARDING;
@@ -1431,7 +1443,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
                if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds))
                        reg |= PORT_CONTROL_DSA_TAG;
                if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
-                   mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+                   mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+                   mv88e6xxx_6320_family(ds)) {
                        if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
                                reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA;
                        else
@@ -1441,14 +1454,15 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
                if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
                    mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
                    mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
-                   mv88e6xxx_6185_family(ds)) {
+                   mv88e6xxx_6185_family(ds) || mv88e6xxx_6320_family(ds)) {
                        if (ds->dst->tag_protocol == DSA_TAG_PROTO_EDSA)
                                reg |= PORT_CONTROL_EGRESS_ADD_TAG;
                }
        }
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
-           mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds)) {
+           mv88e6xxx_6095_family(ds) || mv88e6xxx_6065_family(ds) ||
+           mv88e6xxx_6320_family(ds)) {
                if (ds->dsa_port_mask & (1 << port))
                        reg |= PORT_CONTROL_FRAME_MODE_DSA;
                if (port == dsa_upstream_port(ds))
@@ -1473,11 +1487,11 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
        reg = 0;
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
-           mv88e6xxx_6095_family(ds))
+           mv88e6xxx_6095_family(ds) || mv88e6xxx_6320_family(ds))
                reg = PORT_CONTROL_2_MAP_DA;
 
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
-           mv88e6xxx_6165_family(ds))
+           mv88e6xxx_6165_family(ds) || mv88e6xxx_6320_family(ds))
                reg |= PORT_CONTROL_2_JUMBO_10240;
 
        if (mv88e6xxx_6095_family(ds) || mv88e6xxx_6185_family(ds)) {
@@ -1514,7 +1528,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
                goto abort;
 
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
-           mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+           mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+           mv88e6xxx_6320_family(ds)) {
                /* Do not limit the period of time that this port can
                 * be paused for by the remote end or the period of
                 * time that this port can pause the remote end.
@@ -1564,7 +1579,8 @@ static int mv88e6xxx_setup_port(struct dsa_switch *ds, int port)
 
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
-           mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) {
+           mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
+           mv88e6xxx_6320_family(ds)) {
                /* Rate Control: disable ingress rate limiting. */
                ret = _mv88e6xxx_reg_write(ds, REG_PORT(port),
                                           PORT_RATE_CONTROL, 0x0001);
@@ -1976,7 +1992,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
                          (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT));
 
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
-           mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds)) {
+           mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
+           mv88e6xxx_6320_family(ds)) {
                /* Send all frames with destination addresses matching
                 * 01:80:c2:00:00:2x to the CPU port.
                 */
@@ -1995,7 +2012,8 @@ int mv88e6xxx_setup_global(struct dsa_switch *ds)
 
        if (mv88e6xxx_6352_family(ds) || mv88e6xxx_6351_family(ds) ||
            mv88e6xxx_6165_family(ds) || mv88e6xxx_6097_family(ds) ||
-           mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds)) {
+           mv88e6xxx_6185_family(ds) || mv88e6xxx_6095_family(ds) ||
+           mv88e6xxx_6320_family(ds)) {
                /* Disable ingress rate limiting by resetting all
                 * ingress rate limit registers to their initial
                 * state.
index a650b2656de9f0c4b8181bc3d3b3a4c533b4aa7a..64786cb89a93bdba111181082e8aef600525d8da 100644 (file)
 #define PORT_SWITCH_ID_6182    0x1a60
 #define PORT_SWITCH_ID_6185    0x1a70
 #define PORT_SWITCH_ID_6240    0x2400
-#define PORT_SWITCH_ID_6320    0x1250
+#define PORT_SWITCH_ID_6320    0x1150
+#define PORT_SWITCH_ID_6320_A1 0x1151
+#define PORT_SWITCH_ID_6320_A2 0x1152
+#define PORT_SWITCH_ID_6321    0x3100
+#define PORT_SWITCH_ID_6321_A1 0x3101
+#define PORT_SWITCH_ID_6321_A2 0x3102
 #define PORT_SWITCH_ID_6350    0x3710
 #define PORT_SWITCH_ID_6351    0x3750
 #define PORT_SWITCH_ID_6352    0x3520
@@ -410,6 +415,7 @@ int mv88e6xxx_port_fdb_getnext(struct dsa_switch *ds, int port,
 int mv88e6xxx_phy_page_read(struct dsa_switch *ds, int port, int page, int reg);
 int mv88e6xxx_phy_page_write(struct dsa_switch *ds, int port, int page,
                             int reg, int val);
+bool mv88e6xxx_6320_family(struct dsa_switch *ds);
 extern struct dsa_switch_driver mv88e6131_switch_driver;
 extern struct dsa_switch_driver mv88e6123_61_65_switch_driver;
 extern struct dsa_switch_driver mv88e6352_switch_driver;
index caeb39561567237261ac0d50befebad666cfbeb3..a4e3f8655cb8cfd58b363a8d153b4d7c5c4db686 100644 (file)
@@ -2741,8 +2741,7 @@ static const struct macb_config emac_config = {
 
 
 static const struct macb_config zynqmp_config = {
-       .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE |
-               MACB_CAPS_JUMBO,
+       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO,
        .dma_burst_length = 16,
        .clk_init = macb_clk_init,
        .init = macb_init,
@@ -2750,8 +2749,7 @@ static const struct macb_config zynqmp_config = {
 };
 
 static const struct macb_config zynq_config = {
-       .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE |
-               MACB_CAPS_NO_GIGABIT_HALF,
+       .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF,
        .dma_burst_length = 16,
        .clk_init = macb_clk_init,
        .init = macb_init,
index a11485fbb33f2b7bcd6c973324ea41601dbaf575..b135d05c9984c9427cde89076820e8d79206955a 100644 (file)
@@ -151,6 +151,45 @@ static int cim_la_show_3in1(struct seq_file *seq, void *v, int idx)
        return 0;
 }
 
+static int cim_la_show_t6(struct seq_file *seq, void *v, int idx)
+{
+       if (v == SEQ_START_TOKEN) {
+               seq_puts(seq, "Status   Inst    Data      PC     LS0Stat  "
+                        "LS0Addr  LS0Data  LS1Stat  LS1Addr  LS1Data\n");
+       } else {
+               const u32 *p = v;
+
+               seq_printf(seq, "  %02x   %04x%04x %04x%04x %04x%04x %08x %08x %08x %08x %08x %08x\n",
+                          (p[9] >> 16) & 0xff,       /* Status */
+                          p[9] & 0xffff, p[8] >> 16, /* Inst */
+                          p[8] & 0xffff, p[7] >> 16, /* Data */
+                          p[7] & 0xffff, p[6] >> 16, /* PC */
+                          p[2], p[1], p[0],      /* LS0 Stat, Addr and Data */
+                          p[5], p[4], p[3]);     /* LS1 Stat, Addr and Data */
+       }
+       return 0;
+}
+
+static int cim_la_show_pc_t6(struct seq_file *seq, void *v, int idx)
+{
+       if (v == SEQ_START_TOKEN) {
+               seq_puts(seq, "Status   Inst    Data      PC\n");
+       } else {
+               const u32 *p = v;
+
+               seq_printf(seq, "  %02x   %08x %08x %08x\n",
+                          p[3] & 0xff, p[2], p[1], p[0]);
+               seq_printf(seq, "  %02x   %02x%06x %02x%06x %02x%06x\n",
+                          (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8,
+                          p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8);
+               seq_printf(seq, "  %02x   %04x%04x %04x%04x %04x%04x\n",
+                          (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16,
+                          p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff,
+                          p[6] >> 16);
+       }
+       return 0;
+}
+
 static int cim_la_open(struct inode *inode, struct file *file)
 {
        int ret;
@@ -162,9 +201,18 @@ static int cim_la_open(struct inode *inode, struct file *file)
        if (ret)
                return ret;
 
-       p = seq_open_tab(file, adap->params.cim_la_size / 8, 8 * sizeof(u32), 1,
-                        cfg & UPDBGLACAPTPCONLY_F ?
-                        cim_la_show_3in1 : cim_la_show);
+       if (is_t6(adap->params.chip)) {
+               /* +1 to account for integer division of CIMLA_SIZE/10 */
+               p = seq_open_tab(file, (adap->params.cim_la_size / 10) + 1,
+                                10 * sizeof(u32), 1,
+                                cfg & UPDBGLACAPTPCONLY_F ?
+                                       cim_la_show_pc_t6 : cim_la_show_t6);
+       } else {
+               p = seq_open_tab(file, adap->params.cim_la_size / 8,
+                                8 * sizeof(u32), 1,
+                                cfg & UPDBGLACAPTPCONLY_F ? cim_la_show_3in1 :
+                                                            cim_la_show);
+       }
        if (!p)
                return -ENOMEM;
 
index 351f3b1bf80025167c9afcc226252ec923a639b1..d582e175dfb61827be5304636df03f7859cd5dbb 100644 (file)
@@ -4757,7 +4757,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
         */
        cfg_queues(adapter);
 
-       adapter->l2t = t4_init_l2t();
+       adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
        if (!adapter->l2t) {
                /* We tolerate a lack of L2T, giving up some functionality */
                dev_warn(&pdev->dev, "could not allocate L2T, continuing\n");
index 252efc29321f4e7c57a53e631b520ea8820f6900..ac27898c6ab0b249ad6ced9308b55cd40f883ece 100644 (file)
 #define VLAN_NONE 0xfff
 
 /* identifies sync vs async L2T_WRITE_REQs */
-#define F_SYNC_WR    (1 << 12)
-
-enum {
-       L2T_STATE_VALID,      /* entry is up to date */
-       L2T_STATE_STALE,      /* entry may be used but needs revalidation */
-       L2T_STATE_RESOLVING,  /* entry needs address resolution */
-       L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
-
-       /* when state is one of the below the entry is not hashed */
-       L2T_STATE_SWITCHING,  /* entry is being used by a switching filter */
-       L2T_STATE_UNUSED      /* entry not in use */
-};
+#define SYNC_WR_S    12
+#define SYNC_WR_V(x) ((x) << SYNC_WR_S)
+#define SYNC_WR_F    SYNC_WR_V(1)
 
 struct l2t_data {
+       unsigned int l2t_start;     /* start index of our piece of the L2T */
+       unsigned int l2t_size;      /* number of entries in l2tab */
        rwlock_t lock;
        atomic_t nfree;             /* number of free entries */
        struct l2t_entry *rover;    /* starting point for next allocation */
-       struct l2t_entry l2tab[L2T_SIZE];
+       struct l2t_entry l2tab[0];  /* MUST BE LAST */
 };
 
 static inline unsigned int vlan_prio(const struct l2t_entry *e)
@@ -85,29 +78,36 @@ static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e)
 /*
  * To avoid having to check address families we do not allow v4 and v6
  * neighbors to be on the same hash chain.  We keep v4 entries in the first
- * half of available hash buckets and v6 in the second.
+ * half of available hash buckets and v6 in the second.  We need at least two
+ * entries in our L2T for this scheme to work.
  */
 enum {
-       L2T_SZ_HALF = L2T_SIZE / 2,
-       L2T_HASH_MASK = L2T_SZ_HALF - 1
+       L2T_MIN_HASH_BUCKETS = 2,
 };
 
-static inline unsigned int arp_hash(const u32 *key, int ifindex)
+static inline unsigned int arp_hash(struct l2t_data *d, const u32 *key,
+                                   int ifindex)
 {
-       return jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK;
+       unsigned int l2t_size_half = d->l2t_size / 2;
+
+       return jhash_2words(*key, ifindex, 0) % l2t_size_half;
 }
 
-static inline unsigned int ipv6_hash(const u32 *key, int ifindex)
+static inline unsigned int ipv6_hash(struct l2t_data *d, const u32 *key,
+                                    int ifindex)
 {
+       unsigned int l2t_size_half = d->l2t_size / 2;
        u32 xor = key[0] ^ key[1] ^ key[2] ^ key[3];
 
-       return L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK);
+       return (l2t_size_half +
+               (jhash_2words(xor, ifindex, 0) % l2t_size_half));
 }
 
-static unsigned int addr_hash(const u32 *addr, int addr_len, int ifindex)
+static unsigned int addr_hash(struct l2t_data *d, const u32 *addr,
+                             int addr_len, int ifindex)
 {
-       return addr_len == 4 ? arp_hash(addr, ifindex) :
-                              ipv6_hash(addr, ifindex);
+       return addr_len == 4 ? arp_hash(d, addr, ifindex) :
+                              ipv6_hash(d, addr, ifindex);
 }
 
 /*
@@ -139,6 +139,8 @@ static void neigh_replace(struct l2t_entry *e, struct neighbour *n)
  */
 static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
 {
+       struct l2t_data *d = adap->l2t;
+       unsigned int l2t_idx = e->idx + d->l2t_start;
        struct sk_buff *skb;
        struct cpl_l2t_write_req *req;
 
@@ -150,10 +152,10 @@ static int write_l2e(struct adapter *adap, struct l2t_entry *e, int sync)
        INIT_TP_WR(req, 0);
 
        OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ,
-                                       e->idx | (sync ? F_SYNC_WR : 0) |
+                                       l2t_idx | (sync ? SYNC_WR_F : 0) |
                                        TID_QID_V(adap->sge.fw_evtq.abs_id)));
        req->params = htons(L2T_W_PORT_V(e->lport) | L2T_W_NOREPLY_V(!sync));
-       req->l2t_idx = htons(e->idx);
+       req->l2t_idx = htons(l2t_idx);
        req->vlan = htons(e->vlan);
        if (e->neigh && !(e->neigh->dev->flags & IFF_LOOPBACK))
                memcpy(e->dmac, e->neigh->ha, sizeof(e->dmac));
@@ -190,18 +192,19 @@ static void send_pending(struct adapter *adap, struct l2t_entry *e)
  */
 void do_l2t_write_rpl(struct adapter *adap, const struct cpl_l2t_write_rpl *rpl)
 {
+       struct l2t_data *d = adap->l2t;
        unsigned int tid = GET_TID(rpl);
-       unsigned int idx = tid & (L2T_SIZE - 1);
+       unsigned int l2t_idx = tid % L2T_SIZE;
 
        if (unlikely(rpl->status != CPL_ERR_NONE)) {
                dev_err(adap->pdev_dev,
                        "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
-                       rpl->status, idx);
+                       rpl->status, l2t_idx);
                return;
        }
 
-       if (tid & F_SYNC_WR) {
-               struct l2t_entry *e = &adap->l2t->l2tab[idx];
+       if (tid & SYNC_WR_F) {
+               struct l2t_entry *e = &d->l2tab[l2t_idx - d->l2t_start];
 
                spin_lock(&e->lock);
                if (e->state != L2T_STATE_SWITCHING) {
@@ -276,7 +279,7 @@ static struct l2t_entry *alloc_l2e(struct l2t_data *d)
                return NULL;
 
        /* there's definitely a free entry */
-       for (e = d->rover, end = &d->l2tab[L2T_SIZE]; e != end; ++e)
+       for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e)
                if (atomic_read(&e->refcnt) == 0)
                        goto found;
 
@@ -368,7 +371,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh,
        int addr_len = neigh->tbl->key_len;
        u32 *addr = (u32 *)neigh->primary_key;
        int ifidx = neigh->dev->ifindex;
-       int hash = addr_hash(addr, addr_len, ifidx);
+       int hash = addr_hash(d, addr, addr_len, ifidx);
 
        if (neigh->dev->flags & IFF_LOOPBACK)
                lport = netdev2pinfo(physdev)->tx_chan + 4;
@@ -481,7 +484,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
        int addr_len = neigh->tbl->key_len;
        u32 *addr = (u32 *) neigh->primary_key;
        int ifidx = neigh->dev->ifindex;
-       int hash = addr_hash(addr, addr_len, ifidx);
+       int hash = addr_hash(d, addr, addr_len, ifidx);
 
        read_lock_bh(&d->lock);
        for (e = d->l2tab[hash].first; e; e = e->next)
@@ -554,20 +557,30 @@ int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan,
        return write_l2e(adap, e, 0);
 }
 
-struct l2t_data *t4_init_l2t(void)
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end)
 {
+       unsigned int l2t_size;
        int i;
        struct l2t_data *d;
 
-       d = t4_alloc_mem(sizeof(*d));
+       if (l2t_start >= l2t_end || l2t_end >= L2T_SIZE)
+               return NULL;
+       l2t_size = l2t_end - l2t_start + 1;
+       if (l2t_size < L2T_MIN_HASH_BUCKETS)
+               return NULL;
+
+       d = t4_alloc_mem(sizeof(*d) + l2t_size * sizeof(struct l2t_entry));
        if (!d)
                return NULL;
 
+       d->l2t_start = l2t_start;
+       d->l2t_size = l2t_size;
+
        d->rover = d->l2tab;
-       atomic_set(&d->nfree, L2T_SIZE);
+       atomic_set(&d->nfree, l2t_size);
        rwlock_init(&d->lock);
 
-       for (i = 0; i < L2T_SIZE; ++i) {
+       for (i = 0; i < d->l2t_size; ++i) {
                d->l2tab[i].idx = i;
                d->l2tab[i].state = L2T_STATE_UNUSED;
                spin_lock_init(&d->l2tab[i].lock);
@@ -578,9 +591,9 @@ struct l2t_data *t4_init_l2t(void)
 
 static inline void *l2t_get_idx(struct seq_file *seq, loff_t pos)
 {
-       struct l2t_entry *l2tab = seq->private;
+       struct l2t_data *d = seq->private;
 
-       return pos >= L2T_SIZE ? NULL : &l2tab[pos];
+       return pos >= d->l2t_size ? NULL : &d->l2tab[pos];
 }
 
 static void *l2t_seq_start(struct seq_file *seq, loff_t *pos)
@@ -620,6 +633,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v)
                         "Ethernet address  VLAN/P LP State Users Port\n");
        else {
                char ip[60];
+               struct l2t_data *d = seq->private;
                struct l2t_entry *e = v;
 
                spin_lock_bh(&e->lock);
@@ -628,7 +642,7 @@ static int l2t_seq_show(struct seq_file *seq, void *v)
                else
                        sprintf(ip, e->v6 ? "%pI6c" : "%pI4", e->addr);
                seq_printf(seq, "%4u %-25s %17pM %4d %u %2u   %c   %5u %s\n",
-                          e->idx, ip, e->dmac,
+                          e->idx + d->l2t_start, ip, e->dmac,
                           e->vlan & VLAN_VID_MASK, vlan_prio(e), e->lport,
                           l2e_state(e), atomic_read(&e->refcnt),
                           e->neigh ? e->neigh->dev->name : "");
@@ -652,7 +666,7 @@ static int l2t_seq_open(struct inode *inode, struct file *file)
                struct adapter *adap = inode->i_private;
                struct seq_file *seq = file->private_data;
 
-               seq->private = adap->l2t->l2tab;
+               seq->private = adap->l2t;
        }
        return rc;
 }
index a30126ce90cbabeaf50d7ed3fc5f1531c3831db9..b38dc526aad563a3b27b6b79e2e8d7c093b84ff0 100644 (file)
 #include <linux/if_ether.h>
 #include <linux/atomic.h>
 
+enum { L2T_SIZE = 4096 };     /* # of L2T entries */
+
+enum {
+       L2T_STATE_VALID,      /* entry is up to date */
+       L2T_STATE_STALE,      /* entry may be used but needs revalidation */
+       L2T_STATE_RESOLVING,  /* entry needs address resolution */
+       L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
+       L2T_STATE_NOARP,      /* Netdev down or removed*/
+
+       /* when state is one of the below the entry is not hashed */
+       L2T_STATE_SWITCHING,  /* entry is being used by a switching filter */
+       L2T_STATE_UNUSED      /* entry not in use */
+};
+
 struct adapter;
 struct l2t_data;
 struct neighbour;
@@ -56,7 +70,7 @@ struct cpl_l2t_write_rpl;
  */
 struct l2t_entry {
        u16 state;                  /* entry state */
-       u16 idx;                    /* entry index */
+       u16 idx;                    /* entry index within in-memory table */
        u32 addr[4];                /* next hop IP or IPv6 address */
        int ifindex;                /* neighbor's net_device's ifindex */
        struct neighbour *neigh;    /* associated neighbour */
@@ -104,7 +118,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh);
 struct l2t_entry *t4_l2t_alloc_switching(struct l2t_data *d);
 int t4_l2t_set_switching(struct adapter *adap, struct l2t_entry *e, u16 vlan,
                         u8 port, u8 *eth_addr);
-struct l2t_data *t4_init_l2t(void);
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end);
 void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl);
 
 extern const struct file_operations t4_l2t_fops;
index 2b52aae7ec86d38b9e658e6e92d1f82ae3c6c879..1e6597dc873652ea01c375f696fb02cc33bf6db0 100644 (file)
@@ -1345,9 +1345,9 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x5a80, 0x5a9c,
                0x5b94, 0x5bfc,
                0x5c10, 0x5ec0,
-               0x5ec8, 0x5ec8,
+               0x5ec8, 0x5ecc,
                0x6000, 0x6040,
-               0x6058, 0x6154,
+               0x6058, 0x615c,
                0x7700, 0x7798,
                0x77c0, 0x7880,
                0x78cc, 0x78fc,
@@ -1371,20 +1371,22 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x9f00, 0x9f6c,
                0x9f80, 0xa020,
                0xd004, 0xd03c,
+               0xd100, 0xd118,
+               0xd200, 0xd31c,
                0xdfc0, 0xdfe0,
                0xe000, 0xf008,
                0x11000, 0x11014,
                0x11048, 0x11110,
                0x11118, 0x1117c,
-               0x11190, 0x11260,
+               0x11190, 0x11264,
                0x11300, 0x1130c,
-               0x12000, 0x1205c,
+               0x12000, 0x1206c,
                0x19040, 0x1906c,
                0x19078, 0x19080,
                0x1908c, 0x19124,
                0x19150, 0x191b0,
                0x191d0, 0x191e8,
-               0x19238, 0x192b8,
+               0x19238, 0x192bc,
                0x193f8, 0x19474,
                0x19490, 0x194cc,
                0x194f0, 0x194f8,
@@ -1466,7 +1468,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x30200, 0x30318,
                0x30400, 0x3052c,
                0x30540, 0x3061c,
-               0x30800, 0x3088c,
+               0x30800, 0x30890,
                0x308c0, 0x30908,
                0x30910, 0x309b8,
                0x30a00, 0x30a04,
@@ -1544,7 +1546,7 @@ void t4_get_regs(struct adapter *adap, void *buf, size_t buf_size)
                0x34200, 0x34318,
                0x34400, 0x3452c,
                0x34540, 0x3461c,
-               0x34800, 0x3488c,
+               0x34800, 0x34890,
                0x348c0, 0x34908,
                0x34910, 0x349b8,
                0x34a00, 0x34a04,
@@ -3924,43 +3926,25 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
  */
 void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
 {
-       /* T6 and later has 2 channels */
-       if (adap->params.arch.nchan == NCHAN) {
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->mac_in_errs, 12, TP_MIB_MAC_IN_ERR_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tnl_cong_drops, 8,
-                                TP_MIB_TNL_CNG_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tnl_tx_drops, 4,
-                                TP_MIB_TNL_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->ofld_vlan_drops, 4,
-                                TP_MIB_OFD_VLN_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tcp6_in_errs, 4,
-                                TP_MIB_TCP_V6IN_ERR_0_A);
-       } else {
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->mac_in_errs, 2, TP_MIB_MAC_IN_ERR_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->hdr_in_errs, 2, TP_MIB_HDR_IN_ERR_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tcp_in_errs, 2, TP_MIB_TCP_IN_ERR_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tnl_cong_drops, 2,
-                                TP_MIB_TNL_CNG_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->ofld_chan_drops, 2,
-                                TP_MIB_OFD_CHN_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tnl_tx_drops, 2, TP_MIB_TNL_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->ofld_vlan_drops, 2,
-                                TP_MIB_OFD_VLN_DROP_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
-                                st->tcp6_in_errs, 2, TP_MIB_TCP_V6IN_ERR_0_A);
-       }
+       int nchan = adap->params.arch.nchan;
+
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
+                        st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A);
+
        t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
                         &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A);
 }
@@ -3974,16 +3958,13 @@ void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
  */
 void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st)
 {
-       /* T6 and later has 2 channels */
-       if (adap->params.arch.nchan == NCHAN) {
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
-                                8, TP_MIB_CPL_IN_REQ_0_A);
-       } else {
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
-                                2, TP_MIB_CPL_IN_REQ_0_A);
-               t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
-                                2, TP_MIB_CPL_OUT_RSP_0_A);
-       }
+       int nchan = adap->params.arch.nchan;
+
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
+                        nchan, TP_MIB_CPL_IN_REQ_0_A);
+       t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
+                        nchan, TP_MIB_CPL_OUT_RSP_0_A);
+
 }
 
 /**
index c8488f430d197337d7fb81d62689e2c12c583292..640369df8b3a3cb155eb35c9d7a08009d0716da2 100644 (file)
@@ -47,7 +47,6 @@ enum {
        TCB_SIZE       = 128,   /* TCB size */
        NMTUS          = 16,    /* size of MTU table */
        NCCTRL_WIN     = 32,    /* # of congestion control windows */
-       L2T_SIZE       = 4096,  /* # of L2T entries */
        PM_NSTATS      = 5,     /* # of PM stats */
        MBOX_LEN       = 64,    /* mailbox size in bytes */
        TRACE_LEN      = 112,   /* length of trace data and mask */
index d7ca106927b0d93ee68480c2e45c29879fe9e510..8353a6cbfcc21edd2dde363fafd06b202611cae4 100644 (file)
@@ -142,6 +142,8 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x5013), /* T580-chr */
        CH_PCI_ID_TABLE_FENTRY(0x5014), /* T580-so */
        CH_PCI_ID_TABLE_FENTRY(0x5015), /* T502-bt */
+       CH_PCI_ID_TABLE_FENTRY(0x5016), /* T580-OCP-SO */
+       CH_PCI_ID_TABLE_FENTRY(0x5017), /* T520-OCP-SO */
        CH_PCI_ID_TABLE_FENTRY(0x5080), /* Custom T540-cr */
        CH_PCI_ID_TABLE_FENTRY(0x5081), /* Custom T540-LL-cr */
        CH_PCI_ID_TABLE_FENTRY(0x5082), /* Custom T504-cr */
@@ -155,6 +157,22 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
        CH_PCI_ID_TABLE_FENTRY(0x5090), /* Custom T540-CR */
        CH_PCI_ID_TABLE_FENTRY(0x5091), /* Custom T522-CR */
        CH_PCI_ID_TABLE_FENTRY(0x5092), /* Custom T520-CR */
+
+       /* T6 adapters:
+        */
+       CH_PCI_ID_TABLE_FENTRY(0x6001),
+       CH_PCI_ID_TABLE_FENTRY(0x6002),
+       CH_PCI_ID_TABLE_FENTRY(0x6003),
+       CH_PCI_ID_TABLE_FENTRY(0x6004),
+       CH_PCI_ID_TABLE_FENTRY(0x6005),
+       CH_PCI_ID_TABLE_FENTRY(0x6006),
+       CH_PCI_ID_TABLE_FENTRY(0x6007),
+       CH_PCI_ID_TABLE_FENTRY(0x6009),
+       CH_PCI_ID_TABLE_FENTRY(0x600d),
+       CH_PCI_ID_TABLE_FENTRY(0x6010),
+       CH_PCI_ID_TABLE_FENTRY(0x6011),
+       CH_PCI_ID_TABLE_FENTRY(0x6014),
+       CH_PCI_ID_TABLE_FENTRY(0x6015),
 CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
 
 #endif /* __T4_PCI_ID_TBL_H__ */
index ad53e5ad2acd05afa1b94c09f7e2c1be8f4599a6..1d5e77a566e16acedc805a12b5742fdbb29891c5 100644 (file)
@@ -1898,7 +1898,10 @@ static int napi_rx_handler(struct napi_struct *napi, int budget)
                rspq->unhandled_irqs++;
 
        val = CIDXINC_V(work_done) | SEINTARM_V(intr_params);
-       if (is_t4(rspq->adapter->params.chip)) {
+       /* If we don't have access to the new User GTS (T5+), use the old
+        * doorbell mechanism; otherwise use the new BAR2 mechanism.
+        */
+       if (unlikely(!rspq->bar2_addr)) {
                t4_write_reg(rspq->adapter,
                             T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
                             val | INGRESSQID_V((u32)rspq->cntxt_id));
@@ -1998,10 +2001,13 @@ static unsigned int process_intrq(struct adapter *adapter)
        }
 
        val = CIDXINC_V(work_done) | SEINTARM_V(intrq->intr_params);
-       if (is_t4(adapter->params.chip))
+       /* If we don't have access to the new User GTS (T5+), use the old
+        * doorbell mechanism; otherwise use the new BAR2 mechanism.
+        */
+       if (unlikely(!intrq->bar2_addr)) {
                t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
                             val | INGRESSQID_V(intrq->cntxt_id));
-       else {
+       else {
                writel(val | INGRESSQID_V(intrq->bar2_qid),
                       intrq->bar2_addr + SGE_UDB_GTS);
                wmb();
index d1017509b08ac1e171a12a89770373a5057c5d64..f7b42483921c5847a883a286d28f7700e10b1d28 100644 (file)
@@ -604,19 +604,7 @@ static struct pci_driver pci_driver = {
        .probe          = ec_bhf_probe,
        .remove         = ec_bhf_remove,
 };
-
-static int __init ec_bhf_init(void)
-{
-       return pci_register_driver(&pci_driver);
-}
-
-static void __exit ec_bhf_exit(void)
-{
-       pci_unregister_driver(&pci_driver);
-}
-
-module_init(ec_bhf_init);
-module_exit(ec_bhf_exit);
+module_pci_driver(pci_driver);
 
 module_param(polling_frequency, long, S_IRUGO);
 MODULE_PARM_DESC(polling_frequency, "Polling timer frequency in ns");
index 8d12b41b3b1990af468da5a38c4759fc005ba3d3..cb5777bb74292a951602cd1d963e38bc472e46bf 100644 (file)
@@ -37,7 +37,7 @@
 #include "be_hw.h"
 #include "be_roce.h"
 
-#define DRV_VER                        "10.6.0.2"
+#define DRV_VER                        "10.6.0.3"
 #define DRV_NAME               "be2net"
 #define BE_NAME                        "Emulex BladeEngine2"
 #define BE3_NAME               "Emulex BladeEngine3"
index 9eac3227d2cabc15c2d21a4baafafc3761372560..ecad46f796539f9b5a4dcc331939fee6da3bb1d3 100644 (file)
@@ -88,19 +88,21 @@ static inline void *embedded_payload(struct be_mcc_wrb *wrb)
        return wrb->payload.embedded_payload;
 }
 
-static void be_mcc_notify(struct be_adapter *adapter)
+static int be_mcc_notify(struct be_adapter *adapter)
 {
        struct be_queue_info *mccq = &adapter->mcc_obj.q;
        u32 val = 0;
 
        if (be_check_error(adapter, BE_ERROR_ANY))
-               return;
+               return -EIO;
 
        val |= mccq->id & DB_MCCQ_RING_ID_MASK;
        val |= 1 << DB_MCCQ_NUM_POSTED_SHIFT;
 
        wmb();
        iowrite32(val, adapter->db + DB_MCCQ_OFFSET);
+
+       return 0;
 }
 
 /* To check if valid bit is set, check the entire word as we don't know
@@ -170,6 +172,12 @@ static void be_async_cmd_process(struct be_adapter *adapter,
                return;
        }
 
+       if (opcode == OPCODE_LOWLEVEL_SET_LOOPBACK_MODE &&
+           subsystem == CMD_SUBSYSTEM_LOWLEVEL) {
+               complete(&adapter->et_cmd_compl);
+               return;
+       }
+
        if ((opcode == OPCODE_COMMON_WRITE_FLASHROM ||
             opcode == OPCODE_COMMON_WRITE_OBJECT) &&
            subsystem == CMD_SUBSYSTEM_COMMON) {
@@ -541,7 +549,9 @@ static int be_mcc_notify_wait(struct be_adapter *adapter)
 
        resp = be_decode_resp_hdr(wrb->tag0, wrb->tag1);
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto out;
 
        status = be_mcc_wait_compl(adapter);
        if (status == -EIO)
@@ -1547,7 +1557,10 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd)
        else
                hdr->version = 2;
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err;
+
        adapter->stats_cmd_sent = true;
 
 err:
@@ -1583,7 +1596,10 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
        req->cmd_params.params.pport_num = cpu_to_le16(adapter->hba_port_num);
        req->cmd_params.params.reset_stats = 0;
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err;
+
        adapter->stats_cmd_sent = true;
 
 err:
@@ -1687,8 +1703,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter)
                               OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES,
                               sizeof(*req), wrb, NULL);
 
-       be_mcc_notify(adapter);
-
+       status = be_mcc_notify(adapter);
 err:
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
@@ -1860,7 +1875,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter,
                                cpu_to_le32(set_eqd[i].delay_multiplier);
        }
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
 err:
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
@@ -1953,7 +1968,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value)
                        memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN);
        }
 
-       status = be_mcc_notify_wait(adapter);
+       status = be_mcc_notify(adapter);
 err:
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
@@ -2320,7 +2335,10 @@ int lancer_cmd_write_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
        req->addr_high = cpu_to_le32(upper_32_bits(cmd->dma +
                                sizeof(struct lancer_cmd_req_write_object)));
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err_unlock;
+
        spin_unlock_bh(&adapter->mcc_lock);
 
        if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
@@ -2491,7 +2509,10 @@ int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_dma_mem *cmd,
        req->params.op_code = cpu_to_le32(flash_opcode);
        req->params.data_buf_size = cpu_to_le32(buf_size);
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err_unlock;
+
        spin_unlock_bh(&adapter->mcc_lock);
 
        if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
@@ -2585,7 +2606,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
        wrb = wrb_from_mccq(adapter);
        if (!wrb) {
                status = -EBUSY;
-               goto err;
+               goto err_unlock;
        }
 
        req = embedded_payload(wrb);
@@ -2599,8 +2620,19 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num,
        req->loopback_type = loopback_type;
        req->loopback_state = enable;
 
-       status = be_mcc_notify_wait(adapter);
-err:
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err_unlock;
+
+       spin_unlock_bh(&adapter->mcc_lock);
+
+       if (!wait_for_completion_timeout(&adapter->et_cmd_compl,
+                                        msecs_to_jiffies(SET_LB_MODE_TIMEOUT)))
+               status = -ETIMEDOUT;
+
+       return status;
+
+err_unlock:
        spin_unlock_bh(&adapter->mcc_lock);
        return status;
 }
@@ -2636,7 +2668,9 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num,
        req->num_pkts = cpu_to_le32(num_pkts);
        req->loopback_type = cpu_to_le32(loopback_type);
 
-       be_mcc_notify(adapter);
+       status = be_mcc_notify(adapter);
+       if (status)
+               goto err;
 
        spin_unlock_bh(&adapter->mcc_lock);
 
index 2716e6f30d9a0949633b40dc9864196c7465fa3a..a4479f7488d3cb663091283da9781b2ff99b7505 100644 (file)
@@ -1495,6 +1495,8 @@ struct be_cmd_resp_acpi_wol_magic_config_v1 {
 #define BE_PME_D3COLD_CAP              0x80
 
 /********************** LoopBack test *********************/
+#define SET_LB_MODE_TIMEOUT            12000
+
 struct be_cmd_req_loopback_test {
        struct be_cmd_req_hdr hdr;
        u32 loopback_type;
@@ -1758,6 +1760,7 @@ struct be_cmd_req_set_mac_list {
 /*********************** HSW Config ***********************/
 #define PORT_FWD_TYPE_VEPA             0x3
 #define PORT_FWD_TYPE_VEB              0x2
+#define PORT_FWD_TYPE_PASSTHRU         0x1
 
 #define ENABLE_MAC_SPOOFCHK            0x2
 #define DISABLE_MAC_SPOOFCHK           0x3
index b2476dbfd103120affb5e216a31d304dda570a67..d20ff054c1f78fd8515b7c858391591ced1272a9 100644 (file)
@@ -847,10 +847,21 @@ err:
 static u64 be_loopback_test(struct be_adapter *adapter, u8 loopback_type,
                            u64 *status)
 {
-       be_cmd_set_loopback(adapter, adapter->hba_port_num, loopback_type, 1);
+       int ret;
+
+       ret = be_cmd_set_loopback(adapter, adapter->hba_port_num,
+                                 loopback_type, 1);
+       if (ret)
+               return ret;
+
        *status = be_cmd_loopback_test(adapter, adapter->hba_port_num,
                                       loopback_type, 1500, 2, 0xabc);
-       be_cmd_set_loopback(adapter, adapter->hba_port_num, BE_NO_LOOPBACK, 1);
+
+       ret = be_cmd_set_loopback(adapter, adapter->hba_port_num,
+                                 BE_NO_LOOPBACK, 1);
+       if (ret)
+               return ret;
+
        return *status;
 }
 
index 6f642426308c67399eac3abdb20ae6160ce41d2a..c996dd76f5461253dc841ebf2fe539f813954b2b 100644 (file)
@@ -1254,7 +1254,7 @@ static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
        if (is_udp_pkt((*skb))) {
                struct udphdr *udp = udp_hdr((*skb));
 
-               switch (udp->dest) {
+               switch (ntohs(udp->dest)) {
                case DHCP_CLIENT_PORT:
                        os2bmc = is_dhcp_client_filt_enabled(adapter);
                        goto done;
@@ -3529,15 +3529,15 @@ err:
 
 static int be_setup_wol(struct be_adapter *adapter, bool enable)
 {
+       struct device *dev = &adapter->pdev->dev;
        struct be_dma_mem cmd;
-       int status = 0;
        u8 mac[ETH_ALEN];
+       int status;
 
        eth_zero_addr(mac);
 
        cmd.size = sizeof(struct be_cmd_req_acpi_wol_magic_config);
-       cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
-                                    GFP_KERNEL);
+       cmd.va = dma_zalloc_coherent(dev, cmd.size, &cmd.dma, GFP_KERNEL);
        if (!cmd.va)
                return -ENOMEM;
 
@@ -3546,24 +3546,18 @@ static int be_setup_wol(struct be_adapter *adapter, bool enable)
                                                PCICFG_PM_CONTROL_OFFSET,
                                                PCICFG_PM_CONTROL_MASK);
                if (status) {
-                       dev_err(&adapter->pdev->dev,
-                               "Could not enable Wake-on-lan\n");
-                       dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
-                                         cmd.dma);
-                       return status;
+                       dev_err(dev, "Could not enable Wake-on-lan\n");
+                       goto err;
                }
-               status = be_cmd_enable_magic_wol(adapter,
-                                                adapter->netdev->dev_addr,
-                                                &cmd);
-               pci_enable_wake(adapter->pdev, PCI_D3hot, 1);
-               pci_enable_wake(adapter->pdev, PCI_D3cold, 1);
        } else {
-               status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
-               pci_enable_wake(adapter->pdev, PCI_D3hot, 0);
-               pci_enable_wake(adapter->pdev, PCI_D3cold, 0);
+               ether_addr_copy(mac, adapter->netdev->dev_addr);
        }
 
-       dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
+       status = be_cmd_enable_magic_wol(adapter, mac, &cmd);
+       pci_enable_wake(adapter->pdev, PCI_D3hot, enable);
+       pci_enable_wake(adapter->pdev, PCI_D3cold, enable);
+err:
+       dma_free_coherent(dev, cmd.size, cmd.va, cmd.dma);
        return status;
 }
 
@@ -4924,7 +4918,7 @@ static bool be_check_ufi_compatibility(struct be_adapter *adapter,
 {
        if (!fhdr) {
                dev_err(&adapter->pdev->dev, "Invalid FW UFI file");
-               return -1;
+               return false;
        }
 
        /* First letter of the build version is used to identify
@@ -5079,9 +5073,6 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
        int status = 0;
        u8 hsw_mode;
 
-       if (!sriov_enabled(adapter))
-               return 0;
-
        /* BE and Lancer chips support VEB mode only */
        if (BEx_chip(adapter) || lancer_chip(adapter)) {
                hsw_mode = PORT_FWD_TYPE_VEB;
@@ -5091,6 +5082,9 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
                                               NULL);
                if (status)
                        return 0;
+
+               if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
+                       return 0;
        }
 
        return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
@@ -5813,7 +5807,6 @@ static int be_pci_resume(struct pci_dev *pdev)
        if (status)
                return status;
 
-       pci_set_power_state(pdev, PCI_D0);
        pci_restore_state(pdev);
 
        status = be_resume(adapter);
@@ -5893,7 +5886,6 @@ static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
                return PCI_ERS_RESULT_DISCONNECT;
 
        pci_set_master(pdev);
-       pci_set_power_state(pdev, PCI_D0);
        pci_restore_state(pdev);
 
        /* Check if card is ok and fw is ready */
index d49bee38cd319a0a8c7afd2cad7f1cb1ac7f2ed3..cc2d8b4b18e3e2a99ef303b76809545496089787 100644 (file)
@@ -965,7 +965,6 @@ static struct platform_driver hip04_mac_driver = {
        .remove = hip04_remove,
        .driver = {
                .name           = DRV_NAME,
-               .owner          = THIS_MODULE,
                .of_match_table = hip04_mac_match,
        },
 };
index b3bac25db99cf59ed1cdd2e990962cf129297846..fca0a5be1f0f732cd340dd056e6be86b2fb0a925 100644 (file)
@@ -174,7 +174,6 @@ static struct platform_driver hip04_mdio_driver = {
        .remove = hip04_mdio_remove,
        .driver = {
                .name = "hip04-mdio",
-               .owner = THIS_MODULE,
                .of_match_table = hip04_mdio_match,
        },
 };
index 7a4f20bb7fcb4c2640ad8111f5a98ff95088075c..12c65e1ad6a93c54d94c8add0adbc9685f9ef417 100644 (file)
@@ -917,7 +917,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                        if (dev->features & NETIF_F_RXHASH)
                                skb_set_hash(gro_skb,
                                             be32_to_cpu(cqe->immed_rss_invalid),
-                                            PKT_HASH_TYPE_L3);
+                                            (ip_summed == CHECKSUM_UNNECESSARY) ?
+                                               PKT_HASH_TYPE_L4 :
+                                               PKT_HASH_TYPE_L3);
 
                        skb_record_rx_queue(gro_skb, cq->ring);
                        skb_mark_napi_id(gro_skb, &cq->napi);
@@ -963,7 +965,9 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
                if (dev->features & NETIF_F_RXHASH)
                        skb_set_hash(skb,
                                     be32_to_cpu(cqe->immed_rss_invalid),
-                                    PKT_HASH_TYPE_L3);
+                                    (ip_summed == CHECKSUM_UNNECESSARY) ?
+                                       PKT_HASH_TYPE_L4 :
+                                       PKT_HASH_TYPE_L3);
 
                if ((be32_to_cpu(cqe->vlan_my_qpn) &
                    MLX4_CQE_VLAN_PRESENT_MASK) &&
index 12fbfcb44d8acdedf08fef880a12e53145f8836e..d76f4257e305bccfa5ed12e0834a3f7f17d03c5d 100644 (file)
@@ -2907,6 +2907,8 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
 {
        u64 dev_flags = dev->flags;
        int err = 0;
+       int fw_enabled_sriov_vfs = min(pci_sriov_get_totalvfs(pdev),
+                                       MLX4_MAX_NUM_VF);
 
        if (reset_flow) {
                dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
@@ -2932,6 +2934,12 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
        }
 
        if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
+               if (total_vfs > fw_enabled_sriov_vfs) {
+                       mlx4_err(dev, "requested vfs (%d) > available vfs (%d). Continuing without SR_IOV\n",
+                                total_vfs, fw_enabled_sriov_vfs);
+                       err = -ENOMEM;
+                       goto disable_sriov;
+               }
                mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
                err = pci_enable_sriov(pdev, total_vfs);
        }
@@ -3413,20 +3421,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
                        goto err_disable_pdev;
                }
        }
-       if (total_vfs >= MLX4_MAX_NUM_VF) {
+       if (total_vfs > MLX4_MAX_NUM_VF) {
                dev_err(&pdev->dev,
-                       "Requested more VF's (%d) than allowed (%d)\n",
-                       total_vfs, MLX4_MAX_NUM_VF - 1);
+                       "Requested more VF's (%d) than allowed by hw (%d)\n",
+                       total_vfs, MLX4_MAX_NUM_VF);
                err = -EINVAL;
                goto err_disable_pdev;
        }
 
        for (i = 0; i < MLX4_MAX_PORTS; i++) {
-               if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
+               if (nvfs[i] + nvfs[2] > MLX4_MAX_NUM_VF_P_PORT) {
                        dev_err(&pdev->dev,
-                               "Requested more VF's (%d) for port (%d) than allowed (%d)\n",
+                               "Requested more VF's (%d) for port (%d) than allowed by driver (%d)\n",
                                nvfs[i] + nvfs[2], i + 1,
-                               MLX4_MAX_NUM_VF_P_PORT - 1);
+                               MLX4_MAX_NUM_VF_P_PORT);
                        err = -EINVAL;
                        goto err_disable_pdev;
                }
index c28111749e1f9ba95c8b49231da95f97278f31e8..2d1b9427407982b43673e96a085c7a9ff69e0a53 100644 (file)
@@ -8226,31 +8226,7 @@ static void s2io_rem_nic(struct pci_dev *pdev)
        pci_disable_device(pdev);
 }
 
-/**
- * s2io_starter - Entry point for the driver
- * Description: This function is the entry point for the driver. It verifies
- * the module loadable parameters and initializes PCI configuration space.
- */
-
-static int __init s2io_starter(void)
-{
-       return pci_register_driver(&s2io_driver);
-}
-
-/**
- * s2io_closer - Cleanup routine for the driver
- * Description: This function is the cleanup routine for the driver. It
- * unregisters the driver.
- */
-
-static __exit void s2io_closer(void)
-{
-       pci_unregister_driver(&s2io_driver);
-       DBG_PRINT(INIT_DBG, "cleanup done\n");
-}
-
-module_init(s2io_starter);
-module_exit(s2io_closer);
+module_pci_driver(s2io_driver);
 
 static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
                                struct tcphdr **tcp, struct RxD_t *rxdp,
index d89b6ed82c51ac37d18dda4bfdd9937a0605c7f7..6c5997dc8afc064076e1a00ff726b3d87d3515c9 100644 (file)
@@ -1085,8 +1085,6 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp);
 static void tx_intr_handler(struct fifo_info *fifo_data);
 static void s2io_handle_errors(void * dev_id);
 
-static int s2io_starter(void);
-static void s2io_closer(void);
 static void s2io_tx_watchdog(struct net_device *dev);
 static void s2io_set_multicast(struct net_device *dev);
 static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
index 2d8578cade03790782af7e97a1f59f9848301ae6..c0051673c9fad60d7cc5e53c821e4ea46e17de4a 100644 (file)
@@ -1817,6 +1817,30 @@ rocker_cmd_set_port_settings_macaddr_prep(const struct rocker_port *rocker_port,
        return 0;
 }
 
+static int
+rocker_cmd_set_port_settings_mtu_prep(const struct rocker_port *rocker_port,
+                                     struct rocker_desc_info *desc_info,
+                                     void *priv)
+{
+       int mtu = *(int *)priv;
+       struct rocker_tlv *cmd_info;
+
+       if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_TYPE,
+                              ROCKER_TLV_CMD_TYPE_SET_PORT_SETTINGS))
+               return -EMSGSIZE;
+       cmd_info = rocker_tlv_nest_start(desc_info, ROCKER_TLV_CMD_INFO);
+       if (!cmd_info)
+               return -EMSGSIZE;
+       if (rocker_tlv_put_u32(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_PPORT,
+                              rocker_port->pport))
+               return -EMSGSIZE;
+       if (rocker_tlv_put_u16(desc_info, ROCKER_TLV_CMD_PORT_SETTINGS_MTU,
+                              mtu))
+               return -EMSGSIZE;
+       rocker_tlv_nest_end(desc_info, cmd_info);
+       return 0;
+}
+
 static int
 rocker_cmd_set_port_learning_prep(const struct rocker_port *rocker_port,
                                  struct rocker_desc_info *desc_info,
@@ -1874,6 +1898,14 @@ static int rocker_cmd_set_port_settings_macaddr(struct rocker_port *rocker_port,
                               macaddr, NULL, NULL);
 }
 
+static int rocker_cmd_set_port_settings_mtu(struct rocker_port *rocker_port,
+                                           int mtu)
+{
+       return rocker_cmd_exec(rocker_port, SWITCHDEV_TRANS_NONE, 0,
+                              rocker_cmd_set_port_settings_mtu_prep,
+                              &mtu, NULL, NULL);
+}
+
 static int rocker_port_set_learning(struct rocker_port *rocker_port,
                                    enum switchdev_trans trans)
 {
@@ -4152,6 +4184,34 @@ static int rocker_port_set_mac_address(struct net_device *dev, void *p)
        return 0;
 }
 
+static int rocker_port_change_mtu(struct net_device *dev, int new_mtu)
+{
+       struct rocker_port *rocker_port = netdev_priv(dev);
+       int running = netif_running(dev);
+       int err;
+
+#define ROCKER_PORT_MIN_MTU    68
+#define ROCKER_PORT_MAX_MTU    9000
+
+       if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU)
+               return -EINVAL;
+
+       if (running)
+               rocker_port_stop(dev);
+
+       netdev_info(dev, "MTU change from %d to %d\n", dev->mtu, new_mtu);
+       dev->mtu = new_mtu;
+
+       err = rocker_cmd_set_port_settings_mtu(rocker_port, new_mtu);
+       if (err)
+               return err;
+
+       if (running)
+               err = rocker_port_open(dev);
+
+       return err;
+}
+
 static int rocker_port_get_phys_port_name(struct net_device *dev,
                                          char *buf, size_t len)
 {
@@ -4172,6 +4232,7 @@ static const struct net_device_ops rocker_port_netdev_ops = {
        .ndo_stop                       = rocker_port_stop,
        .ndo_start_xmit                 = rocker_port_xmit,
        .ndo_set_mac_address            = rocker_port_set_mac_address,
+       .ndo_change_mtu                 = rocker_port_change_mtu,
        .ndo_bridge_getlink             = switchdev_port_bridge_getlink,
        .ndo_bridge_setlink             = switchdev_port_bridge_setlink,
        .ndo_bridge_dellink             = switchdev_port_bridge_dellink,
index c61fbf968036a3fe4a57f8afbef704bcffa37dc7..08b2c3d961887c65966013d6c077980d37955c32 100644 (file)
@@ -159,6 +159,7 @@ enum {
        ROCKER_TLV_CMD_PORT_SETTINGS_MODE,              /* u8 */
        ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING,          /* u8 */
        ROCKER_TLV_CMD_PORT_SETTINGS_PHYS_NAME,         /* binary */
+       ROCKER_TLV_CMD_PORT_SETTINGS_MTU,               /* u16 */
 
        __ROCKER_TLV_CMD_PORT_SETTINGS_MAX,
        ROCKER_TLV_CMD_PORT_SETTINGS_MAX =
index 5ec4ed3f6c8def7a6a6cf527cd9ac7ca73c09844..3e47202b901025e82d0ad0777703da181ab7ec88 100644 (file)
@@ -2142,7 +2142,6 @@ MODULE_DEVICE_TABLE(of, of_match);
 static struct platform_driver netcp_driver = {
        .driver = {
                .name           = "netcp-1.0",
-               .owner          = THIS_MODULE,
                .of_match_table = of_match,
        },
        .probe = netcp_probe,
index dd4544085db321d2f9020d97ebbbb9a8887ed4b9..26cd14ccf4d565324317f19fd86960242e34c7fb 100644 (file)
@@ -589,6 +589,7 @@ struct nvsp_message {
 
 
 #define NETVSC_MTU 65536
+#define NETVSC_MTU_MIN 68
 
 #define NETVSC_RECEIVE_BUFFER_SIZE             (1024*1024*16)  /* 16MB */
 #define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY      (1024*1024*15)  /* 15MB */
index 358475ed9b5964c53f038c61f7fb8a3996c2a5ab..b855ba9a507d5b0eeda334b0eb3e254ee5ba26f8 100644 (file)
@@ -743,8 +743,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu)
        if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
                limit = NETVSC_MTU - ETH_HLEN;
 
-       /* Hyper-V hosts don't support MTU < ETH_DATA_LEN (1500) */
-       if (mtu < ETH_DATA_LEN || mtu > limit)
+       if (mtu < NETVSC_MTU_MIN || mtu > limit)
                return -EINVAL;
 
        nvdev->start_remove = true;
index 236aeb76ef224ba5eaf9e0994b6e9363434047ac..2e40417a8087812b27bd5ffadbd24d98999980fd 100644 (file)
@@ -1054,7 +1054,7 @@ int rndis_filter_device_add(struct hv_device *dev,
        ret = rndis_filter_query_device(rndis_device,
                                        RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
                                        &mtu, &size);
-       if (ret == 0 && size == sizeof(u32))
+       if (ret == 0 && size == sizeof(u32) && mtu < net_device->ndev->mtu)
                net_device->ndev->mtu = mtu;
 
        /* Get the mac address */
index 94570aace2414a5984d989045c599ed141582eb5..cc56fac3c3f83ef51e4f42f67f9e1c4e549f7725 100644 (file)
 #include <net/net_namespace.h>
 
 #define TX_Q_LIMIT    32
-struct ifb_private {
+struct ifb_q_private {
+       struct net_device       *dev;
        struct tasklet_struct   ifb_tasklet;
-       int     tasklet_pending;
-
-       struct u64_stats_sync   rsync;
+       int                     tasklet_pending;
+       int                     txqnum;
        struct sk_buff_head     rq;
-       u64 rx_packets;
-       u64 rx_bytes;
+       u64                     rx_packets;
+       u64                     rx_bytes;
+       struct u64_stats_sync   rsync;
 
        struct u64_stats_sync   tsync;
+       u64                     tx_packets;
+       u64                     tx_bytes;
        struct sk_buff_head     tq;
-       u64 tx_packets;
-       u64 tx_bytes;
-};
+} ____cacheline_aligned_in_smp;
 
-static int numifbs = 2;
+struct ifb_dev_private {
+       struct ifb_q_private *tx_private;
+};
 
-static void ri_tasklet(unsigned long dev);
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
 static int ifb_open(struct net_device *dev);
 static int ifb_close(struct net_device *dev);
 
-static void ri_tasklet(unsigned long dev)
+static void ifb_ri_tasklet(unsigned long _txp)
 {
-       struct net_device *_dev = (struct net_device *)dev;
-       struct ifb_private *dp = netdev_priv(_dev);
+       struct ifb_q_private *txp = (struct ifb_q_private *)_txp;
        struct netdev_queue *txq;
        struct sk_buff *skb;
 
-       txq = netdev_get_tx_queue(_dev, 0);
-       if ((skb = skb_peek(&dp->tq)) == NULL) {
-               if (__netif_tx_trylock(txq)) {
-                       skb_queue_splice_tail_init(&dp->rq, &dp->tq);
-                       __netif_tx_unlock(txq);
-               } else {
-                       /* reschedule */
+       txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
+       skb = skb_peek(&txp->tq);
+       if (!skb) {
+               if (!__netif_tx_trylock(txq))
                        goto resched;
-               }
+               skb_queue_splice_tail_init(&txp->rq, &txp->tq);
+               __netif_tx_unlock(txq);
        }
 
-       while ((skb = __skb_dequeue(&dp->tq)) != NULL) {
+       while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
                u32 from = G_TC_FROM(skb->tc_verd);
 
                skb->tc_verd = 0;
                skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
 
-               u64_stats_update_begin(&dp->tsync);
-               dp->tx_packets++;
-               dp->tx_bytes += skb->len;
-               u64_stats_update_end(&dp->tsync);
+               u64_stats_update_begin(&txp->tsync);
+               txp->tx_packets++;
+               txp->tx_bytes += skb->len;
+               u64_stats_update_end(&txp->tsync);
 
                rcu_read_lock();
-               skb->dev = dev_get_by_index_rcu(dev_net(_dev), skb->skb_iif);
+               skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
                if (!skb->dev) {
                        rcu_read_unlock();
                        dev_kfree_skb(skb);
-                       _dev->stats.tx_dropped++;
-                       if (skb_queue_len(&dp->tq) != 0)
+                       txp->dev->stats.tx_dropped++;
+                       if (skb_queue_len(&txp->tq) != 0)
                                goto resched;
                        break;
                }
                rcu_read_unlock();
-               skb->skb_iif = _dev->ifindex;
+               skb->skb_iif = txp->dev->ifindex;
 
                if (from & AT_EGRESS) {
                        dev_queue_xmit(skb);
@@ -112,10 +111,11 @@ static void ri_tasklet(unsigned long dev)
        }
 
        if (__netif_tx_trylock(txq)) {
-               if ((skb = skb_peek(&dp->rq)) == NULL) {
-                       dp->tasklet_pending = 0;
-                       if (netif_queue_stopped(_dev))
-                               netif_wake_queue(_dev);
+               skb = skb_peek(&txp->rq);
+               if (!skb) {
+                       txp->tasklet_pending = 0;
+                       if (netif_tx_queue_stopped(txq))
+                               netif_tx_wake_queue(txq);
                } else {
                        __netif_tx_unlock(txq);
                        goto resched;
@@ -123,8 +123,8 @@ static void ri_tasklet(unsigned long dev)
                __netif_tx_unlock(txq);
        } else {
 resched:
-               dp->tasklet_pending = 1;
-               tasklet_schedule(&dp->ifb_tasklet);
+               txp->tasklet_pending = 1;
+               tasklet_schedule(&txp->ifb_tasklet);
        }
 
 }
@@ -132,29 +132,58 @@ resched:
 static struct rtnl_link_stats64 *ifb_stats64(struct net_device *dev,
                                             struct rtnl_link_stats64 *stats)
 {
-       struct ifb_private *dp = netdev_priv(dev);
+       struct ifb_dev_private *dp = netdev_priv(dev);
+       struct ifb_q_private *txp = dp->tx_private;
        unsigned int start;
-
-       do {
-               start = u64_stats_fetch_begin_irq(&dp->rsync);
-               stats->rx_packets = dp->rx_packets;
-               stats->rx_bytes = dp->rx_bytes;
-       } while (u64_stats_fetch_retry_irq(&dp->rsync, start));
-
-       do {
-               start = u64_stats_fetch_begin_irq(&dp->tsync);
-
-               stats->tx_packets = dp->tx_packets;
-               stats->tx_bytes = dp->tx_bytes;
-
-       } while (u64_stats_fetch_retry_irq(&dp->tsync, start));
-
+       u64 packets, bytes;
+       int i;
+
+       for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+               do {
+                       start = u64_stats_fetch_begin_irq(&txp->rsync);
+                       packets = txp->rx_packets;
+                       bytes = txp->rx_bytes;
+               } while (u64_stats_fetch_retry_irq(&txp->rsync, start));
+               stats->rx_packets += packets;
+               stats->rx_bytes += bytes;
+
+               do {
+                       start = u64_stats_fetch_begin_irq(&txp->tsync);
+                       packets = txp->tx_packets;
+                       bytes = txp->tx_bytes;
+               } while (u64_stats_fetch_retry_irq(&txp->tsync, start));
+               stats->tx_packets += packets;
+               stats->tx_bytes += bytes;
+       }
        stats->rx_dropped = dev->stats.rx_dropped;
        stats->tx_dropped = dev->stats.tx_dropped;
 
        return stats;
 }
 
+static int ifb_dev_init(struct net_device *dev)
+{
+       struct ifb_dev_private *dp = netdev_priv(dev);
+       struct ifb_q_private *txp;
+       int i;
+
+       txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
+       if (!txp)
+               return -ENOMEM;
+       dp->tx_private = txp;
+       for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+               txp->txqnum = i;
+               txp->dev = dev;
+               __skb_queue_head_init(&txp->rq);
+               __skb_queue_head_init(&txp->tq);
+               u64_stats_init(&txp->rsync);
+               u64_stats_init(&txp->tsync);
+               tasklet_init(&txp->ifb_tasklet, ifb_ri_tasklet,
+                            (unsigned long)txp);
+               netif_tx_start_queue(netdev_get_tx_queue(dev, i));
+       }
+       return 0;
+}
 
 static const struct net_device_ops ifb_netdev_ops = {
        .ndo_open       = ifb_open,
@@ -162,6 +191,7 @@ static const struct net_device_ops ifb_netdev_ops = {
        .ndo_get_stats64 = ifb_stats64,
        .ndo_start_xmit = ifb_xmit,
        .ndo_validate_addr = eth_validate_addr,
+       .ndo_init       = ifb_dev_init,
 };
 
 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST | \
@@ -169,10 +199,24 @@ static const struct net_device_ops ifb_netdev_ops = {
                      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX         | \
                      NETIF_F_HW_VLAN_STAG_TX)
 
+static void ifb_dev_free(struct net_device *dev)
+{
+       struct ifb_dev_private *dp = netdev_priv(dev);
+       struct ifb_q_private *txp = dp->tx_private;
+       int i;
+
+       for (i = 0; i < dev->num_tx_queues; i++,txp++) {
+               tasklet_kill(&txp->ifb_tasklet);
+               __skb_queue_purge(&txp->rq);
+               __skb_queue_purge(&txp->tq);
+       }
+       kfree(dp->tx_private);
+       free_netdev(dev);
+}
+
 static void ifb_setup(struct net_device *dev)
 {
        /* Initialize the device structure. */
-       dev->destructor = free_netdev;
        dev->netdev_ops = &ifb_netdev_ops;
 
        /* Fill in device structure with ethernet-generic values. */
@@ -188,17 +232,19 @@ static void ifb_setup(struct net_device *dev)
        dev->priv_flags &= ~IFF_TX_SKB_SHARING;
        netif_keep_dst(dev);
        eth_hw_addr_random(dev);
+       dev->destructor = ifb_dev_free;
 }
 
 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       struct ifb_private *dp = netdev_priv(dev);
+       struct ifb_dev_private *dp = netdev_priv(dev);
        u32 from = G_TC_FROM(skb->tc_verd);
+       struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
 
-       u64_stats_update_begin(&dp->rsync);
-       dp->rx_packets++;
-       dp->rx_bytes += skb->len;
-       u64_stats_update_end(&dp->rsync);
+       u64_stats_update_begin(&txp->rsync);
+       txp->rx_packets++;
+       txp->rx_bytes += skb->len;
+       u64_stats_update_end(&txp->rsync);
 
        if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->skb_iif) {
                dev_kfree_skb(skb);
@@ -206,14 +252,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_OK;
        }
 
-       if (skb_queue_len(&dp->rq) >= dev->tx_queue_len) {
-               netif_stop_queue(dev);
-       }
+       if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
+               netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
 
-       __skb_queue_tail(&dp->rq, skb);
-       if (!dp->tasklet_pending) {
-               dp->tasklet_pending = 1;
-               tasklet_schedule(&dp->ifb_tasklet);
+       __skb_queue_tail(&txp->rq, skb);
+       if (!txp->tasklet_pending) {
+               txp->tasklet_pending = 1;
+               tasklet_schedule(&txp->ifb_tasklet);
        }
 
        return NETDEV_TX_OK;
@@ -221,24 +266,13 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
 
 static int ifb_close(struct net_device *dev)
 {
-       struct ifb_private *dp = netdev_priv(dev);
-
-       tasklet_kill(&dp->ifb_tasklet);
-       netif_stop_queue(dev);
-       __skb_queue_purge(&dp->rq);
-       __skb_queue_purge(&dp->tq);
+       netif_tx_stop_all_queues(dev);
        return 0;
 }
 
 static int ifb_open(struct net_device *dev)
 {
-       struct ifb_private *dp = netdev_priv(dev);
-
-       tasklet_init(&dp->ifb_tasklet, ri_tasklet, (unsigned long)dev);
-       __skb_queue_head_init(&dp->rq);
-       __skb_queue_head_init(&dp->tq);
-       netif_start_queue(dev);
-
+       netif_tx_start_all_queues(dev);
        return 0;
 }
 
@@ -255,31 +289,30 @@ static int ifb_validate(struct nlattr *tb[], struct nlattr *data[])
 
 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
        .kind           = "ifb",
-       .priv_size      = sizeof(struct ifb_private),
+       .priv_size      = sizeof(struct ifb_dev_private),
        .setup          = ifb_setup,
        .validate       = ifb_validate,
 };
 
-/* Number of ifb devices to be set up by this module. */
+/* Number of ifb devices to be set up by this module.
+ * Note that these legacy devices have one queue.
+ * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
+ */
+static int numifbs = 2;
 module_param(numifbs, int, 0);
 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
 
 static int __init ifb_init_one(int index)
 {
        struct net_device *dev_ifb;
-       struct ifb_private *dp;
        int err;
 
-       dev_ifb = alloc_netdev(sizeof(struct ifb_private), "ifb%d",
+       dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
                               NET_NAME_UNKNOWN, ifb_setup);
 
        if (!dev_ifb)
                return -ENOMEM;
 
-       dp = netdev_priv(dev_ifb);
-       u64_stats_init(&dp->rsync);
-       u64_stats_init(&dp->tsync);
-
        dev_ifb->rtnl_link_ops = &ifb_link_ops;
        err = register_netdevice(dev_ifb);
        if (err < 0)
index f721444c2b0a9413dd0bac8c7e61099cf6d3789b..3320a179ee360c6b3e8d90b6355f5cd18c8b4630 100644 (file)
@@ -48,6 +48,8 @@
 #define MII_M1011_IMASK_CLEAR          0x0000
 
 #define MII_M1011_PHY_SCR              0x10
+#define MII_M1011_PHY_SCR_MDI          0x0000
+#define MII_M1011_PHY_SCR_MDI_X                0x0020
 #define MII_M1011_PHY_SCR_AUTO_CROSS   0x0060
 
 #define MII_M1145_PHY_EXT_SR           0x1b
@@ -159,6 +161,43 @@ static int marvell_config_intr(struct phy_device *phydev)
        return err;
 }
 
+static int marvell_set_polarity(struct phy_device *phydev, int polarity)
+{
+       int reg;
+       int err;
+       int val;
+
+       /* get the current settings */
+       reg = phy_read(phydev, MII_M1011_PHY_SCR);
+       if (reg < 0)
+               return reg;
+
+       val = reg;
+       val &= ~MII_M1011_PHY_SCR_AUTO_CROSS;
+       switch (polarity) {
+       case ETH_TP_MDI:
+               val |= MII_M1011_PHY_SCR_MDI;
+               break;
+       case ETH_TP_MDI_X:
+               val |= MII_M1011_PHY_SCR_MDI_X;
+               break;
+       case ETH_TP_MDI_AUTO:
+       case ETH_TP_MDI_INVALID:
+       default:
+               val |= MII_M1011_PHY_SCR_AUTO_CROSS;
+               break;
+       }
+
+       if (val != reg) {
+               /* Set the new polarity value in the register */
+               err = phy_write(phydev, MII_M1011_PHY_SCR, val);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
 static int marvell_config_aneg(struct phy_device *phydev)
 {
        int err;
@@ -191,8 +230,7 @@ static int marvell_config_aneg(struct phy_device *phydev)
        if (err < 0)
                return err;
 
-       err = phy_write(phydev, MII_M1011_PHY_SCR,
-                       MII_M1011_PHY_SCR_AUTO_CROSS);
+       err = marvell_set_polarity(phydev, phydev->mdix);
        if (err < 0)
                return err;
 
index b2197b506acbe86f3540d5ae1d8334129c2bbe57..84b1fba58ac3c8efcbbb0bf9311b442ac52614c1 100644 (file)
@@ -353,6 +353,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd)
 
        phydev->duplex = cmd->duplex;
 
+       phydev->mdix = cmd->eth_tp_mdix_ctrl;
+
        /* Restart the PHY */
        phy_start_aneg(phydev);
 
@@ -377,6 +379,7 @@ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd)
        cmd->transceiver = phy_is_internal(phydev) ?
                XCVR_INTERNAL : XCVR_EXTERNAL;
        cmd->autoneg = phydev->autoneg;
+       cmd->eth_tp_mdix_ctrl = phydev->mdix;
 
        return 0;
 }
index 8a495b318b6f23bf66b19f4d77e557506cab5b0f..c6cb85a85c896fd6dcab466fa80f6dc2b81e8e04 100644 (file)
@@ -325,9 +325,6 @@ static inline pending_ring_idx_t nr_pending_reqs(struct xenvif_queue *queue)
                queue->pending_prod + queue->pending_cons;
 }
 
-/* Callback from stack when TX packet can be released */
-void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
-
 irqreturn_t xenvif_interrupt(int irq, void *dev_id);
 
 extern bool separate_tx_rx_irq;
index 82806c60aa4273d67ff5592dfc11e4eaa57c51d7..1319a6bb6b82bc36f1b9199e00f038301afb24e1 100644 (file)
@@ -94,7 +94,6 @@ static inline struct ipv6hdr *ipipv6_hdr(const struct sk_buff *skb)
 struct inet6_skb_parm {
        int                     iif;
        __be16                  ra;
-       __u16                   hop;
        __u16                   dst0;
        __u16                   srcrt;
        __u16                   dst1;
@@ -111,6 +110,7 @@ struct inet6_skb_parm {
 #define IP6SKB_REROUTED                4
 #define IP6SKB_ROUTERALERT     8
 #define IP6SKB_FRAGMENTED      16
+#define IP6SKB_HOPBYHOP        32
 };
 
 #define IP6CB(skb)     ((struct inet6_skb_parm*)((skb)->cb))
index a26c3f84b8ddc6c15e2abbecf47a588419534b11..e5fb1d4159619f7ecad0fca5515e627fd7016e06 100644 (file)
@@ -424,6 +424,8 @@ struct phy_device {
 
        struct net_device *attached_dev;
 
+       u8 mdix;
+
        void (*adjust_link)(struct net_device *dev);
 };
 #define to_phy_device(d) container_of(d, struct phy_device, dev)
index 3ee4c92afd1bd2baf2b90201a9b4af896d020b5f..8d2a707a9e876e7779fcf5b09f8ed629e9b6cc21 100644 (file)
@@ -21,6 +21,8 @@ struct tcf_common {
        struct gnet_stats_rate_est64    tcfc_rate_est;
        spinlock_t                      tcfc_lock;
        struct rcu_head                 tcfc_rcu;
+       struct gnet_stats_basic_cpu __percpu *cpu_bstats;
+       struct gnet_stats_queue __percpu *cpu_qstats;
 };
 #define tcf_head       common.tcfc_head
 #define tcf_index      common.tcfc_index
@@ -68,6 +70,17 @@ static inline void tcf_hashinfo_destroy(struct tcf_hashinfo *hf)
        kfree(hf->htab);
 }
 
+/* Update lastuse only if needed, to avoid dirtying a cache line.
+ * We use a temp variable to avoid fetching jiffies twice.
+ */
+static inline void tcf_lastuse_update(struct tcf_t *tm)
+{
+       unsigned long now = jiffies;
+
+       if (tm->lastuse != now)
+               tm->lastuse = now;
+}
+
 #ifdef CONFIG_NET_CLS_ACT
 
 #define ACT_P_CREATED 1
@@ -103,7 +116,7 @@ int tcf_hash_release(struct tc_action *a, int bind);
 u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
 int tcf_hash_check(u32 index, struct tc_action *a, int bind);
 int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
-                   int size, int bind);
+                   int size, int bind, bool cpustats);
 void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est);
 void tcf_hash_insert(struct tc_action *a);
 
index b73c88a19dd408f0de41f87c80242816fac4b19d..b07d126694a7aa5d5910e2d4126522aebd602a98 100644 (file)
@@ -205,8 +205,8 @@ void inet_put_port(struct sock *sk);
 
 void inet_hashinfo_init(struct inet_hashinfo *h);
 
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw);
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk);
+void __inet_hash(struct sock *sk, struct sock *osk);
 void inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
 
index 360c4802288db91a38b435bcf5b5d2eb71a8cd1f..879d6e5a973b4ae1af54d6b0c6103c02ee774991 100644 (file)
@@ -100,10 +100,8 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 void inet_twsk_free(struct inet_timewait_sock *tw);
 void inet_twsk_put(struct inet_timewait_sock *tw);
 
-int inet_twsk_unhash(struct inet_timewait_sock *tw);
-
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
-                         struct inet_hashinfo *hashinfo);
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+                          struct inet_hashinfo *hashinfo);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
                                           struct inet_timewait_death_row *dr,
@@ -113,7 +111,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
                           struct inet_hashinfo *hashinfo);
 
 void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo);
-void inet_twsk_deschedule(struct inet_timewait_sock *tw);
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);
 
 void inet_twsk_purge(struct inet_hashinfo *hashinfo,
                     struct inet_timewait_death_row *twdr, int family);
index 8d93544a2d2b5f21c7ed8b8137394df0758dbef3..c0368db6df54d78a1122c45aff412bcafa84b412 100644 (file)
@@ -31,6 +31,7 @@ struct netns_sysctl_ipv6 {
        int auto_flowlabels;
        int icmpv6_time;
        int anycast_src_echo_reply;
+       int ip_nonlocal_bind;
        int fwmark_reflect;
        int idgen_retries;
        int idgen_delay;
index 2738f6f8790836b1b88d5163e5ba297b0f4421c0..2eab08c38e3283efd696bfff4198a16ad27c1d16 100644 (file)
@@ -513,17 +513,20 @@ static inline void bstats_update(struct gnet_stats_basic_packed *bstats,
        bstats->packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
 }
 
-static inline void qdisc_bstats_update_cpu(struct Qdisc *sch,
-                                          const struct sk_buff *skb)
+static inline void bstats_cpu_update(struct gnet_stats_basic_cpu *bstats,
+                                    const struct sk_buff *skb)
 {
-       struct gnet_stats_basic_cpu *bstats =
-                               this_cpu_ptr(sch->cpu_bstats);
-
        u64_stats_update_begin(&bstats->syncp);
        bstats_update(&bstats->bstats, skb);
        u64_stats_update_end(&bstats->syncp);
 }
 
+static inline void qdisc_bstats_cpu_update(struct Qdisc *sch,
+                                          const struct sk_buff *skb)
+{
+       bstats_cpu_update(this_cpu_ptr(sch->cpu_bstats), skb);
+}
+
 static inline void qdisc_bstats_update(struct Qdisc *sch,
                                       const struct sk_buff *skb)
 {
@@ -547,16 +550,24 @@ static inline void __qdisc_qstats_drop(struct Qdisc *sch, int count)
        sch->qstats.drops += count;
 }
 
-static inline void qdisc_qstats_drop(struct Qdisc *sch)
+static inline void qstats_drop_inc(struct gnet_stats_queue *qstats)
 {
-       sch->qstats.drops++;
+       qstats->drops++;
 }
 
-static inline void qdisc_qstats_drop_cpu(struct Qdisc *sch)
+static inline void qstats_overlimit_inc(struct gnet_stats_queue *qstats)
 {
-       struct gnet_stats_queue *qstats = this_cpu_ptr(sch->cpu_qstats);
+       qstats->overlimits++;
+}
 
-       qstats->drops++;
+static inline void qdisc_qstats_drop(struct Qdisc *sch)
+{
+       qstats_drop_inc(&sch->qstats);
+}
+
+static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch)
+{
+       qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats));
 }
 
 static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
index 9fc9b578908ab868dcc0ef5986358ccb603bf5f9..592a6bc02b0b535087e9f2afee844b5ea37c666b 100644 (file)
@@ -6,9 +6,10 @@
 struct tcf_gact {
        struct tcf_common       common;
 #ifdef CONFIG_GACT_PROB
-        u16                    tcfg_ptype;
-        u16                    tcfg_pval;
-        int                    tcfg_paction;
+       u16                     tcfg_ptype;
+       u16                     tcfg_pval;
+       int                     tcfg_paction;
+       atomic_t                packets;
 #endif
 };
 #define to_gact(a) \
index 4dd77a1c106b246b0abc9d8af3d6dc67fa748b5c..dae96bae1c19c2d71fa7c0ea65e74d064e3b3757 100644 (file)
@@ -8,7 +8,7 @@ struct tcf_mirred {
        int                     tcfm_eaction;
        int                     tcfm_ifindex;
        int                     tcfm_ok_push;
-       struct net_device       *tcfm_dev;
+       struct net_device __rcu *tcfm_dev;
        struct list_head        tcfm_list;
 };
 #define to_mirred(a) \
index 950cfecaad3c0d01c646c4fd111eca8d0cf8aef3..364426a2be5a0f7f0a2e6daaf6ce9b9a2f3e3304 100644 (file)
@@ -989,6 +989,11 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 
 #define TCP_INFINITE_SSTHRESH  0x7fffffff
 
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+       return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
 static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
 {
        return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
@@ -1065,7 +1070,7 @@ static inline bool tcp_is_cwnd_limited(const struct sock *sk)
        const struct tcp_sock *tp = tcp_sk(sk);
 
        /* If in slow start, ensure cwnd grows to twice what was ACKed. */
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                return tp->snd_cwnd < 2 * tp->max_packets_out;
 
        return tp->is_cwnd_limited;
index 68f0ecad6c6e211e8f6dac90214fff93c539eb0f..1a47946f95ba46a9ad9c2de262c9638f6a6776f4 100644 (file)
@@ -33,9 +33,6 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 
 static inline void twsk_destructor(struct sock *sk)
 {
-       BUG_ON(sk == NULL);
-       BUG_ON(sk->sk_prot == NULL);
-       BUG_ON(sk->sk_prot->twsk_prot == NULL);
        if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
                sk->sk_prot->twsk_prot->twsk_destructor(sk);
 }
index eaaea6208b424e7ef4fd361646b07fc497180a12..3635b77975085a5801d9d4a5555beaf70a623441 100644 (file)
@@ -182,6 +182,7 @@ struct br_mdb_entry {
 #define MDB_TEMPORARY 0
 #define MDB_PERMANENT 1
        __u8 state;
+       __u16 vid;
        struct {
                union {
                        __be32  ip4;
index c5bedc82bc1c540bf466c4d2e64f8663a974536d..bf38f5e8196c1bfc307405eff2b11a91059738d9 100644 (file)
@@ -453,7 +453,11 @@ select_insn:
                if (unlikely(!prog))
                        goto out;
 
-               ARG1 = BPF_R1;
+               /* ARG1 at this point is guaranteed to point to CTX from
+                * the verifier side due to the fact that the tail call is
+                * handeled like a helper, that is, bpf_tail_call_proto,
+                * where arg1_type is ARG_PTR_TO_CTX.
+                */
                insn = prog->insnsi;
                goto select_insn;
 out:
index 7f58c735d745049025407806e972bb4c7f124888..9198f28a5528f2a27158e2ac67c5b06a44949c2e 100644 (file)
@@ -3674,6 +3674,9 @@ static struct bpf_test tests[] = {
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
                        BPF_ENDIAN(BPF_FROM_BE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
@@ -3708,6 +3711,9 @@ static struct bpf_test tests[] = {
                .u.insns_int = {
                        BPF_LD_IMM64(R0, 0x0123456789abcdefLL),
                        BPF_ENDIAN(BPF_FROM_LE, R0, 32),
+                       BPF_ALU64_REG(BPF_MOV, R1, R0),
+                       BPF_ALU64_IMM(BPF_RSH, R1, 32),
+                       BPF_ALU32_REG(BPF_ADD, R0, R1), /* R1 = 0 */
                        BPF_EXIT_INSN(),
                },
                INTERNAL,
index c11cf2611db0c870542969b6847d0a61d18b64d4..9f7cdd27b762b65cbb66fc7279785cddf040d275 100644 (file)
@@ -85,6 +85,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
                                        memset(&e, 0, sizeof(e));
                                        e.ifindex = port->dev->ifindex;
                                        e.state = p->state;
+                                       e.vid = p->addr.vid;
                                        if (p->addr.proto == htons(ETH_P_IP))
                                                e.addr.u.ip4 = p->addr.u.ip4;
 #if IS_ENABLED(CONFIG_IPV6)
@@ -230,7 +231,7 @@ errout:
 }
 
 void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
-                  struct br_ip *group, int type)
+                  struct br_ip *group, int type, u8 state)
 {
        struct br_mdb_entry entry;
 
@@ -241,6 +242,8 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
 #if IS_ENABLED(CONFIG_IPV6)
        entry.addr.u.ip6 = group->u.ip6;
 #endif
+       entry.state = state;
+       entry.vid = group->vid;
        __br_mdb_notify(dev, &entry, type);
 }
 
@@ -263,6 +266,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
                return false;
        if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
                return false;
+       if (entry->vid >= VLAN_VID_MASK)
+               return false;
 
        return true;
 }
@@ -351,7 +356,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
        if (state == MDB_TEMPORARY)
                mod_timer(&p->timer, now + br->multicast_membership_interval);
 
-       br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+       br_mdb_notify(br->dev, port, group, RTM_NEWMDB, state);
        return 0;
 }
 
@@ -375,6 +380,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
                return -EINVAL;
 
        memset(&ip, 0, sizeof(ip));
+       ip.vid = entry->vid;
        ip.proto = entry->addr.proto;
        if (ip.proto == htons(ETH_P_IP))
                ip.u.ip4 = entry->addr.u.ip4;
@@ -422,6 +428,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
                return -EINVAL;
 
        memset(&ip, 0, sizeof(ip));
+       ip.vid = entry->vid;
        ip.proto = entry->addr.proto;
        if (ip.proto == htons(ETH_P_IP))
                ip.u.ip4 = entry->addr.u.ip4;
index 742a6c27d7a222bc3c53c288b4a6915194310fe6..5a44cd9473f2e03506f809f817f5b6904c6314a7 100644 (file)
@@ -694,7 +694,7 @@ static int br_multicast_add_group(struct net_bridge *br,
        if (unlikely(!p))
                goto err;
        rcu_assign_pointer(*pp, p);
-       br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+       br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY);
 
 found:
        mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -1439,8 +1439,9 @@ br_multicast_leave_group(struct net_bridge *br,
                        rcu_assign_pointer(*pp, p->next);
                        hlist_del_init(&p->mglist);
                        del_timer(&p->timer);
+                       br_mdb_notify(br->dev, port, group, RTM_DELMDB,
+                                     p->state);
                        call_rcu_bh(&p->rcu, br_multicast_free_pg);
-                       br_mdb_notify(br->dev, port, group, RTM_DELMDB);
 
                        if (!mp->ports && !mp->mglist &&
                            netif_running(br->dev))
index 8b21146b24a055652be0c7d74fd875ed875da918..c73fd785654da3e2284c3a4f07ed769aaed344af 100644 (file)
@@ -488,7 +488,7 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
 void br_mdb_init(void);
 void br_mdb_uninit(void);
 void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
-                  struct br_ip *group, int type);
+                  struct br_ip *group, int type, u8 state);
 
 #define mlock_dereference(X, br) \
        rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
index a8e4dd4302853702fef7fb1462fbdeb8a38f45e1..69445a33ace6583c91eedbde46c1d0d0193c8721 100644 (file)
@@ -3645,7 +3645,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 
        qdisc_skb_cb(skb)->pkt_len = skb->len;
        skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-       qdisc_bstats_update_cpu(cl->q, skb);
+       qdisc_bstats_cpu_update(cl->q, skb);
 
        switch (tc_classify(skb, cl, &cl_res)) {
        case TC_ACT_OK:
@@ -3653,7 +3653,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                skb->tc_index = TC_H_MIN(cl_res.classid);
                break;
        case TC_ACT_SHOT:
-               qdisc_qstats_drop_cpu(cl->q);
+               qdisc_qstats_cpu_drop(cl->q);
        case TC_ACT_STOLEN:
        case TC_ACT_QUEUED:
                kfree_skb(skb);
index 43d3dd62fcc8eccd95a4618f68b0553cf7309c01..42689d5c468cb4f53baa058c74cdee58099137c7 100644 (file)
@@ -60,11 +60,15 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
        struct phy_device *phydev;
        unsigned int type;
 
+       if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+               return false;
+
        if (skb_headroom(skb) < ETH_HLEN)
                return false;
+
        __skb_push(skb, ETH_HLEN);
 
-       type = classify(skb);
+       type = ptp_classify_raw(skb);
 
        __skb_pull(skb, ETH_HLEN);
 
index 5f9b063bbe8ab4f3755a5711ae19b816a3bc2026..f8b3701a6c3c66dafe8d9a1e01aaf4836e88ec71 100644 (file)
@@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
-       int twrefcnt = 0;
 
        spin_lock(lock);
 
@@ -371,21 +370,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
        if (tw) {
-               twrefcnt = inet_twsk_unhash(tw);
+               sk_nulls_del_node_init_rcu((struct sock *)tw);
                NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
        }
        spin_unlock(lock);
-       if (twrefcnt)
-               inet_twsk_put(tw);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
        if (twp) {
                *twp = tw;
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
-               inet_twsk_deschedule(tw);
-
-               inet_twsk_put(tw);
+               inet_twsk_deschedule_put(tw);
        }
        return 0;
 
@@ -403,13 +398,12 @@ static u32 inet_sk_port_offset(const struct sock *sk)
                                          inet->inet_dport);
 }
 
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct hlist_nulls_head *list;
        struct inet_ehash_bucket *head;
        spinlock_t *lock;
-       int twrefcnt = 0;
 
        WARN_ON(!sk_unhashed(sk));
 
@@ -420,23 +414,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
 
        spin_lock(lock);
        __sk_nulls_add_node_rcu(sk, list);
-       if (tw) {
-               WARN_ON(sk->sk_hash != tw->tw_hash);
-               twrefcnt = inet_twsk_unhash(tw);
+       if (osk) {
+               WARN_ON(sk->sk_hash != osk->sk_hash);
+               sk_nulls_del_node_init_rcu(osk);
        }
        spin_unlock(lock);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
-       return twrefcnt;
 }
 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
 
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash(struct sock *sk, struct sock *osk)
 {
        struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
        struct inet_listen_hashbucket *ilb;
 
        if (sk->sk_state != TCP_LISTEN)
-               return __inet_hash_nolisten(sk, tw);
+               return __inet_hash_nolisten(sk, osk);
 
        WARN_ON(!sk_unhashed(sk));
        ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -445,7 +438,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
        __sk_nulls_add_node_rcu(sk, &ilb->head);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
        spin_unlock(&ilb->lock);
-       return 0;
 }
 EXPORT_SYMBOL(__inet_hash);
 
@@ -492,7 +484,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
        struct inet_bind_bucket *tb;
        int ret;
        struct net *net = sock_net(sk);
-       int twrefcnt = 1;
 
        if (!snum) {
                int i, remaining, low, high, port;
@@ -560,19 +551,14 @@ ok:
                inet_bind_hash(sk, tb, port);
                if (sk_unhashed(sk)) {
                        inet_sk(sk)->inet_sport = htons(port);
-                       twrefcnt += __inet_hash_nolisten(sk, tw);
+                       __inet_hash_nolisten(sk, (struct sock *)tw);
                }
                if (tw)
-                       twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
+                       inet_twsk_bind_unhash(tw, hinfo);
                spin_unlock(&head->lock);
 
-               if (tw) {
-                       inet_twsk_deschedule(tw);
-                       while (twrefcnt) {
-                               twrefcnt--;
-                               inet_twsk_put(tw);
-                       }
-               }
+               if (tw)
+                       inet_twsk_deschedule_put(tw);
 
                ret = 0;
                goto out;
index 2ffbd16b79e00279235244c3412046062a86fec5..ae22cc24fbe89b32be1f2142450c198e78026851 100644 (file)
 #include <net/ip.h>
 
 
-/**
- *     inet_twsk_unhash - unhash a timewait socket from established hash
- *     @tw: timewait socket
- *
- *     unhash a timewait socket from established hash, if hashed.
- *     ehash lock must be held by caller.
- *     Returns 1 if caller should call inet_twsk_put() after lock release.
- */
-int inet_twsk_unhash(struct inet_timewait_sock *tw)
-{
-       if (hlist_nulls_unhashed(&tw->tw_node))
-               return 0;
-
-       hlist_nulls_del_rcu(&tw->tw_node);
-       sk_nulls_node_init(&tw->tw_node);
-       /*
-        * We cannot call inet_twsk_put() ourself under lock,
-        * caller must call it for us.
-        */
-       return 1;
-}
-
 /**
  *     inet_twsk_bind_unhash - unhash a timewait socket from bind hash
  *     @tw: timewait socket
@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw)
  *     bind hash lock must be held by caller.
  *     Returns 1 if caller should call inet_twsk_put() after lock release.
  */
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
                          struct inet_hashinfo *hashinfo)
 {
        struct inet_bind_bucket *tb = tw->tw_tb;
 
        if (!tb)
-               return 0;
+               return;
 
        __hlist_del(&tw->tw_bind_node);
        tw->tw_tb = NULL;
        inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-       /*
-        * We cannot call inet_twsk_put() ourself under lock,
-        * caller must call it for us.
-        */
-       return 1;
+       __sock_put((struct sock *)tw);
 }
 
 /* Must be called with locally disabled BHs. */
 static void inet_twsk_kill(struct inet_timewait_sock *tw)
 {
        struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
-       struct inet_bind_hashbucket *bhead;
-       int refcnt;
-       /* Unlink from established hashes. */
        spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+       struct inet_bind_hashbucket *bhead;
 
        spin_lock(lock);
-       refcnt = inet_twsk_unhash(tw);
+       sk_nulls_del_node_init_rcu((struct sock *)tw);
        spin_unlock(lock);
 
        /* Disassociate with bind bucket. */
@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
                        hashinfo->bhash_size)];
 
        spin_lock(&bhead->lock);
-       refcnt += inet_twsk_bind_unhash(tw, hashinfo);
+       inet_twsk_bind_unhash(tw, hashinfo);
        spin_unlock(&bhead->lock);
 
-       BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
-       atomic_sub(refcnt, &tw->tw_refcnt);
        atomic_dec(&tw->tw_dr->tw_count);
        inet_twsk_put(tw);
 }
@@ -235,13 +205,17 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc);
  * tcp_input.c to verify this.
  */
 
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw)
+/* This is for handling early-kills of TIME_WAIT sockets.
+ * Warning : consume reference.
+ * Caller should not access tw anymore.
+ */
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
 {
        if (del_timer_sync(&tw->tw_timer))
                inet_twsk_kill(tw);
+       inet_twsk_put(tw);
 }
-EXPORT_SYMBOL(inet_twsk_deschedule);
+EXPORT_SYMBOL(inet_twsk_deschedule_put);
 
 void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
 {
@@ -311,9 +285,8 @@ restart:
 
                        rcu_read_unlock();
                        local_bh_disable();
-                       inet_twsk_deschedule(tw);
+                       inet_twsk_deschedule_put(tw);
                        local_bh_enable();
-                       inet_twsk_put(tw);
                        goto restart_rcu;
                }
                /* If the nulls value we got at the end of this lookup is
index a50dc6d408d11c339b38f2436216c8568c4149cf..4d3fffafbe2473f12ac27d9896033fe36b5dc518 100644 (file)
@@ -522,7 +522,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
        int len;
        int ihlen;
        int err;
-       int sum_truesize;
        u8 ecn;
 
        ipq_kill(qp);
@@ -590,32 +589,19 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
                add_frag_mem_limit(&qp->q, clone->truesize);
        }
 
+       skb_shinfo(head)->frag_list = head->next;
        skb_push(head, head->data - skb_network_header(head));
 
-       sum_truesize = head->truesize;
-       for (fp = head->next; fp;) {
-               bool headstolen;
-               int delta;
-               struct sk_buff *next = fp->next;
-
-               sum_truesize += fp->truesize;
+       for (fp=head->next; fp; fp = fp->next) {
+               head->data_len += fp->len;
+               head->len += fp->len;
                if (head->ip_summed != fp->ip_summed)
                        head->ip_summed = CHECKSUM_NONE;
                else if (head->ip_summed == CHECKSUM_COMPLETE)
                        head->csum = csum_add(head->csum, fp->csum);
-
-               if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
-                       kfree_skb_partial(fp, headstolen);
-               } else {
-                       if (!skb_shinfo(head)->frag_list)
-                               skb_shinfo(head)->frag_list = fp;
-                       head->data_len += fp->len;
-                       head->len += fp->len;
-                       head->truesize += fp->truesize;
-               }
-               fp = next;
+               head->truesize += fp->truesize;
        }
-       sub_frag_mem_limit(&qp->q, sum_truesize);
+       sub_frag_mem_limit(&qp->q, head->truesize);
 
        head->next = NULL;
        head->dev = dev;
index 05ff44b758dfee1e02996a3726ac63854a96ad16..e89094ab5ddb8ce2b6eb2d78a9a9046b42287bd5 100644 (file)
@@ -363,7 +363,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                                                    scoped);
                rcu_read_unlock();
 
-               if (!(isk->freebind || isk->transparent || has_addr ||
+               if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
+                     isk->freebind || isk->transparent || has_addr ||
                      addr_type == IPV6_ADDR_ANY))
                        return -EADDRNOTAVAIL;
 
index d0362a2de3d3805260c878f5e3a9341e225cade9..04c83de4f79e31bd0960a374c0e95948f03c8f6d 100644 (file)
@@ -1546,7 +1546,6 @@ static int __mkroute_input(struct sk_buff *skb,
        struct rtable *rth;
        int err;
        struct in_device *out_dev;
-       unsigned int flags = 0;
        bool do_cache;
        u32 itag = 0;
 
@@ -1610,7 +1609,7 @@ static int __mkroute_input(struct sk_buff *skb,
        }
 
        rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
-       rth->rt_flags = flags;
+       rth->rt_flags = 0;
        rth->rt_type = res->type;
        rth->rt_is_input = 1;
        rth->rt_iif     = 0;
index c037644eafb7caadcb196b1c8b676bbc42abdb93..fd1405d37c149309882742fb12b07331e7282a95 100644 (file)
@@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
        else {
                bictcp_update(ca, tp->snd_cwnd);
index 8c6fd3d5e40feeb3c0b422d0e697e1a674b4f576..167b6a3e1b9868c88e5553b114556ae312dfb99f 100644 (file)
@@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        u32 prior_snd_cwnd;
        u32 incr;
 
-       if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
+       if (tcp_in_slow_start(tp) && hystart_detect)
                tcp_cdg_hystart_update(sk);
 
        if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
index 84be008c945c654b692211b943f83e909a622516..a2ed23c595cf185cadbebcdf19e801012a64250a 100644 (file)
@@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
  */
 u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
 {
-       u32 cwnd = tp->snd_cwnd + acked;
+       u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
 
-       if (cwnd > tp->snd_ssthresh)
-               cwnd = tp->snd_ssthresh + 1;
        acked -= cwnd - tp->snd_cwnd;
        tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
 
@@ -413,7 +411,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                return;
 
        /* In "safe" area, increase. */
-       if (tp->snd_cwnd <= tp->snd_ssthresh) {
+       if (tcp_in_slow_start(tp)) {
                acked = tcp_slow_start(tp, acked);
                if (!acked)
                        return;
index 06d3d665a9fd1bfda5688907a284de83697273f6..28011fb1f4a2104a34f81fc0c9fb4a4382bdadac 100644 (file)
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh) {
+       if (tcp_in_slow_start(tp)) {
                if (hystart && after(ack, ca->end_seq))
                        bictcp_hystart_reset(sk);
                acked = tcp_slow_start(tp, acked);
@@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
                ca->delay_min = delay;
 
        /* hystart triggers when cwnd is larger than some threshold */
-       if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
+       if (hystart && tcp_in_slow_start(tp) &&
            tp->snd_cwnd >= hystart_low_window)
                hystart_update(sk, delay);
 }
index 882c08aae2f58d02bb78212a4eba4d25d7e9c123..db7842495a641829a8725cb436ed2fb3aa5d53e4 100644 (file)
@@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
        else {
                /* Update AIMD parameters.
index 58469fff6c18fd444c95366caa04ab60965d654a..82f0d9ed60f50f27854fdb62a95281beed9df819 100644 (file)
@@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
        else {
                /* In dangerous area, increase slowly.
index f963b274f2b0436755ebe8bb5586b1ec9682c336..083831e359df92ca9ba0fe7dd5a7a76fe41a94b0 100644 (file)
@@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 
        rho_fractions = ca->rho_3ls - (ca->rho << 3);
 
-       if (tp->snd_cwnd < tp->snd_ssthresh) {
+       if (tcp_in_slow_start(tp)) {
                /*
                 * slow start
                 *      INC = 2^RHO - 1
index f71002e4db0ba7fe8dfe35bb2196bbaae751ed59..2ab9bbb6faffb799560df98b093d4cbc1207d816 100644 (file)
@@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                return;
 
        /* In slow start */
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
 
        else {
index 684f095d196e20333adb235fc96a8fb8f0dd691c..1578fc2a6f39b276ef7e2538d4ff4bc6d8054c22 100644 (file)
@@ -109,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 #define FLAG_SYN_ACKED         0x10 /* This ACK acknowledged SYN.              */
 #define FLAG_DATA_SACKED       0x20 /* New SACK.                               */
 #define FLAG_ECE               0x40 /* ECE in this ACK                         */
+#define FLAG_LOST_RETRANS      0x80 /* This ACK marks some retransmission lost */
 #define FLAG_SLOWPATH          0x100 /* Do not skip RFC checks for window update.*/
 #define FLAG_ORIG_SACK_ACKED   0x200 /* Never retransmitted data are (s)acked  */
 #define FLAG_SND_UNA_ADVANCED  0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -196,11 +197,13 @@ static void tcp_enter_quickack_mode(struct sock *sk)
  * and the session is not interactive.
  */
 
-static inline bool tcp_in_quickack_mode(const struct sock *sk)
+static bool tcp_in_quickack_mode(struct sock *sk)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
+       const struct dst_entry *dst = __sk_dst_get(sk);
 
-       return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
+       return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
+               (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
 }
 
 static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -1037,7 +1040,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
  * highest SACK block). Also calculate the lowest snd_nxt among the remaining
  * retransmitted skbs to avoid some costly processing per ACKs.
  */
-static void tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
 {
        const struct inet_connection_sock *icsk = inet_csk(sk);
        struct tcp_sock *tp = tcp_sk(sk);
@@ -1078,7 +1081,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
                if (after(received_upto, ack_seq)) {
                        TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
                        tp->retrans_out -= tcp_skb_pcount(skb);
-
+                       *flag |= FLAG_LOST_RETRANS;
                        tcp_skb_mark_lost_uncond_verify(tp, skb);
                        NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
                } else {
@@ -1818,7 +1821,7 @@ advance_sp:
            ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
                tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
 
-       tcp_mark_lost_retrans(sk);
+       tcp_mark_lost_retrans(sk, &state->flag);
        tcp_verify_left_out(tp);
 out:
 
@@ -2475,15 +2478,14 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
        return false;
 }
 
-/* The cwnd reduction in CWR and Recovery use the PRR algorithm
- * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
+/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
  * It computes the number of packets to send (sndcnt) based on packets newly
  * delivered:
  *   1) If the packets in flight is larger than ssthresh, PRR spreads the
  *     cwnd reductions across a full RTT.
- *   2) If packets in flight is lower than ssthresh (such as due to excess
- *     losses and/or application stalls), do not perform any further cwnd
- *     reductions, but instead slow start up to ssthresh.
+ *   2) Otherwise PRR uses packet conservation to send as much as delivered.
+ *      But when the retransmits are acked without further losses, PRR
+ *      slow starts cwnd up to ssthresh to speed up the recovery.
  */
 static void tcp_init_cwnd_reduction(struct sock *sk)
 {
@@ -2500,7 +2502,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
 }
 
 static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
-                              int fast_rexmit)
+                              int fast_rexmit, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int sndcnt = 0;
@@ -2509,16 +2511,18 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
                                 (tp->packets_out - tp->sacked_out);
 
        tp->prr_delivered += newly_acked_sacked;
-       if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+       if (delta < 0) {
                u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
                               tp->prior_cwnd - 1;
                sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
-       } else {
+       } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
+                  !(flag & FLAG_LOST_RETRANS)) {
                sndcnt = min_t(int, delta,
                               max_t(int, tp->prr_delivered - tp->prr_out,
                                     newly_acked_sacked) + 1);
+       } else {
+               sndcnt = min(delta, newly_acked_sacked);
        }
-
        sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
        tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
 }
@@ -2579,7 +2583,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
        if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
                tcp_try_keep_open(sk);
        } else {
-               tcp_cwnd_reduction(sk, prior_unsacked, 0);
+               tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
        }
 }
 
@@ -2676,7 +2680,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
        tp->prior_ssthresh = 0;
        tcp_init_undo(tp);
 
-       if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+       if (!tcp_in_cwnd_reduction(sk)) {
                if (!ece_ack)
                        tp->prior_ssthresh = tcp_current_ssthresh(sk);
                tcp_init_cwnd_reduction(sk);
@@ -2736,7 +2740,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
 
 /* Undo during fast recovery after partial ACK. */
 static bool tcp_try_undo_partial(struct sock *sk, const int acked,
-                                const int prior_unsacked)
+                                const int prior_unsacked, int flag)
 {
        struct tcp_sock *tp = tcp_sk(sk);
 
@@ -2752,7 +2756,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
                 * mark more packets lost or retransmit more.
                 */
                if (tp->retrans_out) {
-                       tcp_cwnd_reduction(sk, prior_unsacked, 0);
+                       tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
                        return true;
                }
 
@@ -2839,7 +2843,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                        if (tcp_is_reno(tp) && is_dupack)
                                tcp_add_reno_sack(sk);
                } else {
-                       if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+                       if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
                                return;
                        /* Partial ACK arrived. Force fast retransmit. */
                        do_lost = tcp_is_reno(tp) ||
@@ -2852,9 +2856,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
                break;
        case TCP_CA_Loss:
                tcp_process_loss(sk, flag, is_dupack);
-               if (icsk->icsk_ca_state != TCP_CA_Open)
+               if (icsk->icsk_ca_state != TCP_CA_Open &&
+                   !(flag & FLAG_LOST_RETRANS))
                        return;
-               /* Fall through to processing in Open state. */
+               /* Change state if cwnd is undone or retransmits are lost */
        default:
                if (tcp_is_reno(tp)) {
                        if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2889,7 +2894,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
 
        if (do_lost)
                tcp_update_scoreboard(sk, fast_rexmit);
-       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
+       tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
        tcp_xmit_retransmit_queue(sk);
 }
 
@@ -3563,10 +3568,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
                                    &sack_state);
        acked -= tp->packets_out;
 
-       /* Advance cwnd if state allows */
-       if (tcp_may_raise_cwnd(sk, flag))
-               tcp_cong_avoid(sk, ack, acked);
-
        if (tcp_ack_is_dubious(sk, flag)) {
                is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
                tcp_fastretrans_alert(sk, acked, prior_unsacked,
@@ -3575,6 +3576,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
        if (tp->tlp_high_seq)
                tcp_process_tlp_ack(sk, ack, flag);
 
+       /* Advance cwnd if state allows */
+       if (tcp_may_raise_cwnd(sk, flag))
+               tcp_cong_avoid(sk, ack, acked);
+
        if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
                struct dst_entry *dst = __sk_dst_get(sk);
                if (dst)
@@ -3948,7 +3953,6 @@ void tcp_reset(struct sock *sk)
 static void tcp_fin(struct sock *sk)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       const struct dst_entry *dst;
 
        inet_csk_schedule_ack(sk);
 
@@ -3960,9 +3964,7 @@ static void tcp_fin(struct sock *sk)
        case TCP_ESTABLISHED:
                /* Move to CLOSE_WAIT */
                tcp_set_state(sk, TCP_CLOSE_WAIT);
-               dst = __sk_dst_get(sk);
-               if (!dst || !dst_metric(dst, RTAX_QUICKACK))
-                       inet_csk(sk)->icsk_ack.pingpong = 1;
+               inet_csk(sk)->icsk_ack.pingpong = 1;
                break;
 
        case TCP_CLOSE_WAIT:
index d7d4c2b79cf2f516f9e3f62c6fe4415e9bc137a0..486ba96ae91a7f1553bb6d6025e14fc0da2ba3a3 100644 (file)
@@ -1683,8 +1683,7 @@ do_time_wait:
                                                        iph->daddr, th->dest,
                                                        inet_iif(skb));
                if (sk2) {
-                       inet_twsk_deschedule(inet_twsk(sk));
-                       inet_twsk_put(inet_twsk(sk));
+                       inet_twsk_deschedule_put(inet_twsk(sk));
                        sk = sk2;
                        goto process;
                }
index a51d63a43e33af5fc751e4f0f3369b9394776975..b3d64f61d922e1ec10aa31b4e19ea0fb6c6876be 100644 (file)
@@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk)
                                tcp_metric_set(tm, TCP_METRIC_CWND,
                                               tp->snd_cwnd);
                }
-       } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+       } else if (!tcp_in_slow_start(tp) &&
                   icsk->icsk_ca_state == TCP_CA_Open) {
                /* Cong. avoidance phase, cwnd is reliable. */
                if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
index 4bc00cb79e603553076adf750712377586f4b2fb..6d8795b066aca708df47de3c9211f36bee5eb1d4 100644 (file)
@@ -147,8 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
                if (!th->fin ||
                    TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
 kill_with_rst:
-                       inet_twsk_deschedule(tw);
-                       inet_twsk_put(tw);
+                       inet_twsk_deschedule_put(tw);
                        return TCP_TW_RST;
                }
 
@@ -198,8 +197,7 @@ kill_with_rst:
                         */
                        if (sysctl_tcp_rfc1337 == 0) {
 kill:
-                               inet_twsk_deschedule(tw);
-                               inet_twsk_put(tw);
+                               inet_twsk_deschedule_put(tw);
                                return TCP_TW_SUCCESS;
                        }
                }
index b1c218df2c855bc56594ffdd86d75ef5e146731a..71057849593ac44785d14c72c6abeec84612f9b1 100644 (file)
@@ -163,7 +163,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
        const u32 now = tcp_time_stamp;
-       const struct dst_entry *dst = __sk_dst_get(sk);
 
        if (sysctl_tcp_slow_start_after_idle &&
            (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
@@ -174,9 +173,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
        /* If it is a reply for ato after last received
         * packet, enter pingpong mode.
         */
-       if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato &&
-           (!dst || !dst_metric(dst, RTAX_QUICKACK)))
-                       icsk->icsk_ack.pingpong = 1;
+       if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+               icsk->icsk_ack.pingpong = 1;
 }
 
 /* Account for an ACK we sent. */
index 333bcb2415ffca51e06f3042ae3d94b8e21c0725..bf5ea9e9bbc1ed3c07c03f9db69b9848cf83ec8e 100644 (file)
@@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
        if (!tcp_is_cwnd_limited(sk))
                return;
 
-       if (tp->snd_cwnd <= tp->snd_ssthresh)
+       if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
        else
                tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
index 5b752f58a90063e7468b11f2853c7c006b679e60..7149ebc820c7d87afef856ce641ac63678abafa3 100644 (file)
@@ -649,4 +649,3 @@ void tcp_init_xmit_timers(struct sock *sk)
        inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
                                  &tcp_keepalive_timer);
 }
-EXPORT_SYMBOL(tcp_init_xmit_timers);
index a6cea1d5e20d47f06eab95f3344a3e3b7c44da89..13951c4087d407b72cb5bc2ee75822203244e3f3 100644 (file)
@@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                         */
                        diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
 
-                       if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
+                       if (diff > gamma && tcp_in_slow_start(tp)) {
                                /* Going too fast. Time to slow down
                                 * and switch to congestion avoidance.
                                 */
@@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                                tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
                                tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
 
-                       } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
+                       } else if (tcp_in_slow_start(tp)) {
                                /* Slow start.  */
                                tcp_slow_start(tp, acked);
                        } else {
@@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
                vegas->minRTT = 0x7fffffff;
        }
        /* Use normal slow start */
-       else if (tp->snd_cwnd <= tp->snd_ssthresh)
+       else if (tcp_in_slow_start(tp))
                tcp_slow_start(tp, acked);
 }
 
index 112151eeee45bff0c37ac92d78d165ba92bd4d0a..0d094b995cd96f8c5150daf586cdde0f495843f5 100644 (file)
@@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 
                veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
 
-               if (tp->snd_cwnd <= tp->snd_ssthresh) {
+               if (tcp_in_slow_start(tp)) {
                        /* Slow start.  */
                        tcp_slow_start(tp, acked);
                } else {
index 438a73aa777cf560f38a87801b03b8ce20a315b1..643f61339e7b4fc9d4dcba75c4bb772c99d39292 100644 (file)
@@ -5,16 +5,15 @@
 #   IPv6 as module will cause a CRASH if you try to unload it
 menuconfig IPV6
        tristate "The IPv6 protocol"
-       default m
+       default y
        ---help---
-         This is complemental support for the IP version 6.
-         You will still be able to do traditional IPv4 networking as well.
+         Support for IP version 6 (IPv6).
 
          For general information about IPv6, see
          <https://en.wikipedia.org/wiki/IPv6>.
-         For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
-         For specific information about IPv6 under Linux, read the HOWTO at
-         <http://www.bieringer.de/linux/IPv6/>.
+         For specific information about IPv6 under Linux, see
+         Documentation/networking/ipv6.txt and read the HOWTO at
+         <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
 
          To compile this protocol support as a module, choose M here: the 
          module will be called ipv6.
index 21c2c818df3b8379226555268ef526c08553d00d..4ab74d56f65a13f53e93a494b21987a043df53a9 100644 (file)
@@ -1358,15 +1358,94 @@ out:
        return ret;
 }
 
+static void __ipv6_dev_get_saddr(struct net *net,
+                                struct ipv6_saddr_dst *dst,
+                                unsigned int prefs,
+                                const struct in6_addr *saddr,
+                                struct inet6_dev *idev,
+                                struct ipv6_saddr_score *scores)
+{
+       struct ipv6_saddr_score *score = &scores[0], *hiscore = &scores[1];
+
+       read_lock_bh(&idev->lock);
+       list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+               int i;
+
+               /*
+                * - Tentative Address (RFC2462 section 5.4)
+                *  - A tentative address is not considered
+                *    "assigned to an interface" in the traditional
+                *    sense, unless it is also flagged as optimistic.
+                * - Candidate Source Address (section 4)
+                *  - In any case, anycast addresses, multicast
+                *    addresses, and the unspecified address MUST
+                *    NOT be included in a candidate set.
+                */
+               if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+                   (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+                       continue;
+
+               score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+               if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+                            score->addr_type & IPV6_ADDR_MULTICAST)) {
+                       net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+                                           idev->dev->name);
+                       continue;
+               }
+
+               score->rule = -1;
+               bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+               for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+                       int minihiscore, miniscore;
+
+                       minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
+                       miniscore = ipv6_get_saddr_eval(net, score, dst, i);
+
+                       if (minihiscore > miniscore) {
+                               if (i == IPV6_SADDR_RULE_SCOPE &&
+                                   score->scopedist > 0) {
+                                       /*
+                                        * special case:
+                                        * each remaining entry
+                                        * has too small (not enough)
+                                        * scope, because ifa entries
+                                        * are sorted by their scope
+                                        * values.
+                                        */
+                                       goto out;
+                               }
+                               break;
+                       } else if (minihiscore < miniscore) {
+                               if (hiscore->ifa)
+                                       in6_ifa_put(hiscore->ifa);
+
+                               in6_ifa_hold(score->ifa);
+
+                               swap(hiscore, score);
+
+                               /* restore our iterator */
+                               score->ifa = hiscore->ifa;
+
+                               break;
+                       }
+               }
+       }
+out:
+       read_unlock_bh(&idev->lock);
+}
+
 int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
                       const struct in6_addr *daddr, unsigned int prefs,
                       struct in6_addr *saddr)
 {
-       struct ipv6_saddr_score scores[2],
-                               *score = &scores[0], *hiscore = &scores[1];
+       struct ipv6_saddr_score scores[2], *hiscore = &scores[1];
        struct ipv6_saddr_dst dst;
+       struct inet6_dev *idev;
        struct net_device *dev;
        int dst_type;
+       bool use_oif_addr = false;
 
        dst_type = __ipv6_addr_type(daddr);
        dst.addr = daddr;
@@ -1380,97 +1459,35 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
 
        rcu_read_lock();
 
-       for_each_netdev_rcu(net, dev) {
-               struct inet6_dev *idev;
-
-               /* Candidate Source Address (section 4)
-                *  - multicast and link-local destination address,
-                *    the set of candidate source address MUST only
-                *    include addresses assigned to interfaces
-                *    belonging to the same link as the outgoing
-                *    interface.
-                * (- For site-local destination addresses, the
-                *    set of candidate source addresses MUST only
-                *    include addresses assigned to interfaces
-                *    belonging to the same site as the outgoing
-                *    interface.)
-                */
-               if (((dst_type & IPV6_ADDR_MULTICAST) ||
-                    dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
-                   dst.ifindex && dev->ifindex != dst.ifindex)
-                       continue;
-
-               idev = __in6_dev_get(dev);
-               if (!idev)
-                       continue;
-
-               read_lock_bh(&idev->lock);
-               list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
-                       int i;
-
-                       /*
-                        * - Tentative Address (RFC2462 section 5.4)
-                        *  - A tentative address is not considered
-                        *    "assigned to an interface" in the traditional
-                        *    sense, unless it is also flagged as optimistic.
-                        * - Candidate Source Address (section 4)
-                        *  - In any case, anycast addresses, multicast
-                        *    addresses, and the unspecified address MUST
-                        *    NOT be included in a candidate set.
-                        */
-                       if ((score->ifa->flags & IFA_F_TENTATIVE) &&
-                           (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
-                               continue;
-
-                       score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+       /* Candidate Source Address (section 4)
+        *  - multicast and link-local destination address,
+        *    the set of candidate source address MUST only
+        *    include addresses assigned to interfaces
+        *    belonging to the same link as the outgoing
+        *    interface.
+        * (- For site-local destination addresses, the
+        *    set of candidate source addresses MUST only
+        *    include addresses assigned to interfaces
+        *    belonging to the same site as the outgoing
+        *    interface.)
+        */
+       if (dst_dev) {
+               if ((dst_type & IPV6_ADDR_MULTICAST) ||
+                   dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) {
+                       idev = __in6_dev_get(dst_dev);
+                       use_oif_addr = true;
+               }
+       }
 
-                       if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
-                                    score->addr_type & IPV6_ADDR_MULTICAST)) {
-                               net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
-                                                   dev->name);
+       if (use_oif_addr) {
+               __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores);
+       } else {
+               for_each_netdev_rcu(net, dev) {
+                       idev = __in6_dev_get(dev);
+                       if (!idev)
                                continue;
-                       }
-
-                       score->rule = -1;
-                       bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
-
-                       for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
-                               int minihiscore, miniscore;
-
-                               minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
-                               miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
-
-                               if (minihiscore > miniscore) {
-                                       if (i == IPV6_SADDR_RULE_SCOPE &&
-                                           score->scopedist > 0) {
-                                               /*
-                                                * special case:
-                                                * each remaining entry
-                                                * has too small (not enough)
-                                                * scope, because ifa entries
-                                                * are sorted by their scope
-                                                * values.
-                                                */
-                                               goto try_nextdev;
-                                       }
-                                       break;
-                               } else if (minihiscore < miniscore) {
-                                       if (hiscore->ifa)
-                                               in6_ifa_put(hiscore->ifa);
-
-                                       in6_ifa_hold(score->ifa);
-
-                                       swap(hiscore, score);
-
-                                       /* restore our iterator */
-                                       score->ifa = hiscore->ifa;
-
-                                       break;
-                               }
-                       }
+                       __ipv6_dev_get_saddr(net, &dst, prefs, saddr, idev, scores);
                }
-try_nextdev:
-               read_unlock_bh(&idev->lock);
        }
        rcu_read_unlock();
 
index 7de52b65173fa6a1b344b13e67106ad39591ed06..7bc92ea4ae8fd17701f65aaa1a3063a3912ba259 100644 (file)
@@ -342,7 +342,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                         */
                        v4addr = LOOPBACK4_IPV6;
                        if (!(addr_type & IPV6_ADDR_MULTICAST)) {
-                               if (!(inet->freebind || inet->transparent) &&
+                               if (!net->ipv6.sysctl.ip_nonlocal_bind &&
+                                   !(inet->freebind || inet->transparent) &&
                                    !ipv6_chk_addr(net, &addr->sin6_addr,
                                                   dev, 0)) {
                                        err = -EADDRNOTAVAIL;
@@ -679,8 +680,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
        const struct ipv6_pinfo *np = inet6_sk(sk);
 
        if (np->rxopt.all) {
-               if ((opt->hop && (np->rxopt.bits.hopopts ||
-                                 np->rxopt.bits.ohopopts)) ||
+               if (((opt->flags & IP6SKB_HOPBYHOP) &&
+                    (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
                    (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
                     np->rxopt.bits.rxflow) ||
                    (opt->srcrt && (np->rxopt.bits.srcrt ||
index 62d908e64eeb53740d53ddfd57e26867c4e7e4d3..50115522e80f1ed949195fe8916126e7d3d27443 100644 (file)
@@ -558,8 +558,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
        }
 
        /* HbH is allowed only once */
-       if (np->rxopt.bits.hopopts && opt->hop) {
-               u8 *ptr = nh + opt->hop;
+       if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+               u8 *ptr = nh + sizeof(struct ipv6hdr);
                put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
        }
 
@@ -620,8 +620,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
                int hlim = ipv6_hdr(skb)->hop_limit;
                put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
        }
-       if (np->rxopt.bits.ohopopts && opt->hop) {
-               u8 *ptr = nh + opt->hop;
+       if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+               u8 *ptr = nh + sizeof(struct ipv6hdr);
                put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
        }
        if (np->rxopt.bits.odstopts && opt->dst0) {
index a7bbbe45570b287eb05b9f13d0a73a830b767dd2..ce203b0402bea3b16deb34b4835cd2e89e94f899 100644 (file)
@@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
                return -1;
        }
 
-       opt->hop = sizeof(struct ipv6hdr);
+       opt->flags |= IP6SKB_HOPBYHOP;
        if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
                skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
                opt = IP6CB(skb);
index b4fd96de97e61627003eff220e10bdd05a899e28..6ac8dad0138a6b41395f306bffc2d9b47d8d91bc 100644 (file)
@@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
        struct sock *sk2;
        const struct hlist_nulls_node *node;
        struct inet_timewait_sock *tw = NULL;
-       int twrefcnt = 0;
 
        spin_lock(lock);
 
@@ -234,21 +233,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
        WARN_ON(!sk_unhashed(sk));
        __sk_nulls_add_node_rcu(sk, &head->chain);
        if (tw) {
-               twrefcnt = inet_twsk_unhash(tw);
+               sk_nulls_del_node_init_rcu((struct sock *)tw);
                NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
        }
        spin_unlock(lock);
-       if (twrefcnt)
-               inet_twsk_put(tw);
        sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 
        if (twp) {
                *twp = tw;
        } else if (tw) {
                /* Silly. Should hash-dance instead... */
-               inet_twsk_deschedule(tw);
-
-               inet_twsk_put(tw);
+               inet_twsk_deschedule_put(tw);
        }
        return 0;
 
index ca4700cb26c4feec258c8e5034389522125b113e..fdbada1569a37348b47b60769f7d679741b21d0a 100644 (file)
@@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                 * unspecified and mapped address have a v4 equivalent.
                 */
                v4addr = LOOPBACK4_IPV6;
-               if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+               if (!(addr_type & IPV6_ADDR_MULTICAST) &&
+                   !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) {
                        err = -EADDRNOTAVAIL;
                        if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
                                           dev, 0)) {
index 4e705add4f187c69b1b11202d47808308e87cf80..db48aebd9c470c2d3a40c7a4250b67887250985e 100644 (file)
@@ -75,6 +75,13 @@ static struct ctl_table ipv6_table_template[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec
        },
+       {
+               .procname       = "ip_nonlocal_bind",
+               .data           = &init_net.ipv6.sysctl.ip_nonlocal_bind,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec
+       },
        { }
 };
 
@@ -117,6 +124,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
        ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
        ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
        ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
+       ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
 
        ipv6_route_table = ipv6_route_sysctl_init(net);
        if (!ipv6_route_table)
index 6748c4277affad71cd721e3a985af10c31c047ad..d540846a1a79e1f263d593343b6d240ad780527a 100644 (file)
@@ -1481,8 +1481,7 @@ do_time_wait:
                                            ntohs(th->dest), tcp_v6_iif(skb));
                if (sk2) {
                        struct inet_timewait_sock *tw = inet_twsk(sk);
-                       inet_twsk_deschedule(tw);
-                       inet_twsk_put(tw);
+                       inet_twsk_deschedule_put(tw);
                        sk = sk2;
                        tcp_v6_restore_cb(skb);
                        goto process;
index cca96cec1b689fcd104e273a64db6eda44171beb..d0c96c5ae29aa84057e93a5d9796014dca4c52e8 100644 (file)
@@ -272,8 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
                                            hp->source, lport ? lport : hp->dest,
                                            skb->dev, NFT_LOOKUP_LISTENER);
                if (sk2) {
-                       inet_twsk_deschedule(inet_twsk(sk));
-                       inet_twsk_put(inet_twsk(sk));
+                       inet_twsk_deschedule_put(inet_twsk(sk));
                        sk = sk2;
                }
        }
@@ -437,8 +436,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
                                            tgi->lport ? tgi->lport : hp->dest,
                                            skb->dev, NFT_LOOKUP_LISTENER);
                if (sk2) {
-                       inet_twsk_deschedule(inet_twsk(sk));
-                       inet_twsk_put(inet_twsk(sk));
+                       inet_twsk_deschedule_put(inet_twsk(sk));
                        sk = sk2;
                }
        }
index af427a3dbcba238103169ab2a58005feda5fa2f1..074a32f466f8f9168186828cdad6c6484fae1265 100644 (file)
 #include <net/act_api.h>
 #include <net/netlink.h>
 
+static void free_tcf(struct rcu_head *head)
+{
+       struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+
+       free_percpu(p->cpu_bstats);
+       free_percpu(p->cpu_qstats);
+       kfree(p);
+}
+
 void tcf_hash_destroy(struct tc_action *a)
 {
        struct tcf_common *p = a->priv;
@@ -41,7 +50,7 @@ void tcf_hash_destroy(struct tc_action *a)
         * gen_estimator est_timer() might access p->tcfc_lock
         * or bstats, wait a RCU grace period before freeing p
         */
-       kfree_rcu(p, tcfc_rcu);
+       call_rcu(&p->tcfc_rcu, free_tcf);
 }
 EXPORT_SYMBOL(tcf_hash_destroy);
 
@@ -230,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
        if (est)
                gen_kill_estimator(&pc->tcfc_bstats,
                                   &pc->tcfc_rate_est);
-       kfree_rcu(pc, tcfc_rcu);
+       call_rcu(&pc->tcfc_rcu, free_tcf);
 }
 EXPORT_SYMBOL(tcf_hash_cleanup);
 
 int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
-                   int size, int bind)
+                   int size, int bind, bool cpustats)
 {
        struct tcf_hashinfo *hinfo = a->ops->hinfo;
        struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+       int err = -ENOMEM;
 
        if (unlikely(!p))
                return -ENOMEM;
@@ -246,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
        if (bind)
                p->tcfc_bindcnt = 1;
 
+       if (cpustats) {
+               p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+               if (!p->cpu_bstats) {
+err1:
+                       kfree(p);
+                       return err;
+               }
+               p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+               if (!p->cpu_qstats) {
+err2:
+                       free_percpu(p->cpu_bstats);
+                       goto err1;
+               }
+       }
        spin_lock_init(&p->tcfc_lock);
        INIT_HLIST_NODE(&p->tcfc_head);
        p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
        p->tcfc_tm.install = jiffies;
        p->tcfc_tm.lastuse = jiffies;
        if (est) {
-               int err = gen_new_estimator(&p->tcfc_bstats, NULL,
-                                           &p->tcfc_rate_est,
-                                           &p->tcfc_lock, est);
+               err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
+                                       &p->tcfc_rate_est,
+                                       &p->tcfc_lock, est);
                if (err) {
-                       kfree(p);
-                       return err;
+                       free_percpu(p->cpu_qstats);
+                       goto err2;
                }
        }
 
@@ -615,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
        if (err < 0)
                goto errout;
 
-       if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
+       if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
            gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
                                     &p->tcfc_rate_est) < 0 ||
-           gnet_stats_copy_queue(&d, NULL,
+           gnet_stats_copy_queue(&d, p->cpu_qstats,
                                  &p->tcfc_qstats,
                                  p->tcfc_qstats.qlen) < 0)
                goto errout;
index 1d56903fd4c79aa008c4c540aabd8b4c099e81a1..99aa271633e9011e7fb10042a97a07eea5b6375e 100644 (file)
@@ -281,7 +281,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 
        if (!tcf_hash_check(parm->index, act, bind)) {
                ret = tcf_hash_create(parm->index, est, act,
-                                     sizeof(*prog), bind);
+                                     sizeof(*prog), bind, false);
                if (ret < 0)
                        goto destroy_fp;
 
index 295d14bd6c678c31b56219371df83d4ebe3b0a2c..f2b540220ad02f1f8e3b2add9c7477a334081c3d 100644 (file)
@@ -108,7 +108,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
        parm = nla_data(tb[TCA_CONNMARK_PARMS]);
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
+                                     bind, false);
                if (ret)
                        return ret;
 
index 4cd5cf1aedf8b14bc8a8fb0529db868ee74433fd..b07c535ba8e7c6f8dcbc52f4eb69cf4a1ab3d0c2 100644 (file)
@@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
        parm = nla_data(tb[TCA_CSUM_PARMS]);
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+                                     bind, false);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
index 7fffc2272701adb42d109318a80e8fef6fc3289f..5c1b051707363e19a779fcca2de6ae38ba0239fe 100644 (file)
 #ifdef CONFIG_GACT_PROB
 static int gact_net_rand(struct tcf_gact *gact)
 {
-       if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
+       smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+       if (prandom_u32() % gact->tcfg_pval)
                return gact->tcf_action;
        return gact->tcfg_paction;
 }
 
 static int gact_determ(struct tcf_gact *gact)
 {
-       if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+       u32 pack = atomic_inc_return(&gact->packets);
+
+       smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+       if (pack % gact->tcfg_pval)
                return gact->tcf_action;
        return gact->tcfg_paction;
 }
@@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 #endif
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+                                     bind, true);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
@@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 
        gact = to_gact(a);
 
-       spin_lock_bh(&gact->tcf_lock);
+       ASSERT_RTNL();
        gact->tcf_action = parm->action;
 #ifdef CONFIG_GACT_PROB
        if (p_parm) {
                gact->tcfg_paction = p_parm->paction;
-               gact->tcfg_pval    = p_parm->pval;
+               gact->tcfg_pval    = max_t(u16, 1, p_parm->pval);
+               /* Make sure tcfg_pval is written before tcfg_ptype
+                * coupled with smp_rmb() in gact_net_rand() & gact_determ()
+                */
+               smp_wmb();
                gact->tcfg_ptype   = p_parm->ptype;
        }
 #endif
-       spin_unlock_bh(&gact->tcf_lock);
        if (ret == ACT_P_CREATED)
                tcf_hash_insert(a);
        return ret;
@@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
                    struct tcf_result *res)
 {
        struct tcf_gact *gact = a->priv;
-       int action = TC_ACT_SHOT;
+       int action = READ_ONCE(gact->tcf_action);
 
-       spin_lock(&gact->tcf_lock);
 #ifdef CONFIG_GACT_PROB
-       if (gact->tcfg_ptype)
-               action = gact_rand[gact->tcfg_ptype](gact);
-       else
-               action = gact->tcf_action;
-#else
-       action = gact->tcf_action;
+       {
+       u32 ptype = READ_ONCE(gact->tcfg_ptype);
+
+       if (ptype)
+               action = gact_rand[ptype](gact);
+       }
 #endif
-       gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
-       gact->tcf_bstats.packets++;
+       bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
        if (action == TC_ACT_SHOT)
-               gact->tcf_qstats.drops++;
-       gact->tcf_tm.lastuse = jiffies;
-       spin_unlock(&gact->tcf_lock);
+               qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+
+       tcf_lastuse_update(&gact->tcf_tm);
 
        return action;
 }
index cbc8dd7dd48a50e77fdafa7b8cf4041659995cbb..99c9cc1c7af9240f9df444ae158df4fa7f7f8c73 100644 (file)
@@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
                index = nla_get_u32(tb[TCA_IPT_INDEX]);
 
        if (!tcf_hash_check(index, a, bind) ) {
-               ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+               ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
index a42a3b257226178eb5af04054a17813c04368613..19cd8904efa0a46b9d659f36322a78e4cbb64c38 100644 (file)
@@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list);
 static void tcf_mirred_release(struct tc_action *a, int bind)
 {
        struct tcf_mirred *m = to_mirred(a);
+       struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
+
        list_del(&m->tcfm_list);
-       if (m->tcfm_dev)
-               dev_put(m->tcfm_dev);
+       if (dev)
+               dev_put(dev);
 }
 
 static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -93,7 +95,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        if (!tcf_hash_check(parm->index, a, bind)) {
                if (dev == NULL)
                        return -EINVAL;
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
+                                     bind, true);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
@@ -105,18 +108,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
        }
        m = to_mirred(a);
 
-       spin_lock_bh(&m->tcf_lock);
+       ASSERT_RTNL();
        m->tcf_action = parm->action;
        m->tcfm_eaction = parm->eaction;
        if (dev != NULL) {
                m->tcfm_ifindex = parm->ifindex;
                if (ret != ACT_P_CREATED)
-                       dev_put(m->tcfm_dev);
+                       dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
                dev_hold(dev);
-               m->tcfm_dev = dev;
+               rcu_assign_pointer(m->tcfm_dev, dev);
                m->tcfm_ok_push = ok_push;
        }
-       spin_unlock_bh(&m->tcf_lock);
+
        if (ret == ACT_P_CREATED) {
                list_add(&m->tcfm_list, &mirred_list);
                tcf_hash_insert(a);
@@ -131,20 +134,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
        struct tcf_mirred *m = a->priv;
        struct net_device *dev;
        struct sk_buff *skb2;
+       int retval, err;
        u32 at;
-       int retval, err = 1;
 
-       spin_lock(&m->tcf_lock);
-       m->tcf_tm.lastuse = jiffies;
-       bstats_update(&m->tcf_bstats, skb);
+       tcf_lastuse_update(&m->tcf_tm);
+
+       bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
 
-       dev = m->tcfm_dev;
-       if (!dev) {
-               printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+       rcu_read_lock();
+       retval = READ_ONCE(m->tcf_action);
+       dev = rcu_dereference(m->tcfm_dev);
+       if (unlikely(!dev)) {
+               pr_notice_once("tc mirred: target device is gone\n");
                goto out;
        }
 
-       if (!(dev->flags & IFF_UP)) {
+       if (unlikely(!(dev->flags & IFF_UP))) {
                net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
                                       dev->name);
                goto out;
@@ -152,7 +157,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 
        at = G_TC_AT(skb->tc_verd);
        skb2 = skb_clone(skb, GFP_ATOMIC);
-       if (skb2 == NULL)
+       if (!skb2)
                goto out;
 
        if (!(at & AT_EGRESS)) {
@@ -168,16 +173,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
        skb2->dev = dev;
        err = dev_queue_xmit(skb2);
 
-out:
        if (err) {
-               m->tcf_qstats.overlimits++;
+out:
+               qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
                if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
                        retval = TC_ACT_SHOT;
-               else
-                       retval = m->tcf_action;
-       } else
-               retval = m->tcf_action;
-       spin_unlock(&m->tcf_lock);
+       }
+       rcu_read_unlock();
 
        return retval;
 }
@@ -216,14 +218,16 @@ static int mirred_device_event(struct notifier_block *unused,
        struct net_device *dev = netdev_notifier_info_to_dev(ptr);
        struct tcf_mirred *m;
 
+       ASSERT_RTNL();
        if (event == NETDEV_UNREGISTER)
                list_for_each_entry(m, &mirred_list, tcfm_list) {
-                       spin_lock_bh(&m->tcf_lock);
-                       if (m->tcfm_dev == dev) {
+                       if (rcu_access_pointer(m->tcfm_dev) == dev) {
                                dev_put(dev);
-                               m->tcfm_dev = NULL;
+                               /* Note : no rcu grace period necessary, as
+                                * net_device are already rcu protected.
+                                */
+                               RCU_INIT_POINTER(m->tcfm_dev, NULL);
                        }
-                       spin_unlock_bh(&m->tcf_lock);
                }
 
        return NOTIFY_DONE;
index 270a030d5fd099ee7b6f6d74d51b6015aa690647..5be0b3c1c5b0c9f17e3fbd4e1dc1c92c7a8e5aed 100644 (file)
@@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
        parm = nla_data(tb[TCA_NAT_PARMS]);
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+                                     bind, false);
                if (ret)
                        return ret;
                ret = ACT_P_CREATED;
index 17e6d6669c7fdf138915ac9549d3f06d9535d745..ce8676ad892f20b2ea921073b655ef41dcf2ccec 100644 (file)
@@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
        if (!tcf_hash_check(parm->index, a, bind)) {
                if (!parm->nkeys)
                        return -EINVAL;
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+                                     bind, false);
                if (ret)
                        return ret;
                p = to_pedit(a);
index 6a8d9488613a76d9cb2bd03e932ef95487ca0745..d6b708d6afdf37e7c1af4e47873755fc84b1167f 100644 (file)
@@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
        defdata = nla_data(tb[TCA_DEF_DATA]);
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+                                     bind, false);
                if (ret)
                        return ret;
 
index fcfeeaf838beb9e75f07f7cbda7fb2b73237a17f..6751b5f8c046a59912b78762855e51af8e6f29e7 100644 (file)
@@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
        parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+                                     bind, false);
                if (ret)
                        return ret;
 
index d735ecf0b1a78d3fac6ac80b95931cc6cf6caba0..796785e0bf96b0e65f598d3b2dad8256485d034a 100644 (file)
@@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
        action = parm->v_action;
 
        if (!tcf_hash_check(parm->index, a, bind)) {
-               ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind);
+               ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
+                                     bind, false);
                if (ret)
                        return ret;
 
index bdf1c1607b808e9ac0d7ada164a25694c56b6965..c77c872fe8ee477c7cfabf5fd539824fc676b173 100644 (file)
@@ -60,4 +60,29 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag
 static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
        (void *) BPF_FUNC_l4_csum_replace;
 
+#if defined(__x86_64__)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+
+#elif defined(__s390x__)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+
+#endif
 #endif
index 31620463701a55edc0afc54d45a5cabaef890410..3f450a8fa1f3487b2cf2d9b261f63dc3d25f6599 100644 (file)
@@ -29,7 +29,7 @@ int bpf_prog1(struct pt_regs *ctx)
        int len;
 
        /* non-portable! works for the given kernel only */
-       skb = (struct sk_buff *) ctx->di;
+       skb = (struct sk_buff *) PT_REGS_PARM1(ctx);
 
        dev = _(skb->dev);
 
index dc50f4f2943f937d5c389e5384d39a0153d3f273..b32367cfbff4aff3020bb9c36c8faf6c981dd0f8 100644 (file)
@@ -27,10 +27,10 @@ int bpf_prog2(struct pt_regs *ctx)
        long init_val = 1;
        long *value;
 
-       /* x64 specific: read ip of kfree_skb caller.
+       /* x64/s390x specific: read ip of kfree_skb caller.
         * non-portable version of __builtin_return_address(0)
         */
-       bpf_probe_read(&loc, sizeof(loc), (void *)ctx->sp);
+       bpf_probe_read(&loc, sizeof(loc), (void *)PT_REGS_RET(ctx));
 
        value = bpf_map_lookup_elem(&my_map, &loc);
        if (value)
@@ -79,7 +79,7 @@ struct bpf_map_def SEC("maps") my_hist_map = {
 SEC("kprobe/sys_write")
 int bpf_prog3(struct pt_regs *ctx)
 {
-       long write_size = ctx->dx; /* arg3 */
+       long write_size = PT_REGS_PARM3(ctx);
        long init_val = 1;
        long *value;
        struct hist_key key = {};
index 255ff27923666a844e3cb2881e25661278c7c1d6..bf337fbb09472cbe32bfbaff2d4313b7cafb58c6 100644 (file)
@@ -23,7 +23,7 @@ struct bpf_map_def SEC("maps") my_map = {
 SEC("kprobe/blk_mq_start_request")
 int bpf_prog1(struct pt_regs *ctx)
 {
-       long rq = ctx->di;
+       long rq = PT_REGS_PARM1(ctx);
        u64 val = bpf_ktime_get_ns();
 
        bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY);
@@ -51,7 +51,7 @@ struct bpf_map_def SEC("maps") lat_map = {
 SEC("kprobe/blk_update_request")
 int bpf_prog2(struct pt_regs *ctx)
 {
-       long rq = ctx->di;
+       long rq = PT_REGS_PARM1(ctx);
        u64 *value, l, base;
        u32 index;
 
index 126b80512228aa6493c8332e75933ab5852acab1..ac4671420cf15949c4087b8c2847c69978646650 100644 (file)
@@ -27,7 +27,7 @@ struct bpf_map_def SEC("maps") my_map = {
 SEC("kprobe/kmem_cache_free")
 int bpf_prog1(struct pt_regs *ctx)
 {
-       long ptr = ctx->si;
+       long ptr = PT_REGS_PARM2(ctx);
 
        bpf_map_delete_elem(&my_map, &ptr);
        return 0;
@@ -36,11 +36,11 @@ int bpf_prog1(struct pt_regs *ctx)
 SEC("kretprobe/kmem_cache_alloc_node")
 int bpf_prog2(struct pt_regs *ctx)
 {
-       long ptr = ctx->ax;
+       long ptr = PT_REGS_RC(ctx);
        long ip = 0;
 
        /* get ip address of kmem_cache_alloc_node() caller */
-       bpf_probe_read(&ip, sizeof(ip), (void *)(ctx->bp + sizeof(ip)));
+       bpf_probe_read(&ip, sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip)));
 
        struct pair v = {
                .val = bpf_ktime_get_ns(),
index b71fe07a7a7a4820a77e7cb0d5a112febdbaa49c..b3f4295bf288536c1f9ae7500b542a77cf8aaec1 100644 (file)
@@ -24,7 +24,7 @@ int bpf_prog1(struct pt_regs *ctx)
 {
        struct seccomp_data sd = {};
 
-       bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+       bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
 
        /* dispatch into next BPF program depending on syscall number */
        bpf_tail_call(ctx, &progs, sd.nr);
@@ -42,7 +42,7 @@ PROG(__NR_write)(struct pt_regs *ctx)
 {
        struct seccomp_data sd = {};
 
-       bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+       bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
        if (sd.args[2] == 512) {
                char fmt[] = "write(fd=%d, buf=%p, size=%d)\n";
                bpf_trace_printk(fmt, sizeof(fmt),
@@ -55,7 +55,7 @@ PROG(__NR_read)(struct pt_regs *ctx)
 {
        struct seccomp_data sd = {};
 
-       bpf_probe_read(&sd, sizeof(sd), (void *)ctx->di);
+       bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx));
        if (sd.args[2] > 128 && sd.args[2] <= 1024) {
                char fmt[] = "read(fd=%d, buf=%p, size=%d)\n";
                bpf_trace_printk(fmt, sizeof(fmt),