net: let net.core.dev_weight always be non-zero
authorLiu Jian <liujian56@huawei.com>
Thu, 16 Jan 2025 14:30:53 +0000 (22:30 +0800)
committerJakub Kicinski <kuba@kernel.org>
Sat, 18 Jan 2025 03:00:11 +0000 (19:00 -0800)
The following problem was encountered during stability test:

(NULL net_device): NAPI poll function process_backlog+0x0/0x530 \
returned 1, exceeding its budget of 0.
------------[ cut here ]------------
list_add double add: new=ffff88905f746f48, prev=ffff88905f746f48, \
next=ffff88905f746e40.
WARNING: CPU: 18 PID: 5462 at lib/list_debug.c:35 \
__list_add_valid_or_report+0xf3/0x130
CPU: 18 UID: 0 PID: 5462 Comm: ping Kdump: loaded Not tainted 6.13.0-rc7+
RIP: 0010:__list_add_valid_or_report+0xf3/0x130
Call Trace:
? __warn+0xcd/0x250
? __list_add_valid_or_report+0xf3/0x130
enqueue_to_backlog+0x923/0x1070
netif_rx_internal+0x92/0x2b0
__netif_rx+0x15/0x170
loopback_xmit+0x2ef/0x450
dev_hard_start_xmit+0x103/0x490
__dev_queue_xmit+0xeac/0x1950
ip_finish_output2+0x6cc/0x1620
ip_output+0x161/0x270
ip_push_pending_frames+0x155/0x1a0
raw_sendmsg+0xe13/0x1550
__sys_sendto+0x3bf/0x4e0
__x64_sys_sendto+0xdc/0x1b0
do_syscall_64+0x5b/0x170
entry_SYSCALL_64_after_hwframe+0x76/0x7e

The reproduction command is as follows:
  sysctl -w net.core.dev_weight=0
  ping 127.0.0.1

This is because when the napi's weight is set to 0, process_backlog() may
return 0 and clear the NAPI_STATE_SCHED bit of napi->state, causing this
napi to be re-polled in net_rx_action() until __do_softirq() times out.
Since the NAPI_STATE_SCHED bit has been cleared, napi_schedule_rps() can
be retriggered in enqueue_to_backlog(), causing this issue.

Making the napi's weight always non-zero solves this problem.

Triggering this issue requires system-wide admin (setting is
not namespaced).

Fixes: e38766054509 ("[NET]: Fix sysctl net.core.dev_weight")
Fixes: 3d48b53fb2ae ("net: dev_weight: TX/RX orthogonality")
Signed-off-by: Liu Jian <liujian56@huawei.com>
Link: https://patch.msgid.link/20250116143053.4146855-1-liujian56@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/core/sysctl_net_core.c

index cb8d32e5c14e67dd50321c43d4ff1fcc2694b31c..ad2741f1346af2b1674099347c5dbe9914e41ab5 100644 (file)
@@ -319,7 +319,7 @@ static int proc_do_dev_weight(const struct ctl_table *table, int write,
        int ret, weight;
 
        mutex_lock(&dev_weight_mutex);
-       ret = proc_dointvec(table, write, buffer, lenp, ppos);
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
        if (!ret && write) {
                weight = READ_ONCE(weight_p);
                WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias);
@@ -412,6 +412,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_dev_weight,
+               .extra1         = SYSCTL_ONE,
        },
        {
                .procname       = "dev_weight_rx_bias",
@@ -419,6 +420,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_dev_weight,
+               .extra1         = SYSCTL_ONE,
        },
        {
                .procname       = "dev_weight_tx_bias",
@@ -426,6 +428,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_do_dev_weight,
+               .extra1         = SYSCTL_ONE,
        },
        {
                .procname       = "netdev_max_backlog",