openvswitch: Allocate struct ovs_pcpu_storage dynamically
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Fri, 13 Jun 2025 12:36:29 +0000 (14:36 +0200)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 17 Jun 2025 12:47:46 +0000 (14:47 +0200)
PERCPU_MODULE_RESERVE defines the maximum size that can by used for the
per-CPU data size used by modules. This is 8KiB.

Commit 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into
one") restructured the per-CPU memory allocation for the module and
moved the separate alloc_percpu() invocations at module init time to a
static per-CPU variable which is allocated by the module loader.

The size of the per-CPU data section for openvswitch is 6488 bytes which
is ~80% of the available per-CPU memory. Together with a few other
modules it is easy to exhaust the available 8KiB of memory.

Allocate ovs_pcpu_storage dynamically at module init time.

Reported-by: Gal Pressman <gal@nvidia.com>
Closes: https://lore.kernel.org/all/c401e017-f8db-4f57-a1cd-89beb979a277@nvidia.com
Fixes: 035fcdc4d240c ("openvswitch: Merge three per-CPU structures into one")
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Link: https://patch.msgid.link/20250613123629.-XSoQTCu@linutronix.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
net/openvswitch/actions.c
net/openvswitch/datapath.c
net/openvswitch/datapath.h

index e7269a3eec79edc61acf1c094d232e4325b03b57..3add108340bfd9dec30032daeb3f928e6d255cc5 100644 (file)
 #include "flow_netlink.h"
 #include "openvswitch_trace.h"
 
-DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = {
-       .bh_lock = INIT_LOCAL_LOCK(bh_lock),
-};
+struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
 
 /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys'
  * space. Return NULL if out of key spaces.
  */
 static struct sw_flow_key *clone_key(const struct sw_flow_key *key_)
 {
-       struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+       struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
        struct action_flow_keys *keys = &ovs_pcpu->flow_keys;
        int level = ovs_pcpu->exec_level;
        struct sw_flow_key *key = NULL;
@@ -94,7 +92,7 @@ static struct deferred_action *add_deferred_actions(struct sk_buff *skb,
                                    const struct nlattr *actions,
                                    const int actions_len)
 {
-       struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+       struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
        struct deferred_action *da;
 
        da = action_fifo_put(fifo);
@@ -755,7 +753,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
 static int ovs_vport_output(struct net *net, struct sock *sk,
                            struct sk_buff *skb)
 {
-       struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+       struct ovs_frag_data *data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
        struct vport *vport = data->vport;
 
        if (skb_cow_head(skb, data->l2_len) < 0) {
@@ -807,7 +805,7 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb,
        unsigned int hlen = skb_network_offset(skb);
        struct ovs_frag_data *data;
 
-       data = this_cpu_ptr(&ovs_pcpu_storage.frag_data);
+       data = this_cpu_ptr(&ovs_pcpu_storage->frag_data);
        data->dst = skb->_skb_refdst;
        data->vport = vport;
        data->cb = *OVS_CB(skb);
@@ -1566,16 +1564,15 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
        clone = clone_flow_key ? clone_key(key) : key;
        if (clone) {
                int err = 0;
-
                if (actions) { /* Sample action */
                        if (clone_flow_key)
-                               __this_cpu_inc(ovs_pcpu_storage.exec_level);
+                               __this_cpu_inc(ovs_pcpu_storage->exec_level);
 
                        err = do_execute_actions(dp, skb, clone,
                                                 actions, len);
 
                        if (clone_flow_key)
-                               __this_cpu_dec(ovs_pcpu_storage.exec_level);
+                               __this_cpu_dec(ovs_pcpu_storage->exec_level);
                } else { /* Recirc action */
                        clone->recirc_id = recirc_id;
                        ovs_dp_process_packet(skb, clone);
@@ -1611,7 +1608,7 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 
 static void process_deferred_actions(struct datapath *dp)
 {
-       struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage.action_fifos);
+       struct action_fifo *fifo = this_cpu_ptr(&ovs_pcpu_storage->action_fifos);
 
        /* Do not touch the FIFO in case there is no deferred actions. */
        if (action_fifo_is_empty(fifo))
@@ -1642,7 +1639,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
 {
        int err, level;
 
-       level = __this_cpu_inc_return(ovs_pcpu_storage.exec_level);
+       level = __this_cpu_inc_return(ovs_pcpu_storage->exec_level);
        if (unlikely(level > OVS_RECURSION_LIMIT)) {
                net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n",
                                     ovs_dp_name(dp));
@@ -1659,6 +1656,6 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
                process_deferred_actions(dp);
 
 out:
-       __this_cpu_dec(ovs_pcpu_storage.exec_level);
+       __this_cpu_dec(ovs_pcpu_storage->exec_level);
        return err;
 }
index 6a304ae2d959cdba771bbb2a1ac3e2a1503c975b..b990dc83504f432de74e43369571c593ba239635 100644 (file)
@@ -244,7 +244,7 @@ void ovs_dp_detach_port(struct vport *p)
 /* Must be called with rcu_read_lock. */
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 {
-       struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage);
+       struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(ovs_pcpu_storage);
        const struct vport *p = OVS_CB(skb)->input_vport;
        struct datapath *dp = p->dp;
        struct sw_flow *flow;
@@ -299,7 +299,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
         * avoided.
         */
        if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) {
-               local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+               local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
                ovs_pcpu->owner = current;
                ovs_pcpu_locked = true;
        }
@@ -310,7 +310,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
                                    ovs_dp_name(dp), error);
        if (ovs_pcpu_locked) {
                ovs_pcpu->owner = NULL;
-               local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+               local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
        }
 
        stats_counter = &stats->n_hit;
@@ -689,13 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
        sf_acts = rcu_dereference(flow->sf_acts);
 
        local_bh_disable();
-       local_lock_nested_bh(&ovs_pcpu_storage.bh_lock);
+       local_lock_nested_bh(&ovs_pcpu_storage->bh_lock);
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
-               this_cpu_write(ovs_pcpu_storage.owner, current);
+               this_cpu_write(ovs_pcpu_storage->owner, current);
        err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
        if (IS_ENABLED(CONFIG_PREEMPT_RT))
-               this_cpu_write(ovs_pcpu_storage.owner, NULL);
-       local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock);
+               this_cpu_write(ovs_pcpu_storage->owner, NULL);
+       local_unlock_nested_bh(&ovs_pcpu_storage->bh_lock);
        local_bh_enable();
        rcu_read_unlock();
 
@@ -2744,6 +2744,28 @@ static struct drop_reason_list drop_reason_list_ovs = {
        .n_reasons = ARRAY_SIZE(ovs_drop_reasons),
 };
 
+static int __init ovs_alloc_percpu_storage(void)
+{
+       unsigned int cpu;
+
+       ovs_pcpu_storage = alloc_percpu(*ovs_pcpu_storage);
+       if (!ovs_pcpu_storage)
+               return -ENOMEM;
+
+       for_each_possible_cpu(cpu) {
+               struct ovs_pcpu_storage *ovs_pcpu;
+
+               ovs_pcpu = per_cpu_ptr(ovs_pcpu_storage, cpu);
+               local_lock_init(&ovs_pcpu->bh_lock);
+       }
+       return 0;
+}
+
+static void ovs_free_percpu_storage(void)
+{
+       free_percpu(ovs_pcpu_storage);
+}
+
 static int __init dp_init(void)
 {
        int err;
@@ -2753,6 +2775,10 @@ static int __init dp_init(void)
 
        pr_info("Open vSwitch switching datapath\n");
 
+       err = ovs_alloc_percpu_storage();
+       if (err)
+               goto error;
+
        err = ovs_internal_dev_rtnl_link_register();
        if (err)
                goto error;
@@ -2799,6 +2825,7 @@ error_flow_exit:
 error_unreg_rtnl_link:
        ovs_internal_dev_rtnl_link_unregister();
 error:
+       ovs_free_percpu_storage();
        return err;
 }
 
@@ -2813,6 +2840,7 @@ static void dp_cleanup(void)
        ovs_vport_exit();
        ovs_flow_exit();
        ovs_internal_dev_rtnl_link_unregister();
+       ovs_free_percpu_storage();
 }
 
 module_init(dp_init);
index 1b5348b0f55948332f65a078ae013bda76f53093..cfeb817a18894d688661cdc24c1c595f3ab20984 100644 (file)
@@ -220,7 +220,8 @@ struct ovs_pcpu_storage {
        struct task_struct *owner;
        local_lock_t bh_lock;
 };
-DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage);
+
+extern struct ovs_pcpu_storage __percpu *ovs_pcpu_storage;
 
 /**
  * enum ovs_pkt_hash_types - hash info to include with a packet