Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
5bc1421e NH |
2 | /* |
3 | * net/core/netprio_cgroup.c Priority Control Group | |
4 | * | |
5bc1421e NH |
5 | * Authors: Neil Horman <nhorman@tuxdriver.com> |
6 | */ | |
7 | ||
e005d193 JP |
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
9 | ||
c6e970a0 | 10 | #include <linux/module.h> |
5bc1421e NH |
11 | #include <linux/slab.h> |
12 | #include <linux/types.h> | |
13 | #include <linux/string.h> | |
14 | #include <linux/errno.h> | |
15 | #include <linux/skbuff.h> | |
16 | #include <linux/cgroup.h> | |
17 | #include <linux/rcupdate.h> | |
18 | #include <linux/atomic.h> | |
f719ff9b IM |
19 | #include <linux/sched/task.h> |
20 | ||
5bc1421e NH |
21 | #include <net/rtnetlink.h> |
22 | #include <net/pkt_cls.h> | |
23 | #include <net/sock.h> | |
24 | #include <net/netprio_cgroup.h> | |
25 | ||
406a3c63 JF |
26 | #include <linux/fdtable.h> |
27 | ||
297dbde1 TH |
28 | /* |
29 | * netprio allocates per-net_device priomap array which is indexed by | |
30 | * css->id. Limiting css ID to 16bits doesn't lose anything. | |
31 | */ | |
32 | #define NETPRIO_ID_MAX USHRT_MAX | |
33 | ||
4a6ee25c | 34 | #define PRIOMAP_MIN_SZ 128 |
5bc1421e | 35 | |
4a6ee25c | 36 | /* |
8e3bff96 | 37 | * Extend @dev->priomap so that it's large enough to accommodate |
4a6ee25c TH |
38 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
39 | * return. Must be called under rtnl lock. | |
40 | */ | |
41 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) | |
5bc1421e | 42 | { |
4a6ee25c TH |
43 | struct netprio_map *old, *new; |
44 | size_t new_sz, new_len; | |
5bc1421e | 45 | |
4a6ee25c | 46 | /* is the existing priomap large enough? */ |
52bca930 | 47 | old = rtnl_dereference(dev->priomap); |
4a6ee25c TH |
48 | if (old && old->priomap_len > target_idx) |
49 | return 0; | |
50 | ||
51 | /* | |
52 | * Determine the new size. Let's keep it power-of-two. We start | |
53 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | |
54 | * accommodate @target_idx. | |
55 | */ | |
56 | new_sz = PRIOMAP_MIN_SZ; | |
57 | while (true) { | |
58 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | |
59 | sizeof(new->priomap[0]); | |
60 | if (new_len > target_idx) | |
61 | break; | |
62 | new_sz *= 2; | |
63 | /* overflowed? */ | |
64 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | |
65 | return -ENOSPC; | |
66 | } | |
5bc1421e | 67 | |
4a6ee25c TH |
68 | /* allocate & copy */ |
69 | new = kzalloc(new_sz, GFP_KERNEL); | |
62b5942a | 70 | if (!new) |
ef209f15 | 71 | return -ENOMEM; |
5bc1421e | 72 | |
52bca930 TH |
73 | if (old) |
74 | memcpy(new->priomap, old->priomap, | |
75 | old->priomap_len * sizeof(old->priomap[0])); | |
5bc1421e | 76 | |
52bca930 | 77 | new->priomap_len = new_len; |
5bc1421e | 78 | |
4a6ee25c | 79 | /* install the new priomap */ |
52bca930 TH |
80 | rcu_assign_pointer(dev->priomap, new); |
81 | if (old) | |
82 | kfree_rcu(old, rcu); | |
ef209f15 G |
83 | return 0; |
84 | } | |
85 | ||
666b0ebe TH |
86 | /** |
87 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | |
6d37b974 | 88 | * @css: css part of the target pair |
666b0ebe TH |
89 | * @dev: net_device part of the target pair |
90 | * | |
91 | * Should be called under RCU read or rtnl lock. | |
92 | */ | |
6d37b974 | 93 | static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev) |
666b0ebe TH |
94 | { |
95 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | |
db53c73a | 96 | int id = css->id; |
666b0ebe | 97 | |
6d37b974 TH |
98 | if (map && id < map->priomap_len) |
99 | return map->priomap[id]; | |
666b0ebe TH |
100 | return 0; |
101 | } | |
102 | ||
103 | /** | |
104 | * netprio_set_prio - set netprio on a cgroup-net_device pair | |
6d37b974 | 105 | * @css: css part of the target pair |
666b0ebe TH |
106 | * @dev: net_device part of the target pair |
107 | * @prio: prio to set | |
108 | * | |
6d37b974 | 109 | * Set netprio to @prio on @css-@dev pair. Should be called under rtnl |
666b0ebe TH |
110 | * lock and may fail under memory pressure for non-zero @prio. |
111 | */ | |
6d37b974 TH |
112 | static int netprio_set_prio(struct cgroup_subsys_state *css, |
113 | struct net_device *dev, u32 prio) | |
666b0ebe TH |
114 | { |
115 | struct netprio_map *map; | |
db53c73a | 116 | int id = css->id; |
666b0ebe TH |
117 | int ret; |
118 | ||
119 | /* avoid extending priomap for zero writes */ | |
120 | map = rtnl_dereference(dev->priomap); | |
6d37b974 | 121 | if (!prio && (!map || map->priomap_len <= id)) |
666b0ebe TH |
122 | return 0; |
123 | ||
6d37b974 | 124 | ret = extend_netdev_table(dev, id); |
666b0ebe TH |
125 | if (ret) |
126 | return ret; | |
127 | ||
128 | map = rtnl_dereference(dev->priomap); | |
6d37b974 | 129 | map->priomap[id] = prio; |
666b0ebe TH |
130 | return 0; |
131 | } | |
132 | ||
eb95419b TH |
133 | static struct cgroup_subsys_state * |
134 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | |
5bc1421e | 135 | { |
6d37b974 | 136 | struct cgroup_subsys_state *css; |
88d642fa | 137 | |
6d37b974 TH |
138 | css = kzalloc(sizeof(*css), GFP_KERNEL); |
139 | if (!css) | |
5bc1421e NH |
140 | return ERR_PTR(-ENOMEM); |
141 | ||
6d37b974 | 142 | return css; |
5bc1421e NH |
143 | } |
144 | ||
eb95419b | 145 | static int cgrp_css_online(struct cgroup_subsys_state *css) |
5bc1421e | 146 | { |
5c9d535b | 147 | struct cgroup_subsys_state *parent_css = css->parent; |
5bc1421e | 148 | struct net_device *dev; |
811d8d6f TH |
149 | int ret = 0; |
150 | ||
297dbde1 TH |
151 | if (css->id > NETPRIO_ID_MAX) |
152 | return -ENOSPC; | |
153 | ||
eb95419b | 154 | if (!parent_css) |
811d8d6f | 155 | return 0; |
5bc1421e | 156 | |
5bc1421e | 157 | rtnl_lock(); |
811d8d6f TH |
158 | /* |
159 | * Inherit prios from the parent. As all prios are set during | |
160 | * onlining, there is no need to clear them on offline. | |
161 | */ | |
162 | for_each_netdev(&init_net, dev) { | |
6d37b974 | 163 | u32 prio = netprio_prio(parent_css, dev); |
811d8d6f | 164 | |
6d37b974 | 165 | ret = netprio_set_prio(css, dev, prio); |
811d8d6f TH |
166 | if (ret) |
167 | break; | |
168 | } | |
5bc1421e | 169 | rtnl_unlock(); |
811d8d6f TH |
170 | return ret; |
171 | } | |
172 | ||
eb95419b | 173 | static void cgrp_css_free(struct cgroup_subsys_state *css) |
811d8d6f | 174 | { |
eb95419b | 175 | kfree(css); |
5bc1421e NH |
176 | } |
177 | ||
182446d0 | 178 | static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) |
5bc1421e | 179 | { |
db53c73a | 180 | return css->id; |
5bc1421e NH |
181 | } |
182 | ||
2da8ca82 | 183 | static int read_priomap(struct seq_file *sf, void *v) |
5bc1421e NH |
184 | { |
185 | struct net_device *dev; | |
5bc1421e NH |
186 | |
187 | rcu_read_lock(); | |
666b0ebe | 188 | for_each_netdev_rcu(&init_net, dev) |
2da8ca82 TH |
189 | seq_printf(sf, "%s %u\n", dev->name, |
190 | netprio_prio(seq_css(sf), dev)); | |
5bc1421e NH |
191 | rcu_read_unlock(); |
192 | return 0; | |
193 | } | |
194 | ||
451af504 TH |
195 | static ssize_t write_priomap(struct kernfs_open_file *of, |
196 | char *buf, size_t nbytes, loff_t off) | |
5bc1421e | 197 | { |
6d5759dd | 198 | char devname[IFNAMSIZ + 1]; |
5bc1421e | 199 | struct net_device *dev; |
6d5759dd TH |
200 | u32 prio; |
201 | int ret; | |
5bc1421e | 202 | |
451af504 | 203 | if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) |
6d5759dd | 204 | return -EINVAL; |
5bc1421e NH |
205 | |
206 | dev = dev_get_by_name(&init_net, devname); | |
207 | if (!dev) | |
6d5759dd | 208 | return -ENODEV; |
5bc1421e | 209 | |
bd1060a1 TH |
210 | cgroup_sk_alloc_disable(); |
211 | ||
476ad154 | 212 | rtnl_lock(); |
6d5759dd | 213 | |
451af504 | 214 | ret = netprio_set_prio(of_css(of), dev, prio); |
ef209f15 | 215 | |
476ad154 | 216 | rtnl_unlock(); |
5bc1421e | 217 | dev_put(dev); |
451af504 | 218 | return ret ?: nbytes; |
5bc1421e NH |
219 | } |
220 | ||
c3c073f8 AV |
221 | static int update_netprio(const void *v, struct file *file, unsigned n) |
222 | { | |
dba4a925 | 223 | struct socket *sock = sock_from_file(file); |
bd1060a1 TH |
224 | if (sock) { |
225 | spin_lock(&cgroup_sk_update_lock); | |
2a56a1fe TH |
226 | sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, |
227 | (unsigned long)v); | |
bd1060a1 TH |
228 | spin_unlock(&cgroup_sk_update_lock); |
229 | } | |
c3c073f8 AV |
230 | return 0; |
231 | } | |
232 | ||
1f7dd3e5 | 233 | static void net_prio_attach(struct cgroup_taskset *tset) |
406a3c63 JF |
234 | { |
235 | struct task_struct *p; | |
1f7dd3e5 TH |
236 | struct cgroup_subsys_state *css; |
237 | ||
090e28b2 ZL |
238 | cgroup_sk_alloc_disable(); |
239 | ||
1f7dd3e5 | 240 | cgroup_taskset_for_each(p, css, tset) { |
db53c73a | 241 | void *v = (void *)(unsigned long)css->id; |
406a3c63 | 242 | |
406a3c63 | 243 | task_lock(p); |
c3c073f8 | 244 | iterate_fd(p->files, 0, update_netprio, v); |
406a3c63 JF |
245 | task_unlock(p); |
246 | } | |
406a3c63 JF |
247 | } |
248 | ||
5bc1421e NH |
249 | static struct cftype ss_files[] = { |
250 | { | |
251 | .name = "prioidx", | |
252 | .read_u64 = read_prioidx, | |
253 | }, | |
254 | { | |
255 | .name = "ifpriomap", | |
2da8ca82 | 256 | .seq_show = read_priomap, |
451af504 | 257 | .write = write_priomap, |
5bc1421e | 258 | }, |
4baf6e33 | 259 | { } /* terminate */ |
5bc1421e NH |
260 | }; |
261 | ||
073219e9 | 262 | struct cgroup_subsys net_prio_cgrp_subsys = { |
92fb9748 | 263 | .css_alloc = cgrp_css_alloc, |
811d8d6f | 264 | .css_online = cgrp_css_online, |
92fb9748 | 265 | .css_free = cgrp_css_free, |
406a3c63 | 266 | .attach = net_prio_attach, |
5577964e | 267 | .legacy_cftypes = ss_files, |
676f7c8f | 268 | }; |
5bc1421e NH |
269 | |
270 | static int netprio_device_event(struct notifier_block *unused, | |
271 | unsigned long event, void *ptr) | |
272 | { | |
351638e7 | 273 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
5bc1421e | 274 | struct netprio_map *old; |
5bc1421e NH |
275 | |
276 | /* | |
277 | * Note this is called with rtnl_lock held so we have update side | |
278 | * protection on our rcu assignments | |
279 | */ | |
280 | ||
281 | switch (event) { | |
5bc1421e NH |
282 | case NETDEV_UNREGISTER: |
283 | old = rtnl_dereference(dev->priomap); | |
2cfa5a04 | 284 | RCU_INIT_POINTER(dev->priomap, NULL); |
5bc1421e NH |
285 | if (old) |
286 | kfree_rcu(old, rcu); | |
287 | break; | |
288 | } | |
289 | return NOTIFY_DONE; | |
290 | } | |
291 | ||
292 | static struct notifier_block netprio_device_notifier = { | |
293 | .notifier_call = netprio_device_event | |
294 | }; | |
295 | ||
296 | static int __init init_cgroup_netprio(void) | |
297 | { | |
5bc1421e | 298 | register_netdevice_notifier(&netprio_device_notifier); |
af636337 | 299 | return 0; |
5bc1421e | 300 | } |
af636337 | 301 | subsys_initcall(init_cgroup_netprio); |