Commit | Line | Data |
---|---|---|
5bc1421e NH |
1 | /* |
2 | * net/core/netprio_cgroup.c Priority Control Group | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Neil Horman <nhorman@tuxdriver.com> | |
10 | */ | |
11 | ||
e005d193 JP |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
5bc1421e NH |
14 | #include <linux/slab.h> |
15 | #include <linux/types.h> | |
43cc277a | 16 | #include <linux/module.h> |
5bc1421e NH |
17 | #include <linux/string.h> |
18 | #include <linux/errno.h> | |
19 | #include <linux/skbuff.h> | |
20 | #include <linux/cgroup.h> | |
21 | #include <linux/rcupdate.h> | |
22 | #include <linux/atomic.h> | |
f719ff9b IM |
23 | #include <linux/sched/task.h> |
24 | ||
5bc1421e NH |
25 | #include <net/rtnetlink.h> |
26 | #include <net/pkt_cls.h> | |
27 | #include <net/sock.h> | |
28 | #include <net/netprio_cgroup.h> | |
29 | ||
406a3c63 JF |
30 | #include <linux/fdtable.h> |
31 | ||
297dbde1 TH |
32 | /* |
33 | * netprio allocates per-net_device priomap array which is indexed by | |
34 | * css->id. Limiting css ID to 16bits doesn't lose anything. | |
35 | */ | |
36 | #define NETPRIO_ID_MAX USHRT_MAX | |
37 | ||
4a6ee25c | 38 | #define PRIOMAP_MIN_SZ 128 |
5bc1421e | 39 | |
4a6ee25c | 40 | /* |
8e3bff96 | 41 | * Extend @dev->priomap so that it's large enough to accommodate |
4a6ee25c TH |
42 | * @target_idx. @dev->priomap.priomap_len > @target_idx after successful |
43 | * return. Must be called under rtnl lock. | |
44 | */ | |
45 | static int extend_netdev_table(struct net_device *dev, u32 target_idx) | |
5bc1421e | 46 | { |
4a6ee25c TH |
47 | struct netprio_map *old, *new; |
48 | size_t new_sz, new_len; | |
5bc1421e | 49 | |
4a6ee25c | 50 | /* is the existing priomap large enough? */ |
52bca930 | 51 | old = rtnl_dereference(dev->priomap); |
4a6ee25c TH |
52 | if (old && old->priomap_len > target_idx) |
53 | return 0; | |
54 | ||
55 | /* | |
56 | * Determine the new size. Let's keep it power-of-two. We start | |
57 | * from PRIOMAP_MIN_SZ and double it until it's large enough to | |
58 | * accommodate @target_idx. | |
59 | */ | |
60 | new_sz = PRIOMAP_MIN_SZ; | |
61 | while (true) { | |
62 | new_len = (new_sz - offsetof(struct netprio_map, priomap)) / | |
63 | sizeof(new->priomap[0]); | |
64 | if (new_len > target_idx) | |
65 | break; | |
66 | new_sz *= 2; | |
67 | /* overflowed? */ | |
68 | if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) | |
69 | return -ENOSPC; | |
70 | } | |
5bc1421e | 71 | |
4a6ee25c TH |
72 | /* allocate & copy */ |
73 | new = kzalloc(new_sz, GFP_KERNEL); | |
62b5942a | 74 | if (!new) |
ef209f15 | 75 | return -ENOMEM; |
5bc1421e | 76 | |
52bca930 TH |
77 | if (old) |
78 | memcpy(new->priomap, old->priomap, | |
79 | old->priomap_len * sizeof(old->priomap[0])); | |
5bc1421e | 80 | |
52bca930 | 81 | new->priomap_len = new_len; |
5bc1421e | 82 | |
4a6ee25c | 83 | /* install the new priomap */ |
52bca930 TH |
84 | rcu_assign_pointer(dev->priomap, new); |
85 | if (old) | |
86 | kfree_rcu(old, rcu); | |
ef209f15 G |
87 | return 0; |
88 | } | |
89 | ||
666b0ebe TH |
90 | /** |
91 | * netprio_prio - return the effective netprio of a cgroup-net_device pair | |
6d37b974 | 92 | * @css: css part of the target pair |
666b0ebe TH |
93 | * @dev: net_device part of the target pair |
94 | * | |
95 | * Should be called under RCU read or rtnl lock. | |
96 | */ | |
6d37b974 | 97 | static u32 netprio_prio(struct cgroup_subsys_state *css, struct net_device *dev) |
666b0ebe TH |
98 | { |
99 | struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); | |
6d37b974 | 100 | int id = css->cgroup->id; |
666b0ebe | 101 | |
6d37b974 TH |
102 | if (map && id < map->priomap_len) |
103 | return map->priomap[id]; | |
666b0ebe TH |
104 | return 0; |
105 | } | |
106 | ||
107 | /** | |
108 | * netprio_set_prio - set netprio on a cgroup-net_device pair | |
6d37b974 | 109 | * @css: css part of the target pair |
666b0ebe TH |
110 | * @dev: net_device part of the target pair |
111 | * @prio: prio to set | |
112 | * | |
6d37b974 | 113 | * Set netprio to @prio on @css-@dev pair. Should be called under rtnl |
666b0ebe TH |
114 | * lock and may fail under memory pressure for non-zero @prio. |
115 | */ | |
6d37b974 TH |
116 | static int netprio_set_prio(struct cgroup_subsys_state *css, |
117 | struct net_device *dev, u32 prio) | |
666b0ebe TH |
118 | { |
119 | struct netprio_map *map; | |
6d37b974 | 120 | int id = css->cgroup->id; |
666b0ebe TH |
121 | int ret; |
122 | ||
123 | /* avoid extending priomap for zero writes */ | |
124 | map = rtnl_dereference(dev->priomap); | |
6d37b974 | 125 | if (!prio && (!map || map->priomap_len <= id)) |
666b0ebe TH |
126 | return 0; |
127 | ||
6d37b974 | 128 | ret = extend_netdev_table(dev, id); |
666b0ebe TH |
129 | if (ret) |
130 | return ret; | |
131 | ||
132 | map = rtnl_dereference(dev->priomap); | |
6d37b974 | 133 | map->priomap[id] = prio; |
666b0ebe TH |
134 | return 0; |
135 | } | |
136 | ||
eb95419b TH |
137 | static struct cgroup_subsys_state * |
138 | cgrp_css_alloc(struct cgroup_subsys_state *parent_css) | |
5bc1421e | 139 | { |
6d37b974 | 140 | struct cgroup_subsys_state *css; |
88d642fa | 141 | |
6d37b974 TH |
142 | css = kzalloc(sizeof(*css), GFP_KERNEL); |
143 | if (!css) | |
5bc1421e NH |
144 | return ERR_PTR(-ENOMEM); |
145 | ||
6d37b974 | 146 | return css; |
5bc1421e NH |
147 | } |
148 | ||
eb95419b | 149 | static int cgrp_css_online(struct cgroup_subsys_state *css) |
5bc1421e | 150 | { |
5c9d535b | 151 | struct cgroup_subsys_state *parent_css = css->parent; |
5bc1421e | 152 | struct net_device *dev; |
811d8d6f TH |
153 | int ret = 0; |
154 | ||
297dbde1 TH |
155 | if (css->id > NETPRIO_ID_MAX) |
156 | return -ENOSPC; | |
157 | ||
eb95419b | 158 | if (!parent_css) |
811d8d6f | 159 | return 0; |
5bc1421e | 160 | |
5bc1421e | 161 | rtnl_lock(); |
811d8d6f TH |
162 | /* |
163 | * Inherit prios from the parent. As all prios are set during | |
164 | * onlining, there is no need to clear them on offline. | |
165 | */ | |
166 | for_each_netdev(&init_net, dev) { | |
6d37b974 | 167 | u32 prio = netprio_prio(parent_css, dev); |
811d8d6f | 168 | |
6d37b974 | 169 | ret = netprio_set_prio(css, dev, prio); |
811d8d6f TH |
170 | if (ret) |
171 | break; | |
172 | } | |
5bc1421e | 173 | rtnl_unlock(); |
811d8d6f TH |
174 | return ret; |
175 | } | |
176 | ||
eb95419b | 177 | static void cgrp_css_free(struct cgroup_subsys_state *css) |
811d8d6f | 178 | { |
eb95419b | 179 | kfree(css); |
5bc1421e NH |
180 | } |
181 | ||
182446d0 | 182 | static u64 read_prioidx(struct cgroup_subsys_state *css, struct cftype *cft) |
5bc1421e | 183 | { |
182446d0 | 184 | return css->cgroup->id; |
5bc1421e NH |
185 | } |
186 | ||
2da8ca82 | 187 | static int read_priomap(struct seq_file *sf, void *v) |
5bc1421e NH |
188 | { |
189 | struct net_device *dev; | |
5bc1421e NH |
190 | |
191 | rcu_read_lock(); | |
666b0ebe | 192 | for_each_netdev_rcu(&init_net, dev) |
2da8ca82 TH |
193 | seq_printf(sf, "%s %u\n", dev->name, |
194 | netprio_prio(seq_css(sf), dev)); | |
5bc1421e NH |
195 | rcu_read_unlock(); |
196 | return 0; | |
197 | } | |
198 | ||
451af504 TH |
199 | static ssize_t write_priomap(struct kernfs_open_file *of, |
200 | char *buf, size_t nbytes, loff_t off) | |
5bc1421e | 201 | { |
6d5759dd | 202 | char devname[IFNAMSIZ + 1]; |
5bc1421e | 203 | struct net_device *dev; |
6d5759dd TH |
204 | u32 prio; |
205 | int ret; | |
5bc1421e | 206 | |
451af504 | 207 | if (sscanf(buf, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) |
6d5759dd | 208 | return -EINVAL; |
5bc1421e NH |
209 | |
210 | dev = dev_get_by_name(&init_net, devname); | |
211 | if (!dev) | |
6d5759dd | 212 | return -ENODEV; |
5bc1421e | 213 | |
bd1060a1 TH |
214 | cgroup_sk_alloc_disable(); |
215 | ||
476ad154 | 216 | rtnl_lock(); |
6d5759dd | 217 | |
451af504 | 218 | ret = netprio_set_prio(of_css(of), dev, prio); |
ef209f15 | 219 | |
476ad154 | 220 | rtnl_unlock(); |
5bc1421e | 221 | dev_put(dev); |
451af504 | 222 | return ret ?: nbytes; |
5bc1421e NH |
223 | } |
224 | ||
c3c073f8 AV |
225 | static int update_netprio(const void *v, struct file *file, unsigned n) |
226 | { | |
227 | int err; | |
228 | struct socket *sock = sock_from_file(file, &err); | |
bd1060a1 TH |
229 | if (sock) { |
230 | spin_lock(&cgroup_sk_update_lock); | |
2a56a1fe TH |
231 | sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, |
232 | (unsigned long)v); | |
bd1060a1 TH |
233 | spin_unlock(&cgroup_sk_update_lock); |
234 | } | |
c3c073f8 AV |
235 | return 0; |
236 | } | |
237 | ||
1f7dd3e5 | 238 | static void net_prio_attach(struct cgroup_taskset *tset) |
406a3c63 JF |
239 | { |
240 | struct task_struct *p; | |
1f7dd3e5 TH |
241 | struct cgroup_subsys_state *css; |
242 | ||
243 | cgroup_taskset_for_each(p, css, tset) { | |
244 | void *v = (void *)(unsigned long)css->cgroup->id; | |
406a3c63 | 245 | |
406a3c63 | 246 | task_lock(p); |
c3c073f8 | 247 | iterate_fd(p->files, 0, update_netprio, v); |
406a3c63 JF |
248 | task_unlock(p); |
249 | } | |
406a3c63 JF |
250 | } |
251 | ||
5bc1421e NH |
252 | static struct cftype ss_files[] = { |
253 | { | |
254 | .name = "prioidx", | |
255 | .read_u64 = read_prioidx, | |
256 | }, | |
257 | { | |
258 | .name = "ifpriomap", | |
2da8ca82 | 259 | .seq_show = read_priomap, |
451af504 | 260 | .write = write_priomap, |
5bc1421e | 261 | }, |
4baf6e33 | 262 | { } /* terminate */ |
5bc1421e NH |
263 | }; |
264 | ||
073219e9 | 265 | struct cgroup_subsys net_prio_cgrp_subsys = { |
92fb9748 | 266 | .css_alloc = cgrp_css_alloc, |
811d8d6f | 267 | .css_online = cgrp_css_online, |
92fb9748 | 268 | .css_free = cgrp_css_free, |
406a3c63 | 269 | .attach = net_prio_attach, |
5577964e | 270 | .legacy_cftypes = ss_files, |
676f7c8f | 271 | }; |
5bc1421e NH |
272 | |
273 | static int netprio_device_event(struct notifier_block *unused, | |
274 | unsigned long event, void *ptr) | |
275 | { | |
351638e7 | 276 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); |
5bc1421e | 277 | struct netprio_map *old; |
5bc1421e NH |
278 | |
279 | /* | |
280 | * Note this is called with rtnl_lock held so we have update side | |
281 | * protection on our rcu assignments | |
282 | */ | |
283 | ||
284 | switch (event) { | |
5bc1421e NH |
285 | case NETDEV_UNREGISTER: |
286 | old = rtnl_dereference(dev->priomap); | |
2cfa5a04 | 287 | RCU_INIT_POINTER(dev->priomap, NULL); |
5bc1421e NH |
288 | if (old) |
289 | kfree_rcu(old, rcu); | |
290 | break; | |
291 | } | |
292 | return NOTIFY_DONE; | |
293 | } | |
294 | ||
295 | static struct notifier_block netprio_device_notifier = { | |
296 | .notifier_call = netprio_device_event | |
297 | }; | |
298 | ||
299 | static int __init init_cgroup_netprio(void) | |
300 | { | |
5bc1421e | 301 | register_netdevice_notifier(&netprio_device_notifier); |
af636337 | 302 | return 0; |
5bc1421e NH |
303 | } |
304 | ||
af636337 | 305 | subsys_initcall(init_cgroup_netprio); |
5bc1421e | 306 | MODULE_LICENSE("GPL v2"); |