Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 LT |
2 | /* |
3 | * ip6_flowlabel.c IPv6 flowlabel manager. | |
4 | * | |
1da177e4 LT |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | */ | |
7 | ||
4fc268d2 | 8 | #include <linux/capability.h> |
1da177e4 LT |
9 | #include <linux/errno.h> |
10 | #include <linux/types.h> | |
11 | #include <linux/socket.h> | |
12 | #include <linux/net.h> | |
13 | #include <linux/netdevice.h> | |
1da177e4 | 14 | #include <linux/in6.h> |
1da177e4 LT |
15 | #include <linux/proc_fs.h> |
16 | #include <linux/seq_file.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
bc3b2d7f | 18 | #include <linux/export.h> |
4f82f457 | 19 | #include <linux/pid_namespace.h> |
59c820b2 | 20 | #include <linux/jump_label_ratelimit.h> |
1da177e4 | 21 | |
457c4cbc | 22 | #include <net/net_namespace.h> |
1da177e4 LT |
23 | #include <net/sock.h> |
24 | ||
25 | #include <net/ipv6.h> | |
1da177e4 | 26 | #include <net/rawv6.h> |
1da177e4 LT |
27 | #include <net/transp_v6.h> |
28 | ||
7c0f6ba6 | 29 | #include <linux/uaccess.h> |
1da177e4 LT |
30 | |
31 | #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified | |
32 | in old IPv6 RFC. Well, it was reasonable value. | |
33 | */ | |
53b47106 | 34 | #define FL_MAX_LINGER 150 /* Maximal linger timeout */ |
1da177e4 LT |
35 | |
36 | /* FL hash table */ | |
37 | ||
38 | #define FL_MAX_PER_SOCK 32 | |
39 | #define FL_MAX_SIZE 4096 | |
40 | #define FL_HASH_MASK 255 | |
41 | #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) | |
42 | ||
43 | static atomic_t fl_size = ATOMIC_INIT(0); | |
d3aedd5e | 44 | static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; |
1da177e4 | 45 | |
24ed960a | 46 | static void ip6_fl_gc(struct timer_list *unused); |
1d27e3e2 | 47 | static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); |
1da177e4 LT |
48 | |
49 | /* FL hash table lock: it protects only of GC */ | |
50 | ||
d3aedd5e | 51 | static DEFINE_SPINLOCK(ip6_fl_lock); |
1da177e4 LT |
52 | |
53 | /* Big socket sock */ | |
54 | ||
18367681 | 55 | static DEFINE_SPINLOCK(ip6_sk_fl_lock); |
1da177e4 | 56 | |
59c820b2 WB |
57 | DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ); |
58 | EXPORT_SYMBOL(ipv6_flowlabel_exclusive); | |
59 | ||
d3aedd5e | 60 | #define for_each_fl_rcu(hash, fl) \ |
6a98dcf0 | 61 | for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ |
d3aedd5e | 62 | fl != NULL; \ |
6a98dcf0 | 63 | fl = rcu_dereference_bh(fl->next)) |
d3aedd5e | 64 | #define for_each_fl_continue_rcu(fl) \ |
6a98dcf0 | 65 | for (fl = rcu_dereference_bh(fl->next); \ |
d3aedd5e | 66 | fl != NULL; \ |
6a98dcf0 | 67 | fl = rcu_dereference_bh(fl->next)) |
1da177e4 | 68 | |
18367681 YH |
69 | #define for_each_sk_fl_rcu(np, sfl) \ |
70 | for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ | |
71 | sfl != NULL; \ | |
72 | sfl = rcu_dereference_bh(sfl->next)) | |
73 | ||
60e8fbc4 | 74 | static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) |
1da177e4 LT |
75 | { |
76 | struct ip6_flowlabel *fl; | |
77 | ||
d3aedd5e | 78 | for_each_fl_rcu(FL_HASH(label), fl) { |
09ad9bc7 | 79 | if (fl->label == label && net_eq(fl->fl_net, net)) |
1da177e4 LT |
80 | return fl; |
81 | } | |
82 | return NULL; | |
83 | } | |
84 | ||
60e8fbc4 | 85 | static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) |
1da177e4 LT |
86 | { |
87 | struct ip6_flowlabel *fl; | |
88 | ||
d3aedd5e | 89 | rcu_read_lock_bh(); |
60e8fbc4 | 90 | fl = __fl_lookup(net, label); |
d3aedd5e YH |
91 | if (fl && !atomic_inc_not_zero(&fl->users)) |
92 | fl = NULL; | |
93 | rcu_read_unlock_bh(); | |
1da177e4 LT |
94 | return fl; |
95 | } | |
96 | ||
59c820b2 WB |
97 | static bool fl_shared_exclusive(struct ip6_flowlabel *fl) |
98 | { | |
99 | return fl->share == IPV6_FL_S_EXCL || | |
100 | fl->share == IPV6_FL_S_PROCESS || | |
101 | fl->share == IPV6_FL_S_USER; | |
102 | } | |
103 | ||
6c0afef5 ED |
104 | static void fl_free_rcu(struct rcu_head *head) |
105 | { | |
106 | struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); | |
107 | ||
108 | if (fl->share == IPV6_FL_S_PROCESS) | |
109 | put_pid(fl->owner.pid); | |
110 | kfree(fl->opt); | |
111 | kfree(fl); | |
112 | } | |
113 | ||
1da177e4 LT |
114 | |
115 | static void fl_free(struct ip6_flowlabel *fl) | |
116 | { | |
59c820b2 WB |
117 | if (!fl) |
118 | return; | |
119 | ||
120 | if (fl_shared_exclusive(fl) || fl->opt) | |
121 | static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive); | |
122 | ||
123 | call_rcu(&fl->rcu, fl_free_rcu); | |
1da177e4 LT |
124 | } |
125 | ||
126 | static void fl_release(struct ip6_flowlabel *fl) | |
127 | { | |
d3aedd5e | 128 | spin_lock_bh(&ip6_fl_lock); |
1da177e4 LT |
129 | |
130 | fl->lastuse = jiffies; | |
131 | if (atomic_dec_and_test(&fl->users)) { | |
132 | unsigned long ttd = fl->lastuse + fl->linger; | |
133 | if (time_after(ttd, fl->expires)) | |
134 | fl->expires = ttd; | |
135 | ttd = fl->expires; | |
136 | if (fl->opt && fl->share == IPV6_FL_S_EXCL) { | |
137 | struct ipv6_txoptions *opt = fl->opt; | |
138 | fl->opt = NULL; | |
139 | kfree(opt); | |
140 | } | |
141 | if (!timer_pending(&ip6_fl_gc_timer) || | |
142 | time_after(ip6_fl_gc_timer.expires, ttd)) | |
143 | mod_timer(&ip6_fl_gc_timer, ttd); | |
144 | } | |
d3aedd5e | 145 | spin_unlock_bh(&ip6_fl_lock); |
1da177e4 LT |
146 | } |
147 | ||
24ed960a | 148 | static void ip6_fl_gc(struct timer_list *unused) |
1da177e4 LT |
149 | { |
150 | int i; | |
151 | unsigned long now = jiffies; | |
152 | unsigned long sched = 0; | |
153 | ||
d3aedd5e | 154 | spin_lock(&ip6_fl_lock); |
1da177e4 | 155 | |
67ba4152 | 156 | for (i = 0; i <= FL_HASH_MASK; i++) { |
7f0e44ac ED |
157 | struct ip6_flowlabel *fl; |
158 | struct ip6_flowlabel __rcu **flp; | |
159 | ||
1da177e4 | 160 | flp = &fl_ht[i]; |
d3aedd5e YH |
161 | while ((fl = rcu_dereference_protected(*flp, |
162 | lockdep_is_held(&ip6_fl_lock))) != NULL) { | |
1da177e4 LT |
163 | if (atomic_read(&fl->users) == 0) { |
164 | unsigned long ttd = fl->lastuse + fl->linger; | |
165 | if (time_after(ttd, fl->expires)) | |
166 | fl->expires = ttd; | |
167 | ttd = fl->expires; | |
168 | if (time_after_eq(now, ttd)) { | |
169 | *flp = fl->next; | |
170 | fl_free(fl); | |
171 | atomic_dec(&fl_size); | |
172 | continue; | |
173 | } | |
174 | if (!sched || time_before(ttd, sched)) | |
175 | sched = ttd; | |
176 | } | |
177 | flp = &fl->next; | |
178 | } | |
179 | } | |
180 | if (!sched && atomic_read(&fl_size)) | |
181 | sched = now + FL_MAX_LINGER; | |
182 | if (sched) { | |
60e8fbc4 | 183 | mod_timer(&ip6_fl_gc_timer, sched); |
1da177e4 | 184 | } |
d3aedd5e | 185 | spin_unlock(&ip6_fl_lock); |
1da177e4 LT |
186 | } |
187 | ||
2c8c1e72 | 188 | static void __net_exit ip6_fl_purge(struct net *net) |
60e8fbc4 BT |
189 | { |
190 | int i; | |
191 | ||
4762fb98 | 192 | spin_lock_bh(&ip6_fl_lock); |
60e8fbc4 | 193 | for (i = 0; i <= FL_HASH_MASK; i++) { |
7f0e44ac ED |
194 | struct ip6_flowlabel *fl; |
195 | struct ip6_flowlabel __rcu **flp; | |
196 | ||
60e8fbc4 | 197 | flp = &fl_ht[i]; |
d3aedd5e YH |
198 | while ((fl = rcu_dereference_protected(*flp, |
199 | lockdep_is_held(&ip6_fl_lock))) != NULL) { | |
09ad9bc7 OP |
200 | if (net_eq(fl->fl_net, net) && |
201 | atomic_read(&fl->users) == 0) { | |
60e8fbc4 BT |
202 | *flp = fl->next; |
203 | fl_free(fl); | |
204 | atomic_dec(&fl_size); | |
205 | continue; | |
206 | } | |
207 | flp = &fl->next; | |
208 | } | |
209 | } | |
4762fb98 | 210 | spin_unlock_bh(&ip6_fl_lock); |
60e8fbc4 BT |
211 | } |
212 | ||
213 | static struct ip6_flowlabel *fl_intern(struct net *net, | |
214 | struct ip6_flowlabel *fl, __be32 label) | |
1da177e4 | 215 | { |
78c2e502 PE |
216 | struct ip6_flowlabel *lfl; |
217 | ||
1da177e4 LT |
218 | fl->label = label & IPV6_FLOWLABEL_MASK; |
219 | ||
d3aedd5e | 220 | spin_lock_bh(&ip6_fl_lock); |
1da177e4 LT |
221 | if (label == 0) { |
222 | for (;;) { | |
63862b5b | 223 | fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; |
1da177e4 | 224 | if (fl->label) { |
60e8fbc4 | 225 | lfl = __fl_lookup(net, fl->label); |
63159f29 | 226 | if (!lfl) |
1da177e4 LT |
227 | break; |
228 | } | |
229 | } | |
78c2e502 PE |
230 | } else { |
231 | /* | |
232 | * we dropper the ip6_fl_lock, so this entry could reappear | |
233 | * and we need to recheck with it. | |
234 | * | |
235 | * OTOH no need to search the active socket first, like it is | |
236 | * done in ipv6_flowlabel_opt - sock is locked, so new entry | |
237 | * with the same label can only appear on another sock | |
238 | */ | |
60e8fbc4 | 239 | lfl = __fl_lookup(net, fl->label); |
53b24b8f | 240 | if (lfl) { |
78c2e502 | 241 | atomic_inc(&lfl->users); |
d3aedd5e | 242 | spin_unlock_bh(&ip6_fl_lock); |
78c2e502 PE |
243 | return lfl; |
244 | } | |
1da177e4 LT |
245 | } |
246 | ||
247 | fl->lastuse = jiffies; | |
248 | fl->next = fl_ht[FL_HASH(fl->label)]; | |
d3aedd5e | 249 | rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); |
1da177e4 | 250 | atomic_inc(&fl_size); |
d3aedd5e | 251 | spin_unlock_bh(&ip6_fl_lock); |
78c2e502 | 252 | return NULL; |
1da177e4 LT |
253 | } |
254 | ||
255 | ||
256 | ||
257 | /* Socket flowlabel lists */ | |
258 | ||
59c820b2 | 259 | struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label) |
1da177e4 LT |
260 | { |
261 | struct ipv6_fl_socklist *sfl; | |
262 | struct ipv6_pinfo *np = inet6_sk(sk); | |
263 | ||
264 | label &= IPV6_FLOWLABEL_MASK; | |
265 | ||
18367681 YH |
266 | rcu_read_lock_bh(); |
267 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 | 268 | struct ip6_flowlabel *fl = sfl->fl; |
65a3c497 ED |
269 | |
270 | if (fl->label == label && atomic_inc_not_zero(&fl->users)) { | |
1da177e4 | 271 | fl->lastuse = jiffies; |
18367681 | 272 | rcu_read_unlock_bh(); |
1da177e4 LT |
273 | return fl; |
274 | } | |
275 | } | |
18367681 | 276 | rcu_read_unlock_bh(); |
1da177e4 LT |
277 | return NULL; |
278 | } | |
59c820b2 | 279 | EXPORT_SYMBOL_GPL(__fl6_sock_lookup); |
3cf3dc6c | 280 | |
1da177e4 LT |
281 | void fl6_free_socklist(struct sock *sk) |
282 | { | |
283 | struct ipv6_pinfo *np = inet6_sk(sk); | |
284 | struct ipv6_fl_socklist *sfl; | |
285 | ||
18367681 | 286 | if (!rcu_access_pointer(np->ipv6_fl_list)) |
f256dc59 YH |
287 | return; |
288 | ||
18367681 YH |
289 | spin_lock_bh(&ip6_sk_fl_lock); |
290 | while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, | |
291 | lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { | |
292 | np->ipv6_fl_list = sfl->next; | |
293 | spin_unlock_bh(&ip6_sk_fl_lock); | |
f256dc59 | 294 | |
1da177e4 | 295 | fl_release(sfl->fl); |
18367681 YH |
296 | kfree_rcu(sfl, rcu); |
297 | ||
298 | spin_lock_bh(&ip6_sk_fl_lock); | |
1da177e4 | 299 | } |
18367681 | 300 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 LT |
301 | } |
302 | ||
303 | /* Service routines */ | |
304 | ||
305 | ||
306 | /* | |
307 | It is the only difficult place. flowlabel enforces equal headers | |
308 | before and including routing header, however user may supply options | |
309 | following rthdr. | |
310 | */ | |
311 | ||
67ba4152 IM |
312 | struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, |
313 | struct ip6_flowlabel *fl, | |
314 | struct ipv6_txoptions *fopt) | |
1da177e4 | 315 | { |
67ba4152 | 316 | struct ipv6_txoptions *fl_opt = fl->opt; |
1ab1457c | 317 | |
63159f29 | 318 | if (!fopt || fopt->opt_flen == 0) |
df9890c3 | 319 | return fl_opt; |
1ab1457c | 320 | |
53b24b8f | 321 | if (fl_opt) { |
1da177e4 | 322 | opt_space->hopopt = fl_opt->hopopt; |
df9890c3 | 323 | opt_space->dst0opt = fl_opt->dst0opt; |
1da177e4 LT |
324 | opt_space->srcrt = fl_opt->srcrt; |
325 | opt_space->opt_nflen = fl_opt->opt_nflen; | |
326 | } else { | |
327 | if (fopt->opt_nflen == 0) | |
328 | return fopt; | |
329 | opt_space->hopopt = NULL; | |
330 | opt_space->dst0opt = NULL; | |
331 | opt_space->srcrt = NULL; | |
332 | opt_space->opt_nflen = 0; | |
333 | } | |
334 | opt_space->dst1opt = fopt->dst1opt; | |
1da177e4 | 335 | opt_space->opt_flen = fopt->opt_flen; |
864e2a1f | 336 | opt_space->tot_len = fopt->tot_len; |
1da177e4 LT |
337 | return opt_space; |
338 | } | |
a495f836 | 339 | EXPORT_SYMBOL_GPL(fl6_merge_options); |
1da177e4 LT |
340 | |
341 | static unsigned long check_linger(unsigned long ttl) | |
342 | { | |
343 | if (ttl < FL_MIN_LINGER) | |
344 | return FL_MIN_LINGER*HZ; | |
345 | if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) | |
346 | return 0; | |
347 | return ttl*HZ; | |
348 | } | |
349 | ||
350 | static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) | |
351 | { | |
352 | linger = check_linger(linger); | |
353 | if (!linger) | |
354 | return -EPERM; | |
355 | expires = check_linger(expires); | |
356 | if (!expires) | |
357 | return -EPERM; | |
394055f6 FF |
358 | |
359 | spin_lock_bh(&ip6_fl_lock); | |
1da177e4 LT |
360 | fl->lastuse = jiffies; |
361 | if (time_before(fl->linger, linger)) | |
362 | fl->linger = linger; | |
363 | if (time_before(expires, fl->linger)) | |
364 | expires = fl->linger; | |
365 | if (time_before(fl->expires, fl->lastuse + expires)) | |
366 | fl->expires = fl->lastuse + expires; | |
394055f6 FF |
367 | spin_unlock_bh(&ip6_fl_lock); |
368 | ||
1da177e4 LT |
369 | return 0; |
370 | } | |
371 | ||
372 | static struct ip6_flowlabel * | |
ec0506db MŻ |
373 | fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, |
374 | char __user *optval, int optlen, int *err_p) | |
1da177e4 | 375 | { |
684de409 | 376 | struct ip6_flowlabel *fl = NULL; |
1da177e4 LT |
377 | int olen; |
378 | int addr_type; | |
379 | int err; | |
380 | ||
684de409 DM |
381 | olen = optlen - CMSG_ALIGN(sizeof(*freq)); |
382 | err = -EINVAL; | |
383 | if (olen > 64 * 1024) | |
384 | goto done; | |
385 | ||
1da177e4 | 386 | err = -ENOMEM; |
0c600eda | 387 | fl = kzalloc(sizeof(*fl), GFP_KERNEL); |
63159f29 | 388 | if (!fl) |
1da177e4 | 389 | goto done; |
1da177e4 | 390 | |
1da177e4 LT |
391 | if (olen > 0) { |
392 | struct msghdr msg; | |
4c9483b2 | 393 | struct flowi6 flowi6; |
26879da5 | 394 | struct ipcm6_cookie ipc6; |
1da177e4 LT |
395 | |
396 | err = -ENOMEM; | |
397 | fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); | |
63159f29 | 398 | if (!fl->opt) |
1da177e4 LT |
399 | goto done; |
400 | ||
401 | memset(fl->opt, 0, sizeof(*fl->opt)); | |
402 | fl->opt->tot_len = sizeof(*fl->opt) + olen; | |
403 | err = -EFAULT; | |
404 | if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen)) | |
405 | goto done; | |
406 | ||
407 | msg.msg_controllen = olen; | |
67ba4152 | 408 | msg.msg_control = (void *)(fl->opt+1); |
4c9483b2 | 409 | memset(&flowi6, 0, sizeof(flowi6)); |
1da177e4 | 410 | |
26879da5 | 411 | ipc6.opt = fl->opt; |
5fdaa88d | 412 | err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); |
1da177e4 LT |
413 | if (err) |
414 | goto done; | |
415 | err = -EINVAL; | |
416 | if (fl->opt->opt_flen) | |
417 | goto done; | |
418 | if (fl->opt->opt_nflen == 0) { | |
419 | kfree(fl->opt); | |
420 | fl->opt = NULL; | |
421 | } | |
422 | } | |
423 | ||
efd7ef1c | 424 | fl->fl_net = net; |
1da177e4 LT |
425 | fl->expires = jiffies; |
426 | err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); | |
427 | if (err) | |
428 | goto done; | |
429 | fl->share = freq->flr_share; | |
430 | addr_type = ipv6_addr_type(&freq->flr_dst); | |
35700212 JP |
431 | if ((addr_type & IPV6_ADDR_MAPPED) || |
432 | addr_type == IPV6_ADDR_ANY) { | |
c6817e4c | 433 | err = -EINVAL; |
1da177e4 | 434 | goto done; |
c6817e4c | 435 | } |
4e3fd7a0 | 436 | fl->dst = freq->flr_dst; |
1da177e4 LT |
437 | atomic_set(&fl->users, 1); |
438 | switch (fl->share) { | |
439 | case IPV6_FL_S_EXCL: | |
440 | case IPV6_FL_S_ANY: | |
441 | break; | |
442 | case IPV6_FL_S_PROCESS: | |
4f82f457 | 443 | fl->owner.pid = get_task_pid(current, PIDTYPE_PID); |
1da177e4 LT |
444 | break; |
445 | case IPV6_FL_S_USER: | |
4f82f457 | 446 | fl->owner.uid = current_euid(); |
1da177e4 LT |
447 | break; |
448 | default: | |
449 | err = -EINVAL; | |
450 | goto done; | |
451 | } | |
d44e3fa5 ED |
452 | if (fl_shared_exclusive(fl) || fl->opt) |
453 | static_branch_deferred_inc(&ipv6_flowlabel_exclusive); | |
1da177e4 LT |
454 | return fl; |
455 | ||
456 | done: | |
d44e3fa5 ED |
457 | if (fl) { |
458 | kfree(fl->opt); | |
459 | kfree(fl); | |
460 | } | |
1da177e4 LT |
461 | *err_p = err; |
462 | return NULL; | |
463 | } | |
464 | ||
465 | static int mem_check(struct sock *sk) | |
466 | { | |
467 | struct ipv6_pinfo *np = inet6_sk(sk); | |
468 | struct ipv6_fl_socklist *sfl; | |
469 | int room = FL_MAX_SIZE - atomic_read(&fl_size); | |
470 | int count = 0; | |
471 | ||
472 | if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) | |
473 | return 0; | |
474 | ||
f8c31c8f | 475 | rcu_read_lock_bh(); |
18367681 | 476 | for_each_sk_fl_rcu(np, sfl) |
1da177e4 | 477 | count++; |
f8c31c8f | 478 | rcu_read_unlock_bh(); |
1da177e4 LT |
479 | |
480 | if (room <= 0 || | |
481 | ((count >= FL_MAX_PER_SOCK || | |
35700212 JP |
482 | (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && |
483 | !capable(CAP_NET_ADMIN))) | |
1da177e4 LT |
484 | return -ENOBUFS; |
485 | ||
486 | return 0; | |
487 | } | |
488 | ||
04028045 PE |
489 | static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, |
490 | struct ip6_flowlabel *fl) | |
491 | { | |
18367681 | 492 | spin_lock_bh(&ip6_sk_fl_lock); |
04028045 PE |
493 | sfl->fl = fl; |
494 | sfl->next = np->ipv6_fl_list; | |
18367681 YH |
495 | rcu_assign_pointer(np->ipv6_fl_list, sfl); |
496 | spin_unlock_bh(&ip6_sk_fl_lock); | |
04028045 PE |
497 | } |
498 | ||
46e5f401 FF |
499 | int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, |
500 | int flags) | |
3fdfa5ff FF |
501 | { |
502 | struct ipv6_pinfo *np = inet6_sk(sk); | |
503 | struct ipv6_fl_socklist *sfl; | |
504 | ||
46e5f401 FF |
505 | if (flags & IPV6_FL_F_REMOTE) { |
506 | freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; | |
507 | return 0; | |
508 | } | |
509 | ||
df3687ff FF |
510 | if (np->repflow) { |
511 | freq->flr_label = np->flow_label; | |
512 | return 0; | |
513 | } | |
514 | ||
3fdfa5ff FF |
515 | rcu_read_lock_bh(); |
516 | ||
517 | for_each_sk_fl_rcu(np, sfl) { | |
518 | if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { | |
519 | spin_lock_bh(&ip6_fl_lock); | |
520 | freq->flr_label = sfl->fl->label; | |
521 | freq->flr_dst = sfl->fl->dst; | |
522 | freq->flr_share = sfl->fl->share; | |
523 | freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; | |
524 | freq->flr_linger = sfl->fl->linger / HZ; | |
525 | ||
526 | spin_unlock_bh(&ip6_fl_lock); | |
527 | rcu_read_unlock_bh(); | |
528 | return 0; | |
529 | } | |
530 | } | |
531 | rcu_read_unlock_bh(); | |
532 | ||
533 | return -ENOENT; | |
534 | } | |
535 | ||
1da177e4 LT |
536 | int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) |
537 | { | |
55205d40 | 538 | int uninitialized_var(err); |
60e8fbc4 | 539 | struct net *net = sock_net(sk); |
1da177e4 LT |
540 | struct ipv6_pinfo *np = inet6_sk(sk); |
541 | struct in6_flowlabel_req freq; | |
67ba4152 | 542 | struct ipv6_fl_socklist *sfl1 = NULL; |
7f0e44ac ED |
543 | struct ipv6_fl_socklist *sfl; |
544 | struct ipv6_fl_socklist __rcu **sflp; | |
78c2e502 PE |
545 | struct ip6_flowlabel *fl, *fl1 = NULL; |
546 | ||
1da177e4 LT |
547 | |
548 | if (optlen < sizeof(freq)) | |
549 | return -EINVAL; | |
550 | ||
551 | if (copy_from_user(&freq, optval, sizeof(freq))) | |
552 | return -EFAULT; | |
553 | ||
554 | switch (freq.flr_action) { | |
555 | case IPV6_FL_A_PUT: | |
df3687ff FF |
556 | if (freq.flr_flags & IPV6_FL_F_REFLECT) { |
557 | if (sk->sk_protocol != IPPROTO_TCP) | |
558 | return -ENOPROTOOPT; | |
559 | if (!np->repflow) | |
560 | return -ESRCH; | |
561 | np->flow_label = 0; | |
562 | np->repflow = 0; | |
563 | return 0; | |
564 | } | |
18367681 YH |
565 | spin_lock_bh(&ip6_sk_fl_lock); |
566 | for (sflp = &np->ipv6_fl_list; | |
44c3d0c1 ED |
567 | (sfl = rcu_dereference_protected(*sflp, |
568 | lockdep_is_held(&ip6_sk_fl_lock))) != NULL; | |
18367681 | 569 | sflp = &sfl->next) { |
1da177e4 LT |
570 | if (sfl->fl->label == freq.flr_label) { |
571 | if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) | |
572 | np->flow_label &= ~IPV6_FLOWLABEL_MASK; | |
44c3d0c1 | 573 | *sflp = sfl->next; |
18367681 | 574 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 | 575 | fl_release(sfl->fl); |
18367681 | 576 | kfree_rcu(sfl, rcu); |
1da177e4 LT |
577 | return 0; |
578 | } | |
579 | } | |
18367681 | 580 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 LT |
581 | return -ESRCH; |
582 | ||
583 | case IPV6_FL_A_RENEW: | |
18367681 YH |
584 | rcu_read_lock_bh(); |
585 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 LT |
586 | if (sfl->fl->label == freq.flr_label) { |
587 | err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); | |
18367681 | 588 | rcu_read_unlock_bh(); |
1da177e4 LT |
589 | return err; |
590 | } | |
591 | } | |
18367681 | 592 | rcu_read_unlock_bh(); |
1da177e4 | 593 | |
af31f412 EB |
594 | if (freq.flr_share == IPV6_FL_S_NONE && |
595 | ns_capable(net->user_ns, CAP_NET_ADMIN)) { | |
60e8fbc4 | 596 | fl = fl_lookup(net, freq.flr_label); |
1da177e4 LT |
597 | if (fl) { |
598 | err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); | |
599 | fl_release(fl); | |
600 | return err; | |
601 | } | |
602 | } | |
603 | return -ESRCH; | |
604 | ||
605 | case IPV6_FL_A_GET: | |
df3687ff | 606 | if (freq.flr_flags & IPV6_FL_F_REFLECT) { |
6444f72b FF |
607 | struct net *net = sock_net(sk); |
608 | if (net->ipv6.sysctl.flowlabel_consistency) { | |
609 | net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); | |
610 | return -EPERM; | |
611 | } | |
612 | ||
df3687ff FF |
613 | if (sk->sk_protocol != IPPROTO_TCP) |
614 | return -ENOPROTOOPT; | |
6444f72b | 615 | |
df3687ff FF |
616 | np->repflow = 1; |
617 | return 0; | |
618 | } | |
619 | ||
1da177e4 LT |
620 | if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) |
621 | return -EINVAL; | |
622 | ||
82a584b7 TH |
623 | if (net->ipv6.sysctl.flowlabel_state_ranges && |
624 | (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) | |
625 | return -ERANGE; | |
626 | ||
ec0506db | 627 | fl = fl_create(net, sk, &freq, optval, optlen, &err); |
63159f29 | 628 | if (!fl) |
1da177e4 LT |
629 | return err; |
630 | sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); | |
631 | ||
632 | if (freq.flr_label) { | |
1da177e4 | 633 | err = -EEXIST; |
18367681 YH |
634 | rcu_read_lock_bh(); |
635 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 LT |
636 | if (sfl->fl->label == freq.flr_label) { |
637 | if (freq.flr_flags&IPV6_FL_F_EXCL) { | |
18367681 | 638 | rcu_read_unlock_bh(); |
1da177e4 LT |
639 | goto done; |
640 | } | |
641 | fl1 = sfl->fl; | |
65a3c497 ED |
642 | if (!atomic_inc_not_zero(&fl1->users)) |
643 | fl1 = NULL; | |
1da177e4 LT |
644 | break; |
645 | } | |
646 | } | |
18367681 | 647 | rcu_read_unlock_bh(); |
1da177e4 | 648 | |
63159f29 | 649 | if (!fl1) |
60e8fbc4 | 650 | fl1 = fl_lookup(net, freq.flr_label); |
1da177e4 | 651 | if (fl1) { |
78c2e502 | 652 | recheck: |
1da177e4 LT |
653 | err = -EEXIST; |
654 | if (freq.flr_flags&IPV6_FL_F_EXCL) | |
655 | goto release; | |
656 | err = -EPERM; | |
657 | if (fl1->share == IPV6_FL_S_EXCL || | |
658 | fl1->share != fl->share || | |
4f82f457 | 659 | ((fl1->share == IPV6_FL_S_PROCESS) && |
95c16925 | 660 | (fl1->owner.pid != fl->owner.pid)) || |
4f82f457 | 661 | ((fl1->share == IPV6_FL_S_USER) && |
95c16925 | 662 | !uid_eq(fl1->owner.uid, fl->owner.uid))) |
1da177e4 LT |
663 | goto release; |
664 | ||
1da177e4 | 665 | err = -ENOMEM; |
63159f29 | 666 | if (!sfl1) |
1da177e4 LT |
667 | goto release; |
668 | if (fl->linger > fl1->linger) | |
669 | fl1->linger = fl->linger; | |
670 | if ((long)(fl->expires - fl1->expires) > 0) | |
671 | fl1->expires = fl->expires; | |
04028045 | 672 | fl_link(np, sfl1, fl1); |
1da177e4 LT |
673 | fl_free(fl); |
674 | return 0; | |
675 | ||
676 | release: | |
677 | fl_release(fl1); | |
678 | goto done; | |
679 | } | |
680 | } | |
681 | err = -ENOENT; | |
682 | if (!(freq.flr_flags&IPV6_FL_F_CREATE)) | |
683 | goto done; | |
684 | ||
685 | err = -ENOMEM; | |
63159f29 | 686 | if (!sfl1) |
e5d08d71 IM |
687 | goto done; |
688 | ||
689 | err = mem_check(sk); | |
690 | if (err != 0) | |
1da177e4 LT |
691 | goto done; |
692 | ||
60e8fbc4 | 693 | fl1 = fl_intern(net, fl, freq.flr_label); |
53b24b8f | 694 | if (fl1) |
78c2e502 | 695 | goto recheck; |
1da177e4 | 696 | |
6c94d361 DM |
697 | if (!freq.flr_label) { |
698 | if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, | |
699 | &fl->label, sizeof(fl->label))) { | |
700 | /* Intentionally ignore fault. */ | |
701 | } | |
702 | } | |
1da177e4 | 703 | |
04028045 | 704 | fl_link(np, sfl1, fl); |
1da177e4 LT |
705 | return 0; |
706 | ||
707 | default: | |
708 | return -EINVAL; | |
709 | } | |
710 | ||
711 | done: | |
712 | fl_free(fl); | |
713 | kfree(sfl1); | |
714 | return err; | |
715 | } | |
716 | ||
717 | #ifdef CONFIG_PROC_FS | |
718 | ||
719 | struct ip6fl_iter_state { | |
5983a3df | 720 | struct seq_net_private p; |
4f82f457 | 721 | struct pid_namespace *pid_ns; |
1da177e4 LT |
722 | int bucket; |
723 | }; | |
724 | ||
725 | #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) | |
726 | ||
727 | static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) | |
728 | { | |
729 | struct ip6_flowlabel *fl = NULL; | |
730 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); | |
5983a3df | 731 | struct net *net = seq_file_net(seq); |
1da177e4 LT |
732 | |
733 | for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { | |
d3aedd5e YH |
734 | for_each_fl_rcu(state->bucket, fl) { |
735 | if (net_eq(fl->fl_net, net)) | |
736 | goto out; | |
737 | } | |
1da177e4 | 738 | } |
d3aedd5e YH |
739 | fl = NULL; |
740 | out: | |
1da177e4 LT |
741 | return fl; |
742 | } | |
743 | ||
744 | static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) | |
745 | { | |
746 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); | |
5983a3df | 747 | struct net *net = seq_file_net(seq); |
1da177e4 | 748 | |
d3aedd5e YH |
749 | for_each_fl_continue_rcu(fl) { |
750 | if (net_eq(fl->fl_net, net)) | |
751 | goto out; | |
752 | } | |
753 | ||
5983a3df | 754 | try_again: |
d3aedd5e YH |
755 | if (++state->bucket <= FL_HASH_MASK) { |
756 | for_each_fl_rcu(state->bucket, fl) { | |
757 | if (net_eq(fl->fl_net, net)) | |
758 | goto out; | |
759 | } | |
760 | goto try_again; | |
1da177e4 | 761 | } |
d3aedd5e YH |
762 | fl = NULL; |
763 | ||
764 | out: | |
1da177e4 LT |
765 | return fl; |
766 | } | |
767 | ||
768 | static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) | |
769 | { | |
770 | struct ip6_flowlabel *fl = ip6fl_get_first(seq); | |
771 | if (fl) | |
772 | while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) | |
773 | --pos; | |
774 | return pos ? NULL : fl; | |
775 | } | |
776 | ||
777 | static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) | |
d3aedd5e | 778 | __acquires(RCU) |
1da177e4 | 779 | { |
ad08978a CH |
780 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
781 | ||
782 | state->pid_ns = proc_pid_ns(file_inode(seq->file)); | |
783 | ||
d3aedd5e | 784 | rcu_read_lock_bh(); |
1da177e4 LT |
785 | return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
786 | } | |
787 | ||
788 | static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |
789 | { | |
790 | struct ip6_flowlabel *fl; | |
791 | ||
792 | if (v == SEQ_START_TOKEN) | |
793 | fl = ip6fl_get_first(seq); | |
794 | else | |
795 | fl = ip6fl_get_next(seq, v); | |
796 | ++*pos; | |
797 | return fl; | |
798 | } | |
799 | ||
800 | static void ip6fl_seq_stop(struct seq_file *seq, void *v) | |
d3aedd5e | 801 | __releases(RCU) |
1da177e4 | 802 | { |
d3aedd5e | 803 | rcu_read_unlock_bh(); |
1da177e4 LT |
804 | } |
805 | ||
1b7c2dbc | 806 | static int ip6fl_seq_show(struct seq_file *seq, void *v) |
1da177e4 | 807 | { |
4f82f457 | 808 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
869ba988 | 809 | if (v == SEQ_START_TOKEN) { |
1744bea1 | 810 | seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); |
869ba988 | 811 | } else { |
1b7c2dbc | 812 | struct ip6_flowlabel *fl = v; |
1da177e4 | 813 | seq_printf(seq, |
4b7a4274 | 814 | "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", |
95c96174 | 815 | (unsigned int)ntohl(fl->label), |
1da177e4 | 816 | fl->share, |
4f82f457 EB |
817 | ((fl->share == IPV6_FL_S_PROCESS) ? |
818 | pid_nr_ns(fl->owner.pid, state->pid_ns) : | |
819 | ((fl->share == IPV6_FL_S_USER) ? | |
820 | from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : | |
821 | 0)), | |
1da177e4 LT |
822 | atomic_read(&fl->users), |
823 | fl->linger/HZ, | |
824 | (long)(fl->expires - jiffies)/HZ, | |
b071195d | 825 | &fl->dst, |
1da177e4 | 826 | fl->opt ? fl->opt->opt_nflen : 0); |
1da177e4 | 827 | } |
1da177e4 LT |
828 | return 0; |
829 | } | |
830 | ||
56b3d975 | 831 | static const struct seq_operations ip6fl_seq_ops = { |
1da177e4 LT |
832 | .start = ip6fl_seq_start, |
833 | .next = ip6fl_seq_next, | |
834 | .stop = ip6fl_seq_stop, | |
835 | .show = ip6fl_seq_show, | |
836 | }; | |
837 | ||
2c8c1e72 | 838 | static int __net_init ip6_flowlabel_proc_init(struct net *net) |
0a3e78ac | 839 | { |
c3506372 CH |
840 | if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, |
841 | &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) | |
0a3e78ac DL |
842 | return -ENOMEM; |
843 | return 0; | |
844 | } | |
1da177e4 | 845 | |
2c8c1e72 | 846 | static void __net_exit ip6_flowlabel_proc_fini(struct net *net) |
1da177e4 | 847 | { |
ece31ffd | 848 | remove_proc_entry("ip6_flowlabel", net->proc_net); |
0a3e78ac DL |
849 | } |
850 | #else | |
851 | static inline int ip6_flowlabel_proc_init(struct net *net) | |
852 | { | |
853 | return 0; | |
854 | } | |
855 | static inline void ip6_flowlabel_proc_fini(struct net *net) | |
856 | { | |
0a3e78ac | 857 | } |
1da177e4 | 858 | #endif |
0a3e78ac | 859 | |
2c8c1e72 | 860 | static void __net_exit ip6_flowlabel_net_exit(struct net *net) |
60e8fbc4 BT |
861 | { |
862 | ip6_fl_purge(net); | |
5983a3df | 863 | ip6_flowlabel_proc_fini(net); |
60e8fbc4 BT |
864 | } |
865 | ||
866 | static struct pernet_operations ip6_flowlabel_net_ops = { | |
5983a3df | 867 | .init = ip6_flowlabel_proc_init, |
60e8fbc4 BT |
868 | .exit = ip6_flowlabel_net_exit, |
869 | }; | |
870 | ||
0a3e78ac DL |
871 | int ip6_flowlabel_init(void) |
872 | { | |
5983a3df | 873 | return register_pernet_subsys(&ip6_flowlabel_net_ops); |
1da177e4 LT |
874 | } |
875 | ||
876 | void ip6_flowlabel_cleanup(void) | |
877 | { | |
59c820b2 | 878 | static_key_deferred_flush(&ipv6_flowlabel_exclusive); |
1da177e4 | 879 | del_timer(&ip6_fl_gc_timer); |
60e8fbc4 | 880 | unregister_pernet_subsys(&ip6_flowlabel_net_ops); |
1da177e4 | 881 | } |