Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
1da177e4 LT |
2 | /* |
3 | * ip6_flowlabel.c IPv6 flowlabel manager. | |
4 | * | |
1da177e4 LT |
5 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
6 | */ | |
7 | ||
4fc268d2 | 8 | #include <linux/capability.h> |
1da177e4 LT |
9 | #include <linux/errno.h> |
10 | #include <linux/types.h> | |
11 | #include <linux/socket.h> | |
12 | #include <linux/net.h> | |
13 | #include <linux/netdevice.h> | |
1da177e4 | 14 | #include <linux/in6.h> |
1da177e4 LT |
15 | #include <linux/proc_fs.h> |
16 | #include <linux/seq_file.h> | |
5a0e3ad6 | 17 | #include <linux/slab.h> |
bc3b2d7f | 18 | #include <linux/export.h> |
4f82f457 | 19 | #include <linux/pid_namespace.h> |
1da177e4 | 20 | |
457c4cbc | 21 | #include <net/net_namespace.h> |
1da177e4 LT |
22 | #include <net/sock.h> |
23 | ||
24 | #include <net/ipv6.h> | |
1da177e4 | 25 | #include <net/rawv6.h> |
1da177e4 LT |
26 | #include <net/transp_v6.h> |
27 | ||
7c0f6ba6 | 28 | #include <linux/uaccess.h> |
1da177e4 LT |
29 | |
30 | #define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified | |
31 | in old IPv6 RFC. Well, it was reasonable value. | |
32 | */ | |
53b47106 | 33 | #define FL_MAX_LINGER 150 /* Maximal linger timeout */ |
1da177e4 LT |
34 | |
35 | /* FL hash table */ | |
36 | ||
37 | #define FL_MAX_PER_SOCK 32 | |
38 | #define FL_MAX_SIZE 4096 | |
39 | #define FL_HASH_MASK 255 | |
40 | #define FL_HASH(l) (ntohl(l)&FL_HASH_MASK) | |
41 | ||
42 | static atomic_t fl_size = ATOMIC_INIT(0); | |
d3aedd5e | 43 | static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; |
1da177e4 | 44 | |
24ed960a | 45 | static void ip6_fl_gc(struct timer_list *unused); |
1d27e3e2 | 46 | static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); |
1da177e4 LT |
47 | |
48 | /* FL hash table lock: it protects only of GC */ | |
49 | ||
d3aedd5e | 50 | static DEFINE_SPINLOCK(ip6_fl_lock); |
1da177e4 LT |
51 | |
52 | /* Big socket sock */ | |
53 | ||
18367681 | 54 | static DEFINE_SPINLOCK(ip6_sk_fl_lock); |
1da177e4 | 55 | |
d3aedd5e | 56 | #define for_each_fl_rcu(hash, fl) \ |
6a98dcf0 | 57 | for (fl = rcu_dereference_bh(fl_ht[(hash)]); \ |
d3aedd5e | 58 | fl != NULL; \ |
6a98dcf0 | 59 | fl = rcu_dereference_bh(fl->next)) |
d3aedd5e | 60 | #define for_each_fl_continue_rcu(fl) \ |
6a98dcf0 | 61 | for (fl = rcu_dereference_bh(fl->next); \ |
d3aedd5e | 62 | fl != NULL; \ |
6a98dcf0 | 63 | fl = rcu_dereference_bh(fl->next)) |
1da177e4 | 64 | |
18367681 YH |
65 | #define for_each_sk_fl_rcu(np, sfl) \ |
66 | for (sfl = rcu_dereference_bh(np->ipv6_fl_list); \ | |
67 | sfl != NULL; \ | |
68 | sfl = rcu_dereference_bh(sfl->next)) | |
69 | ||
60e8fbc4 | 70 | static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label) |
1da177e4 LT |
71 | { |
72 | struct ip6_flowlabel *fl; | |
73 | ||
d3aedd5e | 74 | for_each_fl_rcu(FL_HASH(label), fl) { |
09ad9bc7 | 75 | if (fl->label == label && net_eq(fl->fl_net, net)) |
1da177e4 LT |
76 | return fl; |
77 | } | |
78 | return NULL; | |
79 | } | |
80 | ||
60e8fbc4 | 81 | static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) |
1da177e4 LT |
82 | { |
83 | struct ip6_flowlabel *fl; | |
84 | ||
d3aedd5e | 85 | rcu_read_lock_bh(); |
60e8fbc4 | 86 | fl = __fl_lookup(net, label); |
d3aedd5e YH |
87 | if (fl && !atomic_inc_not_zero(&fl->users)) |
88 | fl = NULL; | |
89 | rcu_read_unlock_bh(); | |
1da177e4 LT |
90 | return fl; |
91 | } | |
92 | ||
6c0afef5 ED |
93 | static void fl_free_rcu(struct rcu_head *head) |
94 | { | |
95 | struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu); | |
96 | ||
97 | if (fl->share == IPV6_FL_S_PROCESS) | |
98 | put_pid(fl->owner.pid); | |
99 | kfree(fl->opt); | |
100 | kfree(fl); | |
101 | } | |
102 | ||
1da177e4 LT |
103 | |
104 | static void fl_free(struct ip6_flowlabel *fl) | |
105 | { | |
6c0afef5 ED |
106 | if (fl) |
107 | call_rcu(&fl->rcu, fl_free_rcu); | |
1da177e4 LT |
108 | } |
109 | ||
110 | static void fl_release(struct ip6_flowlabel *fl) | |
111 | { | |
d3aedd5e | 112 | spin_lock_bh(&ip6_fl_lock); |
1da177e4 LT |
113 | |
114 | fl->lastuse = jiffies; | |
115 | if (atomic_dec_and_test(&fl->users)) { | |
116 | unsigned long ttd = fl->lastuse + fl->linger; | |
117 | if (time_after(ttd, fl->expires)) | |
118 | fl->expires = ttd; | |
119 | ttd = fl->expires; | |
120 | if (fl->opt && fl->share == IPV6_FL_S_EXCL) { | |
121 | struct ipv6_txoptions *opt = fl->opt; | |
122 | fl->opt = NULL; | |
123 | kfree(opt); | |
124 | } | |
125 | if (!timer_pending(&ip6_fl_gc_timer) || | |
126 | time_after(ip6_fl_gc_timer.expires, ttd)) | |
127 | mod_timer(&ip6_fl_gc_timer, ttd); | |
128 | } | |
d3aedd5e | 129 | spin_unlock_bh(&ip6_fl_lock); |
1da177e4 LT |
130 | } |
131 | ||
24ed960a | 132 | static void ip6_fl_gc(struct timer_list *unused) |
1da177e4 LT |
133 | { |
134 | int i; | |
135 | unsigned long now = jiffies; | |
136 | unsigned long sched = 0; | |
137 | ||
d3aedd5e | 138 | spin_lock(&ip6_fl_lock); |
1da177e4 | 139 | |
67ba4152 | 140 | for (i = 0; i <= FL_HASH_MASK; i++) { |
7f0e44ac ED |
141 | struct ip6_flowlabel *fl; |
142 | struct ip6_flowlabel __rcu **flp; | |
143 | ||
1da177e4 | 144 | flp = &fl_ht[i]; |
d3aedd5e YH |
145 | while ((fl = rcu_dereference_protected(*flp, |
146 | lockdep_is_held(&ip6_fl_lock))) != NULL) { | |
1da177e4 LT |
147 | if (atomic_read(&fl->users) == 0) { |
148 | unsigned long ttd = fl->lastuse + fl->linger; | |
149 | if (time_after(ttd, fl->expires)) | |
150 | fl->expires = ttd; | |
151 | ttd = fl->expires; | |
152 | if (time_after_eq(now, ttd)) { | |
153 | *flp = fl->next; | |
154 | fl_free(fl); | |
155 | atomic_dec(&fl_size); | |
156 | continue; | |
157 | } | |
158 | if (!sched || time_before(ttd, sched)) | |
159 | sched = ttd; | |
160 | } | |
161 | flp = &fl->next; | |
162 | } | |
163 | } | |
164 | if (!sched && atomic_read(&fl_size)) | |
165 | sched = now + FL_MAX_LINGER; | |
166 | if (sched) { | |
60e8fbc4 | 167 | mod_timer(&ip6_fl_gc_timer, sched); |
1da177e4 | 168 | } |
d3aedd5e | 169 | spin_unlock(&ip6_fl_lock); |
1da177e4 LT |
170 | } |
171 | ||
2c8c1e72 | 172 | static void __net_exit ip6_fl_purge(struct net *net) |
60e8fbc4 BT |
173 | { |
174 | int i; | |
175 | ||
4762fb98 | 176 | spin_lock_bh(&ip6_fl_lock); |
60e8fbc4 | 177 | for (i = 0; i <= FL_HASH_MASK; i++) { |
7f0e44ac ED |
178 | struct ip6_flowlabel *fl; |
179 | struct ip6_flowlabel __rcu **flp; | |
180 | ||
60e8fbc4 | 181 | flp = &fl_ht[i]; |
d3aedd5e YH |
182 | while ((fl = rcu_dereference_protected(*flp, |
183 | lockdep_is_held(&ip6_fl_lock))) != NULL) { | |
09ad9bc7 OP |
184 | if (net_eq(fl->fl_net, net) && |
185 | atomic_read(&fl->users) == 0) { | |
60e8fbc4 BT |
186 | *flp = fl->next; |
187 | fl_free(fl); | |
188 | atomic_dec(&fl_size); | |
189 | continue; | |
190 | } | |
191 | flp = &fl->next; | |
192 | } | |
193 | } | |
4762fb98 | 194 | spin_unlock_bh(&ip6_fl_lock); |
60e8fbc4 BT |
195 | } |
196 | ||
197 | static struct ip6_flowlabel *fl_intern(struct net *net, | |
198 | struct ip6_flowlabel *fl, __be32 label) | |
1da177e4 | 199 | { |
78c2e502 PE |
200 | struct ip6_flowlabel *lfl; |
201 | ||
1da177e4 LT |
202 | fl->label = label & IPV6_FLOWLABEL_MASK; |
203 | ||
d3aedd5e | 204 | spin_lock_bh(&ip6_fl_lock); |
1da177e4 LT |
205 | if (label == 0) { |
206 | for (;;) { | |
63862b5b | 207 | fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; |
1da177e4 | 208 | if (fl->label) { |
60e8fbc4 | 209 | lfl = __fl_lookup(net, fl->label); |
63159f29 | 210 | if (!lfl) |
1da177e4 LT |
211 | break; |
212 | } | |
213 | } | |
78c2e502 PE |
214 | } else { |
215 | /* | |
216 | * we dropper the ip6_fl_lock, so this entry could reappear | |
217 | * and we need to recheck with it. | |
218 | * | |
219 | * OTOH no need to search the active socket first, like it is | |
220 | * done in ipv6_flowlabel_opt - sock is locked, so new entry | |
221 | * with the same label can only appear on another sock | |
222 | */ | |
60e8fbc4 | 223 | lfl = __fl_lookup(net, fl->label); |
53b24b8f | 224 | if (lfl) { |
78c2e502 | 225 | atomic_inc(&lfl->users); |
d3aedd5e | 226 | spin_unlock_bh(&ip6_fl_lock); |
78c2e502 PE |
227 | return lfl; |
228 | } | |
1da177e4 LT |
229 | } |
230 | ||
231 | fl->lastuse = jiffies; | |
232 | fl->next = fl_ht[FL_HASH(fl->label)]; | |
d3aedd5e | 233 | rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl); |
1da177e4 | 234 | atomic_inc(&fl_size); |
d3aedd5e | 235 | spin_unlock_bh(&ip6_fl_lock); |
78c2e502 | 236 | return NULL; |
1da177e4 LT |
237 | } |
238 | ||
239 | ||
240 | ||
241 | /* Socket flowlabel lists */ | |
242 | ||
67ba4152 | 243 | struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) |
1da177e4 LT |
244 | { |
245 | struct ipv6_fl_socklist *sfl; | |
246 | struct ipv6_pinfo *np = inet6_sk(sk); | |
247 | ||
248 | label &= IPV6_FLOWLABEL_MASK; | |
249 | ||
18367681 YH |
250 | rcu_read_lock_bh(); |
251 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 LT |
252 | struct ip6_flowlabel *fl = sfl->fl; |
253 | if (fl->label == label) { | |
254 | fl->lastuse = jiffies; | |
255 | atomic_inc(&fl->users); | |
18367681 | 256 | rcu_read_unlock_bh(); |
1da177e4 LT |
257 | return fl; |
258 | } | |
259 | } | |
18367681 | 260 | rcu_read_unlock_bh(); |
1da177e4 LT |
261 | return NULL; |
262 | } | |
3cf3dc6c ACM |
263 | EXPORT_SYMBOL_GPL(fl6_sock_lookup); |
264 | ||
1da177e4 LT |
265 | void fl6_free_socklist(struct sock *sk) |
266 | { | |
267 | struct ipv6_pinfo *np = inet6_sk(sk); | |
268 | struct ipv6_fl_socklist *sfl; | |
269 | ||
18367681 | 270 | if (!rcu_access_pointer(np->ipv6_fl_list)) |
f256dc59 YH |
271 | return; |
272 | ||
18367681 YH |
273 | spin_lock_bh(&ip6_sk_fl_lock); |
274 | while ((sfl = rcu_dereference_protected(np->ipv6_fl_list, | |
275 | lockdep_is_held(&ip6_sk_fl_lock))) != NULL) { | |
276 | np->ipv6_fl_list = sfl->next; | |
277 | spin_unlock_bh(&ip6_sk_fl_lock); | |
f256dc59 | 278 | |
1da177e4 | 279 | fl_release(sfl->fl); |
18367681 YH |
280 | kfree_rcu(sfl, rcu); |
281 | ||
282 | spin_lock_bh(&ip6_sk_fl_lock); | |
1da177e4 | 283 | } |
18367681 | 284 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 LT |
285 | } |
286 | ||
287 | /* Service routines */ | |
288 | ||
289 | ||
290 | /* | |
291 | It is the only difficult place. flowlabel enforces equal headers | |
292 | before and including routing header, however user may supply options | |
293 | following rthdr. | |
294 | */ | |
295 | ||
67ba4152 IM |
296 | struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, |
297 | struct ip6_flowlabel *fl, | |
298 | struct ipv6_txoptions *fopt) | |
1da177e4 | 299 | { |
67ba4152 | 300 | struct ipv6_txoptions *fl_opt = fl->opt; |
1ab1457c | 301 | |
63159f29 | 302 | if (!fopt || fopt->opt_flen == 0) |
df9890c3 | 303 | return fl_opt; |
1ab1457c | 304 | |
53b24b8f | 305 | if (fl_opt) { |
1da177e4 | 306 | opt_space->hopopt = fl_opt->hopopt; |
df9890c3 | 307 | opt_space->dst0opt = fl_opt->dst0opt; |
1da177e4 LT |
308 | opt_space->srcrt = fl_opt->srcrt; |
309 | opt_space->opt_nflen = fl_opt->opt_nflen; | |
310 | } else { | |
311 | if (fopt->opt_nflen == 0) | |
312 | return fopt; | |
313 | opt_space->hopopt = NULL; | |
314 | opt_space->dst0opt = NULL; | |
315 | opt_space->srcrt = NULL; | |
316 | opt_space->opt_nflen = 0; | |
317 | } | |
318 | opt_space->dst1opt = fopt->dst1opt; | |
1da177e4 | 319 | opt_space->opt_flen = fopt->opt_flen; |
864e2a1f | 320 | opt_space->tot_len = fopt->tot_len; |
1da177e4 LT |
321 | return opt_space; |
322 | } | |
a495f836 | 323 | EXPORT_SYMBOL_GPL(fl6_merge_options); |
1da177e4 LT |
324 | |
325 | static unsigned long check_linger(unsigned long ttl) | |
326 | { | |
327 | if (ttl < FL_MIN_LINGER) | |
328 | return FL_MIN_LINGER*HZ; | |
329 | if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN)) | |
330 | return 0; | |
331 | return ttl*HZ; | |
332 | } | |
333 | ||
334 | static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires) | |
335 | { | |
336 | linger = check_linger(linger); | |
337 | if (!linger) | |
338 | return -EPERM; | |
339 | expires = check_linger(expires); | |
340 | if (!expires) | |
341 | return -EPERM; | |
394055f6 FF |
342 | |
343 | spin_lock_bh(&ip6_fl_lock); | |
1da177e4 LT |
344 | fl->lastuse = jiffies; |
345 | if (time_before(fl->linger, linger)) | |
346 | fl->linger = linger; | |
347 | if (time_before(expires, fl->linger)) | |
348 | expires = fl->linger; | |
349 | if (time_before(fl->expires, fl->lastuse + expires)) | |
350 | fl->expires = fl->lastuse + expires; | |
394055f6 FF |
351 | spin_unlock_bh(&ip6_fl_lock); |
352 | ||
1da177e4 LT |
353 | return 0; |
354 | } | |
355 | ||
356 | static struct ip6_flowlabel * | |
ec0506db MŻ |
357 | fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, |
358 | char __user *optval, int optlen, int *err_p) | |
1da177e4 | 359 | { |
684de409 | 360 | struct ip6_flowlabel *fl = NULL; |
1da177e4 LT |
361 | int olen; |
362 | int addr_type; | |
363 | int err; | |
364 | ||
684de409 DM |
365 | olen = optlen - CMSG_ALIGN(sizeof(*freq)); |
366 | err = -EINVAL; | |
367 | if (olen > 64 * 1024) | |
368 | goto done; | |
369 | ||
1da177e4 | 370 | err = -ENOMEM; |
0c600eda | 371 | fl = kzalloc(sizeof(*fl), GFP_KERNEL); |
63159f29 | 372 | if (!fl) |
1da177e4 | 373 | goto done; |
1da177e4 | 374 | |
1da177e4 LT |
375 | if (olen > 0) { |
376 | struct msghdr msg; | |
4c9483b2 | 377 | struct flowi6 flowi6; |
26879da5 | 378 | struct ipcm6_cookie ipc6; |
1da177e4 LT |
379 | |
380 | err = -ENOMEM; | |
381 | fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); | |
63159f29 | 382 | if (!fl->opt) |
1da177e4 LT |
383 | goto done; |
384 | ||
385 | memset(fl->opt, 0, sizeof(*fl->opt)); | |
386 | fl->opt->tot_len = sizeof(*fl->opt) + olen; | |
387 | err = -EFAULT; | |
388 | if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen)) | |
389 | goto done; | |
390 | ||
391 | msg.msg_controllen = olen; | |
67ba4152 | 392 | msg.msg_control = (void *)(fl->opt+1); |
4c9483b2 | 393 | memset(&flowi6, 0, sizeof(flowi6)); |
1da177e4 | 394 | |
26879da5 | 395 | ipc6.opt = fl->opt; |
5fdaa88d | 396 | err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6); |
1da177e4 LT |
397 | if (err) |
398 | goto done; | |
399 | err = -EINVAL; | |
400 | if (fl->opt->opt_flen) | |
401 | goto done; | |
402 | if (fl->opt->opt_nflen == 0) { | |
403 | kfree(fl->opt); | |
404 | fl->opt = NULL; | |
405 | } | |
406 | } | |
407 | ||
efd7ef1c | 408 | fl->fl_net = net; |
1da177e4 LT |
409 | fl->expires = jiffies; |
410 | err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); | |
411 | if (err) | |
412 | goto done; | |
413 | fl->share = freq->flr_share; | |
414 | addr_type = ipv6_addr_type(&freq->flr_dst); | |
35700212 JP |
415 | if ((addr_type & IPV6_ADDR_MAPPED) || |
416 | addr_type == IPV6_ADDR_ANY) { | |
c6817e4c | 417 | err = -EINVAL; |
1da177e4 | 418 | goto done; |
c6817e4c | 419 | } |
4e3fd7a0 | 420 | fl->dst = freq->flr_dst; |
1da177e4 LT |
421 | atomic_set(&fl->users, 1); |
422 | switch (fl->share) { | |
423 | case IPV6_FL_S_EXCL: | |
424 | case IPV6_FL_S_ANY: | |
425 | break; | |
426 | case IPV6_FL_S_PROCESS: | |
4f82f457 | 427 | fl->owner.pid = get_task_pid(current, PIDTYPE_PID); |
1da177e4 LT |
428 | break; |
429 | case IPV6_FL_S_USER: | |
4f82f457 | 430 | fl->owner.uid = current_euid(); |
1da177e4 LT |
431 | break; |
432 | default: | |
433 | err = -EINVAL; | |
434 | goto done; | |
435 | } | |
436 | return fl; | |
437 | ||
438 | done: | |
439 | fl_free(fl); | |
440 | *err_p = err; | |
441 | return NULL; | |
442 | } | |
443 | ||
444 | static int mem_check(struct sock *sk) | |
445 | { | |
446 | struct ipv6_pinfo *np = inet6_sk(sk); | |
447 | struct ipv6_fl_socklist *sfl; | |
448 | int room = FL_MAX_SIZE - atomic_read(&fl_size); | |
449 | int count = 0; | |
450 | ||
451 | if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) | |
452 | return 0; | |
453 | ||
f8c31c8f | 454 | rcu_read_lock_bh(); |
18367681 | 455 | for_each_sk_fl_rcu(np, sfl) |
1da177e4 | 456 | count++; |
f8c31c8f | 457 | rcu_read_unlock_bh(); |
1da177e4 LT |
458 | |
459 | if (room <= 0 || | |
460 | ((count >= FL_MAX_PER_SOCK || | |
35700212 JP |
461 | (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) && |
462 | !capable(CAP_NET_ADMIN))) | |
1da177e4 LT |
463 | return -ENOBUFS; |
464 | ||
465 | return 0; | |
466 | } | |
467 | ||
04028045 PE |
468 | static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, |
469 | struct ip6_flowlabel *fl) | |
470 | { | |
18367681 | 471 | spin_lock_bh(&ip6_sk_fl_lock); |
04028045 PE |
472 | sfl->fl = fl; |
473 | sfl->next = np->ipv6_fl_list; | |
18367681 YH |
474 | rcu_assign_pointer(np->ipv6_fl_list, sfl); |
475 | spin_unlock_bh(&ip6_sk_fl_lock); | |
04028045 PE |
476 | } |
477 | ||
46e5f401 FF |
478 | int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, |
479 | int flags) | |
3fdfa5ff FF |
480 | { |
481 | struct ipv6_pinfo *np = inet6_sk(sk); | |
482 | struct ipv6_fl_socklist *sfl; | |
483 | ||
46e5f401 FF |
484 | if (flags & IPV6_FL_F_REMOTE) { |
485 | freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; | |
486 | return 0; | |
487 | } | |
488 | ||
df3687ff FF |
489 | if (np->repflow) { |
490 | freq->flr_label = np->flow_label; | |
491 | return 0; | |
492 | } | |
493 | ||
3fdfa5ff FF |
494 | rcu_read_lock_bh(); |
495 | ||
496 | for_each_sk_fl_rcu(np, sfl) { | |
497 | if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { | |
498 | spin_lock_bh(&ip6_fl_lock); | |
499 | freq->flr_label = sfl->fl->label; | |
500 | freq->flr_dst = sfl->fl->dst; | |
501 | freq->flr_share = sfl->fl->share; | |
502 | freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; | |
503 | freq->flr_linger = sfl->fl->linger / HZ; | |
504 | ||
505 | spin_unlock_bh(&ip6_fl_lock); | |
506 | rcu_read_unlock_bh(); | |
507 | return 0; | |
508 | } | |
509 | } | |
510 | rcu_read_unlock_bh(); | |
511 | ||
512 | return -ENOENT; | |
513 | } | |
514 | ||
1da177e4 LT |
515 | int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) |
516 | { | |
55205d40 | 517 | int uninitialized_var(err); |
60e8fbc4 | 518 | struct net *net = sock_net(sk); |
1da177e4 LT |
519 | struct ipv6_pinfo *np = inet6_sk(sk); |
520 | struct in6_flowlabel_req freq; | |
67ba4152 | 521 | struct ipv6_fl_socklist *sfl1 = NULL; |
7f0e44ac ED |
522 | struct ipv6_fl_socklist *sfl; |
523 | struct ipv6_fl_socklist __rcu **sflp; | |
78c2e502 PE |
524 | struct ip6_flowlabel *fl, *fl1 = NULL; |
525 | ||
1da177e4 LT |
526 | |
527 | if (optlen < sizeof(freq)) | |
528 | return -EINVAL; | |
529 | ||
530 | if (copy_from_user(&freq, optval, sizeof(freq))) | |
531 | return -EFAULT; | |
532 | ||
533 | switch (freq.flr_action) { | |
534 | case IPV6_FL_A_PUT: | |
df3687ff FF |
535 | if (freq.flr_flags & IPV6_FL_F_REFLECT) { |
536 | if (sk->sk_protocol != IPPROTO_TCP) | |
537 | return -ENOPROTOOPT; | |
538 | if (!np->repflow) | |
539 | return -ESRCH; | |
540 | np->flow_label = 0; | |
541 | np->repflow = 0; | |
542 | return 0; | |
543 | } | |
18367681 YH |
544 | spin_lock_bh(&ip6_sk_fl_lock); |
545 | for (sflp = &np->ipv6_fl_list; | |
44c3d0c1 ED |
546 | (sfl = rcu_dereference_protected(*sflp, |
547 | lockdep_is_held(&ip6_sk_fl_lock))) != NULL; | |
18367681 | 548 | sflp = &sfl->next) { |
1da177e4 LT |
549 | if (sfl->fl->label == freq.flr_label) { |
550 | if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK)) | |
551 | np->flow_label &= ~IPV6_FLOWLABEL_MASK; | |
44c3d0c1 | 552 | *sflp = sfl->next; |
18367681 | 553 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 | 554 | fl_release(sfl->fl); |
18367681 | 555 | kfree_rcu(sfl, rcu); |
1da177e4 LT |
556 | return 0; |
557 | } | |
558 | } | |
18367681 | 559 | spin_unlock_bh(&ip6_sk_fl_lock); |
1da177e4 LT |
560 | return -ESRCH; |
561 | ||
562 | case IPV6_FL_A_RENEW: | |
18367681 YH |
563 | rcu_read_lock_bh(); |
564 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 LT |
565 | if (sfl->fl->label == freq.flr_label) { |
566 | err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires); | |
18367681 | 567 | rcu_read_unlock_bh(); |
1da177e4 LT |
568 | return err; |
569 | } | |
570 | } | |
18367681 | 571 | rcu_read_unlock_bh(); |
1da177e4 | 572 | |
af31f412 EB |
573 | if (freq.flr_share == IPV6_FL_S_NONE && |
574 | ns_capable(net->user_ns, CAP_NET_ADMIN)) { | |
60e8fbc4 | 575 | fl = fl_lookup(net, freq.flr_label); |
1da177e4 LT |
576 | if (fl) { |
577 | err = fl6_renew(fl, freq.flr_linger, freq.flr_expires); | |
578 | fl_release(fl); | |
579 | return err; | |
580 | } | |
581 | } | |
582 | return -ESRCH; | |
583 | ||
584 | case IPV6_FL_A_GET: | |
df3687ff | 585 | if (freq.flr_flags & IPV6_FL_F_REFLECT) { |
6444f72b FF |
586 | struct net *net = sock_net(sk); |
587 | if (net->ipv6.sysctl.flowlabel_consistency) { | |
588 | net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); | |
589 | return -EPERM; | |
590 | } | |
591 | ||
df3687ff FF |
592 | if (sk->sk_protocol != IPPROTO_TCP) |
593 | return -ENOPROTOOPT; | |
6444f72b | 594 | |
df3687ff FF |
595 | np->repflow = 1; |
596 | return 0; | |
597 | } | |
598 | ||
1da177e4 LT |
599 | if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) |
600 | return -EINVAL; | |
601 | ||
82a584b7 TH |
602 | if (net->ipv6.sysctl.flowlabel_state_ranges && |
603 | (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) | |
604 | return -ERANGE; | |
605 | ||
ec0506db | 606 | fl = fl_create(net, sk, &freq, optval, optlen, &err); |
63159f29 | 607 | if (!fl) |
1da177e4 LT |
608 | return err; |
609 | sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); | |
610 | ||
611 | if (freq.flr_label) { | |
1da177e4 | 612 | err = -EEXIST; |
18367681 YH |
613 | rcu_read_lock_bh(); |
614 | for_each_sk_fl_rcu(np, sfl) { | |
1da177e4 LT |
615 | if (sfl->fl->label == freq.flr_label) { |
616 | if (freq.flr_flags&IPV6_FL_F_EXCL) { | |
18367681 | 617 | rcu_read_unlock_bh(); |
1da177e4 LT |
618 | goto done; |
619 | } | |
620 | fl1 = sfl->fl; | |
4ea6a804 | 621 | atomic_inc(&fl1->users); |
1da177e4 LT |
622 | break; |
623 | } | |
624 | } | |
18367681 | 625 | rcu_read_unlock_bh(); |
1da177e4 | 626 | |
63159f29 | 627 | if (!fl1) |
60e8fbc4 | 628 | fl1 = fl_lookup(net, freq.flr_label); |
1da177e4 | 629 | if (fl1) { |
78c2e502 | 630 | recheck: |
1da177e4 LT |
631 | err = -EEXIST; |
632 | if (freq.flr_flags&IPV6_FL_F_EXCL) | |
633 | goto release; | |
634 | err = -EPERM; | |
635 | if (fl1->share == IPV6_FL_S_EXCL || | |
636 | fl1->share != fl->share || | |
4f82f457 | 637 | ((fl1->share == IPV6_FL_S_PROCESS) && |
95c16925 | 638 | (fl1->owner.pid != fl->owner.pid)) || |
4f82f457 | 639 | ((fl1->share == IPV6_FL_S_USER) && |
95c16925 | 640 | !uid_eq(fl1->owner.uid, fl->owner.uid))) |
1da177e4 LT |
641 | goto release; |
642 | ||
1da177e4 | 643 | err = -ENOMEM; |
63159f29 | 644 | if (!sfl1) |
1da177e4 LT |
645 | goto release; |
646 | if (fl->linger > fl1->linger) | |
647 | fl1->linger = fl->linger; | |
648 | if ((long)(fl->expires - fl1->expires) > 0) | |
649 | fl1->expires = fl->expires; | |
04028045 | 650 | fl_link(np, sfl1, fl1); |
1da177e4 LT |
651 | fl_free(fl); |
652 | return 0; | |
653 | ||
654 | release: | |
655 | fl_release(fl1); | |
656 | goto done; | |
657 | } | |
658 | } | |
659 | err = -ENOENT; | |
660 | if (!(freq.flr_flags&IPV6_FL_F_CREATE)) | |
661 | goto done; | |
662 | ||
663 | err = -ENOMEM; | |
63159f29 | 664 | if (!sfl1) |
e5d08d71 IM |
665 | goto done; |
666 | ||
667 | err = mem_check(sk); | |
668 | if (err != 0) | |
1da177e4 LT |
669 | goto done; |
670 | ||
60e8fbc4 | 671 | fl1 = fl_intern(net, fl, freq.flr_label); |
53b24b8f | 672 | if (fl1) |
78c2e502 | 673 | goto recheck; |
1da177e4 | 674 | |
6c94d361 DM |
675 | if (!freq.flr_label) { |
676 | if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, | |
677 | &fl->label, sizeof(fl->label))) { | |
678 | /* Intentionally ignore fault. */ | |
679 | } | |
680 | } | |
1da177e4 | 681 | |
04028045 | 682 | fl_link(np, sfl1, fl); |
1da177e4 LT |
683 | return 0; |
684 | ||
685 | default: | |
686 | return -EINVAL; | |
687 | } | |
688 | ||
689 | done: | |
690 | fl_free(fl); | |
691 | kfree(sfl1); | |
692 | return err; | |
693 | } | |
694 | ||
695 | #ifdef CONFIG_PROC_FS | |
696 | ||
697 | struct ip6fl_iter_state { | |
5983a3df | 698 | struct seq_net_private p; |
4f82f457 | 699 | struct pid_namespace *pid_ns; |
1da177e4 LT |
700 | int bucket; |
701 | }; | |
702 | ||
703 | #define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private) | |
704 | ||
705 | static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq) | |
706 | { | |
707 | struct ip6_flowlabel *fl = NULL; | |
708 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); | |
5983a3df | 709 | struct net *net = seq_file_net(seq); |
1da177e4 LT |
710 | |
711 | for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) { | |
d3aedd5e YH |
712 | for_each_fl_rcu(state->bucket, fl) { |
713 | if (net_eq(fl->fl_net, net)) | |
714 | goto out; | |
715 | } | |
1da177e4 | 716 | } |
d3aedd5e YH |
717 | fl = NULL; |
718 | out: | |
1da177e4 LT |
719 | return fl; |
720 | } | |
721 | ||
722 | static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl) | |
723 | { | |
724 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); | |
5983a3df | 725 | struct net *net = seq_file_net(seq); |
1da177e4 | 726 | |
d3aedd5e YH |
727 | for_each_fl_continue_rcu(fl) { |
728 | if (net_eq(fl->fl_net, net)) | |
729 | goto out; | |
730 | } | |
731 | ||
5983a3df | 732 | try_again: |
d3aedd5e YH |
733 | if (++state->bucket <= FL_HASH_MASK) { |
734 | for_each_fl_rcu(state->bucket, fl) { | |
735 | if (net_eq(fl->fl_net, net)) | |
736 | goto out; | |
737 | } | |
738 | goto try_again; | |
1da177e4 | 739 | } |
d3aedd5e YH |
740 | fl = NULL; |
741 | ||
742 | out: | |
1da177e4 LT |
743 | return fl; |
744 | } | |
745 | ||
746 | static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos) | |
747 | { | |
748 | struct ip6_flowlabel *fl = ip6fl_get_first(seq); | |
749 | if (fl) | |
750 | while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL) | |
751 | --pos; | |
752 | return pos ? NULL : fl; | |
753 | } | |
754 | ||
755 | static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos) | |
d3aedd5e | 756 | __acquires(RCU) |
1da177e4 | 757 | { |
ad08978a CH |
758 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
759 | ||
760 | state->pid_ns = proc_pid_ns(file_inode(seq->file)); | |
761 | ||
d3aedd5e | 762 | rcu_read_lock_bh(); |
1da177e4 LT |
763 | return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; |
764 | } | |
765 | ||
766 | static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |
767 | { | |
768 | struct ip6_flowlabel *fl; | |
769 | ||
770 | if (v == SEQ_START_TOKEN) | |
771 | fl = ip6fl_get_first(seq); | |
772 | else | |
773 | fl = ip6fl_get_next(seq, v); | |
774 | ++*pos; | |
775 | return fl; | |
776 | } | |
777 | ||
778 | static void ip6fl_seq_stop(struct seq_file *seq, void *v) | |
d3aedd5e | 779 | __releases(RCU) |
1da177e4 | 780 | { |
d3aedd5e | 781 | rcu_read_unlock_bh(); |
1da177e4 LT |
782 | } |
783 | ||
1b7c2dbc | 784 | static int ip6fl_seq_show(struct seq_file *seq, void *v) |
1da177e4 | 785 | { |
4f82f457 | 786 | struct ip6fl_iter_state *state = ip6fl_seq_private(seq); |
869ba988 | 787 | if (v == SEQ_START_TOKEN) { |
1744bea1 | 788 | seq_puts(seq, "Label S Owner Users Linger Expires Dst Opt\n"); |
869ba988 | 789 | } else { |
1b7c2dbc | 790 | struct ip6_flowlabel *fl = v; |
1da177e4 | 791 | seq_printf(seq, |
4b7a4274 | 792 | "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", |
95c96174 | 793 | (unsigned int)ntohl(fl->label), |
1da177e4 | 794 | fl->share, |
4f82f457 EB |
795 | ((fl->share == IPV6_FL_S_PROCESS) ? |
796 | pid_nr_ns(fl->owner.pid, state->pid_ns) : | |
797 | ((fl->share == IPV6_FL_S_USER) ? | |
798 | from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : | |
799 | 0)), | |
1da177e4 LT |
800 | atomic_read(&fl->users), |
801 | fl->linger/HZ, | |
802 | (long)(fl->expires - jiffies)/HZ, | |
b071195d | 803 | &fl->dst, |
1da177e4 | 804 | fl->opt ? fl->opt->opt_nflen : 0); |
1da177e4 | 805 | } |
1da177e4 LT |
806 | return 0; |
807 | } | |
808 | ||
56b3d975 | 809 | static const struct seq_operations ip6fl_seq_ops = { |
1da177e4 LT |
810 | .start = ip6fl_seq_start, |
811 | .next = ip6fl_seq_next, | |
812 | .stop = ip6fl_seq_stop, | |
813 | .show = ip6fl_seq_show, | |
814 | }; | |
815 | ||
2c8c1e72 | 816 | static int __net_init ip6_flowlabel_proc_init(struct net *net) |
0a3e78ac | 817 | { |
c3506372 CH |
818 | if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net, |
819 | &ip6fl_seq_ops, sizeof(struct ip6fl_iter_state))) | |
0a3e78ac DL |
820 | return -ENOMEM; |
821 | return 0; | |
822 | } | |
1da177e4 | 823 | |
2c8c1e72 | 824 | static void __net_exit ip6_flowlabel_proc_fini(struct net *net) |
1da177e4 | 825 | { |
ece31ffd | 826 | remove_proc_entry("ip6_flowlabel", net->proc_net); |
0a3e78ac DL |
827 | } |
828 | #else | |
829 | static inline int ip6_flowlabel_proc_init(struct net *net) | |
830 | { | |
831 | return 0; | |
832 | } | |
833 | static inline void ip6_flowlabel_proc_fini(struct net *net) | |
834 | { | |
0a3e78ac | 835 | } |
1da177e4 | 836 | #endif |
0a3e78ac | 837 | |
2c8c1e72 | 838 | static void __net_exit ip6_flowlabel_net_exit(struct net *net) |
60e8fbc4 BT |
839 | { |
840 | ip6_fl_purge(net); | |
5983a3df | 841 | ip6_flowlabel_proc_fini(net); |
60e8fbc4 BT |
842 | } |
843 | ||
844 | static struct pernet_operations ip6_flowlabel_net_ops = { | |
5983a3df | 845 | .init = ip6_flowlabel_proc_init, |
60e8fbc4 BT |
846 | .exit = ip6_flowlabel_net_exit, |
847 | }; | |
848 | ||
0a3e78ac DL |
849 | int ip6_flowlabel_init(void) |
850 | { | |
5983a3df | 851 | return register_pernet_subsys(&ip6_flowlabel_net_ops); |
1da177e4 LT |
852 | } |
853 | ||
854 | void ip6_flowlabel_cleanup(void) | |
855 | { | |
856 | del_timer(&ip6_fl_gc_timer); | |
60e8fbc4 | 857 | unregister_pernet_subsys(&ip6_flowlabel_net_ops); |
1da177e4 | 858 | } |