[DCCP]: Use maximum-RTO backoff from DCCP spec
[linux-2.6-block.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
7c657876
ACM
12#include <linux/dccp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/skbuff.h>
18#include <linux/netdevice.h>
19#include <linux/in.h>
20#include <linux/if_arp.h>
21#include <linux/init.h>
22#include <linux/random.h>
23#include <net/checksum.h>
24
14c85021 25#include <net/inet_sock.h>
7c657876
ACM
26#include <net/sock.h>
27#include <net/xfrm.h>
28
6273172e 29#include <asm/ioctls.h>
7c657876
ACM
30#include <asm/semaphore.h>
31#include <linux/spinlock.h>
32#include <linux/timer.h>
33#include <linux/delay.h>
34#include <linux/poll.h>
7c657876
ACM
35
36#include "ccid.h"
37#include "dccp.h"
afe00251 38#include "feat.h"
7c657876 39
ba89966c 40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 41
f21e68ca
ACM
42EXPORT_SYMBOL_GPL(dccp_statistics);
43
7c657876
ACM
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
f21e68ca
ACM
46EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
075ae866
ACM
48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52};
53
54EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
b1308dc0
IM
56/* the maximum queue length for tx in packets. 0 is no limit */
57int sysctl_dccp_tx_qlen __read_mostly = 5;
58
c25a18ba
ACM
59void dccp_set_state(struct sock *sk, const int state)
60{
61 const int oldstate = sk->sk_state;
62
f11135a3 63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
c25a18ba
ACM
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
66
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
72
73 case DCCP_CLOSED:
0c869620
GR
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
c25a18ba
ACM
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86 }
87
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
90 */
91 sk->sk_state = state;
92}
93
94EXPORT_SYMBOL_GPL(dccp_set_state);
95
0c869620
GR
96static void dccp_finish_passive_close(struct sock *sk)
97{
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
105 /*
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 */
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
111 }
112}
113
c25a18ba
ACM
114void dccp_done(struct sock *sk)
115{
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
118
119 sk->sk_shutdown = SHUTDOWN_MASK;
120
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
123 else
124 inet_csk_destroy_sock(sk);
125}
126
127EXPORT_SYMBOL_GPL(dccp_done);
128
7c657876
ACM
129const char *dccp_packet_name(const int type)
130{
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
142 };
143
144 if (type >= DCCP_NR_PKT_TYPES)
145 return "INVALID";
146 else
147 return dccp_packet_names[type];
148}
149
150EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152const char *dccp_state_name(const int state)
153{
154 static char *dccp_state_names[] = {
f11135a3
GR
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
7c657876
ACM
166 };
167
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
170 else
171 return dccp_state_names[state];
172}
173
174EXPORT_SYMBOL_GPL(dccp_state_name);
175
c985ed70
ACM
176void dccp_hash(struct sock *sk)
177{
178 inet_hash(&dccp_hashinfo, sk);
179}
180
181EXPORT_SYMBOL_GPL(dccp_hash);
182
183void dccp_unhash(struct sock *sk)
184{
185 inet_unhash(&dccp_hashinfo, sk);
186}
187
188EXPORT_SYMBOL_GPL(dccp_unhash);
189
72478873 190int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
3e0fadc5
ACM
191{
192 struct dccp_sock *dp = dccp_sk(sk);
a4bf3902 193 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5 194 struct inet_connection_sock *icsk = inet_csk(sk);
3e0fadc5 195
a4bf3902 196 dccp_minisock_init(&dp->dccps_minisock);
3e0fadc5 197
e18d7a98
ACM
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
208
209 dccp_init_xmit_timers(sk);
210
3e0fadc5
ACM
211 /*
212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 * the listening (master) sock get CCID control blocks, which is not
214 * necessary, but for now, to not mess with the test userspace apps,
215 * lets leave it here, later the real solution is to do this in a
216 * setsockopt(CCIDs-I-want/accept). -acme
217 */
72478873 218 if (likely(ctl_sock_initialized)) {
8ca0d17b 219 int rc = dccp_feat_init(dmsk);
3e0fadc5
ACM
220
221 if (rc)
222 return rc;
223
a4bf3902 224 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
225 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226 if (dp->dccps_hc_rx_ackvec == NULL)
227 return -ENOMEM;
228 }
a4bf3902
ACM
229 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230 sk, GFP_KERNEL);
231 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232 sk, GFP_KERNEL);
8109b02b 233 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
3e0fadc5
ACM
234 dp->dccps_hc_tx_ccid == NULL)) {
235 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
a4bf3902 237 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
238 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239 dp->dccps_hc_rx_ackvec = NULL;
240 }
241 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242 return -ENOMEM;
243 }
244 } else {
245 /* control socket doesn't need feat nego */
a4bf3902
ACM
246 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
3e0fadc5
ACM
248 }
249
3e0fadc5
ACM
250 return 0;
251}
252
253EXPORT_SYMBOL_GPL(dccp_init_sock);
254
255int dccp_destroy_sock(struct sock *sk)
256{
257 struct dccp_sock *dp = dccp_sk(sk);
8ca0d17b 258 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5
ACM
259
260 /*
261 * DCCP doesn't use sk_write_queue, just sk_send_head
262 * for retransmissions
263 */
264 if (sk->sk_send_head != NULL) {
265 kfree_skb(sk->sk_send_head);
266 sk->sk_send_head = NULL;
267 }
268
269 /* Clean up a referenced DCCP bind bucket. */
270 if (inet_csk(sk)->icsk_bind_hash != NULL)
271 inet_put_port(&dccp_hashinfo, sk);
272
273 kfree(dp->dccps_service_list);
274 dp->dccps_service_list = NULL;
275
8ca0d17b 276 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
277 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278 dp->dccps_hc_rx_ackvec = NULL;
279 }
280 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283
284 /* clean up feature negotiation state */
8ca0d17b 285 dccp_feat_clean(dmsk);
3e0fadc5
ACM
286
287 return 0;
288}
289
290EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291
72a3effa 292static inline int dccp_listen_start(struct sock *sk, int backlog)
7c657876 293{
67e6b629
ACM
294 struct dccp_sock *dp = dccp_sk(sk);
295
296 dp->dccps_role = DCCP_ROLE_LISTEN;
72a3effa 297 return inet_csk_listen_start(sk, backlog);
7c657876
ACM
298}
299
ce865a61
GR
300static inline int dccp_need_reset(int state)
301{
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
304}
305
7c657876
ACM
306int dccp_disconnect(struct sock *sk, int flags)
307{
308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct inet_sock *inet = inet_sk(sk);
310 int err = 0;
311 const int old_state = sk->sk_state;
312
313 if (old_state != DCCP_CLOSED)
314 dccp_set_state(sk, DCCP_CLOSED);
315
ce865a61
GR
316 /*
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319 */
7c657876
ACM
320 if (old_state == DCCP_LISTEN) {
321 inet_csk_listen_stop(sk);
ce865a61
GR
322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
7c657876
ACM
325 } else if (old_state == DCCP_REQUESTING)
326 sk->sk_err = ECONNRESET;
327
328 dccp_clear_xmit_timers(sk);
329 __skb_queue_purge(&sk->sk_receive_queue);
330 if (sk->sk_send_head != NULL) {
331 __kfree_skb(sk->sk_send_head);
332 sk->sk_send_head = NULL;
333 }
334
335 inet->dport = 0;
336
337 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338 inet_reset_saddr(sk);
339
340 sk->sk_shutdown = 0;
341 sock_reset_flag(sk, SOCK_DONE);
342
343 icsk->icsk_backoff = 0;
344 inet_csk_delack_init(sk);
345 __sk_dst_reset(sk);
346
347 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348
349 sk->sk_error_report(sk);
350 return err;
351}
352
f21e68ca
ACM
353EXPORT_SYMBOL_GPL(dccp_disconnect);
354
331968bd
ACM
355/*
356 * Wait for a DCCP event.
357 *
358 * Note that we don't need to lock the socket, as the upper poll layers
359 * take care of normal races (between the test and the event) and we don't
360 * go look at any of the socket buffers directly.
361 */
f21e68ca
ACM
362unsigned int dccp_poll(struct file *file, struct socket *sock,
363 poll_table *wait)
331968bd
ACM
364{
365 unsigned int mask;
366 struct sock *sk = sock->sk;
367
368 poll_wait(file, sk->sk_sleep, wait);
369 if (sk->sk_state == DCCP_LISTEN)
370 return inet_csk_listen_poll(sk);
371
372 /* Socket is not locked. We are protected from async events
373 by poll logic and correct handling of state changes
374 made by another threads is impossible in any case.
375 */
376
377 mask = 0;
378 if (sk->sk_err)
379 mask = POLLERR;
380
381 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382 mask |= POLLHUP;
383 if (sk->sk_shutdown & RCV_SHUTDOWN)
f348d70a 384 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
331968bd
ACM
385
386 /* Connected? */
387 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389 mask |= POLLIN | POLLRDNORM;
390
391 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393 mask |= POLLOUT | POLLWRNORM;
394 } else { /* send SIGIO later */
395 set_bit(SOCK_ASYNC_NOSPACE,
396 &sk->sk_socket->flags);
397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398
399 /* Race breaker. If space is freed after
400 * wspace test but before the flags are set,
401 * IO signal will be lost.
402 */
403 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404 mask |= POLLOUT | POLLWRNORM;
405 }
406 }
407 }
408 return mask;
409}
410
f21e68ca
ACM
411EXPORT_SYMBOL_GPL(dccp_poll);
412
7c657876
ACM
413int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414{
6273172e
ACM
415 int rc = -ENOTCONN;
416
417 lock_sock(sk);
418
419 if (sk->sk_state == DCCP_LISTEN)
420 goto out;
421
422 switch (cmd) {
423 case SIOCINQ: {
424 struct sk_buff *skb;
425 unsigned long amount = 0;
426
427 skb = skb_peek(&sk->sk_receive_queue);
428 if (skb != NULL) {
429 /*
430 * We will only return the amount of this packet since
431 * that is all that will be read.
432 */
433 amount = skb->len;
434 }
435 rc = put_user(amount, (int __user *)arg);
436 }
437 break;
438 default:
439 rc = -ENOIOCTLCMD;
440 break;
441 }
442out:
443 release_sock(sk);
444 return rc;
7c657876
ACM
445}
446
f21e68ca
ACM
447EXPORT_SYMBOL_GPL(dccp_ioctl);
448
60fe62e7 449static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
450 char __user *optval, int optlen)
451{
452 struct dccp_sock *dp = dccp_sk(sk);
453 struct dccp_service_list *sl = NULL;
454
8109b02b 455 if (service == DCCP_SERVICE_INVALID_VALUE ||
67e6b629
ACM
456 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457 return -EINVAL;
458
459 if (optlen > sizeof(service)) {
460 sl = kmalloc(optlen, GFP_KERNEL);
461 if (sl == NULL)
462 return -ENOMEM;
463
464 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465 if (copy_from_user(sl->dccpsl_list,
466 optval + sizeof(service),
467 optlen - sizeof(service)) ||
468 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469 kfree(sl);
470 return -EFAULT;
471 }
472 }
473
474 lock_sock(sk);
475 dp->dccps_service = service;
476
a51482bd 477 kfree(dp->dccps_service_list);
67e6b629
ACM
478
479 dp->dccps_service_list = sl;
480 release_sock(sk);
481 return 0;
482}
483
afe00251
AB
484/* byte 1 is feature. the rest is the preference list */
485static int dccp_setsockopt_change(struct sock *sk, int type,
486 struct dccp_so_feat __user *optval)
487{
488 struct dccp_so_feat opt;
489 u8 *val;
490 int rc;
491
492 if (copy_from_user(&opt, optval, sizeof(opt)))
493 return -EFAULT;
494
495 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496 if (!val)
497 return -ENOMEM;
498
499 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500 rc = -EFAULT;
501 goto out_free_val;
502 }
503
8ca0d17b
ACM
504 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505 val, opt.dccpsf_len, GFP_KERNEL);
afe00251
AB
506 if (rc)
507 goto out_free_val;
508
509out:
510 return rc;
511
512out_free_val:
513 kfree(val);
514 goto out;
515}
516
3fdadf7d
DM
517static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 char __user *optval, int optlen)
7c657876 519{
09dbc389
GR
520 struct dccp_sock *dp = dccp_sk(sk);
521 int val, err = 0;
7c657876 522
a84ffe43
ACM
523 if (optlen < sizeof(int))
524 return -EINVAL;
525
526 if (get_user(val, (int __user *)optval))
527 return -EFAULT;
528
67e6b629
ACM
529 if (optname == DCCP_SOCKOPT_SERVICE)
530 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 531
67e6b629 532 lock_sock(sk);
a84ffe43
ACM
533 switch (optname) {
534 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 536 err = 0;
a84ffe43 537 break;
afe00251
AB
538 case DCCP_SOCKOPT_CHANGE_L:
539 if (optlen != sizeof(struct dccp_so_feat))
540 err = -EINVAL;
541 else
542 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
c9eaf173 543 (struct dccp_so_feat __user *)
afe00251
AB
544 optval);
545 break;
afe00251
AB
546 case DCCP_SOCKOPT_CHANGE_R:
547 if (optlen != sizeof(struct dccp_so_feat))
548 err = -EINVAL;
549 else
550 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
9faefb6d 551 (struct dccp_so_feat __user *)
afe00251
AB
552 optval);
553 break;
6f4e5fff
GR
554 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
555 if (val < 0 || val > 15)
556 err = -EINVAL;
557 else
558 dp->dccps_pcslen = val;
559 break;
560 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
561 if (val < 0 || val > 15)
562 err = -EINVAL;
563 else {
564 dp->dccps_pcrlen = val;
565 /* FIXME: add feature negotiation,
566 * ChangeL(MinimumChecksumCoverage, val) */
567 }
568 break;
a84ffe43
ACM
569 default:
570 err = -ENOPROTOOPT;
571 break;
572 }
6f4e5fff 573
a84ffe43
ACM
574 release_sock(sk);
575 return err;
7c657876
ACM
576}
577
3fdadf7d
DM
578int dccp_setsockopt(struct sock *sk, int level, int optname,
579 char __user *optval, int optlen)
580{
581 if (level != SOL_DCCP)
582 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
583 optname, optval,
584 optlen);
585 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
586}
543d9cfe 587
f21e68ca
ACM
588EXPORT_SYMBOL_GPL(dccp_setsockopt);
589
3fdadf7d
DM
590#ifdef CONFIG_COMPAT
591int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
543d9cfe 592 char __user *optval, int optlen)
3fdadf7d 593{
dec73ff0
ACM
594 if (level != SOL_DCCP)
595 return inet_csk_compat_setsockopt(sk, level, optname,
596 optval, optlen);
3fdadf7d
DM
597 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
598}
543d9cfe 599
3fdadf7d
DM
600EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
601#endif
602
67e6b629 603static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 604 __be32 __user *optval,
67e6b629
ACM
605 int __user *optlen)
606{
607 const struct dccp_sock *dp = dccp_sk(sk);
608 const struct dccp_service_list *sl;
609 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
610
611 lock_sock(sk);
67e6b629
ACM
612 if ((sl = dp->dccps_service_list) != NULL) {
613 slen = sl->dccpsl_nr * sizeof(u32);
614 total_len += slen;
615 }
616
617 err = -EINVAL;
618 if (total_len > len)
619 goto out;
620
621 err = 0;
622 if (put_user(total_len, optlen) ||
623 put_user(dp->dccps_service, optval) ||
624 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
625 err = -EFAULT;
626out:
627 release_sock(sk);
628 return err;
629}
630
3fdadf7d 631static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 632 char __user *optval, int __user *optlen)
7c657876 633{
a84ffe43
ACM
634 struct dccp_sock *dp;
635 int val, len;
7c657876 636
a84ffe43
ACM
637 if (get_user(len, optlen))
638 return -EFAULT;
639
39ebc027 640 if (len < (int)sizeof(int))
a84ffe43
ACM
641 return -EINVAL;
642
643 dp = dccp_sk(sk);
644
645 switch (optname) {
646 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 647 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 648 return 0;
88f964db
ACM
649 case DCCP_SOCKOPT_SERVICE:
650 return dccp_getsockopt_service(sk, len,
60fe62e7 651 (__be32 __user *)optval, optlen);
7c559a9e
GR
652 case DCCP_SOCKOPT_GET_CUR_MPS:
653 val = dp->dccps_mss_cache;
654 len = sizeof(val);
655 break;
6f4e5fff
GR
656 case DCCP_SOCKOPT_SEND_CSCOV:
657 val = dp->dccps_pcslen;
39ebc027 658 len = sizeof(val);
6f4e5fff
GR
659 break;
660 case DCCP_SOCKOPT_RECV_CSCOV:
661 val = dp->dccps_pcrlen;
39ebc027 662 len = sizeof(val);
6f4e5fff 663 break;
88f964db
ACM
664 case 128 ... 191:
665 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
666 len, (u32 __user *)optval, optlen);
667 case 192 ... 255:
668 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
669 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
670 default:
671 return -ENOPROTOOPT;
672 }
673
674 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
675 return -EFAULT;
676
677 return 0;
7c657876
ACM
678}
679
3fdadf7d
DM
680int dccp_getsockopt(struct sock *sk, int level, int optname,
681 char __user *optval, int __user *optlen)
682{
683 if (level != SOL_DCCP)
684 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
685 optname, optval,
686 optlen);
687 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
688}
543d9cfe 689
f21e68ca
ACM
690EXPORT_SYMBOL_GPL(dccp_getsockopt);
691
3fdadf7d
DM
692#ifdef CONFIG_COMPAT
693int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
543d9cfe 694 char __user *optval, int __user *optlen)
3fdadf7d 695{
dec73ff0
ACM
696 if (level != SOL_DCCP)
697 return inet_csk_compat_getsockopt(sk, level, optname,
698 optval, optlen);
3fdadf7d
DM
699 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
700}
543d9cfe 701
3fdadf7d
DM
702EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
703#endif
704
7c657876
ACM
705int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
706 size_t len)
707{
708 const struct dccp_sock *dp = dccp_sk(sk);
709 const int flags = msg->msg_flags;
710 const int noblock = flags & MSG_DONTWAIT;
711 struct sk_buff *skb;
712 int rc, size;
713 long timeo;
714
715 if (len > dp->dccps_mss_cache)
716 return -EMSGSIZE;
717
718 lock_sock(sk);
b1308dc0
IM
719
720 if (sysctl_dccp_tx_qlen &&
721 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
722 rc = -EAGAIN;
723 goto out_release;
724 }
725
27258ee5 726 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
727
728 /*
729 * We have to use sk_stream_wait_connect here to set sk_write_pending,
730 * so that the trick in dccp_rcv_request_sent_state_process.
731 */
732 /* Wait for a connection to finish. */
cecd8d0e 733 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
7c657876 734 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 735 goto out_release;
7c657876
ACM
736
737 size = sk->sk_prot->max_header + len;
738 release_sock(sk);
739 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
740 lock_sock(sk);
7c657876
ACM
741 if (skb == NULL)
742 goto out_release;
743
744 skb_reserve(skb, sk->sk_prot->max_header);
745 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
746 if (rc != 0)
747 goto out_discard;
748
97e5848d
IM
749 skb_queue_tail(&sk->sk_write_queue, skb);
750 dccp_write_xmit(sk,0);
7c657876
ACM
751out_release:
752 release_sock(sk);
753 return rc ? : len;
27258ee5
ACM
754out_discard:
755 kfree_skb(skb);
7c657876 756 goto out_release;
7c657876
ACM
757}
758
f21e68ca
ACM
759EXPORT_SYMBOL_GPL(dccp_sendmsg);
760
7c657876
ACM
761int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
762 size_t len, int nonblock, int flags, int *addr_len)
763{
764 const struct dccp_hdr *dh;
7c657876
ACM
765 long timeo;
766
767 lock_sock(sk);
768
531669a0
ACM
769 if (sk->sk_state == DCCP_LISTEN) {
770 len = -ENOTCONN;
7c657876 771 goto out;
7c657876 772 }
7c657876 773
531669a0 774 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
775
776 do {
531669a0 777 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 778
531669a0
ACM
779 if (skb == NULL)
780 goto verify_sock_status;
7c657876 781
531669a0 782 dh = dccp_hdr(skb);
7c657876 783
0c869620
GR
784 switch (dh->dccph_type) {
785 case DCCP_PKT_DATA:
786 case DCCP_PKT_DATAACK:
531669a0 787 goto found_ok_skb;
7c657876 788
0c869620
GR
789 case DCCP_PKT_CLOSE:
790 case DCCP_PKT_CLOSEREQ:
791 if (!(flags & MSG_PEEK))
792 dccp_finish_passive_close(sk);
793 /* fall through */
794 case DCCP_PKT_RESET:
795 dccp_pr_debug("found fin (%s) ok!\n",
796 dccp_packet_name(dh->dccph_type));
531669a0
ACM
797 len = 0;
798 goto found_fin_ok;
0c869620
GR
799 default:
800 dccp_pr_debug("packet_type=%s\n",
801 dccp_packet_name(dh->dccph_type));
802 sk_eat_skb(sk, skb, 0);
531669a0 803 }
531669a0
ACM
804verify_sock_status:
805 if (sock_flag(sk, SOCK_DONE)) {
806 len = 0;
7c657876 807 break;
531669a0 808 }
7c657876 809
531669a0
ACM
810 if (sk->sk_err) {
811 len = sock_error(sk);
812 break;
813 }
7c657876 814
531669a0
ACM
815 if (sk->sk_shutdown & RCV_SHUTDOWN) {
816 len = 0;
817 break;
818 }
7c657876 819
531669a0
ACM
820 if (sk->sk_state == DCCP_CLOSED) {
821 if (!sock_flag(sk, SOCK_DONE)) {
822 /* This occurs when user tries to read
823 * from never connected socket.
824 */
825 len = -ENOTCONN;
7c657876
ACM
826 break;
827 }
531669a0
ACM
828 len = 0;
829 break;
7c657876
ACM
830 }
831
531669a0
ACM
832 if (!timeo) {
833 len = -EAGAIN;
834 break;
835 }
7c657876 836
531669a0
ACM
837 if (signal_pending(current)) {
838 len = sock_intr_errno(timeo);
839 break;
840 }
7c657876 841
531669a0 842 sk_wait_data(sk, &timeo);
7c657876 843 continue;
7c657876 844 found_ok_skb:
531669a0
ACM
845 if (len > skb->len)
846 len = skb->len;
847 else if (len < skb->len)
848 msg->msg_flags |= MSG_TRUNC;
849
850 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
851 /* Exception. Bailout! */
852 len = -EFAULT;
853 break;
7c657876 854 }
7c657876
ACM
855 found_fin_ok:
856 if (!(flags & MSG_PEEK))
624d1164 857 sk_eat_skb(sk, skb, 0);
7c657876 858 break;
531669a0 859 } while (1);
7c657876
ACM
860out:
861 release_sock(sk);
531669a0 862 return len;
7c657876
ACM
863}
864
f21e68ca
ACM
865EXPORT_SYMBOL_GPL(dccp_recvmsg);
866
867int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
868{
869 struct sock *sk = sock->sk;
870 unsigned char old_state;
871 int err;
872
873 lock_sock(sk);
874
875 err = -EINVAL;
876 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
877 goto out;
878
879 old_state = sk->sk_state;
880 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
881 goto out;
882
883 /* Really, if the socket is already in listen state
884 * we can only allow the backlog to be adjusted.
885 */
886 if (old_state != DCCP_LISTEN) {
887 /*
888 * FIXME: here it probably should be sk->sk_prot->listen_start
889 * see tcp_listen_start
890 */
72a3effa 891 err = dccp_listen_start(sk, backlog);
7c657876
ACM
892 if (err)
893 goto out;
894 }
895 sk->sk_max_ack_backlog = backlog;
896 err = 0;
897
898out:
899 release_sock(sk);
900 return err;
901}
902
f21e68ca
ACM
903EXPORT_SYMBOL_GPL(inet_dccp_listen);
904
0c869620 905static void dccp_terminate_connection(struct sock *sk)
7c657876 906{
0c869620 907 u8 next_state = DCCP_CLOSED;
7c657876 908
0c869620
GR
909 switch (sk->sk_state) {
910 case DCCP_PASSIVE_CLOSE:
911 case DCCP_PASSIVE_CLOSEREQ:
912 dccp_finish_passive_close(sk);
913 break;
914 case DCCP_PARTOPEN:
915 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
916 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
917 /* fall through */
918 case DCCP_OPEN:
919 dccp_send_close(sk, 1);
7c657876 920
0c869620
GR
921 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER)
922 next_state = DCCP_ACTIVE_CLOSEREQ;
923 else
924 next_state = DCCP_CLOSING;
925 /* fall through */
926 default:
927 dccp_set_state(sk, next_state);
928 }
7c657876
ACM
929}
930
931void dccp_close(struct sock *sk, long timeout)
932{
97e5848d 933 struct dccp_sock *dp = dccp_sk(sk);
7c657876 934 struct sk_buff *skb;
d83bd95b 935 u32 data_was_unread = 0;
134af346 936 int state;
7c657876
ACM
937
938 lock_sock(sk);
939
940 sk->sk_shutdown = SHUTDOWN_MASK;
941
942 if (sk->sk_state == DCCP_LISTEN) {
943 dccp_set_state(sk, DCCP_CLOSED);
944
945 /* Special case. */
946 inet_csk_listen_stop(sk);
947
948 goto adjudge_to_death;
949 }
950
97e5848d
IM
951 sk_stop_timer(sk, &dp->dccps_xmit_timer);
952
7c657876
ACM
953 /*
954 * We need to flush the recv. buffs. We do this only on the
955 * descriptor close, not protocol-sourced closes, because the
956 *reader process may not have drained the data yet!
957 */
7c657876 958 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
d83bd95b 959 data_was_unread += skb->len;
7c657876
ACM
960 __kfree_skb(skb);
961 }
962
d83bd95b
GR
963 if (data_was_unread) {
964 /* Unread data was tossed, send an appropriate Reset Code */
965 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
966 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
967 dccp_set_state(sk, DCCP_CLOSED);
968 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
7c657876
ACM
969 /* Check zero linger _after_ checking for unread data. */
970 sk->sk_prot->disconnect(sk, 0);
0c869620
GR
971 } else if (sk->sk_state != DCCP_CLOSED) {
972 dccp_terminate_connection(sk);
7c657876
ACM
973 }
974
975 sk_stream_wait_close(sk, timeout);
976
977adjudge_to_death:
134af346
HX
978 state = sk->sk_state;
979 sock_hold(sk);
980 sock_orphan(sk);
981 atomic_inc(sk->sk_prot->orphan_count);
982
7ad07e7c
ACM
983 /*
984 * It is the last release_sock in its life. It will remove backlog.
985 */
7c657876
ACM
986 release_sock(sk);
987 /*
988 * Now socket is owned by kernel and we acquire BH lock
989 * to finish close. No need to check for user refs.
990 */
991 local_bh_disable();
992 bh_lock_sock(sk);
993 BUG_TRAP(!sock_owned_by_user(sk));
994
134af346
HX
995 /* Have we already been destroyed by a softirq or backlog? */
996 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
997 goto out;
7ad07e7c 998
7c657876
ACM
999 if (sk->sk_state == DCCP_CLOSED)
1000 inet_csk_destroy_sock(sk);
1001
1002 /* Otherwise, socket is reprieved until protocol close. */
1003
134af346 1004out:
7c657876
ACM
1005 bh_unlock_sock(sk);
1006 local_bh_enable();
1007 sock_put(sk);
1008}
1009
f21e68ca
ACM
1010EXPORT_SYMBOL_GPL(dccp_close);
1011
7c657876
ACM
1012void dccp_shutdown(struct sock *sk, int how)
1013{
8e8c71f1 1014 dccp_pr_debug("called shutdown(%x)\n", how);
7c657876
ACM
1015}
1016
f21e68ca
ACM
1017EXPORT_SYMBOL_GPL(dccp_shutdown);
1018
46f09ffa 1019static int __init dccp_mib_init(void)
7c657876
ACM
1020{
1021 int rc = -ENOMEM;
1022
1023 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1024 if (dccp_statistics[0] == NULL)
1025 goto out;
1026
1027 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1028 if (dccp_statistics[1] == NULL)
1029 goto out_free_one;
1030
1031 rc = 0;
1032out:
1033 return rc;
1034out_free_one:
1035 free_percpu(dccp_statistics[0]);
1036 dccp_statistics[0] = NULL;
1037 goto out;
1038
1039}
1040
b61fafc4 1041static void dccp_mib_exit(void)
46f09ffa
ACM
1042{
1043 free_percpu(dccp_statistics[0]);
1044 free_percpu(dccp_statistics[1]);
1045 dccp_statistics[0] = dccp_statistics[1] = NULL;
1046}
1047
7c657876
ACM
1048static int thash_entries;
1049module_param(thash_entries, int, 0444);
1050MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1051
a1d3a355 1052#ifdef CONFIG_IP_DCCP_DEBUG
7c657876 1053int dccp_debug;
042d18f9 1054module_param(dccp_debug, bool, 0444);
7c657876 1055MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
1056
1057EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 1058#endif
7c657876
ACM
1059
1060static int __init dccp_init(void)
1061{
1062 unsigned long goal;
1063 int ehash_order, bhash_order, i;
b61fafc4 1064 int rc = -ENOBUFS;
7c657876 1065
7690af3f
ACM
1066 dccp_hashinfo.bind_bucket_cachep =
1067 kmem_cache_create("dccp_bind_bucket",
1068 sizeof(struct inet_bind_bucket), 0,
20c2df83 1069 SLAB_HWCACHE_ALIGN, NULL);
7c657876 1070 if (!dccp_hashinfo.bind_bucket_cachep)
b61fafc4 1071 goto out;
7c657876
ACM
1072
1073 /*
1074 * Size and allocate the main established and bind bucket
1075 * hash tables.
1076 *
1077 * The methodology is similar to that of the buffer cache.
1078 */
1079 if (num_physpages >= (128 * 1024))
1080 goal = num_physpages >> (21 - PAGE_SHIFT);
1081 else
1082 goal = num_physpages >> (23 - PAGE_SHIFT);
1083
1084 if (thash_entries)
7690af3f
ACM
1085 goal = (thash_entries *
1086 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
1087 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1088 ;
1089 do {
1090 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1091 sizeof(struct inet_ehash_bucket);
7690af3f
ACM
1092 while (dccp_hashinfo.ehash_size &
1093 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
1094 dccp_hashinfo.ehash_size--;
1095 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1096 __get_free_pages(GFP_ATOMIC, ehash_order);
1097 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1098
1099 if (!dccp_hashinfo.ehash) {
59348b19 1100 DCCP_CRIT("Failed to allocate DCCP established hash table");
7c657876
ACM
1101 goto out_free_bind_bucket_cachep;
1102 }
1103
dbca9b27 1104 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
7c657876 1105 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
dbca9b27 1106 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
7c657876
ACM
1107 }
1108
230140cf
ED
1109 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1110 goto out_free_dccp_ehash;
1111
7c657876
ACM
1112 bhash_order = ehash_order;
1113
1114 do {
1115 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1116 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
1117 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1118 bhash_order > 0)
7c657876
ACM
1119 continue;
1120 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1121 __get_free_pages(GFP_ATOMIC, bhash_order);
1122 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1123
1124 if (!dccp_hashinfo.bhash) {
59348b19 1125 DCCP_CRIT("Failed to allocate DCCP bind hash table");
230140cf 1126 goto out_free_dccp_locks;
7c657876
ACM
1127 }
1128
1129 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1130 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1131 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1132 }
1133
46f09ffa 1134 rc = dccp_mib_init();
fa23e2ec 1135 if (rc)
7c657876
ACM
1136 goto out_free_dccp_bhash;
1137
9b07ef5d 1138 rc = dccp_ackvec_init();
7c657876 1139 if (rc)
b61fafc4 1140 goto out_free_dccp_mib;
9b07ef5d 1141
e55d912f 1142 rc = dccp_sysctl_init();
9b07ef5d
ACM
1143 if (rc)
1144 goto out_ackvec_exit;
4c70f383
GR
1145
1146 dccp_timestamping_init();
7c657876
ACM
1147out:
1148 return rc;
9b07ef5d
ACM
1149out_ackvec_exit:
1150 dccp_ackvec_exit();
b61fafc4 1151out_free_dccp_mib:
46f09ffa 1152 dccp_mib_exit();
7c657876
ACM
1153out_free_dccp_bhash:
1154 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1155 dccp_hashinfo.bhash = NULL;
230140cf
ED
1156out_free_dccp_locks:
1157 inet_ehash_locks_free(&dccp_hashinfo);
7c657876
ACM
1158out_free_dccp_ehash:
1159 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1160 dccp_hashinfo.ehash = NULL;
1161out_free_bind_bucket_cachep:
1162 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1163 dccp_hashinfo.bind_bucket_cachep = NULL;
7c657876
ACM
1164 goto out;
1165}
1166
7c657876
ACM
1167static void __exit dccp_fini(void)
1168{
46f09ffa 1169 dccp_mib_exit();
725ba8ee
ACM
1170 free_pages((unsigned long)dccp_hashinfo.bhash,
1171 get_order(dccp_hashinfo.bhash_size *
1172 sizeof(struct inet_bind_hashbucket)));
1173 free_pages((unsigned long)dccp_hashinfo.ehash,
1174 get_order(dccp_hashinfo.ehash_size *
1175 sizeof(struct inet_ehash_bucket)));
230140cf 1176 inet_ehash_locks_free(&dccp_hashinfo);
7c657876 1177 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
9b07ef5d 1178 dccp_ackvec_exit();
e55d912f 1179 dccp_sysctl_exit();
7c657876
ACM
1180}
1181
1182module_init(dccp_init);
1183module_exit(dccp_fini);
1184
7c657876
ACM
1185MODULE_LICENSE("GPL");
1186MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1187MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");