dccp: Remove manual influence on NDP Count feature
[linux-2.6-block.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
7c657876
ACM
12#include <linux/dccp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/skbuff.h>
18#include <linux/netdevice.h>
19#include <linux/in.h>
20#include <linux/if_arp.h>
21#include <linux/init.h>
22#include <linux/random.h>
23#include <net/checksum.h>
24
14c85021 25#include <net/inet_sock.h>
7c657876
ACM
26#include <net/sock.h>
27#include <net/xfrm.h>
28
6273172e 29#include <asm/ioctls.h>
7c657876
ACM
30#include <linux/spinlock.h>
31#include <linux/timer.h>
32#include <linux/delay.h>
33#include <linux/poll.h>
7c657876
ACM
34
35#include "ccid.h"
36#include "dccp.h"
afe00251 37#include "feat.h"
7c657876 38
ba89966c 39DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 40
f21e68ca
ACM
41EXPORT_SYMBOL_GPL(dccp_statistics);
42
dd24c001 43struct percpu_counter dccp_orphan_count;
f21e68ca
ACM
44EXPORT_SYMBOL_GPL(dccp_orphan_count);
45
5caea4ea 46struct inet_hashinfo dccp_hashinfo;
075ae866
ACM
47EXPORT_SYMBOL_GPL(dccp_hashinfo);
48
b1308dc0
IM
49/* the maximum queue length for tx in packets. 0 is no limit */
50int sysctl_dccp_tx_qlen __read_mostly = 5;
51
c25a18ba
ACM
52void dccp_set_state(struct sock *sk, const int state)
53{
54 const int oldstate = sk->sk_state;
55
f11135a3 56 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
c25a18ba
ACM
57 dccp_state_name(oldstate), dccp_state_name(state));
58 WARN_ON(state == oldstate);
59
60 switch (state) {
61 case DCCP_OPEN:
62 if (oldstate != DCCP_OPEN)
63 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
6eb55d17
GR
64 /* Client retransmits all Confirm options until entering OPEN */
65 if (oldstate == DCCP_PARTOPEN)
66 dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
c25a18ba
ACM
67 break;
68
69 case DCCP_CLOSED:
0c869620
GR
70 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
71 oldstate == DCCP_CLOSING)
c25a18ba
ACM
72 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74 sk->sk_prot->unhash(sk);
75 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
ab1e0a13 77 inet_put_port(sk);
c25a18ba
ACM
78 /* fall through */
79 default:
80 if (oldstate == DCCP_OPEN)
81 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82 }
83
84 /* Change state AFTER socket is unhashed to avoid closed
85 * socket sitting in hash tables.
86 */
87 sk->sk_state = state;
88}
89
90EXPORT_SYMBOL_GPL(dccp_set_state);
91
0c869620
GR
92static void dccp_finish_passive_close(struct sock *sk)
93{
94 switch (sk->sk_state) {
95 case DCCP_PASSIVE_CLOSE:
96 /* Node (client or server) has received Close packet. */
97 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
98 dccp_set_state(sk, DCCP_CLOSED);
99 break;
100 case DCCP_PASSIVE_CLOSEREQ:
101 /*
102 * Client received CloseReq. We set the `active' flag so that
103 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
104 */
105 dccp_send_close(sk, 1);
106 dccp_set_state(sk, DCCP_CLOSING);
107 }
108}
109
c25a18ba
ACM
110void dccp_done(struct sock *sk)
111{
112 dccp_set_state(sk, DCCP_CLOSED);
113 dccp_clear_xmit_timers(sk);
114
115 sk->sk_shutdown = SHUTDOWN_MASK;
116
117 if (!sock_flag(sk, SOCK_DEAD))
118 sk->sk_state_change(sk);
119 else
120 inet_csk_destroy_sock(sk);
121}
122
123EXPORT_SYMBOL_GPL(dccp_done);
124
7c657876
ACM
125const char *dccp_packet_name(const int type)
126{
127 static const char *dccp_packet_names[] = {
128 [DCCP_PKT_REQUEST] = "REQUEST",
129 [DCCP_PKT_RESPONSE] = "RESPONSE",
130 [DCCP_PKT_DATA] = "DATA",
131 [DCCP_PKT_ACK] = "ACK",
132 [DCCP_PKT_DATAACK] = "DATAACK",
133 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
134 [DCCP_PKT_CLOSE] = "CLOSE",
135 [DCCP_PKT_RESET] = "RESET",
136 [DCCP_PKT_SYNC] = "SYNC",
137 [DCCP_PKT_SYNCACK] = "SYNCACK",
138 };
139
140 if (type >= DCCP_NR_PKT_TYPES)
141 return "INVALID";
142 else
143 return dccp_packet_names[type];
144}
145
146EXPORT_SYMBOL_GPL(dccp_packet_name);
147
148const char *dccp_state_name(const int state)
149{
150 static char *dccp_state_names[] = {
f11135a3
GR
151 [DCCP_OPEN] = "OPEN",
152 [DCCP_REQUESTING] = "REQUESTING",
153 [DCCP_PARTOPEN] = "PARTOPEN",
154 [DCCP_LISTEN] = "LISTEN",
155 [DCCP_RESPOND] = "RESPOND",
156 [DCCP_CLOSING] = "CLOSING",
157 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
158 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
159 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
160 [DCCP_TIME_WAIT] = "TIME_WAIT",
161 [DCCP_CLOSED] = "CLOSED",
7c657876
ACM
162 };
163
164 if (state >= DCCP_MAX_STATES)
165 return "INVALID STATE!";
166 else
167 return dccp_state_names[state];
168}
169
170EXPORT_SYMBOL_GPL(dccp_state_name);
171
72478873 172int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
3e0fadc5
ACM
173{
174 struct dccp_sock *dp = dccp_sk(sk);
175 struct inet_connection_sock *icsk = inet_csk(sk);
3e0fadc5 176
410e27a4
GR
177 dccp_minisock_init(&dp->dccps_minisock);
178
e18d7a98
ACM
179 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
181 sk->sk_state = DCCP_CLOSED;
182 sk->sk_write_space = dccp_write_space;
183 icsk->icsk_sync_mss = dccp_sync_mss;
410e27a4 184 dp->dccps_mss_cache = 536;
e18d7a98
ACM
185 dp->dccps_rate_last = jiffies;
186 dp->dccps_role = DCCP_ROLE_UNDEFINED;
187 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
410e27a4 188 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
e18d7a98
ACM
189
190 dccp_init_xmit_timers(sk);
191
ac75773c 192 INIT_LIST_HEAD(&dp->dccps_featneg);
6eb55d17
GR
193 /* control socket doesn't need feat nego */
194 if (likely(ctl_sock_initialized))
195 return dccp_feat_init(sk);
3e0fadc5
ACM
196 return 0;
197}
198
199EXPORT_SYMBOL_GPL(dccp_init_sock);
200
7d06b2e0 201void dccp_destroy_sock(struct sock *sk)
3e0fadc5
ACM
202{
203 struct dccp_sock *dp = dccp_sk(sk);
410e27a4 204 struct dccp_minisock *dmsk = dccp_msk(sk);
3e0fadc5
ACM
205
206 /*
207 * DCCP doesn't use sk_write_queue, just sk_send_head
208 * for retransmissions
209 */
210 if (sk->sk_send_head != NULL) {
211 kfree_skb(sk->sk_send_head);
212 sk->sk_send_head = NULL;
213 }
214
215 /* Clean up a referenced DCCP bind bucket. */
216 if (inet_csk(sk)->icsk_bind_hash != NULL)
ab1e0a13 217 inet_put_port(sk);
3e0fadc5
ACM
218
219 kfree(dp->dccps_service_list);
220 dp->dccps_service_list = NULL;
221
410e27a4 222 if (dmsk->dccpms_send_ack_vector) {
3e0fadc5
ACM
223 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224 dp->dccps_hc_rx_ackvec = NULL;
225 }
226 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
227 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
228 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
229
230 /* clean up feature negotiation state */
d99a7bd2 231 dccp_feat_list_purge(&dp->dccps_featneg);
3e0fadc5
ACM
232}
233
234EXPORT_SYMBOL_GPL(dccp_destroy_sock);
235
72a3effa 236static inline int dccp_listen_start(struct sock *sk, int backlog)
7c657876 237{
67e6b629
ACM
238 struct dccp_sock *dp = dccp_sk(sk);
239
240 dp->dccps_role = DCCP_ROLE_LISTEN;
9eca0a47
GR
241 /* do not start to listen if feature negotiation setup fails */
242 if (dccp_feat_finalise_settings(dp))
243 return -EPROTO;
72a3effa 244 return inet_csk_listen_start(sk, backlog);
7c657876
ACM
245}
246
ce865a61
GR
247static inline int dccp_need_reset(int state)
248{
249 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
250 state != DCCP_REQUESTING;
251}
252
7c657876
ACM
253int dccp_disconnect(struct sock *sk, int flags)
254{
255 struct inet_connection_sock *icsk = inet_csk(sk);
256 struct inet_sock *inet = inet_sk(sk);
257 int err = 0;
258 const int old_state = sk->sk_state;
259
260 if (old_state != DCCP_CLOSED)
261 dccp_set_state(sk, DCCP_CLOSED);
262
ce865a61
GR
263 /*
264 * This corresponds to the ABORT function of RFC793, sec. 3.8
265 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
266 */
7c657876
ACM
267 if (old_state == DCCP_LISTEN) {
268 inet_csk_listen_stop(sk);
ce865a61
GR
269 } else if (dccp_need_reset(old_state)) {
270 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
271 sk->sk_err = ECONNRESET;
7c657876
ACM
272 } else if (old_state == DCCP_REQUESTING)
273 sk->sk_err = ECONNRESET;
274
275 dccp_clear_xmit_timers(sk);
48816322 276
7c657876 277 __skb_queue_purge(&sk->sk_receive_queue);
48816322 278 __skb_queue_purge(&sk->sk_write_queue);
7c657876
ACM
279 if (sk->sk_send_head != NULL) {
280 __kfree_skb(sk->sk_send_head);
281 sk->sk_send_head = NULL;
282 }
283
284 inet->dport = 0;
285
286 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
287 inet_reset_saddr(sk);
288
289 sk->sk_shutdown = 0;
290 sock_reset_flag(sk, SOCK_DONE);
291
292 icsk->icsk_backoff = 0;
293 inet_csk_delack_init(sk);
294 __sk_dst_reset(sk);
295
547b792c 296 WARN_ON(inet->num && !icsk->icsk_bind_hash);
7c657876
ACM
297
298 sk->sk_error_report(sk);
299 return err;
300}
301
f21e68ca
ACM
302EXPORT_SYMBOL_GPL(dccp_disconnect);
303
331968bd
ACM
304/*
305 * Wait for a DCCP event.
306 *
307 * Note that we don't need to lock the socket, as the upper poll layers
308 * take care of normal races (between the test and the event) and we don't
309 * go look at any of the socket buffers directly.
310 */
f21e68ca
ACM
311unsigned int dccp_poll(struct file *file, struct socket *sock,
312 poll_table *wait)
331968bd
ACM
313{
314 unsigned int mask;
315 struct sock *sk = sock->sk;
316
317 poll_wait(file, sk->sk_sleep, wait);
318 if (sk->sk_state == DCCP_LISTEN)
319 return inet_csk_listen_poll(sk);
320
321 /* Socket is not locked. We are protected from async events
322 by poll logic and correct handling of state changes
323 made by another threads is impossible in any case.
324 */
325
326 mask = 0;
327 if (sk->sk_err)
328 mask = POLLERR;
329
330 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
331 mask |= POLLHUP;
332 if (sk->sk_shutdown & RCV_SHUTDOWN)
f348d70a 333 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
331968bd
ACM
334
335 /* Connected? */
336 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
337 if (atomic_read(&sk->sk_rmem_alloc) > 0)
338 mask |= POLLIN | POLLRDNORM;
339
340 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
341 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
342 mask |= POLLOUT | POLLWRNORM;
343 } else { /* send SIGIO later */
344 set_bit(SOCK_ASYNC_NOSPACE,
345 &sk->sk_socket->flags);
346 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
347
348 /* Race breaker. If space is freed after
349 * wspace test but before the flags are set,
350 * IO signal will be lost.
351 */
352 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
353 mask |= POLLOUT | POLLWRNORM;
354 }
355 }
356 }
357 return mask;
358}
359
f21e68ca
ACM
360EXPORT_SYMBOL_GPL(dccp_poll);
361
7c657876
ACM
362int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
363{
6273172e
ACM
364 int rc = -ENOTCONN;
365
366 lock_sock(sk);
367
368 if (sk->sk_state == DCCP_LISTEN)
369 goto out;
370
371 switch (cmd) {
372 case SIOCINQ: {
373 struct sk_buff *skb;
374 unsigned long amount = 0;
375
376 skb = skb_peek(&sk->sk_receive_queue);
377 if (skb != NULL) {
378 /*
379 * We will only return the amount of this packet since
380 * that is all that will be read.
381 */
382 amount = skb->len;
383 }
384 rc = put_user(amount, (int __user *)arg);
385 }
386 break;
387 default:
388 rc = -ENOIOCTLCMD;
389 break;
390 }
391out:
392 release_sock(sk);
393 return rc;
7c657876
ACM
394}
395
f21e68ca
ACM
396EXPORT_SYMBOL_GPL(dccp_ioctl);
397
60fe62e7 398static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
399 char __user *optval, int optlen)
400{
401 struct dccp_sock *dp = dccp_sk(sk);
402 struct dccp_service_list *sl = NULL;
403
8109b02b 404 if (service == DCCP_SERVICE_INVALID_VALUE ||
67e6b629
ACM
405 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
406 return -EINVAL;
407
408 if (optlen > sizeof(service)) {
409 sl = kmalloc(optlen, GFP_KERNEL);
410 if (sl == NULL)
411 return -ENOMEM;
412
413 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
414 if (copy_from_user(sl->dccpsl_list,
415 optval + sizeof(service),
416 optlen - sizeof(service)) ||
417 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
418 kfree(sl);
419 return -EFAULT;
420 }
421 }
422
423 lock_sock(sk);
424 dp->dccps_service = service;
425
a51482bd 426 kfree(dp->dccps_service_list);
67e6b629
ACM
427
428 dp->dccps_service_list = sl;
429 release_sock(sk);
430 return 0;
431}
432
29450559
GR
433static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
434{
435 u8 *list, len;
436 int i, rc;
437
438 if (cscov < 0 || cscov > 15)
439 return -EINVAL;
440 /*
441 * Populate a list of permissible values, in the range cscov...15. This
442 * is necessary since feature negotiation of single values only works if
443 * both sides incidentally choose the same value. Since the list starts
444 * lowest-value first, negotiation will pick the smallest shared value.
445 */
446 if (cscov == 0)
447 return 0;
448 len = 16 - cscov;
449
450 list = kmalloc(len, GFP_KERNEL);
451 if (list == NULL)
452 return -ENOBUFS;
453
454 for (i = 0; i < len; i++)
455 list[i] = cscov++;
456
457 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
458
459 if (rc == 0) {
460 if (rx)
461 dccp_sk(sk)->dccps_pcrlen = cscov;
462 else
463 dccp_sk(sk)->dccps_pcslen = cscov;
464 }
465 kfree(list);
466 return rc;
467}
468
b20a9c24
GR
469static int dccp_setsockopt_ccid(struct sock *sk, int type,
470 char __user *optval, int optlen)
471{
472 u8 *val;
473 int rc = 0;
474
475 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
476 return -EINVAL;
477
478 val = kmalloc(optlen, GFP_KERNEL);
479 if (val == NULL)
480 return -ENOMEM;
481
482 if (copy_from_user(val, optval, optlen)) {
483 kfree(val);
484 return -EFAULT;
485 }
486
487 lock_sock(sk);
488 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
489 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
490
491 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
492 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
493 release_sock(sk);
494
495 kfree(val);
496 return rc;
497}
498
3fdadf7d
DM
499static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
500 char __user *optval, int optlen)
7c657876 501{
09dbc389
GR
502 struct dccp_sock *dp = dccp_sk(sk);
503 int val, err = 0;
7c657876 504
19102996
GR
505 switch (optname) {
506 case DCCP_SOCKOPT_PACKET_SIZE:
507 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
508 return 0;
509 case DCCP_SOCKOPT_CHANGE_L:
510 case DCCP_SOCKOPT_CHANGE_R:
511 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
512 return 0;
b20a9c24
GR
513 case DCCP_SOCKOPT_CCID:
514 case DCCP_SOCKOPT_RX_CCID:
515 case DCCP_SOCKOPT_TX_CCID:
516 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
19102996
GR
517 }
518
519 if (optlen < (int)sizeof(int))
a84ffe43
ACM
520 return -EINVAL;
521
522 if (get_user(val, (int __user *)optval))
523 return -EFAULT;
524
67e6b629
ACM
525 if (optname == DCCP_SOCKOPT_SERVICE)
526 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 527
67e6b629 528 lock_sock(sk);
a84ffe43 529 switch (optname) {
b8599d20
GR
530 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
531 if (dp->dccps_role != DCCP_ROLE_SERVER)
532 err = -EOPNOTSUPP;
533 else
534 dp->dccps_server_timewait = (val != 0);
535 break;
29450559
GR
536 case DCCP_SOCKOPT_SEND_CSCOV:
537 err = dccp_setsockopt_cscov(sk, val, false);
d6da3511 538 break;
29450559
GR
539 case DCCP_SOCKOPT_RECV_CSCOV:
540 err = dccp_setsockopt_cscov(sk, val, true);
d6da3511 541 break;
a84ffe43
ACM
542 default:
543 err = -ENOPROTOOPT;
544 break;
545 }
410e27a4 546 release_sock(sk);
19102996 547
a84ffe43 548 return err;
7c657876
ACM
549}
550
3fdadf7d
DM
551int dccp_setsockopt(struct sock *sk, int level, int optname,
552 char __user *optval, int optlen)
553{
554 if (level != SOL_DCCP)
555 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
556 optname, optval,
557 optlen);
558 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
559}
543d9cfe 560
f21e68ca
ACM
561EXPORT_SYMBOL_GPL(dccp_setsockopt);
562
3fdadf7d
DM
563#ifdef CONFIG_COMPAT
564int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
543d9cfe 565 char __user *optval, int optlen)
3fdadf7d 566{
dec73ff0
ACM
567 if (level != SOL_DCCP)
568 return inet_csk_compat_setsockopt(sk, level, optname,
569 optval, optlen);
3fdadf7d
DM
570 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
571}
543d9cfe 572
3fdadf7d
DM
573EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
574#endif
575
67e6b629 576static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 577 __be32 __user *optval,
67e6b629
ACM
578 int __user *optlen)
579{
580 const struct dccp_sock *dp = dccp_sk(sk);
581 const struct dccp_service_list *sl;
582 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
583
584 lock_sock(sk);
67e6b629
ACM
585 if ((sl = dp->dccps_service_list) != NULL) {
586 slen = sl->dccpsl_nr * sizeof(u32);
587 total_len += slen;
588 }
589
590 err = -EINVAL;
591 if (total_len > len)
592 goto out;
593
594 err = 0;
595 if (put_user(total_len, optlen) ||
596 put_user(dp->dccps_service, optval) ||
597 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
598 err = -EFAULT;
599out:
600 release_sock(sk);
601 return err;
602}
603
3fdadf7d 604static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 605 char __user *optval, int __user *optlen)
7c657876 606{
a84ffe43
ACM
607 struct dccp_sock *dp;
608 int val, len;
7c657876 609
a84ffe43
ACM
610 if (get_user(len, optlen))
611 return -EFAULT;
612
39ebc027 613 if (len < (int)sizeof(int))
a84ffe43
ACM
614 return -EINVAL;
615
616 dp = dccp_sk(sk);
617
618 switch (optname) {
619 case DCCP_SOCKOPT_PACKET_SIZE:
5aed3243 620 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
841bac1d 621 return 0;
88f964db
ACM
622 case DCCP_SOCKOPT_SERVICE:
623 return dccp_getsockopt_service(sk, len,
60fe62e7 624 (__be32 __user *)optval, optlen);
7c559a9e
GR
625 case DCCP_SOCKOPT_GET_CUR_MPS:
626 val = dp->dccps_mss_cache;
7c559a9e 627 break;
d90ebcbf
GR
628 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
629 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
71c262a3
GR
630 case DCCP_SOCKOPT_TX_CCID:
631 val = ccid_get_current_tx_ccid(dp);
632 if (val < 0)
633 return -ENOPROTOOPT;
634 break;
635 case DCCP_SOCKOPT_RX_CCID:
636 val = ccid_get_current_rx_ccid(dp);
637 if (val < 0)
638 return -ENOPROTOOPT;
639 break;
b8599d20
GR
640 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
641 val = dp->dccps_server_timewait;
b8599d20 642 break;
6f4e5fff
GR
643 case DCCP_SOCKOPT_SEND_CSCOV:
644 val = dp->dccps_pcslen;
645 break;
646 case DCCP_SOCKOPT_RECV_CSCOV:
647 val = dp->dccps_pcrlen;
648 break;
88f964db
ACM
649 case 128 ... 191:
650 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
651 len, (u32 __user *)optval, optlen);
652 case 192 ... 255:
653 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
654 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
655 default:
656 return -ENOPROTOOPT;
657 }
658
79133506 659 len = sizeof(val);
a84ffe43
ACM
660 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
661 return -EFAULT;
662
663 return 0;
7c657876
ACM
664}
665
3fdadf7d
DM
666int dccp_getsockopt(struct sock *sk, int level, int optname,
667 char __user *optval, int __user *optlen)
668{
669 if (level != SOL_DCCP)
670 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
671 optname, optval,
672 optlen);
673 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
674}
543d9cfe 675
f21e68ca
ACM
676EXPORT_SYMBOL_GPL(dccp_getsockopt);
677
3fdadf7d
DM
678#ifdef CONFIG_COMPAT
679int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
543d9cfe 680 char __user *optval, int __user *optlen)
3fdadf7d 681{
dec73ff0
ACM
682 if (level != SOL_DCCP)
683 return inet_csk_compat_getsockopt(sk, level, optname,
684 optval, optlen);
3fdadf7d
DM
685 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
686}
543d9cfe 687
3fdadf7d
DM
688EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
689#endif
690
7c657876
ACM
691int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
692 size_t len)
693{
694 const struct dccp_sock *dp = dccp_sk(sk);
695 const int flags = msg->msg_flags;
696 const int noblock = flags & MSG_DONTWAIT;
697 struct sk_buff *skb;
698 int rc, size;
699 long timeo;
700
701 if (len > dp->dccps_mss_cache)
702 return -EMSGSIZE;
703
704 lock_sock(sk);
b1308dc0 705
410e27a4
GR
706 if (sysctl_dccp_tx_qlen &&
707 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
b1308dc0
IM
708 rc = -EAGAIN;
709 goto out_release;
710 }
711
27258ee5 712 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
713
714 /*
715 * We have to use sk_stream_wait_connect here to set sk_write_pending,
716 * so that the trick in dccp_rcv_request_sent_state_process.
717 */
718 /* Wait for a connection to finish. */
cecd8d0e 719 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
7c657876 720 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 721 goto out_release;
7c657876
ACM
722
723 size = sk->sk_prot->max_header + len;
724 release_sock(sk);
725 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
726 lock_sock(sk);
7c657876
ACM
727 if (skb == NULL)
728 goto out_release;
729
730 skb_reserve(skb, sk->sk_prot->max_header);
731 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
732 if (rc != 0)
733 goto out_discard;
734
410e27a4
GR
735 skb_queue_tail(&sk->sk_write_queue, skb);
736 dccp_write_xmit(sk,0);
7c657876
ACM
737out_release:
738 release_sock(sk);
739 return rc ? : len;
27258ee5
ACM
740out_discard:
741 kfree_skb(skb);
7c657876 742 goto out_release;
7c657876
ACM
743}
744
f21e68ca
ACM
745EXPORT_SYMBOL_GPL(dccp_sendmsg);
746
7c657876
ACM
747int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
748 size_t len, int nonblock, int flags, int *addr_len)
749{
750 const struct dccp_hdr *dh;
7c657876
ACM
751 long timeo;
752
753 lock_sock(sk);
754
531669a0
ACM
755 if (sk->sk_state == DCCP_LISTEN) {
756 len = -ENOTCONN;
7c657876 757 goto out;
7c657876 758 }
7c657876 759
531669a0 760 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
761
762 do {
531669a0 763 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 764
531669a0
ACM
765 if (skb == NULL)
766 goto verify_sock_status;
7c657876 767
531669a0 768 dh = dccp_hdr(skb);
7c657876 769
0c869620
GR
770 switch (dh->dccph_type) {
771 case DCCP_PKT_DATA:
772 case DCCP_PKT_DATAACK:
531669a0 773 goto found_ok_skb;
7c657876 774
0c869620
GR
775 case DCCP_PKT_CLOSE:
776 case DCCP_PKT_CLOSEREQ:
777 if (!(flags & MSG_PEEK))
778 dccp_finish_passive_close(sk);
779 /* fall through */
780 case DCCP_PKT_RESET:
781 dccp_pr_debug("found fin (%s) ok!\n",
782 dccp_packet_name(dh->dccph_type));
531669a0
ACM
783 len = 0;
784 goto found_fin_ok;
0c869620
GR
785 default:
786 dccp_pr_debug("packet_type=%s\n",
787 dccp_packet_name(dh->dccph_type));
788 sk_eat_skb(sk, skb, 0);
531669a0 789 }
531669a0
ACM
790verify_sock_status:
791 if (sock_flag(sk, SOCK_DONE)) {
792 len = 0;
7c657876 793 break;
531669a0 794 }
7c657876 795
531669a0
ACM
796 if (sk->sk_err) {
797 len = sock_error(sk);
798 break;
799 }
7c657876 800
531669a0
ACM
801 if (sk->sk_shutdown & RCV_SHUTDOWN) {
802 len = 0;
803 break;
804 }
7c657876 805
531669a0
ACM
806 if (sk->sk_state == DCCP_CLOSED) {
807 if (!sock_flag(sk, SOCK_DONE)) {
808 /* This occurs when user tries to read
809 * from never connected socket.
810 */
811 len = -ENOTCONN;
7c657876
ACM
812 break;
813 }
531669a0
ACM
814 len = 0;
815 break;
7c657876
ACM
816 }
817
531669a0
ACM
818 if (!timeo) {
819 len = -EAGAIN;
820 break;
821 }
7c657876 822
531669a0
ACM
823 if (signal_pending(current)) {
824 len = sock_intr_errno(timeo);
825 break;
826 }
7c657876 827
531669a0 828 sk_wait_data(sk, &timeo);
7c657876 829 continue;
7c657876 830 found_ok_skb:
531669a0
ACM
831 if (len > skb->len)
832 len = skb->len;
833 else if (len < skb->len)
834 msg->msg_flags |= MSG_TRUNC;
835
836 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
837 /* Exception. Bailout! */
838 len = -EFAULT;
839 break;
7c657876 840 }
7c657876
ACM
841 found_fin_ok:
842 if (!(flags & MSG_PEEK))
624d1164 843 sk_eat_skb(sk, skb, 0);
7c657876 844 break;
531669a0 845 } while (1);
7c657876
ACM
846out:
847 release_sock(sk);
531669a0 848 return len;
7c657876
ACM
849}
850
f21e68ca
ACM
851EXPORT_SYMBOL_GPL(dccp_recvmsg);
852
853int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
854{
855 struct sock *sk = sock->sk;
856 unsigned char old_state;
857 int err;
858
859 lock_sock(sk);
860
861 err = -EINVAL;
862 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
863 goto out;
864
865 old_state = sk->sk_state;
866 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
867 goto out;
868
869 /* Really, if the socket is already in listen state
870 * we can only allow the backlog to be adjusted.
871 */
872 if (old_state != DCCP_LISTEN) {
873 /*
874 * FIXME: here it probably should be sk->sk_prot->listen_start
875 * see tcp_listen_start
876 */
72a3effa 877 err = dccp_listen_start(sk, backlog);
7c657876
ACM
878 if (err)
879 goto out;
880 }
881 sk->sk_max_ack_backlog = backlog;
882 err = 0;
883
884out:
885 release_sock(sk);
886 return err;
887}
888
f21e68ca
ACM
889EXPORT_SYMBOL_GPL(inet_dccp_listen);
890
0c869620 891static void dccp_terminate_connection(struct sock *sk)
7c657876 892{
0c869620 893 u8 next_state = DCCP_CLOSED;
7c657876 894
0c869620
GR
895 switch (sk->sk_state) {
896 case DCCP_PASSIVE_CLOSE:
897 case DCCP_PASSIVE_CLOSEREQ:
898 dccp_finish_passive_close(sk);
899 break;
900 case DCCP_PARTOPEN:
901 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
902 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
903 /* fall through */
904 case DCCP_OPEN:
905 dccp_send_close(sk, 1);
7c657876 906
b8599d20
GR
907 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
908 !dccp_sk(sk)->dccps_server_timewait)
0c869620
GR
909 next_state = DCCP_ACTIVE_CLOSEREQ;
910 else
911 next_state = DCCP_CLOSING;
912 /* fall through */
913 default:
914 dccp_set_state(sk, next_state);
915 }
7c657876
ACM
916}
917
918void dccp_close(struct sock *sk, long timeout)
919{
97e5848d 920 struct dccp_sock *dp = dccp_sk(sk);
7c657876 921 struct sk_buff *skb;
d83bd95b 922 u32 data_was_unread = 0;
134af346 923 int state;
7c657876
ACM
924
925 lock_sock(sk);
926
927 sk->sk_shutdown = SHUTDOWN_MASK;
928
929 if (sk->sk_state == DCCP_LISTEN) {
930 dccp_set_state(sk, DCCP_CLOSED);
931
932 /* Special case. */
933 inet_csk_listen_stop(sk);
934
935 goto adjudge_to_death;
936 }
937
97e5848d
IM
938 sk_stop_timer(sk, &dp->dccps_xmit_timer);
939
7c657876
ACM
940 /*
941 * We need to flush the recv. buffs. We do this only on the
942 * descriptor close, not protocol-sourced closes, because the
943 *reader process may not have drained the data yet!
944 */
7c657876 945 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
d83bd95b 946 data_was_unread += skb->len;
7c657876
ACM
947 __kfree_skb(skb);
948 }
949
d83bd95b
GR
950 if (data_was_unread) {
951 /* Unread data was tossed, send an appropriate Reset Code */
952 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
953 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
954 dccp_set_state(sk, DCCP_CLOSED);
955 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
7c657876
ACM
956 /* Check zero linger _after_ checking for unread data. */
957 sk->sk_prot->disconnect(sk, 0);
0c869620
GR
958 } else if (sk->sk_state != DCCP_CLOSED) {
959 dccp_terminate_connection(sk);
7c657876
ACM
960 }
961
962 sk_stream_wait_close(sk, timeout);
963
964adjudge_to_death:
134af346
HX
965 state = sk->sk_state;
966 sock_hold(sk);
967 sock_orphan(sk);
dd24c001 968 percpu_counter_inc(sk->sk_prot->orphan_count);
134af346 969
7ad07e7c
ACM
970 /*
971 * It is the last release_sock in its life. It will remove backlog.
972 */
7c657876
ACM
973 release_sock(sk);
974 /*
975 * Now socket is owned by kernel and we acquire BH lock
976 * to finish close. No need to check for user refs.
977 */
978 local_bh_disable();
979 bh_lock_sock(sk);
547b792c 980 WARN_ON(sock_owned_by_user(sk));
7c657876 981
134af346
HX
982 /* Have we already been destroyed by a softirq or backlog? */
983 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
984 goto out;
7ad07e7c 985
7c657876
ACM
986 if (sk->sk_state == DCCP_CLOSED)
987 inet_csk_destroy_sock(sk);
988
989 /* Otherwise, socket is reprieved until protocol close. */
990
134af346 991out:
7c657876
ACM
992 bh_unlock_sock(sk);
993 local_bh_enable();
994 sock_put(sk);
995}
996
f21e68ca
ACM
997EXPORT_SYMBOL_GPL(dccp_close);
998
7c657876
ACM
999void dccp_shutdown(struct sock *sk, int how)
1000{
8e8c71f1 1001 dccp_pr_debug("called shutdown(%x)\n", how);
7c657876
ACM
1002}
1003
f21e68ca
ACM
1004EXPORT_SYMBOL_GPL(dccp_shutdown);
1005
24e8b7e4 1006static inline int dccp_mib_init(void)
7c657876 1007{
24e8b7e4 1008 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
7c657876
ACM
1009}
1010
24e8b7e4 1011static inline void dccp_mib_exit(void)
46f09ffa 1012{
24e8b7e4 1013 snmp_mib_free((void**)dccp_statistics);
46f09ffa
ACM
1014}
1015
7c657876
ACM
1016static int thash_entries;
1017module_param(thash_entries, int, 0444);
1018MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1019
a1d3a355 1020#ifdef CONFIG_IP_DCCP_DEBUG
7c657876 1021int dccp_debug;
43264991 1022module_param(dccp_debug, bool, 0644);
7c657876 1023MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
1024
1025EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 1026#endif
7c657876
ACM
1027
1028static int __init dccp_init(void)
1029{
1030 unsigned long goal;
1031 int ehash_order, bhash_order, i;
dd24c001 1032 int rc;
7c657876 1033
028b0275
PM
1034 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1035 FIELD_SIZEOF(struct sk_buff, cb));
dd24c001
ED
1036 rc = percpu_counter_init(&dccp_orphan_count, 0);
1037 if (rc)
1038 goto out;
1039 rc = -ENOBUFS;
5caea4ea 1040 inet_hashinfo_init(&dccp_hashinfo);
7690af3f
ACM
1041 dccp_hashinfo.bind_bucket_cachep =
1042 kmem_cache_create("dccp_bind_bucket",
1043 sizeof(struct inet_bind_bucket), 0,
20c2df83 1044 SLAB_HWCACHE_ALIGN, NULL);
7c657876 1045 if (!dccp_hashinfo.bind_bucket_cachep)
dd24c001 1046 goto out_free_percpu;
7c657876
ACM
1047
1048 /*
1049 * Size and allocate the main established and bind bucket
1050 * hash tables.
1051 *
1052 * The methodology is similar to that of the buffer cache.
1053 */
1054 if (num_physpages >= (128 * 1024))
1055 goal = num_physpages >> (21 - PAGE_SHIFT);
1056 else
1057 goal = num_physpages >> (23 - PAGE_SHIFT);
1058
1059 if (thash_entries)
7690af3f
ACM
1060 goal = (thash_entries *
1061 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
1062 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1063 ;
1064 do {
1065 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066 sizeof(struct inet_ehash_bucket);
7690af3f
ACM
1067 while (dccp_hashinfo.ehash_size &
1068 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
1069 dccp_hashinfo.ehash_size--;
1070 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071 __get_free_pages(GFP_ATOMIC, ehash_order);
1072 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1073
1074 if (!dccp_hashinfo.ehash) {
59348b19 1075 DCCP_CRIT("Failed to allocate DCCP established hash table");
7c657876
ACM
1076 goto out_free_bind_bucket_cachep;
1077 }
1078
dbca9b27 1079 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
3ab5aee7
ED
1080 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
1081 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i);
7c657876
ACM
1082 }
1083
230140cf
ED
1084 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085 goto out_free_dccp_ehash;
1086
7c657876
ACM
1087 bhash_order = ehash_order;
1088
1089 do {
1090 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
1092 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1093 bhash_order > 0)
7c657876
ACM
1094 continue;
1095 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096 __get_free_pages(GFP_ATOMIC, bhash_order);
1097 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1098
1099 if (!dccp_hashinfo.bhash) {
59348b19 1100 DCCP_CRIT("Failed to allocate DCCP bind hash table");
230140cf 1101 goto out_free_dccp_locks;
7c657876
ACM
1102 }
1103
1104 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1107 }
1108
46f09ffa 1109 rc = dccp_mib_init();
fa23e2ec 1110 if (rc)
7c657876
ACM
1111 goto out_free_dccp_bhash;
1112
9b07ef5d 1113 rc = dccp_ackvec_init();
7c657876 1114 if (rc)
b61fafc4 1115 goto out_free_dccp_mib;
9b07ef5d 1116
e55d912f 1117 rc = dccp_sysctl_init();
9b07ef5d
ACM
1118 if (rc)
1119 goto out_ackvec_exit;
4c70f383
GR
1120
1121 dccp_timestamping_init();
7c657876
ACM
1122out:
1123 return rc;
9b07ef5d
ACM
1124out_ackvec_exit:
1125 dccp_ackvec_exit();
b61fafc4 1126out_free_dccp_mib:
46f09ffa 1127 dccp_mib_exit();
7c657876
ACM
1128out_free_dccp_bhash:
1129 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1130 dccp_hashinfo.bhash = NULL;
230140cf
ED
1131out_free_dccp_locks:
1132 inet_ehash_locks_free(&dccp_hashinfo);
7c657876
ACM
1133out_free_dccp_ehash:
1134 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1135 dccp_hashinfo.ehash = NULL;
1136out_free_bind_bucket_cachep:
1137 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1138 dccp_hashinfo.bind_bucket_cachep = NULL;
dd24c001
ED
1139out_free_percpu:
1140 percpu_counter_destroy(&dccp_orphan_count);
7c657876
ACM
1141 goto out;
1142}
1143
7c657876
ACM
1144static void __exit dccp_fini(void)
1145{
46f09ffa 1146 dccp_mib_exit();
725ba8ee
ACM
1147 free_pages((unsigned long)dccp_hashinfo.bhash,
1148 get_order(dccp_hashinfo.bhash_size *
1149 sizeof(struct inet_bind_hashbucket)));
1150 free_pages((unsigned long)dccp_hashinfo.ehash,
1151 get_order(dccp_hashinfo.ehash_size *
1152 sizeof(struct inet_ehash_bucket)));
230140cf 1153 inet_ehash_locks_free(&dccp_hashinfo);
7c657876 1154 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
9b07ef5d 1155 dccp_ackvec_exit();
e55d912f 1156 dccp_sysctl_exit();
7c657876
ACM
1157}
1158
1159module_init(dccp_init);
1160module_exit(dccp_fini);
1161
7c657876
ACM
1162MODULE_LICENSE("GPL");
1163MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");