[DCCP]: Dont use dccp_v4_checksum in dccp_make_response
[linux-block.git] / net / dccp / proto.c
CommitLineData
7c657876
ACM
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/config.h>
13#include <linux/dccp.h>
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/skbuff.h>
19#include <linux/netdevice.h>
20#include <linux/in.h>
21#include <linux/if_arp.h>
22#include <linux/init.h>
23#include <linux/random.h>
24#include <net/checksum.h>
25
26#include <net/inet_common.h>
14c85021 27#include <net/inet_sock.h>
7c657876
ACM
28#include <net/protocol.h>
29#include <net/sock.h>
30#include <net/xfrm.h>
31
32#include <asm/semaphore.h>
33#include <linux/spinlock.h>
34#include <linux/timer.h>
35#include <linux/delay.h>
36#include <linux/poll.h>
7c657876
ACM
37
38#include "ccid.h"
39#include "dccp.h"
afe00251 40#include "feat.h"
7c657876 41
ba89966c 42DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
7c657876 43
f21e68ca
ACM
44EXPORT_SYMBOL_GPL(dccp_statistics);
45
7c657876
ACM
46atomic_t dccp_orphan_count = ATOMIC_INIT(0);
47
f21e68ca
ACM
48EXPORT_SYMBOL_GPL(dccp_orphan_count);
49
7c657876
ACM
50static struct net_protocol dccp_protocol = {
51 .handler = dccp_v4_rcv,
52 .err_handler = dccp_v4_err,
a516b049 53 .no_policy = 1,
7c657876
ACM
54};
55
56const char *dccp_packet_name(const int type)
57{
58 static const char *dccp_packet_names[] = {
59 [DCCP_PKT_REQUEST] = "REQUEST",
60 [DCCP_PKT_RESPONSE] = "RESPONSE",
61 [DCCP_PKT_DATA] = "DATA",
62 [DCCP_PKT_ACK] = "ACK",
63 [DCCP_PKT_DATAACK] = "DATAACK",
64 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65 [DCCP_PKT_CLOSE] = "CLOSE",
66 [DCCP_PKT_RESET] = "RESET",
67 [DCCP_PKT_SYNC] = "SYNC",
68 [DCCP_PKT_SYNCACK] = "SYNCACK",
69 };
70
71 if (type >= DCCP_NR_PKT_TYPES)
72 return "INVALID";
73 else
74 return dccp_packet_names[type];
75}
76
77EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79const char *dccp_state_name(const int state)
80{
81 static char *dccp_state_names[] = {
82 [DCCP_OPEN] = "OPEN",
83 [DCCP_REQUESTING] = "REQUESTING",
84 [DCCP_PARTOPEN] = "PARTOPEN",
85 [DCCP_LISTEN] = "LISTEN",
86 [DCCP_RESPOND] = "RESPOND",
87 [DCCP_CLOSING] = "CLOSING",
88 [DCCP_TIME_WAIT] = "TIME_WAIT",
89 [DCCP_CLOSED] = "CLOSED",
90 };
91
92 if (state >= DCCP_MAX_STATES)
93 return "INVALID STATE!";
94 else
95 return dccp_state_names[state];
96}
97
98EXPORT_SYMBOL_GPL(dccp_state_name);
99
c985ed70
ACM
100void dccp_hash(struct sock *sk)
101{
102 inet_hash(&dccp_hashinfo, sk);
103}
104
105EXPORT_SYMBOL_GPL(dccp_hash);
106
107void dccp_unhash(struct sock *sk)
108{
109 inet_unhash(&dccp_hashinfo, sk);
110}
111
112EXPORT_SYMBOL_GPL(dccp_unhash);
113
3e0fadc5
ACM
114int dccp_init_sock(struct sock *sk)
115{
116 struct dccp_sock *dp = dccp_sk(sk);
117 struct inet_connection_sock *icsk = inet_csk(sk);
118 static int dccp_ctl_socket_init = 1;
119
120 dccp_options_init(&dp->dccps_options);
121 do_gettimeofday(&dp->dccps_epoch);
122
123 /*
124 * FIXME: We're hardcoding the CCID, and doing this at this point makes
125 * the listening (master) sock get CCID control blocks, which is not
126 * necessary, but for now, to not mess with the test userspace apps,
127 * lets leave it here, later the real solution is to do this in a
128 * setsockopt(CCIDs-I-want/accept). -acme
129 */
130 if (likely(!dccp_ctl_socket_init)) {
131 int rc = dccp_feat_init(sk);
132
133 if (rc)
134 return rc;
135
136 if (dp->dccps_options.dccpo_send_ack_vector) {
137 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138 if (dp->dccps_hc_rx_ackvec == NULL)
139 return -ENOMEM;
140 }
141 dp->dccps_hc_rx_ccid =
142 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
143 sk, GFP_KERNEL);
144 dp->dccps_hc_tx_ccid =
145 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
146 sk, GFP_KERNEL);
147 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148 dp->dccps_hc_tx_ccid == NULL)) {
149 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151 if (dp->dccps_options.dccpo_send_ack_vector) {
152 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153 dp->dccps_hc_rx_ackvec = NULL;
154 }
155 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
156 return -ENOMEM;
157 }
158 } else {
159 /* control socket doesn't need feat nego */
160 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162 dccp_ctl_socket_init = 0;
163 }
164
165 dccp_init_xmit_timers(sk);
166 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
167 sk->sk_state = DCCP_CLOSED;
168 sk->sk_write_space = dccp_write_space;
169 icsk->icsk_sync_mss = dccp_sync_mss;
170 dp->dccps_mss_cache = 536;
171 dp->dccps_role = DCCP_ROLE_UNDEFINED;
172 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
173 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
174
175 return 0;
176}
177
178EXPORT_SYMBOL_GPL(dccp_init_sock);
179
180int dccp_destroy_sock(struct sock *sk)
181{
182 struct dccp_sock *dp = dccp_sk(sk);
183
184 /*
185 * DCCP doesn't use sk_write_queue, just sk_send_head
186 * for retransmissions
187 */
188 if (sk->sk_send_head != NULL) {
189 kfree_skb(sk->sk_send_head);
190 sk->sk_send_head = NULL;
191 }
192
193 /* Clean up a referenced DCCP bind bucket. */
194 if (inet_csk(sk)->icsk_bind_hash != NULL)
195 inet_put_port(&dccp_hashinfo, sk);
196
197 kfree(dp->dccps_service_list);
198 dp->dccps_service_list = NULL;
199
200 if (dp->dccps_options.dccpo_send_ack_vector) {
201 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202 dp->dccps_hc_rx_ackvec = NULL;
203 }
204 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207
208 /* clean up feature negotiation state */
209 dccp_feat_clean(sk);
210
211 return 0;
212}
213
214EXPORT_SYMBOL_GPL(dccp_destroy_sock);
215
7c657876
ACM
216static inline int dccp_listen_start(struct sock *sk)
217{
67e6b629
ACM
218 struct dccp_sock *dp = dccp_sk(sk);
219
220 dp->dccps_role = DCCP_ROLE_LISTEN;
221 /*
222 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223 * before calling listen()
224 */
225 if (dccp_service_not_initialized(sk))
226 return -EPROTO;
7c657876
ACM
227 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
228}
229
230int dccp_disconnect(struct sock *sk, int flags)
231{
232 struct inet_connection_sock *icsk = inet_csk(sk);
233 struct inet_sock *inet = inet_sk(sk);
234 int err = 0;
235 const int old_state = sk->sk_state;
236
237 if (old_state != DCCP_CLOSED)
238 dccp_set_state(sk, DCCP_CLOSED);
239
240 /* ABORT function of RFC793 */
241 if (old_state == DCCP_LISTEN) {
242 inet_csk_listen_stop(sk);
243 /* FIXME: do the active reset thing */
244 } else if (old_state == DCCP_REQUESTING)
245 sk->sk_err = ECONNRESET;
246
247 dccp_clear_xmit_timers(sk);
248 __skb_queue_purge(&sk->sk_receive_queue);
249 if (sk->sk_send_head != NULL) {
250 __kfree_skb(sk->sk_send_head);
251 sk->sk_send_head = NULL;
252 }
253
254 inet->dport = 0;
255
256 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257 inet_reset_saddr(sk);
258
259 sk->sk_shutdown = 0;
260 sock_reset_flag(sk, SOCK_DONE);
261
262 icsk->icsk_backoff = 0;
263 inet_csk_delack_init(sk);
264 __sk_dst_reset(sk);
265
266 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
267
268 sk->sk_error_report(sk);
269 return err;
270}
271
f21e68ca
ACM
272EXPORT_SYMBOL_GPL(dccp_disconnect);
273
331968bd
ACM
274/*
275 * Wait for a DCCP event.
276 *
277 * Note that we don't need to lock the socket, as the upper poll layers
278 * take care of normal races (between the test and the event) and we don't
279 * go look at any of the socket buffers directly.
280 */
f21e68ca
ACM
281unsigned int dccp_poll(struct file *file, struct socket *sock,
282 poll_table *wait)
331968bd
ACM
283{
284 unsigned int mask;
285 struct sock *sk = sock->sk;
286
287 poll_wait(file, sk->sk_sleep, wait);
288 if (sk->sk_state == DCCP_LISTEN)
289 return inet_csk_listen_poll(sk);
290
291 /* Socket is not locked. We are protected from async events
292 by poll logic and correct handling of state changes
293 made by another threads is impossible in any case.
294 */
295
296 mask = 0;
297 if (sk->sk_err)
298 mask = POLLERR;
299
300 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
301 mask |= POLLHUP;
302 if (sk->sk_shutdown & RCV_SHUTDOWN)
303 mask |= POLLIN | POLLRDNORM;
304
305 /* Connected? */
306 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308 mask |= POLLIN | POLLRDNORM;
309
310 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312 mask |= POLLOUT | POLLWRNORM;
313 } else { /* send SIGIO later */
314 set_bit(SOCK_ASYNC_NOSPACE,
315 &sk->sk_socket->flags);
316 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
317
318 /* Race breaker. If space is freed after
319 * wspace test but before the flags are set,
320 * IO signal will be lost.
321 */
322 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323 mask |= POLLOUT | POLLWRNORM;
324 }
325 }
326 }
327 return mask;
328}
329
f21e68ca
ACM
330EXPORT_SYMBOL_GPL(dccp_poll);
331
7c657876
ACM
332int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
333{
334 dccp_pr_debug("entry\n");
335 return -ENOIOCTLCMD;
336}
337
f21e68ca
ACM
338EXPORT_SYMBOL_GPL(dccp_ioctl);
339
60fe62e7 340static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
67e6b629
ACM
341 char __user *optval, int optlen)
342{
343 struct dccp_sock *dp = dccp_sk(sk);
344 struct dccp_service_list *sl = NULL;
345
346 if (service == DCCP_SERVICE_INVALID_VALUE ||
347 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
348 return -EINVAL;
349
350 if (optlen > sizeof(service)) {
351 sl = kmalloc(optlen, GFP_KERNEL);
352 if (sl == NULL)
353 return -ENOMEM;
354
355 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356 if (copy_from_user(sl->dccpsl_list,
357 optval + sizeof(service),
358 optlen - sizeof(service)) ||
359 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
360 kfree(sl);
361 return -EFAULT;
362 }
363 }
364
365 lock_sock(sk);
366 dp->dccps_service = service;
367
a51482bd 368 kfree(dp->dccps_service_list);
67e6b629
ACM
369
370 dp->dccps_service_list = sl;
371 release_sock(sk);
372 return 0;
373}
374
afe00251
AB
375/* byte 1 is feature. the rest is the preference list */
376static int dccp_setsockopt_change(struct sock *sk, int type,
377 struct dccp_so_feat __user *optval)
378{
379 struct dccp_so_feat opt;
380 u8 *val;
381 int rc;
382
383 if (copy_from_user(&opt, optval, sizeof(opt)))
384 return -EFAULT;
385
386 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
387 if (!val)
388 return -ENOMEM;
389
390 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
391 rc = -EFAULT;
392 goto out_free_val;
393 }
394
395 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
396 GFP_KERNEL);
397 if (rc)
398 goto out_free_val;
399
400out:
401 return rc;
402
403out_free_val:
404 kfree(val);
405 goto out;
406}
407
7c657876 408int dccp_setsockopt(struct sock *sk, int level, int optname,
a1d3a355 409 char __user *optval, int optlen)
7c657876 410{
a84ffe43
ACM
411 struct dccp_sock *dp;
412 int err;
413 int val;
7c657876
ACM
414
415 if (level != SOL_DCCP)
57cca05a
ACM
416 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
417 optname, optval,
418 optlen);
7c657876 419
a84ffe43
ACM
420 if (optlen < sizeof(int))
421 return -EINVAL;
422
423 if (get_user(val, (int __user *)optval))
424 return -EFAULT;
425
67e6b629
ACM
426 if (optname == DCCP_SOCKOPT_SERVICE)
427 return dccp_setsockopt_service(sk, val, optval, optlen);
a84ffe43 428
67e6b629 429 lock_sock(sk);
a84ffe43
ACM
430 dp = dccp_sk(sk);
431 err = 0;
432
433 switch (optname) {
434 case DCCP_SOCKOPT_PACKET_SIZE:
435 dp->dccps_packet_size = val;
436 break;
afe00251
AB
437
438 case DCCP_SOCKOPT_CHANGE_L:
439 if (optlen != sizeof(struct dccp_so_feat))
440 err = -EINVAL;
441 else
442 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443 (struct dccp_so_feat *)
444 optval);
445 break;
446
447 case DCCP_SOCKOPT_CHANGE_R:
448 if (optlen != sizeof(struct dccp_so_feat))
449 err = -EINVAL;
450 else
451 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452 (struct dccp_so_feat *)
453 optval);
454 break;
455
a84ffe43
ACM
456 default:
457 err = -ENOPROTOOPT;
458 break;
459 }
460
461 release_sock(sk);
462 return err;
7c657876
ACM
463}
464
f21e68ca
ACM
465EXPORT_SYMBOL_GPL(dccp_setsockopt);
466
67e6b629 467static int dccp_getsockopt_service(struct sock *sk, int len,
60fe62e7 468 __be32 __user *optval,
67e6b629
ACM
469 int __user *optlen)
470{
471 const struct dccp_sock *dp = dccp_sk(sk);
472 const struct dccp_service_list *sl;
473 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
474
475 lock_sock(sk);
476 if (dccp_service_not_initialized(sk))
477 goto out;
478
479 if ((sl = dp->dccps_service_list) != NULL) {
480 slen = sl->dccpsl_nr * sizeof(u32);
481 total_len += slen;
482 }
483
484 err = -EINVAL;
485 if (total_len > len)
486 goto out;
487
488 err = 0;
489 if (put_user(total_len, optlen) ||
490 put_user(dp->dccps_service, optval) ||
491 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
492 err = -EFAULT;
493out:
494 release_sock(sk);
495 return err;
496}
497
7c657876 498int dccp_getsockopt(struct sock *sk, int level, int optname,
a1d3a355 499 char __user *optval, int __user *optlen)
7c657876 500{
a84ffe43
ACM
501 struct dccp_sock *dp;
502 int val, len;
7c657876
ACM
503
504 if (level != SOL_DCCP)
57cca05a
ACM
505 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
506 optname, optval,
507 optlen);
a84ffe43
ACM
508 if (get_user(len, optlen))
509 return -EFAULT;
510
88f964db 511 if (len < sizeof(int))
a84ffe43
ACM
512 return -EINVAL;
513
514 dp = dccp_sk(sk);
515
516 switch (optname) {
517 case DCCP_SOCKOPT_PACKET_SIZE:
518 val = dp->dccps_packet_size;
88f964db 519 len = sizeof(dp->dccps_packet_size);
a84ffe43 520 break;
88f964db
ACM
521 case DCCP_SOCKOPT_SERVICE:
522 return dccp_getsockopt_service(sk, len,
60fe62e7 523 (__be32 __user *)optval, optlen);
88f964db
ACM
524 case 128 ... 191:
525 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526 len, (u32 __user *)optval, optlen);
527 case 192 ... 255:
528 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529 len, (u32 __user *)optval, optlen);
a84ffe43
ACM
530 default:
531 return -ENOPROTOOPT;
532 }
533
534 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
535 return -EFAULT;
536
537 return 0;
7c657876
ACM
538}
539
f21e68ca
ACM
540EXPORT_SYMBOL_GPL(dccp_getsockopt);
541
7c657876
ACM
542int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
543 size_t len)
544{
545 const struct dccp_sock *dp = dccp_sk(sk);
546 const int flags = msg->msg_flags;
547 const int noblock = flags & MSG_DONTWAIT;
548 struct sk_buff *skb;
549 int rc, size;
550 long timeo;
551
552 if (len > dp->dccps_mss_cache)
553 return -EMSGSIZE;
554
555 lock_sock(sk);
27258ee5 556 timeo = sock_sndtimeo(sk, noblock);
7c657876
ACM
557
558 /*
559 * We have to use sk_stream_wait_connect here to set sk_write_pending,
560 * so that the trick in dccp_rcv_request_sent_state_process.
561 */
562 /* Wait for a connection to finish. */
563 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
27258ee5 565 goto out_release;
7c657876
ACM
566
567 size = sk->sk_prot->max_header + len;
568 release_sock(sk);
569 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
570 lock_sock(sk);
7c657876
ACM
571 if (skb == NULL)
572 goto out_release;
573
574 skb_reserve(skb, sk->sk_prot->max_header);
575 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
27258ee5
ACM
576 if (rc != 0)
577 goto out_discard;
578
d6809c12 579 rc = dccp_write_xmit(sk, skb, &timeo);
20472af9
ACM
580 /*
581 * XXX we don't use sk_write_queue, so just discard the packet.
582 * Current plan however is to _use_ sk_write_queue with
583 * an algorith similar to tcp_sendmsg, where the main difference
584 * is that in DCCP we have to respect packet boundaries, so
585 * no coalescing of skbs.
586 *
587 * This bug was _quickly_ found & fixed by just looking at an OSTRA
588 * generated callgraph 8) -acme
589 */
7c657876
ACM
590out_release:
591 release_sock(sk);
592 return rc ? : len;
27258ee5
ACM
593out_discard:
594 kfree_skb(skb);
7c657876 595 goto out_release;
7c657876
ACM
596}
597
f21e68ca
ACM
598EXPORT_SYMBOL_GPL(dccp_sendmsg);
599
7c657876
ACM
600int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601 size_t len, int nonblock, int flags, int *addr_len)
602{
603 const struct dccp_hdr *dh;
7c657876
ACM
604 long timeo;
605
606 lock_sock(sk);
607
531669a0
ACM
608 if (sk->sk_state == DCCP_LISTEN) {
609 len = -ENOTCONN;
7c657876 610 goto out;
7c657876 611 }
7c657876 612
531669a0 613 timeo = sock_rcvtimeo(sk, nonblock);
7c657876
ACM
614
615 do {
531669a0 616 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
7c657876 617
531669a0
ACM
618 if (skb == NULL)
619 goto verify_sock_status;
7c657876 620
531669a0 621 dh = dccp_hdr(skb);
7c657876 622
531669a0
ACM
623 if (dh->dccph_type == DCCP_PKT_DATA ||
624 dh->dccph_type == DCCP_PKT_DATAACK)
625 goto found_ok_skb;
7c657876 626
531669a0
ACM
627 if (dh->dccph_type == DCCP_PKT_RESET ||
628 dh->dccph_type == DCCP_PKT_CLOSE) {
629 dccp_pr_debug("found fin ok!\n");
630 len = 0;
631 goto found_fin_ok;
632 }
633 dccp_pr_debug("packet_type=%s\n",
634 dccp_packet_name(dh->dccph_type));
635 sk_eat_skb(sk, skb);
636verify_sock_status:
637 if (sock_flag(sk, SOCK_DONE)) {
638 len = 0;
7c657876 639 break;
531669a0 640 }
7c657876 641
531669a0
ACM
642 if (sk->sk_err) {
643 len = sock_error(sk);
644 break;
645 }
7c657876 646
531669a0
ACM
647 if (sk->sk_shutdown & RCV_SHUTDOWN) {
648 len = 0;
649 break;
650 }
7c657876 651
531669a0
ACM
652 if (sk->sk_state == DCCP_CLOSED) {
653 if (!sock_flag(sk, SOCK_DONE)) {
654 /* This occurs when user tries to read
655 * from never connected socket.
656 */
657 len = -ENOTCONN;
7c657876
ACM
658 break;
659 }
531669a0
ACM
660 len = 0;
661 break;
7c657876
ACM
662 }
663
531669a0
ACM
664 if (!timeo) {
665 len = -EAGAIN;
666 break;
667 }
7c657876 668
531669a0
ACM
669 if (signal_pending(current)) {
670 len = sock_intr_errno(timeo);
671 break;
672 }
7c657876 673
531669a0 674 sk_wait_data(sk, &timeo);
7c657876 675 continue;
7c657876 676 found_ok_skb:
531669a0
ACM
677 if (len > skb->len)
678 len = skb->len;
679 else if (len < skb->len)
680 msg->msg_flags |= MSG_TRUNC;
681
682 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683 /* Exception. Bailout! */
684 len = -EFAULT;
685 break;
7c657876 686 }
7c657876
ACM
687 found_fin_ok:
688 if (!(flags & MSG_PEEK))
689 sk_eat_skb(sk, skb);
690 break;
531669a0 691 } while (1);
7c657876
ACM
692out:
693 release_sock(sk);
531669a0 694 return len;
7c657876
ACM
695}
696
f21e68ca
ACM
697EXPORT_SYMBOL_GPL(dccp_recvmsg);
698
699int inet_dccp_listen(struct socket *sock, int backlog)
7c657876
ACM
700{
701 struct sock *sk = sock->sk;
702 unsigned char old_state;
703 int err;
704
705 lock_sock(sk);
706
707 err = -EINVAL;
708 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
709 goto out;
710
711 old_state = sk->sk_state;
712 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
713 goto out;
714
715 /* Really, if the socket is already in listen state
716 * we can only allow the backlog to be adjusted.
717 */
718 if (old_state != DCCP_LISTEN) {
719 /*
720 * FIXME: here it probably should be sk->sk_prot->listen_start
721 * see tcp_listen_start
722 */
723 err = dccp_listen_start(sk);
724 if (err)
725 goto out;
726 }
727 sk->sk_max_ack_backlog = backlog;
728 err = 0;
729
730out:
731 release_sock(sk);
732 return err;
733}
734
f21e68ca
ACM
735EXPORT_SYMBOL_GPL(inet_dccp_listen);
736
7c657876 737static const unsigned char dccp_new_state[] = {
7690af3f
ACM
738 /* current state: new state: action: */
739 [0] = DCCP_CLOSED,
740 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
741 [DCCP_REQUESTING] = DCCP_CLOSED,
742 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
743 [DCCP_LISTEN] = DCCP_CLOSED,
744 [DCCP_RESPOND] = DCCP_CLOSED,
745 [DCCP_CLOSING] = DCCP_CLOSED,
746 [DCCP_TIME_WAIT] = DCCP_CLOSED,
747 [DCCP_CLOSED] = DCCP_CLOSED,
7c657876
ACM
748};
749
750static int dccp_close_state(struct sock *sk)
751{
752 const int next = dccp_new_state[sk->sk_state];
753 const int ns = next & DCCP_STATE_MASK;
754
755 if (ns != sk->sk_state)
756 dccp_set_state(sk, ns);
757
758 return next & DCCP_ACTION_FIN;
759}
760
761void dccp_close(struct sock *sk, long timeout)
762{
763 struct sk_buff *skb;
764
765 lock_sock(sk);
766
767 sk->sk_shutdown = SHUTDOWN_MASK;
768
769 if (sk->sk_state == DCCP_LISTEN) {
770 dccp_set_state(sk, DCCP_CLOSED);
771
772 /* Special case. */
773 inet_csk_listen_stop(sk);
774
775 goto adjudge_to_death;
776 }
777
778 /*
779 * We need to flush the recv. buffs. We do this only on the
780 * descriptor close, not protocol-sourced closes, because the
781 *reader process may not have drained the data yet!
782 */
783 /* FIXME: check for unread data */
784 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
785 __kfree_skb(skb);
786 }
787
788 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789 /* Check zero linger _after_ checking for unread data. */
790 sk->sk_prot->disconnect(sk, 0);
791 } else if (dccp_close_state(sk)) {
7ad07e7c 792 dccp_send_close(sk, 1);
7c657876
ACM
793 }
794
795 sk_stream_wait_close(sk, timeout);
796
797adjudge_to_death:
7ad07e7c
ACM
798 /*
799 * It is the last release_sock in its life. It will remove backlog.
800 */
7c657876
ACM
801 release_sock(sk);
802 /*
803 * Now socket is owned by kernel and we acquire BH lock
804 * to finish close. No need to check for user refs.
805 */
806 local_bh_disable();
807 bh_lock_sock(sk);
808 BUG_TRAP(!sock_owned_by_user(sk));
809
810 sock_hold(sk);
811 sock_orphan(sk);
7ad07e7c
ACM
812
813 /*
814 * The last release_sock may have processed the CLOSE or RESET
815 * packet moving sock to CLOSED state, if not we have to fire
816 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817 * in draft-ietf-dccp-spec-11. -acme
818 */
819 if (sk->sk_state == DCCP_CLOSING) {
820 /* FIXME: should start at 2 * RTT */
821 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823 inet_csk(sk)->icsk_rto,
824 DCCP_RTO_MAX);
825#if 0
826 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
7c657876 827 dccp_set_state(sk, DCCP_CLOSED);
7ad07e7c
ACM
828#endif
829 }
7c657876 830
7ad07e7c 831 atomic_inc(sk->sk_prot->orphan_count);
7c657876
ACM
832 if (sk->sk_state == DCCP_CLOSED)
833 inet_csk_destroy_sock(sk);
834
835 /* Otherwise, socket is reprieved until protocol close. */
836
837 bh_unlock_sock(sk);
838 local_bh_enable();
839 sock_put(sk);
840}
841
f21e68ca
ACM
842EXPORT_SYMBOL_GPL(dccp_close);
843
7c657876
ACM
844void dccp_shutdown(struct sock *sk, int how)
845{
846 dccp_pr_debug("entry\n");
847}
848
f21e68ca
ACM
849EXPORT_SYMBOL_GPL(dccp_shutdown);
850
90ddc4f0 851static const struct proto_ops inet_dccp_ops = {
7c657876
ACM
852 .family = PF_INET,
853 .owner = THIS_MODULE,
854 .release = inet_release,
855 .bind = inet_bind,
856 .connect = inet_stream_connect,
857 .socketpair = sock_no_socketpair,
858 .accept = inet_accept,
859 .getname = inet_getname,
331968bd
ACM
860 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
861 .poll = dccp_poll,
7c657876 862 .ioctl = inet_ioctl,
7690af3f
ACM
863 /* FIXME: work on inet_listen to rename it to sock_common_listen */
864 .listen = inet_dccp_listen,
7c657876
ACM
865 .shutdown = inet_shutdown,
866 .setsockopt = sock_common_setsockopt,
867 .getsockopt = sock_common_getsockopt,
868 .sendmsg = inet_sendmsg,
869 .recvmsg = sock_common_recvmsg,
870 .mmap = sock_no_mmap,
871 .sendpage = sock_no_sendpage,
872};
873
874extern struct net_proto_family inet_family_ops;
875
876static struct inet_protosw dccp_v4_protosw = {
877 .type = SOCK_DCCP,
878 .protocol = IPPROTO_DCCP,
34ca6860 879 .prot = &dccp_prot,
7c657876
ACM
880 .ops = &inet_dccp_ops,
881 .capability = -1,
882 .no_check = 0,
d83d8461 883 .flags = INET_PROTOSW_ICSK,
7c657876
ACM
884};
885
886/*
887 * This is the global socket data structure used for responding to
888 * the Out-of-the-blue (OOTB) packets. A control sock will be created
889 * for this socket at the initialization time.
890 */
891struct socket *dccp_ctl_socket;
892
893static char dccp_ctl_socket_err_msg[] __initdata =
894 KERN_ERR "DCCP: Failed to create the control socket.\n";
895
896static int __init dccp_ctl_sock_init(void)
897{
898 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
899 &dccp_ctl_socket);
900 if (rc < 0)
901 printk(dccp_ctl_socket_err_msg);
902 else {
903 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
904 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
905
906 /* Unhash it so that IP input processing does not even
907 * see it, we do not wish this socket to see incoming
908 * packets.
909 */
910 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
911 }
912
913 return rc;
914}
915
725ba8ee
ACM
916#ifdef CONFIG_IP_DCCP_UNLOAD_HACK
917void dccp_ctl_sock_exit(void)
7c657876 918{
5480855b 919 if (dccp_ctl_socket != NULL) {
7c657876 920 sock_release(dccp_ctl_socket);
5480855b
ACM
921 dccp_ctl_socket = NULL;
922 }
7c657876
ACM
923}
924
725ba8ee
ACM
925EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
926#endif
927
7c657876
ACM
928static int __init init_dccp_v4_mibs(void)
929{
930 int rc = -ENOMEM;
931
932 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
933 if (dccp_statistics[0] == NULL)
934 goto out;
935
936 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
937 if (dccp_statistics[1] == NULL)
938 goto out_free_one;
939
940 rc = 0;
941out:
942 return rc;
943out_free_one:
944 free_percpu(dccp_statistics[0]);
945 dccp_statistics[0] = NULL;
946 goto out;
947
948}
949
950static int thash_entries;
951module_param(thash_entries, int, 0444);
952MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
953
a1d3a355 954#ifdef CONFIG_IP_DCCP_DEBUG
7c657876
ACM
955int dccp_debug;
956module_param(dccp_debug, int, 0444);
957MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
f21e68ca
ACM
958
959EXPORT_SYMBOL_GPL(dccp_debug);
a1d3a355 960#endif
7c657876
ACM
961
962static int __init dccp_init(void)
963{
964 unsigned long goal;
965 int ehash_order, bhash_order, i;
34ca6860 966 int rc = proto_register(&dccp_prot, 1);
7c657876
ACM
967
968 if (rc)
969 goto out;
970
fa23e2ec 971 rc = -ENOBUFS;
7690af3f
ACM
972 dccp_hashinfo.bind_bucket_cachep =
973 kmem_cache_create("dccp_bind_bucket",
974 sizeof(struct inet_bind_bucket), 0,
975 SLAB_HWCACHE_ALIGN, NULL, NULL);
7c657876
ACM
976 if (!dccp_hashinfo.bind_bucket_cachep)
977 goto out_proto_unregister;
978
979 /*
980 * Size and allocate the main established and bind bucket
981 * hash tables.
982 *
983 * The methodology is similar to that of the buffer cache.
984 */
985 if (num_physpages >= (128 * 1024))
986 goal = num_physpages >> (21 - PAGE_SHIFT);
987 else
988 goal = num_physpages >> (23 - PAGE_SHIFT);
989
990 if (thash_entries)
7690af3f
ACM
991 goal = (thash_entries *
992 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
7c657876
ACM
993 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
994 ;
995 do {
996 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
997 sizeof(struct inet_ehash_bucket);
998 dccp_hashinfo.ehash_size >>= 1;
7690af3f
ACM
999 while (dccp_hashinfo.ehash_size &
1000 (dccp_hashinfo.ehash_size - 1))
7c657876
ACM
1001 dccp_hashinfo.ehash_size--;
1002 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1003 __get_free_pages(GFP_ATOMIC, ehash_order);
1004 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1005
1006 if (!dccp_hashinfo.ehash) {
1007 printk(KERN_CRIT "Failed to allocate DCCP "
1008 "established hash table\n");
1009 goto out_free_bind_bucket_cachep;
1010 }
1011
1012 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1013 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1014 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1015 }
1016
1017 bhash_order = ehash_order;
1018
1019 do {
1020 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1021 sizeof(struct inet_bind_hashbucket);
7690af3f
ACM
1022 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1023 bhash_order > 0)
7c657876
ACM
1024 continue;
1025 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1026 __get_free_pages(GFP_ATOMIC, bhash_order);
1027 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1028
1029 if (!dccp_hashinfo.bhash) {
1030 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1031 goto out_free_dccp_ehash;
1032 }
1033
1034 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1035 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1036 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1037 }
1038
fa23e2ec
ACM
1039 rc = init_dccp_v4_mibs();
1040 if (rc)
7c657876
ACM
1041 goto out_free_dccp_bhash;
1042
1043 rc = -EAGAIN;
1044 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1045 goto out_free_dccp_v4_mibs;
1046
1047 inet_register_protosw(&dccp_v4_protosw);
1048
9b07ef5d 1049 rc = dccp_ackvec_init();
7c657876
ACM
1050 if (rc)
1051 goto out_unregister_protosw;
9b07ef5d 1052
e55d912f 1053 rc = dccp_sysctl_init();
9b07ef5d
ACM
1054 if (rc)
1055 goto out_ackvec_exit;
e55d912f
ACM
1056
1057 rc = dccp_ctl_sock_init();
1058 if (rc)
1059 goto out_sysctl_exit;
7c657876
ACM
1060out:
1061 return rc;
e55d912f
ACM
1062out_sysctl_exit:
1063 dccp_sysctl_exit();
9b07ef5d
ACM
1064out_ackvec_exit:
1065 dccp_ackvec_exit();
7c657876
ACM
1066out_unregister_protosw:
1067 inet_unregister_protosw(&dccp_v4_protosw);
1068 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1069out_free_dccp_v4_mibs:
1070 free_percpu(dccp_statistics[0]);
1071 free_percpu(dccp_statistics[1]);
1072 dccp_statistics[0] = dccp_statistics[1] = NULL;
1073out_free_dccp_bhash:
1074 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1075 dccp_hashinfo.bhash = NULL;
1076out_free_dccp_ehash:
1077 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1078 dccp_hashinfo.ehash = NULL;
1079out_free_bind_bucket_cachep:
1080 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1081 dccp_hashinfo.bind_bucket_cachep = NULL;
1082out_proto_unregister:
34ca6860 1083 proto_unregister(&dccp_prot);
7c657876
ACM
1084 goto out;
1085}
1086
1087static const char dccp_del_proto_err_msg[] __exitdata =
1088 KERN_ERR "can't remove dccp net_protocol\n";
1089
1090static void __exit dccp_fini(void)
1091{
7c657876
ACM
1092 inet_unregister_protosw(&dccp_v4_protosw);
1093
1094 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1095 printk(dccp_del_proto_err_msg);
1096
725ba8ee
ACM
1097 free_percpu(dccp_statistics[0]);
1098 free_percpu(dccp_statistics[1]);
1099 free_pages((unsigned long)dccp_hashinfo.bhash,
1100 get_order(dccp_hashinfo.bhash_size *
1101 sizeof(struct inet_bind_hashbucket)));
1102 free_pages((unsigned long)dccp_hashinfo.ehash,
1103 get_order(dccp_hashinfo.ehash_size *
1104 sizeof(struct inet_ehash_bucket)));
7c657876 1105 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
34ca6860 1106 proto_unregister(&dccp_prot);
9b07ef5d 1107 dccp_ackvec_exit();
e55d912f 1108 dccp_sysctl_exit();
7c657876
ACM
1109}
1110
1111module_init(dccp_init);
1112module_exit(dccp_fini);
1113
bb97d31f
ACM
1114/*
1115 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1116 * values directly, Also cover the case where the protocol is not specified,
1117 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1118 */
1119MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1120MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
7c657876
ACM
1121MODULE_LICENSE("GPL");
1122MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1123MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");