[DCCP]: Move the IPv4 specific bits from proto.c to ipv4.c
[linux-2.6.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_sock.h>
27 #include <net/sock.h>
28 #include <net/xfrm.h>
29
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 const char *dccp_packet_name(const int type)
57 {
58         static const char *dccp_packet_names[] = {
59                 [DCCP_PKT_REQUEST]  = "REQUEST",
60                 [DCCP_PKT_RESPONSE] = "RESPONSE",
61                 [DCCP_PKT_DATA]     = "DATA",
62                 [DCCP_PKT_ACK]      = "ACK",
63                 [DCCP_PKT_DATAACK]  = "DATAACK",
64                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
65                 [DCCP_PKT_CLOSE]    = "CLOSE",
66                 [DCCP_PKT_RESET]    = "RESET",
67                 [DCCP_PKT_SYNC]     = "SYNC",
68                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
69         };
70
71         if (type >= DCCP_NR_PKT_TYPES)
72                 return "INVALID";
73         else
74                 return dccp_packet_names[type];
75 }
76
77 EXPORT_SYMBOL_GPL(dccp_packet_name);
78
79 const char *dccp_state_name(const int state)
80 {
81         static char *dccp_state_names[] = {
82         [DCCP_OPEN]       = "OPEN",
83         [DCCP_REQUESTING] = "REQUESTING",
84         [DCCP_PARTOPEN]   = "PARTOPEN",
85         [DCCP_LISTEN]     = "LISTEN",
86         [DCCP_RESPOND]    = "RESPOND",
87         [DCCP_CLOSING]    = "CLOSING",
88         [DCCP_TIME_WAIT]  = "TIME_WAIT",
89         [DCCP_CLOSED]     = "CLOSED",
90         };
91
92         if (state >= DCCP_MAX_STATES)
93                 return "INVALID STATE!";
94         else
95                 return dccp_state_names[state];
96 }
97
98 EXPORT_SYMBOL_GPL(dccp_state_name);
99
100 void dccp_hash(struct sock *sk)
101 {
102         inet_hash(&dccp_hashinfo, sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_hash);
106
107 void dccp_unhash(struct sock *sk)
108 {
109         inet_unhash(&dccp_hashinfo, sk);
110 }
111
112 EXPORT_SYMBOL_GPL(dccp_unhash);
113
114 int dccp_init_sock(struct sock *sk)
115 {
116         struct dccp_sock *dp = dccp_sk(sk);
117         struct inet_connection_sock *icsk = inet_csk(sk);
118         static int dccp_ctl_socket_init = 1;
119
120         dccp_options_init(&dp->dccps_options);
121         do_gettimeofday(&dp->dccps_epoch);
122
123         /*
124          * FIXME: We're hardcoding the CCID, and doing this at this point makes
125          * the listening (master) sock get CCID control blocks, which is not
126          * necessary, but for now, to not mess with the test userspace apps,
127          * lets leave it here, later the real solution is to do this in a
128          * setsockopt(CCIDs-I-want/accept). -acme
129          */
130         if (likely(!dccp_ctl_socket_init)) {
131                 int rc = dccp_feat_init(sk);
132
133                 if (rc)
134                         return rc;
135
136                 if (dp->dccps_options.dccpo_send_ack_vector) {
137                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
138                         if (dp->dccps_hc_rx_ackvec == NULL)
139                                 return -ENOMEM;
140                 }
141                 dp->dccps_hc_rx_ccid =
142                                 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
143                                                sk, GFP_KERNEL);
144                 dp->dccps_hc_tx_ccid =
145                                 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
146                                                sk, GFP_KERNEL);
147                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
148                              dp->dccps_hc_tx_ccid == NULL)) {
149                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
150                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
151                         if (dp->dccps_options.dccpo_send_ack_vector) {
152                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
153                                 dp->dccps_hc_rx_ackvec = NULL;
154                         }
155                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
156                         return -ENOMEM;
157                 }
158         } else {
159                 /* control socket doesn't need feat nego */
160                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
161                 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
162                 dccp_ctl_socket_init = 0;
163         }
164
165         dccp_init_xmit_timers(sk);
166         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
167         sk->sk_state            = DCCP_CLOSED;
168         sk->sk_write_space      = dccp_write_space;
169         icsk->icsk_sync_mss     = dccp_sync_mss;
170         dp->dccps_mss_cache     = 536;
171         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
172         dp->dccps_service       = DCCP_SERVICE_INVALID_VALUE;
173         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
174
175         return 0;
176 }
177
178 EXPORT_SYMBOL_GPL(dccp_init_sock);
179
180 int dccp_destroy_sock(struct sock *sk)
181 {
182         struct dccp_sock *dp = dccp_sk(sk);
183
184         /*
185          * DCCP doesn't use sk_write_queue, just sk_send_head
186          * for retransmissions
187          */
188         if (sk->sk_send_head != NULL) {
189                 kfree_skb(sk->sk_send_head);
190                 sk->sk_send_head = NULL;
191         }
192
193         /* Clean up a referenced DCCP bind bucket. */
194         if (inet_csk(sk)->icsk_bind_hash != NULL)
195                 inet_put_port(&dccp_hashinfo, sk);
196
197         kfree(dp->dccps_service_list);
198         dp->dccps_service_list = NULL;
199
200         if (dp->dccps_options.dccpo_send_ack_vector) {
201                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                 dp->dccps_hc_rx_ackvec = NULL;
203         }
204         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
205         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
206         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207
208         /* clean up feature negotiation state */
209         dccp_feat_clean(sk);
210
211         return 0;
212 }
213
214 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
215
216 static inline int dccp_listen_start(struct sock *sk)
217 {
218         struct dccp_sock *dp = dccp_sk(sk);
219
220         dp->dccps_role = DCCP_ROLE_LISTEN;
221         /*
222          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
223          * before calling listen()
224          */
225         if (dccp_service_not_initialized(sk))
226                 return -EPROTO;
227         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
228 }
229
230 int dccp_disconnect(struct sock *sk, int flags)
231 {
232         struct inet_connection_sock *icsk = inet_csk(sk);
233         struct inet_sock *inet = inet_sk(sk);
234         int err = 0;
235         const int old_state = sk->sk_state;
236
237         if (old_state != DCCP_CLOSED)
238                 dccp_set_state(sk, DCCP_CLOSED);
239
240         /* ABORT function of RFC793 */
241         if (old_state == DCCP_LISTEN) {
242                 inet_csk_listen_stop(sk);
243         /* FIXME: do the active reset thing */
244         } else if (old_state == DCCP_REQUESTING)
245                 sk->sk_err = ECONNRESET;
246
247         dccp_clear_xmit_timers(sk);
248         __skb_queue_purge(&sk->sk_receive_queue);
249         if (sk->sk_send_head != NULL) {
250                 __kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         inet->dport = 0;
255
256         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
257                 inet_reset_saddr(sk);
258
259         sk->sk_shutdown = 0;
260         sock_reset_flag(sk, SOCK_DONE);
261
262         icsk->icsk_backoff = 0;
263         inet_csk_delack_init(sk);
264         __sk_dst_reset(sk);
265
266         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
267
268         sk->sk_error_report(sk);
269         return err;
270 }
271
272 EXPORT_SYMBOL_GPL(dccp_disconnect);
273
274 /*
275  *      Wait for a DCCP event.
276  *
277  *      Note that we don't need to lock the socket, as the upper poll layers
278  *      take care of normal races (between the test and the event) and we don't
279  *      go look at any of the socket buffers directly.
280  */
281 unsigned int dccp_poll(struct file *file, struct socket *sock,
282                        poll_table *wait)
283 {
284         unsigned int mask;
285         struct sock *sk = sock->sk;
286
287         poll_wait(file, sk->sk_sleep, wait);
288         if (sk->sk_state == DCCP_LISTEN)
289                 return inet_csk_listen_poll(sk);
290
291         /* Socket is not locked. We are protected from async events
292            by poll logic and correct handling of state changes
293            made by another threads is impossible in any case.
294          */
295
296         mask = 0;
297         if (sk->sk_err)
298                 mask = POLLERR;
299
300         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
301                 mask |= POLLHUP;
302         if (sk->sk_shutdown & RCV_SHUTDOWN)
303                 mask |= POLLIN | POLLRDNORM;
304
305         /* Connected? */
306         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
307                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
308                         mask |= POLLIN | POLLRDNORM;
309
310                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
311                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
312                                 mask |= POLLOUT | POLLWRNORM;
313                         } else {  /* send SIGIO later */
314                                 set_bit(SOCK_ASYNC_NOSPACE,
315                                         &sk->sk_socket->flags);
316                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
317
318                                 /* Race breaker. If space is freed after
319                                  * wspace test but before the flags are set,
320                                  * IO signal will be lost.
321                                  */
322                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
323                                         mask |= POLLOUT | POLLWRNORM;
324                         }
325                 }
326         }
327         return mask;
328 }
329
330 EXPORT_SYMBOL_GPL(dccp_poll);
331
332 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
333 {
334         dccp_pr_debug("entry\n");
335         return -ENOIOCTLCMD;
336 }
337
338 EXPORT_SYMBOL_GPL(dccp_ioctl);
339
340 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
341                                    char __user *optval, int optlen)
342 {
343         struct dccp_sock *dp = dccp_sk(sk);
344         struct dccp_service_list *sl = NULL;
345
346         if (service == DCCP_SERVICE_INVALID_VALUE || 
347             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
348                 return -EINVAL;
349
350         if (optlen > sizeof(service)) {
351                 sl = kmalloc(optlen, GFP_KERNEL);
352                 if (sl == NULL)
353                         return -ENOMEM;
354
355                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
356                 if (copy_from_user(sl->dccpsl_list,
357                                    optval + sizeof(service),
358                                    optlen - sizeof(service)) ||
359                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
360                         kfree(sl);
361                         return -EFAULT;
362                 }
363         }
364
365         lock_sock(sk);
366         dp->dccps_service = service;
367
368         kfree(dp->dccps_service_list);
369
370         dp->dccps_service_list = sl;
371         release_sock(sk);
372         return 0;
373 }
374
375 /* byte 1 is feature.  the rest is the preference list */
376 static int dccp_setsockopt_change(struct sock *sk, int type,
377                                   struct dccp_so_feat __user *optval)
378 {
379         struct dccp_so_feat opt;
380         u8 *val;
381         int rc;
382
383         if (copy_from_user(&opt, optval, sizeof(opt)))
384                 return -EFAULT;
385
386         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
387         if (!val)
388                 return -ENOMEM;
389
390         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
391                 rc = -EFAULT;
392                 goto out_free_val;
393         }
394
395         rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
396                               GFP_KERNEL);
397         if (rc)
398                 goto out_free_val;
399
400 out:
401         return rc;
402
403 out_free_val:
404         kfree(val);
405         goto out;
406 }
407
408 int dccp_setsockopt(struct sock *sk, int level, int optname,
409                     char __user *optval, int optlen)
410 {
411         struct dccp_sock *dp;
412         int err;
413         int val;
414
415         if (level != SOL_DCCP)
416                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
417                                                              optname, optval,
418                                                              optlen);
419
420         if (optlen < sizeof(int))
421                 return -EINVAL;
422
423         if (get_user(val, (int __user *)optval))
424                 return -EFAULT;
425
426         if (optname == DCCP_SOCKOPT_SERVICE)
427                 return dccp_setsockopt_service(sk, val, optval, optlen);
428
429         lock_sock(sk);
430         dp = dccp_sk(sk);
431         err = 0;
432
433         switch (optname) {
434         case DCCP_SOCKOPT_PACKET_SIZE:
435                 dp->dccps_packet_size = val;
436                 break;
437
438         case DCCP_SOCKOPT_CHANGE_L:
439                 if (optlen != sizeof(struct dccp_so_feat))
440                         err = -EINVAL;
441                 else
442                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
443                                                      (struct dccp_so_feat *)
444                                                      optval);
445                 break;
446
447         case DCCP_SOCKOPT_CHANGE_R:
448                 if (optlen != sizeof(struct dccp_so_feat))
449                         err = -EINVAL;
450                 else
451                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
452                                                      (struct dccp_so_feat *)
453                                                      optval);
454                 break;
455
456         default:
457                 err = -ENOPROTOOPT;
458                 break;
459         }
460         
461         release_sock(sk);
462         return err;
463 }
464
465 EXPORT_SYMBOL_GPL(dccp_setsockopt);
466
467 static int dccp_getsockopt_service(struct sock *sk, int len,
468                                    __be32 __user *optval,
469                                    int __user *optlen)
470 {
471         const struct dccp_sock *dp = dccp_sk(sk);
472         const struct dccp_service_list *sl;
473         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
474
475         lock_sock(sk);
476         if (dccp_service_not_initialized(sk))
477                 goto out;
478
479         if ((sl = dp->dccps_service_list) != NULL) {
480                 slen = sl->dccpsl_nr * sizeof(u32);
481                 total_len += slen;
482         }
483
484         err = -EINVAL;
485         if (total_len > len)
486                 goto out;
487
488         err = 0;
489         if (put_user(total_len, optlen) ||
490             put_user(dp->dccps_service, optval) ||
491             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
492                 err = -EFAULT;
493 out:
494         release_sock(sk);
495         return err;
496 }
497
498 int dccp_getsockopt(struct sock *sk, int level, int optname,
499                     char __user *optval, int __user *optlen)
500 {
501         struct dccp_sock *dp;
502         int val, len;
503
504         if (level != SOL_DCCP)
505                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
506                                                              optname, optval,
507                                                              optlen);
508         if (get_user(len, optlen))
509                 return -EFAULT;
510
511         if (len < sizeof(int))
512                 return -EINVAL;
513
514         dp = dccp_sk(sk);
515
516         switch (optname) {
517         case DCCP_SOCKOPT_PACKET_SIZE:
518                 val = dp->dccps_packet_size;
519                 len = sizeof(dp->dccps_packet_size);
520                 break;
521         case DCCP_SOCKOPT_SERVICE:
522                 return dccp_getsockopt_service(sk, len,
523                                                (__be32 __user *)optval, optlen);
524         case 128 ... 191:
525                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
526                                              len, (u32 __user *)optval, optlen);
527         case 192 ... 255:
528                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
529                                              len, (u32 __user *)optval, optlen);
530         default:
531                 return -ENOPROTOOPT;
532         }
533
534         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
535                 return -EFAULT;
536
537         return 0;
538 }
539
540 EXPORT_SYMBOL_GPL(dccp_getsockopt);
541
542 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
543                  size_t len)
544 {
545         const struct dccp_sock *dp = dccp_sk(sk);
546         const int flags = msg->msg_flags;
547         const int noblock = flags & MSG_DONTWAIT;
548         struct sk_buff *skb;
549         int rc, size;
550         long timeo;
551
552         if (len > dp->dccps_mss_cache)
553                 return -EMSGSIZE;
554
555         lock_sock(sk);
556         timeo = sock_sndtimeo(sk, noblock);
557
558         /*
559          * We have to use sk_stream_wait_connect here to set sk_write_pending,
560          * so that the trick in dccp_rcv_request_sent_state_process.
561          */
562         /* Wait for a connection to finish. */
563         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
564                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
565                         goto out_release;
566
567         size = sk->sk_prot->max_header + len;
568         release_sock(sk);
569         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
570         lock_sock(sk);
571         if (skb == NULL)
572                 goto out_release;
573
574         skb_reserve(skb, sk->sk_prot->max_header);
575         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
576         if (rc != 0)
577                 goto out_discard;
578
579         rc = dccp_write_xmit(sk, skb, &timeo);
580         /*
581          * XXX we don't use sk_write_queue, so just discard the packet.
582          *     Current plan however is to _use_ sk_write_queue with
583          *     an algorith similar to tcp_sendmsg, where the main difference
584          *     is that in DCCP we have to respect packet boundaries, so
585          *     no coalescing of skbs.
586          *
587          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
588          *     generated callgraph 8) -acme
589          */
590 out_release:
591         release_sock(sk);
592         return rc ? : len;
593 out_discard:
594         kfree_skb(skb);
595         goto out_release;
596 }
597
598 EXPORT_SYMBOL_GPL(dccp_sendmsg);
599
600 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
601                  size_t len, int nonblock, int flags, int *addr_len)
602 {
603         const struct dccp_hdr *dh;
604         long timeo;
605
606         lock_sock(sk);
607
608         if (sk->sk_state == DCCP_LISTEN) {
609                 len = -ENOTCONN;
610                 goto out;
611         }
612
613         timeo = sock_rcvtimeo(sk, nonblock);
614
615         do {
616                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
617
618                 if (skb == NULL)
619                         goto verify_sock_status;
620
621                 dh = dccp_hdr(skb);
622
623                 if (dh->dccph_type == DCCP_PKT_DATA ||
624                     dh->dccph_type == DCCP_PKT_DATAACK)
625                         goto found_ok_skb;
626
627                 if (dh->dccph_type == DCCP_PKT_RESET ||
628                     dh->dccph_type == DCCP_PKT_CLOSE) {
629                         dccp_pr_debug("found fin ok!\n");
630                         len = 0;
631                         goto found_fin_ok;
632                 }
633                 dccp_pr_debug("packet_type=%s\n",
634                               dccp_packet_name(dh->dccph_type));
635                 sk_eat_skb(sk, skb);
636 verify_sock_status:
637                 if (sock_flag(sk, SOCK_DONE)) {
638                         len = 0;
639                         break;
640                 }
641
642                 if (sk->sk_err) {
643                         len = sock_error(sk);
644                         break;
645                 }
646
647                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
648                         len = 0;
649                         break;
650                 }
651
652                 if (sk->sk_state == DCCP_CLOSED) {
653                         if (!sock_flag(sk, SOCK_DONE)) {
654                                 /* This occurs when user tries to read
655                                  * from never connected socket.
656                                  */
657                                 len = -ENOTCONN;
658                                 break;
659                         }
660                         len = 0;
661                         break;
662                 }
663
664                 if (!timeo) {
665                         len = -EAGAIN;
666                         break;
667                 }
668
669                 if (signal_pending(current)) {
670                         len = sock_intr_errno(timeo);
671                         break;
672                 }
673
674                 sk_wait_data(sk, &timeo);
675                 continue;
676         found_ok_skb:
677                 if (len > skb->len)
678                         len = skb->len;
679                 else if (len < skb->len)
680                         msg->msg_flags |= MSG_TRUNC;
681
682                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
683                         /* Exception. Bailout! */
684                         len = -EFAULT;
685                         break;
686                 }
687         found_fin_ok:
688                 if (!(flags & MSG_PEEK))
689                         sk_eat_skb(sk, skb);
690                 break;
691         } while (1);
692 out:
693         release_sock(sk);
694         return len;
695 }
696
697 EXPORT_SYMBOL_GPL(dccp_recvmsg);
698
699 int inet_dccp_listen(struct socket *sock, int backlog)
700 {
701         struct sock *sk = sock->sk;
702         unsigned char old_state;
703         int err;
704
705         lock_sock(sk);
706
707         err = -EINVAL;
708         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
709                 goto out;
710
711         old_state = sk->sk_state;
712         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
713                 goto out;
714
715         /* Really, if the socket is already in listen state
716          * we can only allow the backlog to be adjusted.
717          */
718         if (old_state != DCCP_LISTEN) {
719                 /*
720                  * FIXME: here it probably should be sk->sk_prot->listen_start
721                  * see tcp_listen_start
722                  */
723                 err = dccp_listen_start(sk);
724                 if (err)
725                         goto out;
726         }
727         sk->sk_max_ack_backlog = backlog;
728         err = 0;
729
730 out:
731         release_sock(sk);
732         return err;
733 }
734
735 EXPORT_SYMBOL_GPL(inet_dccp_listen);
736
737 static const unsigned char dccp_new_state[] = {
738         /* current state:   new state:      action:     */
739         [0]               = DCCP_CLOSED,
740         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
741         [DCCP_REQUESTING] = DCCP_CLOSED,
742         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
743         [DCCP_LISTEN]     = DCCP_CLOSED,
744         [DCCP_RESPOND]    = DCCP_CLOSED,
745         [DCCP_CLOSING]    = DCCP_CLOSED,
746         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
747         [DCCP_CLOSED]     = DCCP_CLOSED,
748 };
749
750 static int dccp_close_state(struct sock *sk)
751 {
752         const int next = dccp_new_state[sk->sk_state];
753         const int ns = next & DCCP_STATE_MASK;
754
755         if (ns != sk->sk_state)
756                 dccp_set_state(sk, ns);
757
758         return next & DCCP_ACTION_FIN;
759 }
760
761 void dccp_close(struct sock *sk, long timeout)
762 {
763         struct sk_buff *skb;
764
765         lock_sock(sk);
766
767         sk->sk_shutdown = SHUTDOWN_MASK;
768
769         if (sk->sk_state == DCCP_LISTEN) {
770                 dccp_set_state(sk, DCCP_CLOSED);
771
772                 /* Special case. */
773                 inet_csk_listen_stop(sk);
774
775                 goto adjudge_to_death;
776         }
777
778         /*
779          * We need to flush the recv. buffs.  We do this only on the
780          * descriptor close, not protocol-sourced closes, because the
781           *reader process may not have drained the data yet!
782          */
783         /* FIXME: check for unread data */
784         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
785                 __kfree_skb(skb);
786         }
787
788         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
789                 /* Check zero linger _after_ checking for unread data. */
790                 sk->sk_prot->disconnect(sk, 0);
791         } else if (dccp_close_state(sk)) {
792                 dccp_send_close(sk, 1);
793         }
794
795         sk_stream_wait_close(sk, timeout);
796
797 adjudge_to_death:
798         /*
799          * It is the last release_sock in its life. It will remove backlog.
800          */
801         release_sock(sk);
802         /*
803          * Now socket is owned by kernel and we acquire BH lock
804          * to finish close. No need to check for user refs.
805          */
806         local_bh_disable();
807         bh_lock_sock(sk);
808         BUG_TRAP(!sock_owned_by_user(sk));
809
810         sock_hold(sk);
811         sock_orphan(sk);
812
813         /*
814          * The last release_sock may have processed the CLOSE or RESET
815          * packet moving sock to CLOSED state, if not we have to fire
816          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
817          * in draft-ietf-dccp-spec-11. -acme
818          */
819         if (sk->sk_state == DCCP_CLOSING) {
820                 /* FIXME: should start at 2 * RTT */
821                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
822                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
823                                           inet_csk(sk)->icsk_rto,
824                                           DCCP_RTO_MAX);
825 #if 0
826                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
827                 dccp_set_state(sk, DCCP_CLOSED);
828 #endif
829         }
830
831         atomic_inc(sk->sk_prot->orphan_count);
832         if (sk->sk_state == DCCP_CLOSED)
833                 inet_csk_destroy_sock(sk);
834
835         /* Otherwise, socket is reprieved until protocol close. */
836
837         bh_unlock_sock(sk);
838         local_bh_enable();
839         sock_put(sk);
840 }
841
842 EXPORT_SYMBOL_GPL(dccp_close);
843
844 void dccp_shutdown(struct sock *sk, int how)
845 {
846         dccp_pr_debug("entry\n");
847 }
848
849 EXPORT_SYMBOL_GPL(dccp_shutdown);
850
851 static int __init dccp_mib_init(void)
852 {
853         int rc = -ENOMEM;
854
855         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
856         if (dccp_statistics[0] == NULL)
857                 goto out;
858
859         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
860         if (dccp_statistics[1] == NULL)
861                 goto out_free_one;
862
863         rc = 0;
864 out:
865         return rc;
866 out_free_one:
867         free_percpu(dccp_statistics[0]);
868         dccp_statistics[0] = NULL;
869         goto out;
870
871 }
872
873 static void dccp_mib_exit(void)
874 {
875         free_percpu(dccp_statistics[0]);
876         free_percpu(dccp_statistics[1]);
877         dccp_statistics[0] = dccp_statistics[1] = NULL;
878 }
879
880 static int thash_entries;
881 module_param(thash_entries, int, 0444);
882 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
883
884 #ifdef CONFIG_IP_DCCP_DEBUG
885 int dccp_debug;
886 module_param(dccp_debug, int, 0444);
887 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
888
889 EXPORT_SYMBOL_GPL(dccp_debug);
890 #endif
891
892 static int __init dccp_init(void)
893 {
894         unsigned long goal;
895         int ehash_order, bhash_order, i;
896         int rc = -ENOBUFS;
897
898         dccp_hashinfo.bind_bucket_cachep =
899                 kmem_cache_create("dccp_bind_bucket",
900                                   sizeof(struct inet_bind_bucket), 0,
901                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
902         if (!dccp_hashinfo.bind_bucket_cachep)
903                 goto out;
904
905         /*
906          * Size and allocate the main established and bind bucket
907          * hash tables.
908          *
909          * The methodology is similar to that of the buffer cache.
910          */
911         if (num_physpages >= (128 * 1024))
912                 goal = num_physpages >> (21 - PAGE_SHIFT);
913         else
914                 goal = num_physpages >> (23 - PAGE_SHIFT);
915
916         if (thash_entries)
917                 goal = (thash_entries *
918                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
919         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
920                 ;
921         do {
922                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
923                                         sizeof(struct inet_ehash_bucket);
924                 dccp_hashinfo.ehash_size >>= 1;
925                 while (dccp_hashinfo.ehash_size &
926                        (dccp_hashinfo.ehash_size - 1))
927                         dccp_hashinfo.ehash_size--;
928                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
929                         __get_free_pages(GFP_ATOMIC, ehash_order);
930         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
931
932         if (!dccp_hashinfo.ehash) {
933                 printk(KERN_CRIT "Failed to allocate DCCP "
934                                  "established hash table\n");
935                 goto out_free_bind_bucket_cachep;
936         }
937
938         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
939                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
940                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
941         }
942
943         bhash_order = ehash_order;
944
945         do {
946                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
947                                         sizeof(struct inet_bind_hashbucket);
948                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
949                     bhash_order > 0)
950                         continue;
951                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
952                         __get_free_pages(GFP_ATOMIC, bhash_order);
953         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
954
955         if (!dccp_hashinfo.bhash) {
956                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
957                 goto out_free_dccp_ehash;
958         }
959
960         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
961                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
962                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
963         }
964
965         rc = dccp_mib_init();
966         if (rc)
967                 goto out_free_dccp_bhash;
968
969         rc = dccp_ackvec_init();
970         if (rc)
971                 goto out_free_dccp_mib;
972
973         rc = dccp_sysctl_init();
974         if (rc)
975                 goto out_ackvec_exit;
976 out:
977         return rc;
978 out_ackvec_exit:
979         dccp_ackvec_exit();
980 out_free_dccp_mib:
981         dccp_mib_exit();
982 out_free_dccp_bhash:
983         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
984         dccp_hashinfo.bhash = NULL;
985 out_free_dccp_ehash:
986         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
987         dccp_hashinfo.ehash = NULL;
988 out_free_bind_bucket_cachep:
989         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
990         dccp_hashinfo.bind_bucket_cachep = NULL;
991         goto out;
992 }
993
994 static void __exit dccp_fini(void)
995 {
996         dccp_mib_exit();
997         free_pages((unsigned long)dccp_hashinfo.bhash,
998                    get_order(dccp_hashinfo.bhash_size *
999                              sizeof(struct inet_bind_hashbucket)));
1000         free_pages((unsigned long)dccp_hashinfo.ehash,
1001                    get_order(dccp_hashinfo.ehash_size *
1002                              sizeof(struct inet_ehash_bucket)));
1003         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1004         dccp_ackvec_exit();
1005         dccp_sysctl_exit();
1006 }
1007
1008 module_init(dccp_init);
1009 module_exit(dccp_fini);
1010
1011 MODULE_LICENSE("GPL");
1012 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1013 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");