[DCCP]: Introduce DCCP_SOCKOPT_PACKET_SIZE
[linux-3.10.git] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
98         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
99 }
100
101 int dccp_disconnect(struct sock *sk, int flags)
102 {
103         struct inet_connection_sock *icsk = inet_csk(sk);
104         struct inet_sock *inet = inet_sk(sk);
105         int err = 0;
106         const int old_state = sk->sk_state;
107
108         if (old_state != DCCP_CLOSED)
109                 dccp_set_state(sk, DCCP_CLOSED);
110
111         /* ABORT function of RFC793 */
112         if (old_state == DCCP_LISTEN) {
113                 inet_csk_listen_stop(sk);
114         /* FIXME: do the active reset thing */
115         } else if (old_state == DCCP_REQUESTING)
116                 sk->sk_err = ECONNRESET;
117
118         dccp_clear_xmit_timers(sk);
119         __skb_queue_purge(&sk->sk_receive_queue);
120         if (sk->sk_send_head != NULL) {
121                 __kfree_skb(sk->sk_send_head);
122                 sk->sk_send_head = NULL;
123         }
124
125         inet->dport = 0;
126
127         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
128                 inet_reset_saddr(sk);
129
130         sk->sk_shutdown = 0;
131         sock_reset_flag(sk, SOCK_DONE);
132
133         icsk->icsk_backoff = 0;
134         inet_csk_delack_init(sk);
135         __sk_dst_reset(sk);
136
137         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
138
139         sk->sk_error_report(sk);
140         return err;
141 }
142
143 /*
144  *      Wait for a DCCP event.
145  *
146  *      Note that we don't need to lock the socket, as the upper poll layers
147  *      take care of normal races (between the test and the event) and we don't
148  *      go look at any of the socket buffers directly.
149  */
150 static unsigned int dccp_poll(struct file *file, struct socket *sock,
151                               poll_table *wait)
152 {
153         unsigned int mask;
154         struct sock *sk = sock->sk;
155
156         poll_wait(file, sk->sk_sleep, wait);
157         if (sk->sk_state == DCCP_LISTEN)
158                 return inet_csk_listen_poll(sk);
159
160         /* Socket is not locked. We are protected from async events
161            by poll logic and correct handling of state changes
162            made by another threads is impossible in any case.
163          */
164
165         mask = 0;
166         if (sk->sk_err)
167                 mask = POLLERR;
168
169         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
170                 mask |= POLLHUP;
171         if (sk->sk_shutdown & RCV_SHUTDOWN)
172                 mask |= POLLIN | POLLRDNORM;
173
174         /* Connected? */
175         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
176                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
177                         mask |= POLLIN | POLLRDNORM;
178
179                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
180                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
181                                 mask |= POLLOUT | POLLWRNORM;
182                         } else {  /* send SIGIO later */
183                                 set_bit(SOCK_ASYNC_NOSPACE,
184                                         &sk->sk_socket->flags);
185                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
186
187                                 /* Race breaker. If space is freed after
188                                  * wspace test but before the flags are set,
189                                  * IO signal will be lost.
190                                  */
191                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
192                                         mask |= POLLOUT | POLLWRNORM;
193                         }
194                 }
195         }
196         return mask;
197 }
198
199 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
200 {
201         dccp_pr_debug("entry\n");
202         return -ENOIOCTLCMD;
203 }
204
205 int dccp_setsockopt(struct sock *sk, int level, int optname,
206                     char __user *optval, int optlen)
207 {
208         struct dccp_sock *dp;
209         int err;
210         int val;
211
212         if (level != SOL_DCCP)
213                 return ip_setsockopt(sk, level, optname, optval, optlen);
214
215         if (optlen < sizeof(int))
216                 return -EINVAL;
217
218         if (get_user(val, (int __user *)optval))
219                 return -EFAULT;
220
221         lock_sock(sk);
222
223         dp = dccp_sk(sk);
224         err = 0;
225
226         switch (optname) {
227         case DCCP_SOCKOPT_PACKET_SIZE:
228                 dp->dccps_packet_size = val;
229                 break;
230         default:
231                 err = -ENOPROTOOPT;
232                 break;
233         }
234         
235         release_sock(sk);
236         return err;
237 }
238
239 int dccp_getsockopt(struct sock *sk, int level, int optname,
240                     char __user *optval, int __user *optlen)
241 {
242         struct dccp_sock *dp;
243         int val, len;
244
245         if (level != SOL_DCCP)
246                 return ip_getsockopt(sk, level, optname, optval, optlen);
247
248         if (get_user(len, optlen))
249                 return -EFAULT;
250
251         len = min_t(unsigned int, len, sizeof(int));
252         if (len < 0)
253                 return -EINVAL;
254
255         dp = dccp_sk(sk);
256
257         switch (optname) {
258         case DCCP_SOCKOPT_PACKET_SIZE:
259                 val = dp->dccps_packet_size;
260                 break;
261         default:
262                 return -ENOPROTOOPT;
263         }
264
265         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
266                 return -EFAULT;
267
268         return 0;
269 }
270
271 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
272                  size_t len)
273 {
274         const struct dccp_sock *dp = dccp_sk(sk);
275         const int flags = msg->msg_flags;
276         const int noblock = flags & MSG_DONTWAIT;
277         struct sk_buff *skb;
278         int rc, size;
279         long timeo;
280
281         if (len > dp->dccps_mss_cache)
282                 return -EMSGSIZE;
283
284         lock_sock(sk);
285         timeo = sock_sndtimeo(sk, noblock);
286
287         /*
288          * We have to use sk_stream_wait_connect here to set sk_write_pending,
289          * so that the trick in dccp_rcv_request_sent_state_process.
290          */
291         /* Wait for a connection to finish. */
292         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
293                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
294                         goto out_release;
295
296         size = sk->sk_prot->max_header + len;
297         release_sock(sk);
298         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
299         lock_sock(sk);
300         if (skb == NULL)
301                 goto out_release;
302
303         skb_reserve(skb, sk->sk_prot->max_header);
304         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
305         if (rc != 0)
306                 goto out_discard;
307
308         rc = dccp_write_xmit(sk, skb, &timeo);
309         /*
310          * XXX we don't use sk_write_queue, so just discard the packet.
311          *     Current plan however is to _use_ sk_write_queue with
312          *     an algorith similar to tcp_sendmsg, where the main difference
313          *     is that in DCCP we have to respect packet boundaries, so
314          *     no coalescing of skbs.
315          *
316          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
317          *     generated callgraph 8) -acme
318          */
319         if (rc != 0)
320                 goto out_discard;
321 out_release:
322         release_sock(sk);
323         return rc ? : len;
324 out_discard:
325         kfree_skb(skb);
326         goto out_release;
327 }
328
329 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
330                  size_t len, int nonblock, int flags, int *addr_len)
331 {
332         const struct dccp_hdr *dh;
333         long timeo;
334
335         lock_sock(sk);
336
337         if (sk->sk_state == DCCP_LISTEN) {
338                 len = -ENOTCONN;
339                 goto out;
340         }
341
342         timeo = sock_rcvtimeo(sk, nonblock);
343
344         do {
345                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
346
347                 if (skb == NULL)
348                         goto verify_sock_status;
349
350                 dh = dccp_hdr(skb);
351
352                 if (dh->dccph_type == DCCP_PKT_DATA ||
353                     dh->dccph_type == DCCP_PKT_DATAACK)
354                         goto found_ok_skb;
355
356                 if (dh->dccph_type == DCCP_PKT_RESET ||
357                     dh->dccph_type == DCCP_PKT_CLOSE) {
358                         dccp_pr_debug("found fin ok!\n");
359                         len = 0;
360                         goto found_fin_ok;
361                 }
362                 dccp_pr_debug("packet_type=%s\n",
363                               dccp_packet_name(dh->dccph_type));
364                 sk_eat_skb(sk, skb);
365 verify_sock_status:
366                 if (sock_flag(sk, SOCK_DONE)) {
367                         len = 0;
368                         break;
369                 }
370
371                 if (sk->sk_err) {
372                         len = sock_error(sk);
373                         break;
374                 }
375
376                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
377                         len = 0;
378                         break;
379                 }
380
381                 if (sk->sk_state == DCCP_CLOSED) {
382                         if (!sock_flag(sk, SOCK_DONE)) {
383                                 /* This occurs when user tries to read
384                                  * from never connected socket.
385                                  */
386                                 len = -ENOTCONN;
387                                 break;
388                         }
389                         len = 0;
390                         break;
391                 }
392
393                 if (!timeo) {
394                         len = -EAGAIN;
395                         break;
396                 }
397
398                 if (signal_pending(current)) {
399                         len = sock_intr_errno(timeo);
400                         break;
401                 }
402
403                 sk_wait_data(sk, &timeo);
404                 continue;
405         found_ok_skb:
406                 if (len > skb->len)
407                         len = skb->len;
408                 else if (len < skb->len)
409                         msg->msg_flags |= MSG_TRUNC;
410
411                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
412                         /* Exception. Bailout! */
413                         len = -EFAULT;
414                         break;
415                 }
416         found_fin_ok:
417                 if (!(flags & MSG_PEEK))
418                         sk_eat_skb(sk, skb);
419                 break;
420         } while (1);
421 out:
422         release_sock(sk);
423         return len;
424 }
425
426 static int inet_dccp_listen(struct socket *sock, int backlog)
427 {
428         struct sock *sk = sock->sk;
429         unsigned char old_state;
430         int err;
431
432         lock_sock(sk);
433
434         err = -EINVAL;
435         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
436                 goto out;
437
438         old_state = sk->sk_state;
439         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
440                 goto out;
441
442         /* Really, if the socket is already in listen state
443          * we can only allow the backlog to be adjusted.
444          */
445         if (old_state != DCCP_LISTEN) {
446                 /*
447                  * FIXME: here it probably should be sk->sk_prot->listen_start
448                  * see tcp_listen_start
449                  */
450                 err = dccp_listen_start(sk);
451                 if (err)
452                         goto out;
453         }
454         sk->sk_max_ack_backlog = backlog;
455         err = 0;
456
457 out:
458         release_sock(sk);
459         return err;
460 }
461
462 static const unsigned char dccp_new_state[] = {
463         /* current state:   new state:      action:     */
464         [0]               = DCCP_CLOSED,
465         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
466         [DCCP_REQUESTING] = DCCP_CLOSED,
467         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
468         [DCCP_LISTEN]     = DCCP_CLOSED,
469         [DCCP_RESPOND]    = DCCP_CLOSED,
470         [DCCP_CLOSING]    = DCCP_CLOSED,
471         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
472         [DCCP_CLOSED]     = DCCP_CLOSED,
473 };
474
475 static int dccp_close_state(struct sock *sk)
476 {
477         const int next = dccp_new_state[sk->sk_state];
478         const int ns = next & DCCP_STATE_MASK;
479
480         if (ns != sk->sk_state)
481                 dccp_set_state(sk, ns);
482
483         return next & DCCP_ACTION_FIN;
484 }
485
486 void dccp_close(struct sock *sk, long timeout)
487 {
488         struct sk_buff *skb;
489
490         lock_sock(sk);
491
492         sk->sk_shutdown = SHUTDOWN_MASK;
493
494         if (sk->sk_state == DCCP_LISTEN) {
495                 dccp_set_state(sk, DCCP_CLOSED);
496
497                 /* Special case. */
498                 inet_csk_listen_stop(sk);
499
500                 goto adjudge_to_death;
501         }
502
503         /*
504          * We need to flush the recv. buffs.  We do this only on the
505          * descriptor close, not protocol-sourced closes, because the
506           *reader process may not have drained the data yet!
507          */
508         /* FIXME: check for unread data */
509         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
510                 __kfree_skb(skb);
511         }
512
513         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
514                 /* Check zero linger _after_ checking for unread data. */
515                 sk->sk_prot->disconnect(sk, 0);
516         } else if (dccp_close_state(sk)) {
517                 dccp_send_close(sk, 1);
518         }
519
520         sk_stream_wait_close(sk, timeout);
521
522 adjudge_to_death:
523         /*
524          * It is the last release_sock in its life. It will remove backlog.
525          */
526         release_sock(sk);
527         /*
528          * Now socket is owned by kernel and we acquire BH lock
529          * to finish close. No need to check for user refs.
530          */
531         local_bh_disable();
532         bh_lock_sock(sk);
533         BUG_TRAP(!sock_owned_by_user(sk));
534
535         sock_hold(sk);
536         sock_orphan(sk);
537
538         /*
539          * The last release_sock may have processed the CLOSE or RESET
540          * packet moving sock to CLOSED state, if not we have to fire
541          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
542          * in draft-ietf-dccp-spec-11. -acme
543          */
544         if (sk->sk_state == DCCP_CLOSING) {
545                 /* FIXME: should start at 2 * RTT */
546                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
547                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
548                                           inet_csk(sk)->icsk_rto,
549                                           DCCP_RTO_MAX);
550 #if 0
551                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
552                 dccp_set_state(sk, DCCP_CLOSED);
553 #endif
554         }
555
556         atomic_inc(sk->sk_prot->orphan_count);
557         if (sk->sk_state == DCCP_CLOSED)
558                 inet_csk_destroy_sock(sk);
559
560         /* Otherwise, socket is reprieved until protocol close. */
561
562         bh_unlock_sock(sk);
563         local_bh_enable();
564         sock_put(sk);
565 }
566
567 void dccp_shutdown(struct sock *sk, int how)
568 {
569         dccp_pr_debug("entry\n");
570 }
571
572 static struct proto_ops inet_dccp_ops = {
573         .family         = PF_INET,
574         .owner          = THIS_MODULE,
575         .release        = inet_release,
576         .bind           = inet_bind,
577         .connect        = inet_stream_connect,
578         .socketpair     = sock_no_socketpair,
579         .accept         = inet_accept,
580         .getname        = inet_getname,
581         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
582         .poll           = dccp_poll,
583         .ioctl          = inet_ioctl,
584         /* FIXME: work on inet_listen to rename it to sock_common_listen */
585         .listen         = inet_dccp_listen,
586         .shutdown       = inet_shutdown,
587         .setsockopt     = sock_common_setsockopt,
588         .getsockopt     = sock_common_getsockopt,
589         .sendmsg        = inet_sendmsg,
590         .recvmsg        = sock_common_recvmsg,
591         .mmap           = sock_no_mmap,
592         .sendpage       = sock_no_sendpage,
593 };
594
595 extern struct net_proto_family inet_family_ops;
596
597 static struct inet_protosw dccp_v4_protosw = {
598         .type           = SOCK_DCCP,
599         .protocol       = IPPROTO_DCCP,
600         .prot           = &dccp_v4_prot,
601         .ops            = &inet_dccp_ops,
602         .capability     = -1,
603         .no_check       = 0,
604         .flags          = 0,
605 };
606
607 /*
608  * This is the global socket data structure used for responding to
609  * the Out-of-the-blue (OOTB) packets. A control sock will be created
610  * for this socket at the initialization time.
611  */
612 struct socket *dccp_ctl_socket;
613
614 static char dccp_ctl_socket_err_msg[] __initdata =
615         KERN_ERR "DCCP: Failed to create the control socket.\n";
616
617 static int __init dccp_ctl_sock_init(void)
618 {
619         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
620                                   &dccp_ctl_socket);
621         if (rc < 0)
622                 printk(dccp_ctl_socket_err_msg);
623         else {
624                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
625                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
626
627                 /* Unhash it so that IP input processing does not even
628                  * see it, we do not wish this socket to see incoming
629                  * packets.
630                  */
631                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
632         }
633
634         return rc;
635 }
636
637 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
638 void dccp_ctl_sock_exit(void)
639 {
640         if (dccp_ctl_socket != NULL) {
641                 sock_release(dccp_ctl_socket);
642                 dccp_ctl_socket = NULL;
643         }
644 }
645
646 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
647 #endif
648
649 static int __init init_dccp_v4_mibs(void)
650 {
651         int rc = -ENOMEM;
652
653         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
654         if (dccp_statistics[0] == NULL)
655                 goto out;
656
657         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
658         if (dccp_statistics[1] == NULL)
659                 goto out_free_one;
660
661         rc = 0;
662 out:
663         return rc;
664 out_free_one:
665         free_percpu(dccp_statistics[0]);
666         dccp_statistics[0] = NULL;
667         goto out;
668
669 }
670
671 static int thash_entries;
672 module_param(thash_entries, int, 0444);
673 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
674
675 #ifdef CONFIG_IP_DCCP_DEBUG
676 int dccp_debug;
677 module_param(dccp_debug, int, 0444);
678 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
679 #endif
680
681 static int __init dccp_init(void)
682 {
683         unsigned long goal;
684         int ehash_order, bhash_order, i;
685         int rc = proto_register(&dccp_v4_prot, 1);
686
687         if (rc)
688                 goto out;
689
690         dccp_hashinfo.bind_bucket_cachep =
691                 kmem_cache_create("dccp_bind_bucket",
692                                   sizeof(struct inet_bind_bucket), 0,
693                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
694         if (!dccp_hashinfo.bind_bucket_cachep)
695                 goto out_proto_unregister;
696
697         /*
698          * Size and allocate the main established and bind bucket
699          * hash tables.
700          *
701          * The methodology is similar to that of the buffer cache.
702          */
703         if (num_physpages >= (128 * 1024))
704                 goal = num_physpages >> (21 - PAGE_SHIFT);
705         else
706                 goal = num_physpages >> (23 - PAGE_SHIFT);
707
708         if (thash_entries)
709                 goal = (thash_entries *
710                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
711         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
712                 ;
713         do {
714                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
715                                         sizeof(struct inet_ehash_bucket);
716                 dccp_hashinfo.ehash_size >>= 1;
717                 while (dccp_hashinfo.ehash_size &
718                        (dccp_hashinfo.ehash_size - 1))
719                         dccp_hashinfo.ehash_size--;
720                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
721                         __get_free_pages(GFP_ATOMIC, ehash_order);
722         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
723
724         if (!dccp_hashinfo.ehash) {
725                 printk(KERN_CRIT "Failed to allocate DCCP "
726                                  "established hash table\n");
727                 goto out_free_bind_bucket_cachep;
728         }
729
730         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
731                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
732                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
733         }
734
735         bhash_order = ehash_order;
736
737         do {
738                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
739                                         sizeof(struct inet_bind_hashbucket);
740                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
741                     bhash_order > 0)
742                         continue;
743                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
744                         __get_free_pages(GFP_ATOMIC, bhash_order);
745         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
746
747         if (!dccp_hashinfo.bhash) {
748                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
749                 goto out_free_dccp_ehash;
750         }
751
752         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
753                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
754                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
755         }
756
757         if (init_dccp_v4_mibs())
758                 goto out_free_dccp_bhash;
759
760         rc = -EAGAIN;
761         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
762                 goto out_free_dccp_v4_mibs;
763
764         inet_register_protosw(&dccp_v4_protosw);
765
766         rc = dccp_ctl_sock_init();
767         if (rc)
768                 goto out_unregister_protosw;
769 out:
770         return rc;
771 out_unregister_protosw:
772         inet_unregister_protosw(&dccp_v4_protosw);
773         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
774 out_free_dccp_v4_mibs:
775         free_percpu(dccp_statistics[0]);
776         free_percpu(dccp_statistics[1]);
777         dccp_statistics[0] = dccp_statistics[1] = NULL;
778 out_free_dccp_bhash:
779         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
780         dccp_hashinfo.bhash = NULL;
781 out_free_dccp_ehash:
782         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
783         dccp_hashinfo.ehash = NULL;
784 out_free_bind_bucket_cachep:
785         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
786         dccp_hashinfo.bind_bucket_cachep = NULL;
787 out_proto_unregister:
788         proto_unregister(&dccp_v4_prot);
789         goto out;
790 }
791
792 static const char dccp_del_proto_err_msg[] __exitdata =
793         KERN_ERR "can't remove dccp net_protocol\n";
794
795 static void __exit dccp_fini(void)
796 {
797         inet_unregister_protosw(&dccp_v4_protosw);
798
799         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
800                 printk(dccp_del_proto_err_msg);
801
802         free_percpu(dccp_statistics[0]);
803         free_percpu(dccp_statistics[1]);
804         free_pages((unsigned long)dccp_hashinfo.bhash,
805                    get_order(dccp_hashinfo.bhash_size *
806                              sizeof(struct inet_bind_hashbucket)));
807         free_pages((unsigned long)dccp_hashinfo.ehash,
808                    get_order(dccp_hashinfo.ehash_size *
809                              sizeof(struct inet_ehash_bucket)));
810         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
811         proto_unregister(&dccp_v4_prot);
812 }
813
814 module_init(dccp_init);
815 module_exit(dccp_fini);
816
817 /*
818  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
819  * values directly, Also cover the case where the protocol is not specified,
820  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
821  */
822 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
823 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
824 MODULE_LICENSE("GPL");
825 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
826 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");