cb9d0cb5f2707f351dc29bc4749cd69b337a30ce
[linux-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119
120 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
121 static DEFINE_SPINLOCK(unix_table_lock);
122 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
123
124 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
125
126 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
127
128 #ifdef CONFIG_SECURITY_NETWORK
129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
130 {
131         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
132 }
133
134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
135 {
136         scm->secid = *UNIXSID(skb);
137 }
138 #else
139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 { }
141
142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 { }
144 #endif /* CONFIG_SECURITY_NETWORK */
145
146 /*
147  *  SMP locking strategy:
148  *    hash table is protected with spinlock unix_table_lock
149  *    each socket state is protected by separate rwlock.
150  */
151
152 static inline unsigned unix_hash_fold(__wsum n)
153 {
154         unsigned hash = (__force unsigned)n;
155         hash ^= hash>>16;
156         hash ^= hash>>8;
157         return hash&(UNIX_HASH_SIZE-1);
158 }
159
160 #define unix_peer(sk) (unix_sk(sk)->peer)
161
162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
163 {
164         return unix_peer(osk) == sk;
165 }
166
167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
168 {
169         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
170 }
171
172 static struct sock *unix_peer_get(struct sock *s)
173 {
174         struct sock *peer;
175
176         unix_state_lock(s);
177         peer = unix_peer(s);
178         if (peer)
179                 sock_hold(peer);
180         unix_state_unlock(s);
181         return peer;
182 }
183
184 static inline void unix_release_addr(struct unix_address *addr)
185 {
186         if (atomic_dec_and_test(&addr->refcnt))
187                 kfree(addr);
188 }
189
190 /*
191  *      Check unix socket name:
192  *              - should be not zero length.
193  *              - if started by not zero, should be NULL terminated (FS object)
194  *              - if started by zero, it is abstract name.
195  */
196
197 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
198 {
199         if (len <= sizeof(short) || len > sizeof(*sunaddr))
200                 return -EINVAL;
201         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
202                 return -EINVAL;
203         if (sunaddr->sun_path[0]) {
204                 /*
205                  * This may look like an off by one error but it is a bit more
206                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
207                  * sun_path[108] doesnt as such exist.  However in kernel space
208                  * we are guaranteed that it is a valid memory location in our
209                  * kernel address buffer.
210                  */
211                 ((char *)sunaddr)[len]=0;
212                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
213                 return len;
214         }
215
216         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
217         return len;
218 }
219
220 static void __unix_remove_socket(struct sock *sk)
221 {
222         sk_del_node_init(sk);
223 }
224
225 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
226 {
227         BUG_TRAP(sk_unhashed(sk));
228         sk_add_node(sk, list);
229 }
230
231 static inline void unix_remove_socket(struct sock *sk)
232 {
233         spin_lock(&unix_table_lock);
234         __unix_remove_socket(sk);
235         spin_unlock(&unix_table_lock);
236 }
237
238 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
239 {
240         spin_lock(&unix_table_lock);
241         __unix_insert_socket(list, sk);
242         spin_unlock(&unix_table_lock);
243 }
244
245 static struct sock *__unix_find_socket_byname(struct net *net,
246                                               struct sockaddr_un *sunname,
247                                               int len, int type, unsigned hash)
248 {
249         struct sock *s;
250         struct hlist_node *node;
251
252         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253                 struct unix_sock *u = unix_sk(s);
254
255                 if (sock_net(s) != net)
256                         continue;
257
258                 if (u->addr->len == len &&
259                     !memcmp(u->addr->name, sunname, len))
260                         goto found;
261         }
262         s = NULL;
263 found:
264         return s;
265 }
266
267 static inline struct sock *unix_find_socket_byname(struct net *net,
268                                                    struct sockaddr_un *sunname,
269                                                    int len, int type,
270                                                    unsigned hash)
271 {
272         struct sock *s;
273
274         spin_lock(&unix_table_lock);
275         s = __unix_find_socket_byname(net, sunname, len, type, hash);
276         if (s)
277                 sock_hold(s);
278         spin_unlock(&unix_table_lock);
279         return s;
280 }
281
282 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
283 {
284         struct sock *s;
285         struct hlist_node *node;
286
287         spin_lock(&unix_table_lock);
288         sk_for_each(s, node,
289                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
290                 struct dentry *dentry = unix_sk(s)->dentry;
291
292                 if (sock_net(s) != net)
293                         continue;
294
295                 if(dentry && dentry->d_inode == i)
296                 {
297                         sock_hold(s);
298                         goto found;
299                 }
300         }
301         s = NULL;
302 found:
303         spin_unlock(&unix_table_lock);
304         return s;
305 }
306
307 static inline int unix_writable(struct sock *sk)
308 {
309         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
310 }
311
312 static void unix_write_space(struct sock *sk)
313 {
314         read_lock(&sk->sk_callback_lock);
315         if (unix_writable(sk)) {
316                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
317                         wake_up_interruptible_sync(sk->sk_sleep);
318                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
319         }
320         read_unlock(&sk->sk_callback_lock);
321 }
322
323 /* When dgram socket disconnects (or changes its peer), we clear its receive
324  * queue of packets arrived from previous peer. First, it allows to do
325  * flow control based only on wmem_alloc; second, sk connected to peer
326  * may receive messages only from that peer. */
327 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
328 {
329         if (!skb_queue_empty(&sk->sk_receive_queue)) {
330                 skb_queue_purge(&sk->sk_receive_queue);
331                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
332
333                 /* If one link of bidirectional dgram pipe is disconnected,
334                  * we signal error. Messages are lost. Do not make this,
335                  * when peer was not connected to us.
336                  */
337                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
338                         other->sk_err = ECONNRESET;
339                         other->sk_error_report(other);
340                 }
341         }
342 }
343
344 static void unix_sock_destructor(struct sock *sk)
345 {
346         struct unix_sock *u = unix_sk(sk);
347
348         skb_queue_purge(&sk->sk_receive_queue);
349
350         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
351         BUG_TRAP(sk_unhashed(sk));
352         BUG_TRAP(!sk->sk_socket);
353         if (!sock_flag(sk, SOCK_DEAD)) {
354                 printk("Attempt to release alive unix socket: %p\n", sk);
355                 return;
356         }
357
358         if (u->addr)
359                 unix_release_addr(u->addr);
360
361         atomic_dec(&unix_nr_socks);
362 #ifdef UNIX_REFCNT_DEBUG
363         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
364 #endif
365 }
366
367 static int unix_release_sock (struct sock *sk, int embrion)
368 {
369         struct unix_sock *u = unix_sk(sk);
370         struct dentry *dentry;
371         struct vfsmount *mnt;
372         struct sock *skpair;
373         struct sk_buff *skb;
374         int state;
375
376         unix_remove_socket(sk);
377
378         /* Clear state */
379         unix_state_lock(sk);
380         sock_orphan(sk);
381         sk->sk_shutdown = SHUTDOWN_MASK;
382         dentry       = u->dentry;
383         u->dentry    = NULL;
384         mnt          = u->mnt;
385         u->mnt       = NULL;
386         state = sk->sk_state;
387         sk->sk_state = TCP_CLOSE;
388         unix_state_unlock(sk);
389
390         wake_up_interruptible_all(&u->peer_wait);
391
392         skpair=unix_peer(sk);
393
394         if (skpair!=NULL) {
395                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
396                         unix_state_lock(skpair);
397                         /* No more writes */
398                         skpair->sk_shutdown = SHUTDOWN_MASK;
399                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
400                                 skpair->sk_err = ECONNRESET;
401                         unix_state_unlock(skpair);
402                         skpair->sk_state_change(skpair);
403                         read_lock(&skpair->sk_callback_lock);
404                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
405                         read_unlock(&skpair->sk_callback_lock);
406                 }
407                 sock_put(skpair); /* It may now die */
408                 unix_peer(sk) = NULL;
409         }
410
411         /* Try to flush out this socket. Throw out buffers at least */
412
413         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
414                 if (state==TCP_LISTEN)
415                         unix_release_sock(skb->sk, 1);
416                 /* passed fds are erased in the kfree_skb hook        */
417                 kfree_skb(skb);
418         }
419
420         if (dentry) {
421                 dput(dentry);
422                 mntput(mnt);
423         }
424
425         sock_put(sk);
426
427         /* ---- Socket is dead now and most probably destroyed ---- */
428
429         /*
430          * Fixme: BSD difference: In BSD all sockets connected to use get
431          *        ECONNRESET and we die on the spot. In Linux we behave
432          *        like files and pipes do and wait for the last
433          *        dereference.
434          *
435          * Can't we simply set sock->err?
436          *
437          *        What the above comment does talk about? --ANK(980817)
438          */
439
440         if (unix_tot_inflight)
441                 unix_gc();              /* Garbage collect fds */
442
443         return 0;
444 }
445
446 static int unix_listen(struct socket *sock, int backlog)
447 {
448         int err;
449         struct sock *sk = sock->sk;
450         struct unix_sock *u = unix_sk(sk);
451
452         err = -EOPNOTSUPP;
453         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
454                 goto out;                       /* Only stream/seqpacket sockets accept */
455         err = -EINVAL;
456         if (!u->addr)
457                 goto out;                       /* No listens on an unbound socket */
458         unix_state_lock(sk);
459         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
460                 goto out_unlock;
461         if (backlog > sk->sk_max_ack_backlog)
462                 wake_up_interruptible_all(&u->peer_wait);
463         sk->sk_max_ack_backlog  = backlog;
464         sk->sk_state            = TCP_LISTEN;
465         /* set credentials so connect can copy them */
466         sk->sk_peercred.pid     = task_tgid_vnr(current);
467         sk->sk_peercred.uid     = current->euid;
468         sk->sk_peercred.gid     = current->egid;
469         err = 0;
470
471 out_unlock:
472         unix_state_unlock(sk);
473 out:
474         return err;
475 }
476
477 static int unix_release(struct socket *);
478 static int unix_bind(struct socket *, struct sockaddr *, int);
479 static int unix_stream_connect(struct socket *, struct sockaddr *,
480                                int addr_len, int flags);
481 static int unix_socketpair(struct socket *, struct socket *);
482 static int unix_accept(struct socket *, struct socket *, int);
483 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
484 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
485 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
486 static int unix_shutdown(struct socket *, int);
487 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
488                                struct msghdr *, size_t);
489 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
490                                struct msghdr *, size_t, int);
491 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
492                               struct msghdr *, size_t);
493 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
494                               struct msghdr *, size_t, int);
495 static int unix_dgram_connect(struct socket *, struct sockaddr *,
496                               int, int);
497 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
498                                   struct msghdr *, size_t);
499
500 static const struct proto_ops unix_stream_ops = {
501         .family =       PF_UNIX,
502         .owner =        THIS_MODULE,
503         .release =      unix_release,
504         .bind =         unix_bind,
505         .connect =      unix_stream_connect,
506         .socketpair =   unix_socketpair,
507         .accept =       unix_accept,
508         .getname =      unix_getname,
509         .poll =         unix_poll,
510         .ioctl =        unix_ioctl,
511         .listen =       unix_listen,
512         .shutdown =     unix_shutdown,
513         .setsockopt =   sock_no_setsockopt,
514         .getsockopt =   sock_no_getsockopt,
515         .sendmsg =      unix_stream_sendmsg,
516         .recvmsg =      unix_stream_recvmsg,
517         .mmap =         sock_no_mmap,
518         .sendpage =     sock_no_sendpage,
519 };
520
521 static const struct proto_ops unix_dgram_ops = {
522         .family =       PF_UNIX,
523         .owner =        THIS_MODULE,
524         .release =      unix_release,
525         .bind =         unix_bind,
526         .connect =      unix_dgram_connect,
527         .socketpair =   unix_socketpair,
528         .accept =       sock_no_accept,
529         .getname =      unix_getname,
530         .poll =         datagram_poll,
531         .ioctl =        unix_ioctl,
532         .listen =       sock_no_listen,
533         .shutdown =     unix_shutdown,
534         .setsockopt =   sock_no_setsockopt,
535         .getsockopt =   sock_no_getsockopt,
536         .sendmsg =      unix_dgram_sendmsg,
537         .recvmsg =      unix_dgram_recvmsg,
538         .mmap =         sock_no_mmap,
539         .sendpage =     sock_no_sendpage,
540 };
541
542 static const struct proto_ops unix_seqpacket_ops = {
543         .family =       PF_UNIX,
544         .owner =        THIS_MODULE,
545         .release =      unix_release,
546         .bind =         unix_bind,
547         .connect =      unix_stream_connect,
548         .socketpair =   unix_socketpair,
549         .accept =       unix_accept,
550         .getname =      unix_getname,
551         .poll =         datagram_poll,
552         .ioctl =        unix_ioctl,
553         .listen =       unix_listen,
554         .shutdown =     unix_shutdown,
555         .setsockopt =   sock_no_setsockopt,
556         .getsockopt =   sock_no_getsockopt,
557         .sendmsg =      unix_seqpacket_sendmsg,
558         .recvmsg =      unix_dgram_recvmsg,
559         .mmap =         sock_no_mmap,
560         .sendpage =     sock_no_sendpage,
561 };
562
563 static struct proto unix_proto = {
564         .name     = "UNIX",
565         .owner    = THIS_MODULE,
566         .obj_size = sizeof(struct unix_sock),
567 };
568
569 /*
570  * AF_UNIX sockets do not interact with hardware, hence they
571  * dont trigger interrupts - so it's safe for them to have
572  * bh-unsafe locking for their sk_receive_queue.lock. Split off
573  * this special lock-class by reinitializing the spinlock key:
574  */
575 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
576
577 static struct sock * unix_create1(struct net *net, struct socket *sock)
578 {
579         struct sock *sk = NULL;
580         struct unix_sock *u;
581
582         atomic_inc(&unix_nr_socks);
583         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
584                 goto out;
585
586         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
587         if (!sk)
588                 goto out;
589
590         sock_init_data(sock,sk);
591         lockdep_set_class(&sk->sk_receive_queue.lock,
592                                 &af_unix_sk_receive_queue_lock_key);
593
594         sk->sk_write_space      = unix_write_space;
595         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
596         sk->sk_destruct         = unix_sock_destructor;
597         u         = unix_sk(sk);
598         u->dentry = NULL;
599         u->mnt    = NULL;
600         spin_lock_init(&u->lock);
601         atomic_set(&u->inflight, 0);
602         INIT_LIST_HEAD(&u->link);
603         mutex_init(&u->readlock); /* single task reading lock */
604         init_waitqueue_head(&u->peer_wait);
605         unix_insert_socket(unix_sockets_unbound, sk);
606 out:
607         if (sk == NULL)
608                 atomic_dec(&unix_nr_socks);
609         return sk;
610 }
611
612 static int unix_create(struct net *net, struct socket *sock, int protocol)
613 {
614         if (protocol && protocol != PF_UNIX)
615                 return -EPROTONOSUPPORT;
616
617         sock->state = SS_UNCONNECTED;
618
619         switch (sock->type) {
620         case SOCK_STREAM:
621                 sock->ops = &unix_stream_ops;
622                 break;
623                 /*
624                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
625                  *      nothing uses it.
626                  */
627         case SOCK_RAW:
628                 sock->type=SOCK_DGRAM;
629         case SOCK_DGRAM:
630                 sock->ops = &unix_dgram_ops;
631                 break;
632         case SOCK_SEQPACKET:
633                 sock->ops = &unix_seqpacket_ops;
634                 break;
635         default:
636                 return -ESOCKTNOSUPPORT;
637         }
638
639         return unix_create1(net, sock) ? 0 : -ENOMEM;
640 }
641
642 static int unix_release(struct socket *sock)
643 {
644         struct sock *sk = sock->sk;
645
646         if (!sk)
647                 return 0;
648
649         sock->sk = NULL;
650
651         return unix_release_sock (sk, 0);
652 }
653
654 static int unix_autobind(struct socket *sock)
655 {
656         struct sock *sk = sock->sk;
657         struct net *net = sock_net(sk);
658         struct unix_sock *u = unix_sk(sk);
659         static u32 ordernum = 1;
660         struct unix_address * addr;
661         int err;
662
663         mutex_lock(&u->readlock);
664
665         err = 0;
666         if (u->addr)
667                 goto out;
668
669         err = -ENOMEM;
670         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
671         if (!addr)
672                 goto out;
673
674         addr->name->sun_family = AF_UNIX;
675         atomic_set(&addr->refcnt, 1);
676
677 retry:
678         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
679         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
680
681         spin_lock(&unix_table_lock);
682         ordernum = (ordernum+1)&0xFFFFF;
683
684         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
685                                       addr->hash)) {
686                 spin_unlock(&unix_table_lock);
687                 /* Sanity yield. It is unusual case, but yet... */
688                 if (!(ordernum&0xFF))
689                         yield();
690                 goto retry;
691         }
692         addr->hash ^= sk->sk_type;
693
694         __unix_remove_socket(sk);
695         u->addr = addr;
696         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
697         spin_unlock(&unix_table_lock);
698         err = 0;
699
700 out:    mutex_unlock(&u->readlock);
701         return err;
702 }
703
704 static struct sock *unix_find_other(struct net *net,
705                                     struct sockaddr_un *sunname, int len,
706                                     int type, unsigned hash, int *error)
707 {
708         struct sock *u;
709         struct nameidata nd;
710         int err = 0;
711
712         if (sunname->sun_path[0]) {
713                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
714                 if (err)
715                         goto fail;
716                 err = vfs_permission(&nd, MAY_WRITE);
717                 if (err)
718                         goto put_fail;
719
720                 err = -ECONNREFUSED;
721                 if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
722                         goto put_fail;
723                 u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
724                 if (!u)
725                         goto put_fail;
726
727                 if (u->sk_type == type)
728                         touch_atime(nd.path.mnt, nd.path.dentry);
729
730                 path_put(&nd.path);
731
732                 err=-EPROTOTYPE;
733                 if (u->sk_type != type) {
734                         sock_put(u);
735                         goto fail;
736                 }
737         } else {
738                 err = -ECONNREFUSED;
739                 u=unix_find_socket_byname(net, sunname, len, type, hash);
740                 if (u) {
741                         struct dentry *dentry;
742                         dentry = unix_sk(u)->dentry;
743                         if (dentry)
744                                 touch_atime(unix_sk(u)->mnt, dentry);
745                 } else
746                         goto fail;
747         }
748         return u;
749
750 put_fail:
751         path_put(&nd.path);
752 fail:
753         *error=err;
754         return NULL;
755 }
756
757
758 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
759 {
760         struct sock *sk = sock->sk;
761         struct net *net = sock_net(sk);
762         struct unix_sock *u = unix_sk(sk);
763         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
764         struct dentry * dentry = NULL;
765         struct nameidata nd;
766         int err;
767         unsigned hash;
768         struct unix_address *addr;
769         struct hlist_head *list;
770
771         err = -EINVAL;
772         if (sunaddr->sun_family != AF_UNIX)
773                 goto out;
774
775         if (addr_len==sizeof(short)) {
776                 err = unix_autobind(sock);
777                 goto out;
778         }
779
780         err = unix_mkname(sunaddr, addr_len, &hash);
781         if (err < 0)
782                 goto out;
783         addr_len = err;
784
785         mutex_lock(&u->readlock);
786
787         err = -EINVAL;
788         if (u->addr)
789                 goto out_up;
790
791         err = -ENOMEM;
792         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
793         if (!addr)
794                 goto out_up;
795
796         memcpy(addr->name, sunaddr, addr_len);
797         addr->len = addr_len;
798         addr->hash = hash ^ sk->sk_type;
799         atomic_set(&addr->refcnt, 1);
800
801         if (sunaddr->sun_path[0]) {
802                 unsigned int mode;
803                 err = 0;
804                 /*
805                  * Get the parent directory, calculate the hash for last
806                  * component.
807                  */
808                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
809                 if (err)
810                         goto out_mknod_parent;
811
812                 dentry = lookup_create(&nd, 0);
813                 err = PTR_ERR(dentry);
814                 if (IS_ERR(dentry))
815                         goto out_mknod_unlock;
816
817                 /*
818                  * All right, let's create it.
819                  */
820                 mode = S_IFSOCK |
821                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
822                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
823                 if (err)
824                         goto out_mknod_dput;
825                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
826                 dput(nd.path.dentry);
827                 nd.path.dentry = dentry;
828
829                 addr->hash = UNIX_HASH_SIZE;
830         }
831
832         spin_lock(&unix_table_lock);
833
834         if (!sunaddr->sun_path[0]) {
835                 err = -EADDRINUSE;
836                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
837                                               sk->sk_type, hash)) {
838                         unix_release_addr(addr);
839                         goto out_unlock;
840                 }
841
842                 list = &unix_socket_table[addr->hash];
843         } else {
844                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
845                 u->dentry = nd.path.dentry;
846                 u->mnt    = nd.path.mnt;
847         }
848
849         err = 0;
850         __unix_remove_socket(sk);
851         u->addr = addr;
852         __unix_insert_socket(list, sk);
853
854 out_unlock:
855         spin_unlock(&unix_table_lock);
856 out_up:
857         mutex_unlock(&u->readlock);
858 out:
859         return err;
860
861 out_mknod_dput:
862         dput(dentry);
863 out_mknod_unlock:
864         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
865         path_put(&nd.path);
866 out_mknod_parent:
867         if (err==-EEXIST)
868                 err=-EADDRINUSE;
869         unix_release_addr(addr);
870         goto out_up;
871 }
872
873 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
874 {
875         if (unlikely(sk1 == sk2) || !sk2) {
876                 unix_state_lock(sk1);
877                 return;
878         }
879         if (sk1 < sk2) {
880                 unix_state_lock(sk1);
881                 unix_state_lock_nested(sk2);
882         } else {
883                 unix_state_lock(sk2);
884                 unix_state_lock_nested(sk1);
885         }
886 }
887
888 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
889 {
890         if (unlikely(sk1 == sk2) || !sk2) {
891                 unix_state_unlock(sk1);
892                 return;
893         }
894         unix_state_unlock(sk1);
895         unix_state_unlock(sk2);
896 }
897
898 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
899                               int alen, int flags)
900 {
901         struct sock *sk = sock->sk;
902         struct net *net = sock_net(sk);
903         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
904         struct sock *other;
905         unsigned hash;
906         int err;
907
908         if (addr->sa_family != AF_UNSPEC) {
909                 err = unix_mkname(sunaddr, alen, &hash);
910                 if (err < 0)
911                         goto out;
912                 alen = err;
913
914                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
915                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
916                         goto out;
917
918 restart:
919                 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
920                 if (!other)
921                         goto out;
922
923                 unix_state_double_lock(sk, other);
924
925                 /* Apparently VFS overslept socket death. Retry. */
926                 if (sock_flag(other, SOCK_DEAD)) {
927                         unix_state_double_unlock(sk, other);
928                         sock_put(other);
929                         goto restart;
930                 }
931
932                 err = -EPERM;
933                 if (!unix_may_send(sk, other))
934                         goto out_unlock;
935
936                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
937                 if (err)
938                         goto out_unlock;
939
940         } else {
941                 /*
942                  *      1003.1g breaking connected state with AF_UNSPEC
943                  */
944                 other = NULL;
945                 unix_state_double_lock(sk, other);
946         }
947
948         /*
949          * If it was connected, reconnect.
950          */
951         if (unix_peer(sk)) {
952                 struct sock *old_peer = unix_peer(sk);
953                 unix_peer(sk)=other;
954                 unix_state_double_unlock(sk, other);
955
956                 if (other != old_peer)
957                         unix_dgram_disconnected(sk, old_peer);
958                 sock_put(old_peer);
959         } else {
960                 unix_peer(sk)=other;
961                 unix_state_double_unlock(sk, other);
962         }
963         return 0;
964
965 out_unlock:
966         unix_state_double_unlock(sk, other);
967         sock_put(other);
968 out:
969         return err;
970 }
971
972 static long unix_wait_for_peer(struct sock *other, long timeo)
973 {
974         struct unix_sock *u = unix_sk(other);
975         int sched;
976         DEFINE_WAIT(wait);
977
978         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
979
980         sched = !sock_flag(other, SOCK_DEAD) &&
981                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
982                 (skb_queue_len(&other->sk_receive_queue) >
983                  other->sk_max_ack_backlog);
984
985         unix_state_unlock(other);
986
987         if (sched)
988                 timeo = schedule_timeout(timeo);
989
990         finish_wait(&u->peer_wait, &wait);
991         return timeo;
992 }
993
994 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
995                                int addr_len, int flags)
996 {
997         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
998         struct sock *sk = sock->sk;
999         struct net *net = sock_net(sk);
1000         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1001         struct sock *newsk = NULL;
1002         struct sock *other = NULL;
1003         struct sk_buff *skb = NULL;
1004         unsigned hash;
1005         int st;
1006         int err;
1007         long timeo;
1008
1009         err = unix_mkname(sunaddr, addr_len, &hash);
1010         if (err < 0)
1011                 goto out;
1012         addr_len = err;
1013
1014         if (test_bit(SOCK_PASSCRED, &sock->flags)
1015                 && !u->addr && (err = unix_autobind(sock)) != 0)
1016                 goto out;
1017
1018         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1019
1020         /* First of all allocate resources.
1021            If we will make it after state is locked,
1022            we will have to recheck all again in any case.
1023          */
1024
1025         err = -ENOMEM;
1026
1027         /* create new sock for complete connection */
1028         newsk = unix_create1(sock_net(sk), NULL);
1029         if (newsk == NULL)
1030                 goto out;
1031
1032         /* Allocate skb for sending to listening sock */
1033         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1034         if (skb == NULL)
1035                 goto out;
1036
1037 restart:
1038         /*  Find listening sock. */
1039         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1040         if (!other)
1041                 goto out;
1042
1043         /* Latch state of peer */
1044         unix_state_lock(other);
1045
1046         /* Apparently VFS overslept socket death. Retry. */
1047         if (sock_flag(other, SOCK_DEAD)) {
1048                 unix_state_unlock(other);
1049                 sock_put(other);
1050                 goto restart;
1051         }
1052
1053         err = -ECONNREFUSED;
1054         if (other->sk_state != TCP_LISTEN)
1055                 goto out_unlock;
1056
1057         if (skb_queue_len(&other->sk_receive_queue) >
1058             other->sk_max_ack_backlog) {
1059                 err = -EAGAIN;
1060                 if (!timeo)
1061                         goto out_unlock;
1062
1063                 timeo = unix_wait_for_peer(other, timeo);
1064
1065                 err = sock_intr_errno(timeo);
1066                 if (signal_pending(current))
1067                         goto out;
1068                 sock_put(other);
1069                 goto restart;
1070         }
1071
1072         /* Latch our state.
1073
1074            It is tricky place. We need to grab write lock and cannot
1075            drop lock on peer. It is dangerous because deadlock is
1076            possible. Connect to self case and simultaneous
1077            attempt to connect are eliminated by checking socket
1078            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1079            check this before attempt to grab lock.
1080
1081            Well, and we have to recheck the state after socket locked.
1082          */
1083         st = sk->sk_state;
1084
1085         switch (st) {
1086         case TCP_CLOSE:
1087                 /* This is ok... continue with connect */
1088                 break;
1089         case TCP_ESTABLISHED:
1090                 /* Socket is already connected */
1091                 err = -EISCONN;
1092                 goto out_unlock;
1093         default:
1094                 err = -EINVAL;
1095                 goto out_unlock;
1096         }
1097
1098         unix_state_lock_nested(sk);
1099
1100         if (sk->sk_state != st) {
1101                 unix_state_unlock(sk);
1102                 unix_state_unlock(other);
1103                 sock_put(other);
1104                 goto restart;
1105         }
1106
1107         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1108         if (err) {
1109                 unix_state_unlock(sk);
1110                 goto out_unlock;
1111         }
1112
1113         /* The way is open! Fastly set all the necessary fields... */
1114
1115         sock_hold(sk);
1116         unix_peer(newsk)        = sk;
1117         newsk->sk_state         = TCP_ESTABLISHED;
1118         newsk->sk_type          = sk->sk_type;
1119         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1120         newsk->sk_peercred.uid  = current->euid;
1121         newsk->sk_peercred.gid  = current->egid;
1122         newu = unix_sk(newsk);
1123         newsk->sk_sleep         = &newu->peer_wait;
1124         otheru = unix_sk(other);
1125
1126         /* copy address information from listening to new sock*/
1127         if (otheru->addr) {
1128                 atomic_inc(&otheru->addr->refcnt);
1129                 newu->addr = otheru->addr;
1130         }
1131         if (otheru->dentry) {
1132                 newu->dentry    = dget(otheru->dentry);
1133                 newu->mnt       = mntget(otheru->mnt);
1134         }
1135
1136         /* Set credentials */
1137         sk->sk_peercred = other->sk_peercred;
1138
1139         sock->state     = SS_CONNECTED;
1140         sk->sk_state    = TCP_ESTABLISHED;
1141         sock_hold(newsk);
1142
1143         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1144         unix_peer(sk)   = newsk;
1145
1146         unix_state_unlock(sk);
1147
1148         /* take ten and and send info to listening sock */
1149         spin_lock(&other->sk_receive_queue.lock);
1150         __skb_queue_tail(&other->sk_receive_queue, skb);
1151         spin_unlock(&other->sk_receive_queue.lock);
1152         unix_state_unlock(other);
1153         other->sk_data_ready(other, 0);
1154         sock_put(other);
1155         return 0;
1156
1157 out_unlock:
1158         if (other)
1159                 unix_state_unlock(other);
1160
1161 out:
1162         if (skb)
1163                 kfree_skb(skb);
1164         if (newsk)
1165                 unix_release_sock(newsk, 0);
1166         if (other)
1167                 sock_put(other);
1168         return err;
1169 }
1170
1171 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1172 {
1173         struct sock *ska=socka->sk, *skb = sockb->sk;
1174
1175         /* Join our sockets back to back */
1176         sock_hold(ska);
1177         sock_hold(skb);
1178         unix_peer(ska)=skb;
1179         unix_peer(skb)=ska;
1180         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1181         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1182         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1183
1184         if (ska->sk_type != SOCK_DGRAM) {
1185                 ska->sk_state = TCP_ESTABLISHED;
1186                 skb->sk_state = TCP_ESTABLISHED;
1187                 socka->state  = SS_CONNECTED;
1188                 sockb->state  = SS_CONNECTED;
1189         }
1190         return 0;
1191 }
1192
1193 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1194 {
1195         struct sock *sk = sock->sk;
1196         struct sock *tsk;
1197         struct sk_buff *skb;
1198         int err;
1199
1200         err = -EOPNOTSUPP;
1201         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1202                 goto out;
1203
1204         err = -EINVAL;
1205         if (sk->sk_state != TCP_LISTEN)
1206                 goto out;
1207
1208         /* If socket state is TCP_LISTEN it cannot change (for now...),
1209          * so that no locks are necessary.
1210          */
1211
1212         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1213         if (!skb) {
1214                 /* This means receive shutdown. */
1215                 if (err == 0)
1216                         err = -EINVAL;
1217                 goto out;
1218         }
1219
1220         tsk = skb->sk;
1221         skb_free_datagram(sk, skb);
1222         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1223
1224         /* attach accepted sock to socket */
1225         unix_state_lock(tsk);
1226         newsock->state = SS_CONNECTED;
1227         sock_graft(tsk, newsock);
1228         unix_state_unlock(tsk);
1229         return 0;
1230
1231 out:
1232         return err;
1233 }
1234
1235
1236 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1237 {
1238         struct sock *sk = sock->sk;
1239         struct unix_sock *u;
1240         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1241         int err = 0;
1242
1243         if (peer) {
1244                 sk = unix_peer_get(sk);
1245
1246                 err = -ENOTCONN;
1247                 if (!sk)
1248                         goto out;
1249                 err = 0;
1250         } else {
1251                 sock_hold(sk);
1252         }
1253
1254         u = unix_sk(sk);
1255         unix_state_lock(sk);
1256         if (!u->addr) {
1257                 sunaddr->sun_family = AF_UNIX;
1258                 sunaddr->sun_path[0] = 0;
1259                 *uaddr_len = sizeof(short);
1260         } else {
1261                 struct unix_address *addr = u->addr;
1262
1263                 *uaddr_len = addr->len;
1264                 memcpy(sunaddr, addr->name, *uaddr_len);
1265         }
1266         unix_state_unlock(sk);
1267         sock_put(sk);
1268 out:
1269         return err;
1270 }
1271
1272 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1273 {
1274         int i;
1275
1276         scm->fp = UNIXCB(skb).fp;
1277         skb->destructor = sock_wfree;
1278         UNIXCB(skb).fp = NULL;
1279
1280         for (i=scm->fp->count-1; i>=0; i--)
1281                 unix_notinflight(scm->fp->fp[i]);
1282 }
1283
1284 static void unix_destruct_fds(struct sk_buff *skb)
1285 {
1286         struct scm_cookie scm;
1287         memset(&scm, 0, sizeof(scm));
1288         unix_detach_fds(&scm, skb);
1289
1290         /* Alas, it calls VFS */
1291         /* So fscking what? fput() had been SMP-safe since the last Summer */
1292         scm_destroy(&scm);
1293         sock_wfree(skb);
1294 }
1295
1296 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1297 {
1298         int i;
1299         for (i=scm->fp->count-1; i>=0; i--)
1300                 unix_inflight(scm->fp->fp[i]);
1301         UNIXCB(skb).fp = scm->fp;
1302         skb->destructor = unix_destruct_fds;
1303         scm->fp = NULL;
1304 }
1305
1306 /*
1307  *      Send AF_UNIX data.
1308  */
1309
1310 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1311                               struct msghdr *msg, size_t len)
1312 {
1313         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1314         struct sock *sk = sock->sk;
1315         struct net *net = sock_net(sk);
1316         struct unix_sock *u = unix_sk(sk);
1317         struct sockaddr_un *sunaddr=msg->msg_name;
1318         struct sock *other = NULL;
1319         int namelen = 0; /* fake GCC */
1320         int err;
1321         unsigned hash;
1322         struct sk_buff *skb;
1323         long timeo;
1324         struct scm_cookie tmp_scm;
1325
1326         if (NULL == siocb->scm)
1327                 siocb->scm = &tmp_scm;
1328         err = scm_send(sock, msg, siocb->scm);
1329         if (err < 0)
1330                 return err;
1331
1332         err = -EOPNOTSUPP;
1333         if (msg->msg_flags&MSG_OOB)
1334                 goto out;
1335
1336         if (msg->msg_namelen) {
1337                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1338                 if (err < 0)
1339                         goto out;
1340                 namelen = err;
1341         } else {
1342                 sunaddr = NULL;
1343                 err = -ENOTCONN;
1344                 other = unix_peer_get(sk);
1345                 if (!other)
1346                         goto out;
1347         }
1348
1349         if (test_bit(SOCK_PASSCRED, &sock->flags)
1350                 && !u->addr && (err = unix_autobind(sock)) != 0)
1351                 goto out;
1352
1353         err = -EMSGSIZE;
1354         if (len > sk->sk_sndbuf - 32)
1355                 goto out;
1356
1357         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1358         if (skb==NULL)
1359                 goto out;
1360
1361         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1362         if (siocb->scm->fp)
1363                 unix_attach_fds(siocb->scm, skb);
1364         unix_get_secdata(siocb->scm, skb);
1365
1366         skb_reset_transport_header(skb);
1367         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1368         if (err)
1369                 goto out_free;
1370
1371         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1372
1373 restart:
1374         if (!other) {
1375                 err = -ECONNRESET;
1376                 if (sunaddr == NULL)
1377                         goto out_free;
1378
1379                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1380                                         hash, &err);
1381                 if (other==NULL)
1382                         goto out_free;
1383         }
1384
1385         unix_state_lock(other);
1386         err = -EPERM;
1387         if (!unix_may_send(sk, other))
1388                 goto out_unlock;
1389
1390         if (sock_flag(other, SOCK_DEAD)) {
1391                 /*
1392                  *      Check with 1003.1g - what should
1393                  *      datagram error
1394                  */
1395                 unix_state_unlock(other);
1396                 sock_put(other);
1397
1398                 err = 0;
1399                 unix_state_lock(sk);
1400                 if (unix_peer(sk) == other) {
1401                         unix_peer(sk)=NULL;
1402                         unix_state_unlock(sk);
1403
1404                         unix_dgram_disconnected(sk, other);
1405                         sock_put(other);
1406                         err = -ECONNREFUSED;
1407                 } else {
1408                         unix_state_unlock(sk);
1409                 }
1410
1411                 other = NULL;
1412                 if (err)
1413                         goto out_free;
1414                 goto restart;
1415         }
1416
1417         err = -EPIPE;
1418         if (other->sk_shutdown & RCV_SHUTDOWN)
1419                 goto out_unlock;
1420
1421         if (sk->sk_type != SOCK_SEQPACKET) {
1422                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1423                 if (err)
1424                         goto out_unlock;
1425         }
1426
1427         if (unix_peer(other) != sk &&
1428             (skb_queue_len(&other->sk_receive_queue) >
1429              other->sk_max_ack_backlog)) {
1430                 if (!timeo) {
1431                         err = -EAGAIN;
1432                         goto out_unlock;
1433                 }
1434
1435                 timeo = unix_wait_for_peer(other, timeo);
1436
1437                 err = sock_intr_errno(timeo);
1438                 if (signal_pending(current))
1439                         goto out_free;
1440
1441                 goto restart;
1442         }
1443
1444         skb_queue_tail(&other->sk_receive_queue, skb);
1445         unix_state_unlock(other);
1446         other->sk_data_ready(other, len);
1447         sock_put(other);
1448         scm_destroy(siocb->scm);
1449         return len;
1450
1451 out_unlock:
1452         unix_state_unlock(other);
1453 out_free:
1454         kfree_skb(skb);
1455 out:
1456         if (other)
1457                 sock_put(other);
1458         scm_destroy(siocb->scm);
1459         return err;
1460 }
1461
1462
1463 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1464                                struct msghdr *msg, size_t len)
1465 {
1466         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1467         struct sock *sk = sock->sk;
1468         struct sock *other = NULL;
1469         struct sockaddr_un *sunaddr=msg->msg_name;
1470         int err,size;
1471         struct sk_buff *skb;
1472         int sent=0;
1473         struct scm_cookie tmp_scm;
1474
1475         if (NULL == siocb->scm)
1476                 siocb->scm = &tmp_scm;
1477         err = scm_send(sock, msg, siocb->scm);
1478         if (err < 0)
1479                 return err;
1480
1481         err = -EOPNOTSUPP;
1482         if (msg->msg_flags&MSG_OOB)
1483                 goto out_err;
1484
1485         if (msg->msg_namelen) {
1486                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1487                 goto out_err;
1488         } else {
1489                 sunaddr = NULL;
1490                 err = -ENOTCONN;
1491                 other = unix_peer(sk);
1492                 if (!other)
1493                         goto out_err;
1494         }
1495
1496         if (sk->sk_shutdown & SEND_SHUTDOWN)
1497                 goto pipe_err;
1498
1499         while(sent < len)
1500         {
1501                 /*
1502                  *      Optimisation for the fact that under 0.01% of X
1503                  *      messages typically need breaking up.
1504                  */
1505
1506                 size = len-sent;
1507
1508                 /* Keep two messages in the pipe so it schedules better */
1509                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1510                         size = (sk->sk_sndbuf >> 1) - 64;
1511
1512                 if (size > SKB_MAX_ALLOC)
1513                         size = SKB_MAX_ALLOC;
1514
1515                 /*
1516                  *      Grab a buffer
1517                  */
1518
1519                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1520
1521                 if (skb==NULL)
1522                         goto out_err;
1523
1524                 /*
1525                  *      If you pass two values to the sock_alloc_send_skb
1526                  *      it tries to grab the large buffer with GFP_NOFS
1527                  *      (which can fail easily), and if it fails grab the
1528                  *      fallback size buffer which is under a page and will
1529                  *      succeed. [Alan]
1530                  */
1531                 size = min_t(int, size, skb_tailroom(skb));
1532
1533                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1534                 if (siocb->scm->fp)
1535                         unix_attach_fds(siocb->scm, skb);
1536
1537                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1538                         kfree_skb(skb);
1539                         goto out_err;
1540                 }
1541
1542                 unix_state_lock(other);
1543
1544                 if (sock_flag(other, SOCK_DEAD) ||
1545                     (other->sk_shutdown & RCV_SHUTDOWN))
1546                         goto pipe_err_free;
1547
1548                 skb_queue_tail(&other->sk_receive_queue, skb);
1549                 unix_state_unlock(other);
1550                 other->sk_data_ready(other, size);
1551                 sent+=size;
1552         }
1553
1554         scm_destroy(siocb->scm);
1555         siocb->scm = NULL;
1556
1557         return sent;
1558
1559 pipe_err_free:
1560         unix_state_unlock(other);
1561         kfree_skb(skb);
1562 pipe_err:
1563         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1564                 send_sig(SIGPIPE,current,0);
1565         err = -EPIPE;
1566 out_err:
1567         scm_destroy(siocb->scm);
1568         siocb->scm = NULL;
1569         return sent ? : err;
1570 }
1571
1572 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1573                                   struct msghdr *msg, size_t len)
1574 {
1575         int err;
1576         struct sock *sk = sock->sk;
1577
1578         err = sock_error(sk);
1579         if (err)
1580                 return err;
1581
1582         if (sk->sk_state != TCP_ESTABLISHED)
1583                 return -ENOTCONN;
1584
1585         if (msg->msg_namelen)
1586                 msg->msg_namelen = 0;
1587
1588         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1589 }
1590
1591 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1592 {
1593         struct unix_sock *u = unix_sk(sk);
1594
1595         msg->msg_namelen = 0;
1596         if (u->addr) {
1597                 msg->msg_namelen = u->addr->len;
1598                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1599         }
1600 }
1601
1602 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1603                               struct msghdr *msg, size_t size,
1604                               int flags)
1605 {
1606         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1607         struct scm_cookie tmp_scm;
1608         struct sock *sk = sock->sk;
1609         struct unix_sock *u = unix_sk(sk);
1610         int noblock = flags & MSG_DONTWAIT;
1611         struct sk_buff *skb;
1612         int err;
1613
1614         err = -EOPNOTSUPP;
1615         if (flags&MSG_OOB)
1616                 goto out;
1617
1618         msg->msg_namelen = 0;
1619
1620         mutex_lock(&u->readlock);
1621
1622         skb = skb_recv_datagram(sk, flags, noblock, &err);
1623         if (!skb) {
1624                 unix_state_lock(sk);
1625                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1626                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1627                     (sk->sk_shutdown & RCV_SHUTDOWN))
1628                         err = 0;
1629                 unix_state_unlock(sk);
1630                 goto out_unlock;
1631         }
1632
1633         wake_up_interruptible_sync(&u->peer_wait);
1634
1635         if (msg->msg_name)
1636                 unix_copy_addr(msg, skb->sk);
1637
1638         if (size > skb->len)
1639                 size = skb->len;
1640         else if (size < skb->len)
1641                 msg->msg_flags |= MSG_TRUNC;
1642
1643         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1644         if (err)
1645                 goto out_free;
1646
1647         if (!siocb->scm) {
1648                 siocb->scm = &tmp_scm;
1649                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1650         }
1651         siocb->scm->creds = *UNIXCREDS(skb);
1652         unix_set_secdata(siocb->scm, skb);
1653
1654         if (!(flags & MSG_PEEK))
1655         {
1656                 if (UNIXCB(skb).fp)
1657                         unix_detach_fds(siocb->scm, skb);
1658         }
1659         else
1660         {
1661                 /* It is questionable: on PEEK we could:
1662                    - do not return fds - good, but too simple 8)
1663                    - return fds, and do not return them on read (old strategy,
1664                      apparently wrong)
1665                    - clone fds (I chose it for now, it is the most universal
1666                      solution)
1667
1668                    POSIX 1003.1g does not actually define this clearly
1669                    at all. POSIX 1003.1g doesn't define a lot of things
1670                    clearly however!
1671
1672                 */
1673                 if (UNIXCB(skb).fp)
1674                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1675         }
1676         err = size;
1677
1678         scm_recv(sock, msg, siocb->scm, flags);
1679
1680 out_free:
1681         skb_free_datagram(sk,skb);
1682 out_unlock:
1683         mutex_unlock(&u->readlock);
1684 out:
1685         return err;
1686 }
1687
1688 /*
1689  *      Sleep until data has arrive. But check for races..
1690  */
1691
1692 static long unix_stream_data_wait(struct sock * sk, long timeo)
1693 {
1694         DEFINE_WAIT(wait);
1695
1696         unix_state_lock(sk);
1697
1698         for (;;) {
1699                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1700
1701                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1702                     sk->sk_err ||
1703                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1704                     signal_pending(current) ||
1705                     !timeo)
1706                         break;
1707
1708                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1709                 unix_state_unlock(sk);
1710                 timeo = schedule_timeout(timeo);
1711                 unix_state_lock(sk);
1712                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1713         }
1714
1715         finish_wait(sk->sk_sleep, &wait);
1716         unix_state_unlock(sk);
1717         return timeo;
1718 }
1719
1720
1721
1722 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1723                                struct msghdr *msg, size_t size,
1724                                int flags)
1725 {
1726         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1727         struct scm_cookie tmp_scm;
1728         struct sock *sk = sock->sk;
1729         struct unix_sock *u = unix_sk(sk);
1730         struct sockaddr_un *sunaddr=msg->msg_name;
1731         int copied = 0;
1732         int check_creds = 0;
1733         int target;
1734         int err = 0;
1735         long timeo;
1736
1737         err = -EINVAL;
1738         if (sk->sk_state != TCP_ESTABLISHED)
1739                 goto out;
1740
1741         err = -EOPNOTSUPP;
1742         if (flags&MSG_OOB)
1743                 goto out;
1744
1745         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1746         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1747
1748         msg->msg_namelen = 0;
1749
1750         /* Lock the socket to prevent queue disordering
1751          * while sleeps in memcpy_tomsg
1752          */
1753
1754         if (!siocb->scm) {
1755                 siocb->scm = &tmp_scm;
1756                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1757         }
1758
1759         mutex_lock(&u->readlock);
1760
1761         do
1762         {
1763                 int chunk;
1764                 struct sk_buff *skb;
1765
1766                 unix_state_lock(sk);
1767                 skb = skb_dequeue(&sk->sk_receive_queue);
1768                 if (skb==NULL)
1769                 {
1770                         if (copied >= target)
1771                                 goto unlock;
1772
1773                         /*
1774                          *      POSIX 1003.1g mandates this order.
1775                          */
1776
1777                         if ((err = sock_error(sk)) != 0)
1778                                 goto unlock;
1779                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1780                                 goto unlock;
1781
1782                         unix_state_unlock(sk);
1783                         err = -EAGAIN;
1784                         if (!timeo)
1785                                 break;
1786                         mutex_unlock(&u->readlock);
1787
1788                         timeo = unix_stream_data_wait(sk, timeo);
1789
1790                         if (signal_pending(current)) {
1791                                 err = sock_intr_errno(timeo);
1792                                 goto out;
1793                         }
1794                         mutex_lock(&u->readlock);
1795                         continue;
1796  unlock:
1797                         unix_state_unlock(sk);
1798                         break;
1799                 }
1800                 unix_state_unlock(sk);
1801
1802                 if (check_creds) {
1803                         /* Never glue messages from different writers */
1804                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1805                                 skb_queue_head(&sk->sk_receive_queue, skb);
1806                                 break;
1807                         }
1808                 } else {
1809                         /* Copy credentials */
1810                         siocb->scm->creds = *UNIXCREDS(skb);
1811                         check_creds = 1;
1812                 }
1813
1814                 /* Copy address just once */
1815                 if (sunaddr)
1816                 {
1817                         unix_copy_addr(msg, skb->sk);
1818                         sunaddr = NULL;
1819                 }
1820
1821                 chunk = min_t(unsigned int, skb->len, size);
1822                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1823                         skb_queue_head(&sk->sk_receive_queue, skb);
1824                         if (copied == 0)
1825                                 copied = -EFAULT;
1826                         break;
1827                 }
1828                 copied += chunk;
1829                 size -= chunk;
1830
1831                 /* Mark read part of skb as used */
1832                 if (!(flags & MSG_PEEK))
1833                 {
1834                         skb_pull(skb, chunk);
1835
1836                         if (UNIXCB(skb).fp)
1837                                 unix_detach_fds(siocb->scm, skb);
1838
1839                         /* put the skb back if we didn't use it up.. */
1840                         if (skb->len)
1841                         {
1842                                 skb_queue_head(&sk->sk_receive_queue, skb);
1843                                 break;
1844                         }
1845
1846                         kfree_skb(skb);
1847
1848                         if (siocb->scm->fp)
1849                                 break;
1850                 }
1851                 else
1852                 {
1853                         /* It is questionable, see note in unix_dgram_recvmsg.
1854                          */
1855                         if (UNIXCB(skb).fp)
1856                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1857
1858                         /* put message back and return */
1859                         skb_queue_head(&sk->sk_receive_queue, skb);
1860                         break;
1861                 }
1862         } while (size);
1863
1864         mutex_unlock(&u->readlock);
1865         scm_recv(sock, msg, siocb->scm, flags);
1866 out:
1867         return copied ? : err;
1868 }
1869
1870 static int unix_shutdown(struct socket *sock, int mode)
1871 {
1872         struct sock *sk = sock->sk;
1873         struct sock *other;
1874
1875         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1876
1877         if (mode) {
1878                 unix_state_lock(sk);
1879                 sk->sk_shutdown |= mode;
1880                 other=unix_peer(sk);
1881                 if (other)
1882                         sock_hold(other);
1883                 unix_state_unlock(sk);
1884                 sk->sk_state_change(sk);
1885
1886                 if (other &&
1887                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1888
1889                         int peer_mode = 0;
1890
1891                         if (mode&RCV_SHUTDOWN)
1892                                 peer_mode |= SEND_SHUTDOWN;
1893                         if (mode&SEND_SHUTDOWN)
1894                                 peer_mode |= RCV_SHUTDOWN;
1895                         unix_state_lock(other);
1896                         other->sk_shutdown |= peer_mode;
1897                         unix_state_unlock(other);
1898                         other->sk_state_change(other);
1899                         read_lock(&other->sk_callback_lock);
1900                         if (peer_mode == SHUTDOWN_MASK)
1901                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1902                         else if (peer_mode & RCV_SHUTDOWN)
1903                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1904                         read_unlock(&other->sk_callback_lock);
1905                 }
1906                 if (other)
1907                         sock_put(other);
1908         }
1909         return 0;
1910 }
1911
1912 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1913 {
1914         struct sock *sk = sock->sk;
1915         long amount=0;
1916         int err;
1917
1918         switch(cmd)
1919         {
1920                 case SIOCOUTQ:
1921                         amount = atomic_read(&sk->sk_wmem_alloc);
1922                         err = put_user(amount, (int __user *)arg);
1923                         break;
1924                 case SIOCINQ:
1925                 {
1926                         struct sk_buff *skb;
1927
1928                         if (sk->sk_state == TCP_LISTEN) {
1929                                 err = -EINVAL;
1930                                 break;
1931                         }
1932
1933                         spin_lock(&sk->sk_receive_queue.lock);
1934                         if (sk->sk_type == SOCK_STREAM ||
1935                             sk->sk_type == SOCK_SEQPACKET) {
1936                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1937                                         amount += skb->len;
1938                         } else {
1939                                 skb = skb_peek(&sk->sk_receive_queue);
1940                                 if (skb)
1941                                         amount=skb->len;
1942                         }
1943                         spin_unlock(&sk->sk_receive_queue.lock);
1944                         err = put_user(amount, (int __user *)arg);
1945                         break;
1946                 }
1947
1948                 default:
1949                         err = -ENOIOCTLCMD;
1950                         break;
1951         }
1952         return err;
1953 }
1954
1955 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1956 {
1957         struct sock *sk = sock->sk;
1958         unsigned int mask;
1959
1960         poll_wait(file, sk->sk_sleep, wait);
1961         mask = 0;
1962
1963         /* exceptional events? */
1964         if (sk->sk_err)
1965                 mask |= POLLERR;
1966         if (sk->sk_shutdown == SHUTDOWN_MASK)
1967                 mask |= POLLHUP;
1968         if (sk->sk_shutdown & RCV_SHUTDOWN)
1969                 mask |= POLLRDHUP;
1970
1971         /* readable? */
1972         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1973             (sk->sk_shutdown & RCV_SHUTDOWN))
1974                 mask |= POLLIN | POLLRDNORM;
1975
1976         /* Connection-based need to check for termination and startup */
1977         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1978                 mask |= POLLHUP;
1979
1980         /*
1981          * we set writable also when the other side has shut down the
1982          * connection. This prevents stuck sockets.
1983          */
1984         if (unix_writable(sk))
1985                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1986
1987         return mask;
1988 }
1989
1990
1991 #ifdef CONFIG_PROC_FS
1992 static struct sock *first_unix_socket(int *i)
1993 {
1994         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1995                 if (!hlist_empty(&unix_socket_table[*i]))
1996                         return __sk_head(&unix_socket_table[*i]);
1997         }
1998         return NULL;
1999 }
2000
2001 static struct sock *next_unix_socket(int *i, struct sock *s)
2002 {
2003         struct sock *next = sk_next(s);
2004         /* More in this chain? */
2005         if (next)
2006                 return next;
2007         /* Look for next non-empty chain. */
2008         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2009                 if (!hlist_empty(&unix_socket_table[*i]))
2010                         return __sk_head(&unix_socket_table[*i]);
2011         }
2012         return NULL;
2013 }
2014
2015 struct unix_iter_state {
2016         struct seq_net_private p;
2017         int i;
2018 };
2019 static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
2020 {
2021         loff_t off = 0;
2022         struct sock *s;
2023
2024         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2025                 if (sock_net(s) != iter->p.net)
2026                         continue;
2027                 if (off == pos)
2028                         return s;
2029                 ++off;
2030         }
2031         return NULL;
2032 }
2033
2034
2035 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2036         __acquires(unix_table_lock)
2037 {
2038         struct unix_iter_state *iter = seq->private;
2039         spin_lock(&unix_table_lock);
2040         return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
2041 }
2042
2043 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2044 {
2045         struct unix_iter_state *iter = seq->private;
2046         struct sock *sk = v;
2047         ++*pos;
2048
2049         if (v == (void *)1)
2050                 sk = first_unix_socket(&iter->i);
2051         else
2052                 sk = next_unix_socket(&iter->i, sk);
2053         while (sk && (sock_net(sk) != iter->p.net))
2054                 sk = next_unix_socket(&iter->i, sk);
2055         return sk;
2056 }
2057
2058 static void unix_seq_stop(struct seq_file *seq, void *v)
2059         __releases(unix_table_lock)
2060 {
2061         spin_unlock(&unix_table_lock);
2062 }
2063
2064 static int unix_seq_show(struct seq_file *seq, void *v)
2065 {
2066
2067         if (v == (void *)1)
2068                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2069                          "Inode Path\n");
2070         else {
2071                 struct sock *s = v;
2072                 struct unix_sock *u = unix_sk(s);
2073                 unix_state_lock(s);
2074
2075                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2076                         s,
2077                         atomic_read(&s->sk_refcnt),
2078                         0,
2079                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2080                         s->sk_type,
2081                         s->sk_socket ?
2082                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2083                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2084                         sock_i_ino(s));
2085
2086                 if (u->addr) {
2087                         int i, len;
2088                         seq_putc(seq, ' ');
2089
2090                         i = 0;
2091                         len = u->addr->len - sizeof(short);
2092                         if (!UNIX_ABSTRACT(s))
2093                                 len--;
2094                         else {
2095                                 seq_putc(seq, '@');
2096                                 i++;
2097                         }
2098                         for ( ; i < len; i++)
2099                                 seq_putc(seq, u->addr->name->sun_path[i]);
2100                 }
2101                 unix_state_unlock(s);
2102                 seq_putc(seq, '\n');
2103         }
2104
2105         return 0;
2106 }
2107
2108 static const struct seq_operations unix_seq_ops = {
2109         .start  = unix_seq_start,
2110         .next   = unix_seq_next,
2111         .stop   = unix_seq_stop,
2112         .show   = unix_seq_show,
2113 };
2114
2115
2116 static int unix_seq_open(struct inode *inode, struct file *file)
2117 {
2118         return seq_open_net(inode, file, &unix_seq_ops,
2119                             sizeof(struct unix_iter_state));
2120 }
2121
2122 static const struct file_operations unix_seq_fops = {
2123         .owner          = THIS_MODULE,
2124         .open           = unix_seq_open,
2125         .read           = seq_read,
2126         .llseek         = seq_lseek,
2127         .release        = seq_release_net,
2128 };
2129
2130 #endif
2131
2132 static struct net_proto_family unix_family_ops = {
2133         .family = PF_UNIX,
2134         .create = unix_create,
2135         .owner  = THIS_MODULE,
2136 };
2137
2138
2139 static int unix_net_init(struct net *net)
2140 {
2141         int error = -ENOMEM;
2142
2143         net->unx.sysctl_max_dgram_qlen = 10;
2144         if (unix_sysctl_register(net))
2145                 goto out;
2146
2147 #ifdef CONFIG_PROC_FS
2148         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2149                 unix_sysctl_unregister(net);
2150                 goto out;
2151         }
2152 #endif
2153         error = 0;
2154 out:
2155         return 0;
2156 }
2157
2158 static void unix_net_exit(struct net *net)
2159 {
2160         unix_sysctl_unregister(net);
2161         proc_net_remove(net, "unix");
2162 }
2163
2164 static struct pernet_operations unix_net_ops = {
2165         .init = unix_net_init,
2166         .exit = unix_net_exit,
2167 };
2168
2169 static int __init af_unix_init(void)
2170 {
2171         int rc = -1;
2172         struct sk_buff *dummy_skb;
2173
2174         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2175
2176         rc = proto_register(&unix_proto, 1);
2177         if (rc != 0) {
2178                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2179                        __func__);
2180                 goto out;
2181         }
2182
2183         sock_register(&unix_family_ops);
2184         register_pernet_subsys(&unix_net_ops);
2185 out:
2186         return rc;
2187 }
2188
2189 static void __exit af_unix_exit(void)
2190 {
2191         sock_unregister(PF_UNIX);
2192         proto_unregister(&unix_proto);
2193         unregister_pernet_subsys(&unix_net_ops);
2194 }
2195
2196 module_init(af_unix_init);
2197 module_exit(af_unix_exit);
2198
2199 MODULE_LICENSE("GPL");
2200 MODULE_ALIAS_NETPROTO(PF_UNIX);