]> nv-tegra.nvidia Code Review - linux-2.6.git/blob - net/unix/af_unix.c
/spare/repo/libata-dev branch 'master'
[linux-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/signal.h>
89 #include <linux/sched.h>
90 #include <linux/errno.h>
91 #include <linux/string.h>
92 #include <linux/stat.h>
93 #include <linux/dcache.h>
94 #include <linux/namei.h>
95 #include <linux/socket.h>
96 #include <linux/un.h>
97 #include <linux/fcntl.h>
98 #include <linux/termios.h>
99 #include <linux/sockios.h>
100 #include <linux/net.h>
101 #include <linux/in.h>
102 #include <linux/fs.h>
103 #include <linux/slab.h>
104 #include <asm/uaccess.h>
105 #include <linux/skbuff.h>
106 #include <linux/netdevice.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
112 #include <net/scm.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/smp_lock.h>
116 #include <linux/rtnetlink.h>
117 #include <linux/mount.h>
118 #include <net/checksum.h>
119 #include <linux/security.h>
120
121 int sysctl_unix_max_dgram_qlen = 10;
122
123 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
124 DEFINE_RWLOCK(unix_table_lock);
125 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
126
127 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
128
129 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
130
131 /*
132  *  SMP locking strategy:
133  *    hash table is protected with rwlock unix_table_lock
134  *    each socket state is protected by separate rwlock.
135  */
136
137 static inline unsigned unix_hash_fold(unsigned hash)
138 {
139         hash ^= hash>>16;
140         hash ^= hash>>8;
141         return hash&(UNIX_HASH_SIZE-1);
142 }
143
144 #define unix_peer(sk) (unix_sk(sk)->peer)
145
146 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
147 {
148         return unix_peer(osk) == sk;
149 }
150
151 static inline int unix_may_send(struct sock *sk, struct sock *osk)
152 {
153         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
154 }
155
156 static struct sock *unix_peer_get(struct sock *s)
157 {
158         struct sock *peer;
159
160         unix_state_rlock(s);
161         peer = unix_peer(s);
162         if (peer)
163                 sock_hold(peer);
164         unix_state_runlock(s);
165         return peer;
166 }
167
168 static inline void unix_release_addr(struct unix_address *addr)
169 {
170         if (atomic_dec_and_test(&addr->refcnt))
171                 kfree(addr);
172 }
173
174 /*
175  *      Check unix socket name:
176  *              - should be not zero length.
177  *              - if started by not zero, should be NULL terminated (FS object)
178  *              - if started by zero, it is abstract name.
179  */
180  
181 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
182 {
183         if (len <= sizeof(short) || len > sizeof(*sunaddr))
184                 return -EINVAL;
185         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
186                 return -EINVAL;
187         if (sunaddr->sun_path[0]) {
188                 /*
189                  * This may look like an off by one error but it is a bit more
190                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
191                  * sun_path[108] doesnt as such exist.  However in kernel space
192                  * we are guaranteed that it is a valid memory location in our
193                  * kernel address buffer.
194                  */
195                 ((char *)sunaddr)[len]=0;
196                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
197                 return len;
198         }
199
200         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
201         return len;
202 }
203
204 static void __unix_remove_socket(struct sock *sk)
205 {
206         sk_del_node_init(sk);
207 }
208
209 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
210 {
211         BUG_TRAP(sk_unhashed(sk));
212         sk_add_node(sk, list);
213 }
214
215 static inline void unix_remove_socket(struct sock *sk)
216 {
217         write_lock(&unix_table_lock);
218         __unix_remove_socket(sk);
219         write_unlock(&unix_table_lock);
220 }
221
222 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
223 {
224         write_lock(&unix_table_lock);
225         __unix_insert_socket(list, sk);
226         write_unlock(&unix_table_lock);
227 }
228
229 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
230                                               int len, int type, unsigned hash)
231 {
232         struct sock *s;
233         struct hlist_node *node;
234
235         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
236                 struct unix_sock *u = unix_sk(s);
237
238                 if (u->addr->len == len &&
239                     !memcmp(u->addr->name, sunname, len))
240                         goto found;
241         }
242         s = NULL;
243 found:
244         return s;
245 }
246
247 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
248                                                    int len, int type,
249                                                    unsigned hash)
250 {
251         struct sock *s;
252
253         read_lock(&unix_table_lock);
254         s = __unix_find_socket_byname(sunname, len, type, hash);
255         if (s)
256                 sock_hold(s);
257         read_unlock(&unix_table_lock);
258         return s;
259 }
260
261 static struct sock *unix_find_socket_byinode(struct inode *i)
262 {
263         struct sock *s;
264         struct hlist_node *node;
265
266         read_lock(&unix_table_lock);
267         sk_for_each(s, node,
268                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
269                 struct dentry *dentry = unix_sk(s)->dentry;
270
271                 if(dentry && dentry->d_inode == i)
272                 {
273                         sock_hold(s);
274                         goto found;
275                 }
276         }
277         s = NULL;
278 found:
279         read_unlock(&unix_table_lock);
280         return s;
281 }
282
283 static inline int unix_writable(struct sock *sk)
284 {
285         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
286 }
287
288 static void unix_write_space(struct sock *sk)
289 {
290         read_lock(&sk->sk_callback_lock);
291         if (unix_writable(sk)) {
292                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
293                         wake_up_interruptible(sk->sk_sleep);
294                 sk_wake_async(sk, 2, POLL_OUT);
295         }
296         read_unlock(&sk->sk_callback_lock);
297 }
298
299 /* When dgram socket disconnects (or changes its peer), we clear its receive
300  * queue of packets arrived from previous peer. First, it allows to do
301  * flow control based only on wmem_alloc; second, sk connected to peer
302  * may receive messages only from that peer. */
303 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
304 {
305         if (!skb_queue_empty(&sk->sk_receive_queue)) {
306                 skb_queue_purge(&sk->sk_receive_queue);
307                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
308
309                 /* If one link of bidirectional dgram pipe is disconnected,
310                  * we signal error. Messages are lost. Do not make this,
311                  * when peer was not connected to us.
312                  */
313                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
314                         other->sk_err = ECONNRESET;
315                         other->sk_error_report(other);
316                 }
317         }
318 }
319
320 static void unix_sock_destructor(struct sock *sk)
321 {
322         struct unix_sock *u = unix_sk(sk);
323
324         skb_queue_purge(&sk->sk_receive_queue);
325
326         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
327         BUG_TRAP(sk_unhashed(sk));
328         BUG_TRAP(!sk->sk_socket);
329         if (!sock_flag(sk, SOCK_DEAD)) {
330                 printk("Attempt to release alive unix socket: %p\n", sk);
331                 return;
332         }
333
334         if (u->addr)
335                 unix_release_addr(u->addr);
336
337         atomic_dec(&unix_nr_socks);
338 #ifdef UNIX_REFCNT_DEBUG
339         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
340 #endif
341 }
342
343 static int unix_release_sock (struct sock *sk, int embrion)
344 {
345         struct unix_sock *u = unix_sk(sk);
346         struct dentry *dentry;
347         struct vfsmount *mnt;
348         struct sock *skpair;
349         struct sk_buff *skb;
350         int state;
351
352         unix_remove_socket(sk);
353
354         /* Clear state */
355         unix_state_wlock(sk);
356         sock_orphan(sk);
357         sk->sk_shutdown = SHUTDOWN_MASK;
358         dentry       = u->dentry;
359         u->dentry    = NULL;
360         mnt          = u->mnt;
361         u->mnt       = NULL;
362         state = sk->sk_state;
363         sk->sk_state = TCP_CLOSE;
364         unix_state_wunlock(sk);
365
366         wake_up_interruptible_all(&u->peer_wait);
367
368         skpair=unix_peer(sk);
369
370         if (skpair!=NULL) {
371                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
372                         unix_state_wlock(skpair);
373                         /* No more writes */
374                         skpair->sk_shutdown = SHUTDOWN_MASK;
375                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
376                                 skpair->sk_err = ECONNRESET;
377                         unix_state_wunlock(skpair);
378                         skpair->sk_state_change(skpair);
379                         read_lock(&skpair->sk_callback_lock);
380                         sk_wake_async(skpair,1,POLL_HUP);
381                         read_unlock(&skpair->sk_callback_lock);
382                 }
383                 sock_put(skpair); /* It may now die */
384                 unix_peer(sk) = NULL;
385         }
386
387         /* Try to flush out this socket. Throw out buffers at least */
388
389         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
390                 if (state==TCP_LISTEN)
391                         unix_release_sock(skb->sk, 1);
392                 /* passed fds are erased in the kfree_skb hook        */
393                 kfree_skb(skb);
394         }
395
396         if (dentry) {
397                 dput(dentry);
398                 mntput(mnt);
399         }
400
401         sock_put(sk);
402
403         /* ---- Socket is dead now and most probably destroyed ---- */
404
405         /*
406          * Fixme: BSD difference: In BSD all sockets connected to use get
407          *        ECONNRESET and we die on the spot. In Linux we behave
408          *        like files and pipes do and wait for the last
409          *        dereference.
410          *
411          * Can't we simply set sock->err?
412          *
413          *        What the above comment does talk about? --ANK(980817)
414          */
415
416         if (atomic_read(&unix_tot_inflight))
417                 unix_gc();              /* Garbage collect fds */       
418
419         return 0;
420 }
421
422 static int unix_listen(struct socket *sock, int backlog)
423 {
424         int err;
425         struct sock *sk = sock->sk;
426         struct unix_sock *u = unix_sk(sk);
427
428         err = -EOPNOTSUPP;
429         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
430                 goto out;                       /* Only stream/seqpacket sockets accept */
431         err = -EINVAL;
432         if (!u->addr)
433                 goto out;                       /* No listens on an unbound socket */
434         unix_state_wlock(sk);
435         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
436                 goto out_unlock;
437         if (backlog > sk->sk_max_ack_backlog)
438                 wake_up_interruptible_all(&u->peer_wait);
439         sk->sk_max_ack_backlog  = backlog;
440         sk->sk_state            = TCP_LISTEN;
441         /* set credentials so connect can copy them */
442         sk->sk_peercred.pid     = current->tgid;
443         sk->sk_peercred.uid     = current->euid;
444         sk->sk_peercred.gid     = current->egid;
445         err = 0;
446
447 out_unlock:
448         unix_state_wunlock(sk);
449 out:
450         return err;
451 }
452
453 static int unix_release(struct socket *);
454 static int unix_bind(struct socket *, struct sockaddr *, int);
455 static int unix_stream_connect(struct socket *, struct sockaddr *,
456                                int addr_len, int flags);
457 static int unix_socketpair(struct socket *, struct socket *);
458 static int unix_accept(struct socket *, struct socket *, int);
459 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
460 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
461 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
462 static int unix_shutdown(struct socket *, int);
463 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
464                                struct msghdr *, size_t);
465 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
466                                struct msghdr *, size_t, int);
467 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
468                               struct msghdr *, size_t);
469 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
470                               struct msghdr *, size_t, int);
471 static int unix_dgram_connect(struct socket *, struct sockaddr *,
472                               int, int);
473 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
474                                   struct msghdr *, size_t);
475
476 static struct proto_ops unix_stream_ops = {
477         .family =       PF_UNIX,
478         .owner =        THIS_MODULE,
479         .release =      unix_release,
480         .bind =         unix_bind,
481         .connect =      unix_stream_connect,
482         .socketpair =   unix_socketpair,
483         .accept =       unix_accept,
484         .getname =      unix_getname,
485         .poll =         unix_poll,
486         .ioctl =        unix_ioctl,
487         .listen =       unix_listen,
488         .shutdown =     unix_shutdown,
489         .setsockopt =   sock_no_setsockopt,
490         .getsockopt =   sock_no_getsockopt,
491         .sendmsg =      unix_stream_sendmsg,
492         .recvmsg =      unix_stream_recvmsg,
493         .mmap =         sock_no_mmap,
494         .sendpage =     sock_no_sendpage,
495 };
496
497 static struct proto_ops unix_dgram_ops = {
498         .family =       PF_UNIX,
499         .owner =        THIS_MODULE,
500         .release =      unix_release,
501         .bind =         unix_bind,
502         .connect =      unix_dgram_connect,
503         .socketpair =   unix_socketpair,
504         .accept =       sock_no_accept,
505         .getname =      unix_getname,
506         .poll =         datagram_poll,
507         .ioctl =        unix_ioctl,
508         .listen =       sock_no_listen,
509         .shutdown =     unix_shutdown,
510         .setsockopt =   sock_no_setsockopt,
511         .getsockopt =   sock_no_getsockopt,
512         .sendmsg =      unix_dgram_sendmsg,
513         .recvmsg =      unix_dgram_recvmsg,
514         .mmap =         sock_no_mmap,
515         .sendpage =     sock_no_sendpage,
516 };
517
518 static struct proto_ops unix_seqpacket_ops = {
519         .family =       PF_UNIX,
520         .owner =        THIS_MODULE,
521         .release =      unix_release,
522         .bind =         unix_bind,
523         .connect =      unix_stream_connect,
524         .socketpair =   unix_socketpair,
525         .accept =       unix_accept,
526         .getname =      unix_getname,
527         .poll =         datagram_poll,
528         .ioctl =        unix_ioctl,
529         .listen =       unix_listen,
530         .shutdown =     unix_shutdown,
531         .setsockopt =   sock_no_setsockopt,
532         .getsockopt =   sock_no_getsockopt,
533         .sendmsg =      unix_seqpacket_sendmsg,
534         .recvmsg =      unix_dgram_recvmsg,
535         .mmap =         sock_no_mmap,
536         .sendpage =     sock_no_sendpage,
537 };
538
539 static struct proto unix_proto = {
540         .name     = "UNIX",
541         .owner    = THIS_MODULE,
542         .obj_size = sizeof(struct unix_sock),
543 };
544
545 static struct sock * unix_create1(struct socket *sock)
546 {
547         struct sock *sk = NULL;
548         struct unix_sock *u;
549
550         if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
551                 goto out;
552
553         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
554         if (!sk)
555                 goto out;
556
557         atomic_inc(&unix_nr_socks);
558
559         sock_init_data(sock,sk);
560
561         sk->sk_write_space      = unix_write_space;
562         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
563         sk->sk_destruct         = unix_sock_destructor;
564         u         = unix_sk(sk);
565         u->dentry = NULL;
566         u->mnt    = NULL;
567         rwlock_init(&u->lock);
568         atomic_set(&u->inflight, sock ? 0 : -1);
569         init_MUTEX(&u->readsem); /* single task reading lock */
570         init_waitqueue_head(&u->peer_wait);
571         unix_insert_socket(unix_sockets_unbound, sk);
572 out:
573         return sk;
574 }
575
576 static int unix_create(struct socket *sock, int protocol)
577 {
578         if (protocol && protocol != PF_UNIX)
579                 return -EPROTONOSUPPORT;
580
581         sock->state = SS_UNCONNECTED;
582
583         switch (sock->type) {
584         case SOCK_STREAM:
585                 sock->ops = &unix_stream_ops;
586                 break;
587                 /*
588                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
589                  *      nothing uses it.
590                  */
591         case SOCK_RAW:
592                 sock->type=SOCK_DGRAM;
593         case SOCK_DGRAM:
594                 sock->ops = &unix_dgram_ops;
595                 break;
596         case SOCK_SEQPACKET:
597                 sock->ops = &unix_seqpacket_ops;
598                 break;
599         default:
600                 return -ESOCKTNOSUPPORT;
601         }
602
603         return unix_create1(sock) ? 0 : -ENOMEM;
604 }
605
606 static int unix_release(struct socket *sock)
607 {
608         struct sock *sk = sock->sk;
609
610         if (!sk)
611                 return 0;
612
613         sock->sk = NULL;
614
615         return unix_release_sock (sk, 0);
616 }
617
618 static int unix_autobind(struct socket *sock)
619 {
620         struct sock *sk = sock->sk;
621         struct unix_sock *u = unix_sk(sk);
622         static u32 ordernum = 1;
623         struct unix_address * addr;
624         int err;
625
626         down(&u->readsem);
627
628         err = 0;
629         if (u->addr)
630                 goto out;
631
632         err = -ENOMEM;
633         addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
634         if (!addr)
635                 goto out;
636
637         memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
638         addr->name->sun_family = AF_UNIX;
639         atomic_set(&addr->refcnt, 1);
640
641 retry:
642         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
643         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
644
645         write_lock(&unix_table_lock);
646         ordernum = (ordernum+1)&0xFFFFF;
647
648         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
649                                       addr->hash)) {
650                 write_unlock(&unix_table_lock);
651                 /* Sanity yield. It is unusual case, but yet... */
652                 if (!(ordernum&0xFF))
653                         yield();
654                 goto retry;
655         }
656         addr->hash ^= sk->sk_type;
657
658         __unix_remove_socket(sk);
659         u->addr = addr;
660         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
661         write_unlock(&unix_table_lock);
662         err = 0;
663
664 out:    up(&u->readsem);
665         return err;
666 }
667
668 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
669                                     int type, unsigned hash, int *error)
670 {
671         struct sock *u;
672         struct nameidata nd;
673         int err = 0;
674         
675         if (sunname->sun_path[0]) {
676                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
677                 if (err)
678                         goto fail;
679                 err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
680                 if (err)
681                         goto put_fail;
682
683                 err = -ECONNREFUSED;
684                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
685                         goto put_fail;
686                 u=unix_find_socket_byinode(nd.dentry->d_inode);
687                 if (!u)
688                         goto put_fail;
689
690                 if (u->sk_type == type)
691                         touch_atime(nd.mnt, nd.dentry);
692
693                 path_release(&nd);
694
695                 err=-EPROTOTYPE;
696                 if (u->sk_type != type) {
697                         sock_put(u);
698                         goto fail;
699                 }
700         } else {
701                 err = -ECONNREFUSED;
702                 u=unix_find_socket_byname(sunname, len, type, hash);
703                 if (u) {
704                         struct dentry *dentry;
705                         dentry = unix_sk(u)->dentry;
706                         if (dentry)
707                                 touch_atime(unix_sk(u)->mnt, dentry);
708                 } else
709                         goto fail;
710         }
711         return u;
712
713 put_fail:
714         path_release(&nd);
715 fail:
716         *error=err;
717         return NULL;
718 }
719
720
721 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
722 {
723         struct sock *sk = sock->sk;
724         struct unix_sock *u = unix_sk(sk);
725         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
726         struct dentry * dentry = NULL;
727         struct nameidata nd;
728         int err;
729         unsigned hash;
730         struct unix_address *addr;
731         struct hlist_head *list;
732
733         err = -EINVAL;
734         if (sunaddr->sun_family != AF_UNIX)
735                 goto out;
736
737         if (addr_len==sizeof(short)) {
738                 err = unix_autobind(sock);
739                 goto out;
740         }
741
742         err = unix_mkname(sunaddr, addr_len, &hash);
743         if (err < 0)
744                 goto out;
745         addr_len = err;
746
747         down(&u->readsem);
748
749         err = -EINVAL;
750         if (u->addr)
751                 goto out_up;
752
753         err = -ENOMEM;
754         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
755         if (!addr)
756                 goto out_up;
757
758         memcpy(addr->name, sunaddr, addr_len);
759         addr->len = addr_len;
760         addr->hash = hash ^ sk->sk_type;
761         atomic_set(&addr->refcnt, 1);
762
763         if (sunaddr->sun_path[0]) {
764                 unsigned int mode;
765                 err = 0;
766                 /*
767                  * Get the parent directory, calculate the hash for last
768                  * component.
769                  */
770                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
771                 if (err)
772                         goto out_mknod_parent;
773
774                 dentry = lookup_create(&nd, 0);
775                 err = PTR_ERR(dentry);
776                 if (IS_ERR(dentry))
777                         goto out_mknod_unlock;
778
779                 /*
780                  * All right, let's create it.
781                  */
782                 mode = S_IFSOCK |
783                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
784                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
785                 if (err)
786                         goto out_mknod_dput;
787                 up(&nd.dentry->d_inode->i_sem);
788                 dput(nd.dentry);
789                 nd.dentry = dentry;
790
791                 addr->hash = UNIX_HASH_SIZE;
792         }
793
794         write_lock(&unix_table_lock);
795
796         if (!sunaddr->sun_path[0]) {
797                 err = -EADDRINUSE;
798                 if (__unix_find_socket_byname(sunaddr, addr_len,
799                                               sk->sk_type, hash)) {
800                         unix_release_addr(addr);
801                         goto out_unlock;
802                 }
803
804                 list = &unix_socket_table[addr->hash];
805         } else {
806                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
807                 u->dentry = nd.dentry;
808                 u->mnt    = nd.mnt;
809         }
810
811         err = 0;
812         __unix_remove_socket(sk);
813         u->addr = addr;
814         __unix_insert_socket(list, sk);
815
816 out_unlock:
817         write_unlock(&unix_table_lock);
818 out_up:
819         up(&u->readsem);
820 out:
821         return err;
822
823 out_mknod_dput:
824         dput(dentry);
825 out_mknod_unlock:
826         up(&nd.dentry->d_inode->i_sem);
827         path_release(&nd);
828 out_mknod_parent:
829         if (err==-EEXIST)
830                 err=-EADDRINUSE;
831         unix_release_addr(addr);
832         goto out_up;
833 }
834
835 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
836                               int alen, int flags)
837 {
838         struct sock *sk = sock->sk;
839         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
840         struct sock *other;
841         unsigned hash;
842         int err;
843
844         if (addr->sa_family != AF_UNSPEC) {
845                 err = unix_mkname(sunaddr, alen, &hash);
846                 if (err < 0)
847                         goto out;
848                 alen = err;
849
850                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
851                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
852                         goto out;
853
854                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
855                 if (!other)
856                         goto out;
857
858                 unix_state_wlock(sk);
859
860                 err = -EPERM;
861                 if (!unix_may_send(sk, other))
862                         goto out_unlock;
863
864                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
865                 if (err)
866                         goto out_unlock;
867
868         } else {
869                 /*
870                  *      1003.1g breaking connected state with AF_UNSPEC
871                  */
872                 other = NULL;
873                 unix_state_wlock(sk);
874         }
875
876         /*
877          * If it was connected, reconnect.
878          */
879         if (unix_peer(sk)) {
880                 struct sock *old_peer = unix_peer(sk);
881                 unix_peer(sk)=other;
882                 unix_state_wunlock(sk);
883
884                 if (other != old_peer)
885                         unix_dgram_disconnected(sk, old_peer);
886                 sock_put(old_peer);
887         } else {
888                 unix_peer(sk)=other;
889                 unix_state_wunlock(sk);
890         }
891         return 0;
892
893 out_unlock:
894         unix_state_wunlock(sk);
895         sock_put(other);
896 out:
897         return err;
898 }
899
900 static long unix_wait_for_peer(struct sock *other, long timeo)
901 {
902         struct unix_sock *u = unix_sk(other);
903         int sched;
904         DEFINE_WAIT(wait);
905
906         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
907
908         sched = !sock_flag(other, SOCK_DEAD) &&
909                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
910                 (skb_queue_len(&other->sk_receive_queue) >
911                  other->sk_max_ack_backlog);
912
913         unix_state_runlock(other);
914
915         if (sched)
916                 timeo = schedule_timeout(timeo);
917
918         finish_wait(&u->peer_wait, &wait);
919         return timeo;
920 }
921
922 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
923                                int addr_len, int flags)
924 {
925         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
926         struct sock *sk = sock->sk;
927         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
928         struct sock *newsk = NULL;
929         struct sock *other = NULL;
930         struct sk_buff *skb = NULL;
931         unsigned hash;
932         int st;
933         int err;
934         long timeo;
935
936         err = unix_mkname(sunaddr, addr_len, &hash);
937         if (err < 0)
938                 goto out;
939         addr_len = err;
940
941         if (test_bit(SOCK_PASSCRED, &sock->flags)
942                 && !u->addr && (err = unix_autobind(sock)) != 0)
943                 goto out;
944
945         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
946
947         /* First of all allocate resources.
948            If we will make it after state is locked,
949            we will have to recheck all again in any case.
950          */
951
952         err = -ENOMEM;
953
954         /* create new sock for complete connection */
955         newsk = unix_create1(NULL);
956         if (newsk == NULL)
957                 goto out;
958
959         /* Allocate skb for sending to listening sock */
960         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
961         if (skb == NULL)
962                 goto out;
963
964 restart:
965         /*  Find listening sock. */
966         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
967         if (!other)
968                 goto out;
969
970         /* Latch state of peer */
971         unix_state_rlock(other);
972
973         /* Apparently VFS overslept socket death. Retry. */
974         if (sock_flag(other, SOCK_DEAD)) {
975                 unix_state_runlock(other);
976                 sock_put(other);
977                 goto restart;
978         }
979
980         err = -ECONNREFUSED;
981         if (other->sk_state != TCP_LISTEN)
982                 goto out_unlock;
983
984         if (skb_queue_len(&other->sk_receive_queue) >
985             other->sk_max_ack_backlog) {
986                 err = -EAGAIN;
987                 if (!timeo)
988                         goto out_unlock;
989
990                 timeo = unix_wait_for_peer(other, timeo);
991
992                 err = sock_intr_errno(timeo);
993                 if (signal_pending(current))
994                         goto out;
995                 sock_put(other);
996                 goto restart;
997         }
998
999         /* Latch our state.
1000
1001            It is tricky place. We need to grab write lock and cannot
1002            drop lock on peer. It is dangerous because deadlock is
1003            possible. Connect to self case and simultaneous
1004            attempt to connect are eliminated by checking socket
1005            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1006            check this before attempt to grab lock.
1007
1008            Well, and we have to recheck the state after socket locked.
1009          */
1010         st = sk->sk_state;
1011
1012         switch (st) {
1013         case TCP_CLOSE:
1014                 /* This is ok... continue with connect */
1015                 break;
1016         case TCP_ESTABLISHED:
1017                 /* Socket is already connected */
1018                 err = -EISCONN;
1019                 goto out_unlock;
1020         default:
1021                 err = -EINVAL;
1022                 goto out_unlock;
1023         }
1024
1025         unix_state_wlock(sk);
1026
1027         if (sk->sk_state != st) {
1028                 unix_state_wunlock(sk);
1029                 unix_state_runlock(other);
1030                 sock_put(other);
1031                 goto restart;
1032         }
1033
1034         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1035         if (err) {
1036                 unix_state_wunlock(sk);
1037                 goto out_unlock;
1038         }
1039
1040         /* The way is open! Fastly set all the necessary fields... */
1041
1042         sock_hold(sk);
1043         unix_peer(newsk)        = sk;
1044         newsk->sk_state         = TCP_ESTABLISHED;
1045         newsk->sk_type          = sk->sk_type;
1046         newsk->sk_peercred.pid  = current->tgid;
1047         newsk->sk_peercred.uid  = current->euid;
1048         newsk->sk_peercred.gid  = current->egid;
1049         newu = unix_sk(newsk);
1050         newsk->sk_sleep         = &newu->peer_wait;
1051         otheru = unix_sk(other);
1052
1053         /* copy address information from listening to new sock*/
1054         if (otheru->addr) {
1055                 atomic_inc(&otheru->addr->refcnt);
1056                 newu->addr = otheru->addr;
1057         }
1058         if (otheru->dentry) {
1059                 newu->dentry    = dget(otheru->dentry);
1060                 newu->mnt       = mntget(otheru->mnt);
1061         }
1062
1063         /* Set credentials */
1064         sk->sk_peercred = other->sk_peercred;
1065
1066         sock_hold(newsk);
1067         unix_peer(sk)   = newsk;
1068         sock->state     = SS_CONNECTED;
1069         sk->sk_state    = TCP_ESTABLISHED;
1070
1071         unix_state_wunlock(sk);
1072
1073         /* take ten and and send info to listening sock */
1074         spin_lock(&other->sk_receive_queue.lock);
1075         __skb_queue_tail(&other->sk_receive_queue, skb);
1076         /* Undo artificially decreased inflight after embrion
1077          * is installed to listening socket. */
1078         atomic_inc(&newu->inflight);
1079         spin_unlock(&other->sk_receive_queue.lock);
1080         unix_state_runlock(other);
1081         other->sk_data_ready(other, 0);
1082         sock_put(other);
1083         return 0;
1084
1085 out_unlock:
1086         if (other)
1087                 unix_state_runlock(other);
1088
1089 out:
1090         if (skb)
1091                 kfree_skb(skb);
1092         if (newsk)
1093                 unix_release_sock(newsk, 0);
1094         if (other)
1095                 sock_put(other);
1096         return err;
1097 }
1098
1099 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1100 {
1101         struct sock *ska=socka->sk, *skb = sockb->sk;
1102
1103         /* Join our sockets back to back */
1104         sock_hold(ska);
1105         sock_hold(skb);
1106         unix_peer(ska)=skb;
1107         unix_peer(skb)=ska;
1108         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1109         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1110         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1111
1112         if (ska->sk_type != SOCK_DGRAM) {
1113                 ska->sk_state = TCP_ESTABLISHED;
1114                 skb->sk_state = TCP_ESTABLISHED;
1115                 socka->state  = SS_CONNECTED;
1116                 sockb->state  = SS_CONNECTED;
1117         }
1118         return 0;
1119 }
1120
1121 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1122 {
1123         struct sock *sk = sock->sk;
1124         struct sock *tsk;
1125         struct sk_buff *skb;
1126         int err;
1127
1128         err = -EOPNOTSUPP;
1129         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1130                 goto out;
1131
1132         err = -EINVAL;
1133         if (sk->sk_state != TCP_LISTEN)
1134                 goto out;
1135
1136         /* If socket state is TCP_LISTEN it cannot change (for now...),
1137          * so that no locks are necessary.
1138          */
1139
1140         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1141         if (!skb) {
1142                 /* This means receive shutdown. */
1143                 if (err == 0)
1144                         err = -EINVAL;
1145                 goto out;
1146         }
1147
1148         tsk = skb->sk;
1149         skb_free_datagram(sk, skb);
1150         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1151
1152         /* attach accepted sock to socket */
1153         unix_state_wlock(tsk);
1154         newsock->state = SS_CONNECTED;
1155         sock_graft(tsk, newsock);
1156         unix_state_wunlock(tsk);
1157         return 0;
1158
1159 out:
1160         return err;
1161 }
1162
1163
1164 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1165 {
1166         struct sock *sk = sock->sk;
1167         struct unix_sock *u;
1168         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1169         int err = 0;
1170
1171         if (peer) {
1172                 sk = unix_peer_get(sk);
1173
1174                 err = -ENOTCONN;
1175                 if (!sk)
1176                         goto out;
1177                 err = 0;
1178         } else {
1179                 sock_hold(sk);
1180         }
1181
1182         u = unix_sk(sk);
1183         unix_state_rlock(sk);
1184         if (!u->addr) {
1185                 sunaddr->sun_family = AF_UNIX;
1186                 sunaddr->sun_path[0] = 0;
1187                 *uaddr_len = sizeof(short);
1188         } else {
1189                 struct unix_address *addr = u->addr;
1190
1191                 *uaddr_len = addr->len;
1192                 memcpy(sunaddr, addr->name, *uaddr_len);
1193         }
1194         unix_state_runlock(sk);
1195         sock_put(sk);
1196 out:
1197         return err;
1198 }
1199
1200 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1201 {
1202         int i;
1203
1204         scm->fp = UNIXCB(skb).fp;
1205         skb->destructor = sock_wfree;
1206         UNIXCB(skb).fp = NULL;
1207
1208         for (i=scm->fp->count-1; i>=0; i--)
1209                 unix_notinflight(scm->fp->fp[i]);
1210 }
1211
1212 static void unix_destruct_fds(struct sk_buff *skb)
1213 {
1214         struct scm_cookie scm;
1215         memset(&scm, 0, sizeof(scm));
1216         unix_detach_fds(&scm, skb);
1217
1218         /* Alas, it calls VFS */
1219         /* So fscking what? fput() had been SMP-safe since the last Summer */
1220         scm_destroy(&scm);
1221         sock_wfree(skb);
1222 }
1223
1224 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1225 {
1226         int i;
1227         for (i=scm->fp->count-1; i>=0; i--)
1228                 unix_inflight(scm->fp->fp[i]);
1229         UNIXCB(skb).fp = scm->fp;
1230         skb->destructor = unix_destruct_fds;
1231         scm->fp = NULL;
1232 }
1233
1234 /*
1235  *      Send AF_UNIX data.
1236  */
1237
1238 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1239                               struct msghdr *msg, size_t len)
1240 {
1241         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1242         struct sock *sk = sock->sk;
1243         struct unix_sock *u = unix_sk(sk);
1244         struct sockaddr_un *sunaddr=msg->msg_name;
1245         struct sock *other = NULL;
1246         int namelen = 0; /* fake GCC */
1247         int err;
1248         unsigned hash;
1249         struct sk_buff *skb;
1250         long timeo;
1251         struct scm_cookie tmp_scm;
1252
1253         if (NULL == siocb->scm)
1254                 siocb->scm = &tmp_scm;
1255         err = scm_send(sock, msg, siocb->scm);
1256         if (err < 0)
1257                 return err;
1258
1259         err = -EOPNOTSUPP;
1260         if (msg->msg_flags&MSG_OOB)
1261                 goto out;
1262
1263         if (msg->msg_namelen) {
1264                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1265                 if (err < 0)
1266                         goto out;
1267                 namelen = err;
1268         } else {
1269                 sunaddr = NULL;
1270                 err = -ENOTCONN;
1271                 other = unix_peer_get(sk);
1272                 if (!other)
1273                         goto out;
1274         }
1275
1276         if (test_bit(SOCK_PASSCRED, &sock->flags)
1277                 && !u->addr && (err = unix_autobind(sock)) != 0)
1278                 goto out;
1279
1280         err = -EMSGSIZE;
1281         if (len > sk->sk_sndbuf - 32)
1282                 goto out;
1283
1284         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1285         if (skb==NULL)
1286                 goto out;
1287
1288         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1289         if (siocb->scm->fp)
1290                 unix_attach_fds(siocb->scm, skb);
1291
1292         skb->h.raw = skb->data;
1293         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1294         if (err)
1295                 goto out_free;
1296
1297         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1298
1299 restart:
1300         if (!other) {
1301                 err = -ECONNRESET;
1302                 if (sunaddr == NULL)
1303                         goto out_free;
1304
1305                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1306                                         hash, &err);
1307                 if (other==NULL)
1308                         goto out_free;
1309         }
1310
1311         unix_state_rlock(other);
1312         err = -EPERM;
1313         if (!unix_may_send(sk, other))
1314                 goto out_unlock;
1315
1316         if (sock_flag(other, SOCK_DEAD)) {
1317                 /*
1318                  *      Check with 1003.1g - what should
1319                  *      datagram error
1320                  */
1321                 unix_state_runlock(other);
1322                 sock_put(other);
1323
1324                 err = 0;
1325                 unix_state_wlock(sk);
1326                 if (unix_peer(sk) == other) {
1327                         unix_peer(sk)=NULL;
1328                         unix_state_wunlock(sk);
1329
1330                         unix_dgram_disconnected(sk, other);
1331                         sock_put(other);
1332                         err = -ECONNREFUSED;
1333                 } else {
1334                         unix_state_wunlock(sk);
1335                 }
1336
1337                 other = NULL;
1338                 if (err)
1339                         goto out_free;
1340                 goto restart;
1341         }
1342
1343         err = -EPIPE;
1344         if (other->sk_shutdown & RCV_SHUTDOWN)
1345                 goto out_unlock;
1346
1347         if (sk->sk_type != SOCK_SEQPACKET) {
1348                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1349                 if (err)
1350                         goto out_unlock;
1351         }
1352
1353         if (unix_peer(other) != sk &&
1354             (skb_queue_len(&other->sk_receive_queue) >
1355              other->sk_max_ack_backlog)) {
1356                 if (!timeo) {
1357                         err = -EAGAIN;
1358                         goto out_unlock;
1359                 }
1360
1361                 timeo = unix_wait_for_peer(other, timeo);
1362
1363                 err = sock_intr_errno(timeo);
1364                 if (signal_pending(current))
1365                         goto out_free;
1366
1367                 goto restart;
1368         }
1369
1370         skb_queue_tail(&other->sk_receive_queue, skb);
1371         unix_state_runlock(other);
1372         other->sk_data_ready(other, len);
1373         sock_put(other);
1374         scm_destroy(siocb->scm);
1375         return len;
1376
1377 out_unlock:
1378         unix_state_runlock(other);
1379 out_free:
1380         kfree_skb(skb);
1381 out:
1382         if (other)
1383                 sock_put(other);
1384         scm_destroy(siocb->scm);
1385         return err;
1386 }
1387
1388                 
1389 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1390                                struct msghdr *msg, size_t len)
1391 {
1392         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1393         struct sock *sk = sock->sk;
1394         struct sock *other = NULL;
1395         struct sockaddr_un *sunaddr=msg->msg_name;
1396         int err,size;
1397         struct sk_buff *skb;
1398         int sent=0;
1399         struct scm_cookie tmp_scm;
1400
1401         if (NULL == siocb->scm)
1402                 siocb->scm = &tmp_scm;
1403         err = scm_send(sock, msg, siocb->scm);
1404         if (err < 0)
1405                 return err;
1406
1407         err = -EOPNOTSUPP;
1408         if (msg->msg_flags&MSG_OOB)
1409                 goto out_err;
1410
1411         if (msg->msg_namelen) {
1412                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1413                 goto out_err;
1414         } else {
1415                 sunaddr = NULL;
1416                 err = -ENOTCONN;
1417                 other = unix_peer_get(sk);
1418                 if (!other)
1419                         goto out_err;
1420         }
1421
1422         if (sk->sk_shutdown & SEND_SHUTDOWN)
1423                 goto pipe_err;
1424
1425         while(sent < len)
1426         {
1427                 /*
1428                  *      Optimisation for the fact that under 0.01% of X messages typically
1429                  *      need breaking up.
1430                  */
1431
1432                 size=len-sent;
1433
1434                 /* Keep two messages in the pipe so it schedules better */
1435                 if (size > sk->sk_sndbuf / 2 - 64)
1436                         size = sk->sk_sndbuf / 2 - 64;
1437
1438                 if (size > SKB_MAX_ALLOC)
1439                         size = SKB_MAX_ALLOC;
1440                         
1441                 /*
1442                  *      Grab a buffer
1443                  */
1444                  
1445                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1446
1447                 if (skb==NULL)
1448                         goto out_err;
1449
1450                 /*
1451                  *      If you pass two values to the sock_alloc_send_skb
1452                  *      it tries to grab the large buffer with GFP_NOFS
1453                  *      (which can fail easily), and if it fails grab the
1454                  *      fallback size buffer which is under a page and will
1455                  *      succeed. [Alan]
1456                  */
1457                 size = min_t(int, size, skb_tailroom(skb));
1458
1459                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1460                 if (siocb->scm->fp)
1461                         unix_attach_fds(siocb->scm, skb);
1462
1463                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1464                         kfree_skb(skb);
1465                         goto out_err;
1466                 }
1467
1468                 unix_state_rlock(other);
1469
1470                 if (sock_flag(other, SOCK_DEAD) ||
1471                     (other->sk_shutdown & RCV_SHUTDOWN))
1472                         goto pipe_err_free;
1473
1474                 skb_queue_tail(&other->sk_receive_queue, skb);
1475                 unix_state_runlock(other);
1476                 other->sk_data_ready(other, size);
1477                 sent+=size;
1478         }
1479         sock_put(other);
1480
1481         scm_destroy(siocb->scm);
1482         siocb->scm = NULL;
1483
1484         return sent;
1485
1486 pipe_err_free:
1487         unix_state_runlock(other);
1488         kfree_skb(skb);
1489 pipe_err:
1490         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1491                 send_sig(SIGPIPE,current,0);
1492         err = -EPIPE;
1493 out_err:
1494         if (other)
1495                 sock_put(other);
1496         scm_destroy(siocb->scm);
1497         siocb->scm = NULL;
1498         return sent ? : err;
1499 }
1500
1501 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1502                                   struct msghdr *msg, size_t len)
1503 {
1504         int err;
1505         struct sock *sk = sock->sk;
1506         
1507         err = sock_error(sk);
1508         if (err)
1509                 return err;
1510
1511         if (sk->sk_state != TCP_ESTABLISHED)
1512                 return -ENOTCONN;
1513
1514         if (msg->msg_namelen)
1515                 msg->msg_namelen = 0;
1516
1517         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1518 }
1519                                                                                             
1520 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1521 {
1522         struct unix_sock *u = unix_sk(sk);
1523
1524         msg->msg_namelen = 0;
1525         if (u->addr) {
1526                 msg->msg_namelen = u->addr->len;
1527                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1528         }
1529 }
1530
1531 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1532                               struct msghdr *msg, size_t size,
1533                               int flags)
1534 {
1535         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1536         struct scm_cookie tmp_scm;
1537         struct sock *sk = sock->sk;
1538         struct unix_sock *u = unix_sk(sk);
1539         int noblock = flags & MSG_DONTWAIT;
1540         struct sk_buff *skb;
1541         int err;
1542
1543         err = -EOPNOTSUPP;
1544         if (flags&MSG_OOB)
1545                 goto out;
1546
1547         msg->msg_namelen = 0;
1548
1549         down(&u->readsem);
1550
1551         skb = skb_recv_datagram(sk, flags, noblock, &err);
1552         if (!skb)
1553                 goto out_unlock;
1554
1555         wake_up_interruptible(&u->peer_wait);
1556
1557         if (msg->msg_name)
1558                 unix_copy_addr(msg, skb->sk);
1559
1560         if (size > skb->len)
1561                 size = skb->len;
1562         else if (size < skb->len)
1563                 msg->msg_flags |= MSG_TRUNC;
1564
1565         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1566         if (err)
1567                 goto out_free;
1568
1569         if (!siocb->scm) {
1570                 siocb->scm = &tmp_scm;
1571                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1572         }
1573         siocb->scm->creds = *UNIXCREDS(skb);
1574
1575         if (!(flags & MSG_PEEK))
1576         {
1577                 if (UNIXCB(skb).fp)
1578                         unix_detach_fds(siocb->scm, skb);
1579         }
1580         else 
1581         {
1582                 /* It is questionable: on PEEK we could:
1583                    - do not return fds - good, but too simple 8)
1584                    - return fds, and do not return them on read (old strategy,
1585                      apparently wrong)
1586                    - clone fds (I chose it for now, it is the most universal
1587                      solution)
1588                 
1589                    POSIX 1003.1g does not actually define this clearly
1590                    at all. POSIX 1003.1g doesn't define a lot of things
1591                    clearly however!                  
1592                    
1593                 */
1594                 if (UNIXCB(skb).fp)
1595                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1596         }
1597         err = size;
1598
1599         scm_recv(sock, msg, siocb->scm, flags);
1600
1601 out_free:
1602         skb_free_datagram(sk,skb);
1603 out_unlock:
1604         up(&u->readsem);
1605 out:
1606         return err;
1607 }
1608
1609 /*
1610  *      Sleep until data has arrive. But check for races..
1611  */
1612  
1613 static long unix_stream_data_wait(struct sock * sk, long timeo)
1614 {
1615         DEFINE_WAIT(wait);
1616
1617         unix_state_rlock(sk);
1618
1619         for (;;) {
1620                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1621
1622                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1623                     sk->sk_err ||
1624                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1625                     signal_pending(current) ||
1626                     !timeo)
1627                         break;
1628
1629                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1630                 unix_state_runlock(sk);
1631                 timeo = schedule_timeout(timeo);
1632                 unix_state_rlock(sk);
1633                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1634         }
1635
1636         finish_wait(sk->sk_sleep, &wait);
1637         unix_state_runlock(sk);
1638         return timeo;
1639 }
1640
1641
1642
1643 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1644                                struct msghdr *msg, size_t size,
1645                                int flags)
1646 {
1647         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1648         struct scm_cookie tmp_scm;
1649         struct sock *sk = sock->sk;
1650         struct unix_sock *u = unix_sk(sk);
1651         struct sockaddr_un *sunaddr=msg->msg_name;
1652         int copied = 0;
1653         int check_creds = 0;
1654         int target;
1655         int err = 0;
1656         long timeo;
1657
1658         err = -EINVAL;
1659         if (sk->sk_state != TCP_ESTABLISHED)
1660                 goto out;
1661
1662         err = -EOPNOTSUPP;
1663         if (flags&MSG_OOB)
1664                 goto out;
1665
1666         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1667         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1668
1669         msg->msg_namelen = 0;
1670
1671         /* Lock the socket to prevent queue disordering
1672          * while sleeps in memcpy_tomsg
1673          */
1674
1675         if (!siocb->scm) {
1676                 siocb->scm = &tmp_scm;
1677                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1678         }
1679
1680         down(&u->readsem);
1681
1682         do
1683         {
1684                 int chunk;
1685                 struct sk_buff *skb;
1686
1687                 skb = skb_dequeue(&sk->sk_receive_queue);
1688                 if (skb==NULL)
1689                 {
1690                         if (copied >= target)
1691                                 break;
1692
1693                         /*
1694                          *      POSIX 1003.1g mandates this order.
1695                          */
1696                          
1697                         if ((err = sock_error(sk)) != 0)
1698                                 break;
1699                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1700                                 break;
1701                         err = -EAGAIN;
1702                         if (!timeo)
1703                                 break;
1704                         up(&u->readsem);
1705
1706                         timeo = unix_stream_data_wait(sk, timeo);
1707
1708                         if (signal_pending(current)) {
1709                                 err = sock_intr_errno(timeo);
1710                                 goto out;
1711                         }
1712                         down(&u->readsem);
1713                         continue;
1714                 }
1715
1716                 if (check_creds) {
1717                         /* Never glue messages from different writers */
1718                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1719                                 skb_queue_head(&sk->sk_receive_queue, skb);
1720                                 break;
1721                         }
1722                 } else {
1723                         /* Copy credentials */
1724                         siocb->scm->creds = *UNIXCREDS(skb);
1725                         check_creds = 1;
1726                 }
1727
1728                 /* Copy address just once */
1729                 if (sunaddr)
1730                 {
1731                         unix_copy_addr(msg, skb->sk);
1732                         sunaddr = NULL;
1733                 }
1734
1735                 chunk = min_t(unsigned int, skb->len, size);
1736                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1737                         skb_queue_head(&sk->sk_receive_queue, skb);
1738                         if (copied == 0)
1739                                 copied = -EFAULT;
1740                         break;
1741                 }
1742                 copied += chunk;
1743                 size -= chunk;
1744
1745                 /* Mark read part of skb as used */
1746                 if (!(flags & MSG_PEEK))
1747                 {
1748                         skb_pull(skb, chunk);
1749
1750                         if (UNIXCB(skb).fp)
1751                                 unix_detach_fds(siocb->scm, skb);
1752
1753                         /* put the skb back if we didn't use it up.. */
1754                         if (skb->len)
1755                         {
1756                                 skb_queue_head(&sk->sk_receive_queue, skb);
1757                                 break;
1758                         }
1759
1760                         kfree_skb(skb);
1761
1762                         if (siocb->scm->fp)
1763                                 break;
1764                 }
1765                 else
1766                 {
1767                         /* It is questionable, see note in unix_dgram_recvmsg.
1768                          */
1769                         if (UNIXCB(skb).fp)
1770                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1771
1772                         /* put message back and return */
1773                         skb_queue_head(&sk->sk_receive_queue, skb);
1774                         break;
1775                 }
1776         } while (size);
1777
1778         up(&u->readsem);
1779         scm_recv(sock, msg, siocb->scm, flags);
1780 out:
1781         return copied ? : err;
1782 }
1783
1784 static int unix_shutdown(struct socket *sock, int mode)
1785 {
1786         struct sock *sk = sock->sk;
1787         struct sock *other;
1788
1789         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1790
1791         if (mode) {
1792                 unix_state_wlock(sk);
1793                 sk->sk_shutdown |= mode;
1794                 other=unix_peer(sk);
1795                 if (other)
1796                         sock_hold(other);
1797                 unix_state_wunlock(sk);
1798                 sk->sk_state_change(sk);
1799
1800                 if (other &&
1801                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1802
1803                         int peer_mode = 0;
1804
1805                         if (mode&RCV_SHUTDOWN)
1806                                 peer_mode |= SEND_SHUTDOWN;
1807                         if (mode&SEND_SHUTDOWN)
1808                                 peer_mode |= RCV_SHUTDOWN;
1809                         unix_state_wlock(other);
1810                         other->sk_shutdown |= peer_mode;
1811                         unix_state_wunlock(other);
1812                         other->sk_state_change(other);
1813                         read_lock(&other->sk_callback_lock);
1814                         if (peer_mode == SHUTDOWN_MASK)
1815                                 sk_wake_async(other,1,POLL_HUP);
1816                         else if (peer_mode & RCV_SHUTDOWN)
1817                                 sk_wake_async(other,1,POLL_IN);
1818                         read_unlock(&other->sk_callback_lock);
1819                 }
1820                 if (other)
1821                         sock_put(other);
1822         }
1823         return 0;
1824 }
1825
1826 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1827 {
1828         struct sock *sk = sock->sk;
1829         long amount=0;
1830         int err;
1831
1832         switch(cmd)
1833         {
1834                 case SIOCOUTQ:
1835                         amount = atomic_read(&sk->sk_wmem_alloc);
1836                         err = put_user(amount, (int __user *)arg);
1837                         break;
1838                 case SIOCINQ:
1839                 {
1840                         struct sk_buff *skb;
1841
1842                         if (sk->sk_state == TCP_LISTEN) {
1843                                 err = -EINVAL;
1844                                 break;
1845                         }
1846
1847                         spin_lock(&sk->sk_receive_queue.lock);
1848                         if (sk->sk_type == SOCK_STREAM ||
1849                             sk->sk_type == SOCK_SEQPACKET) {
1850                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1851                                         amount += skb->len;
1852                         } else {
1853                                 skb = skb_peek(&sk->sk_receive_queue);
1854                                 if (skb)
1855                                         amount=skb->len;
1856                         }
1857                         spin_unlock(&sk->sk_receive_queue.lock);
1858                         err = put_user(amount, (int __user *)arg);
1859                         break;
1860                 }
1861
1862                 default:
1863                         err = dev_ioctl(cmd, (void __user *)arg);
1864                         break;
1865         }
1866         return err;
1867 }
1868
1869 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1870 {
1871         struct sock *sk = sock->sk;
1872         unsigned int mask;
1873
1874         poll_wait(file, sk->sk_sleep, wait);
1875         mask = 0;
1876
1877         /* exceptional events? */
1878         if (sk->sk_err)
1879                 mask |= POLLERR;
1880         if (sk->sk_shutdown == SHUTDOWN_MASK)
1881                 mask |= POLLHUP;
1882
1883         /* readable? */
1884         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1885             (sk->sk_shutdown & RCV_SHUTDOWN))
1886                 mask |= POLLIN | POLLRDNORM;
1887
1888         /* Connection-based need to check for termination and startup */
1889         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1890                 mask |= POLLHUP;
1891
1892         /*
1893          * we set writable also when the other side has shut down the
1894          * connection. This prevents stuck sockets.
1895          */
1896         if (unix_writable(sk))
1897                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1898
1899         return mask;
1900 }
1901
1902
1903 #ifdef CONFIG_PROC_FS
1904 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1905 {
1906         loff_t off = 0;
1907         struct sock *s;
1908
1909         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1910                 if (off == pos) 
1911                         return s;
1912                 ++off;
1913         }
1914         return NULL;
1915 }
1916
1917
1918 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1919 {
1920         read_lock(&unix_table_lock);
1921         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1922 }
1923
1924 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1925 {
1926         ++*pos;
1927
1928         if (v == (void *)1) 
1929                 return first_unix_socket(seq->private);
1930         return next_unix_socket(seq->private, v);
1931 }
1932
1933 static void unix_seq_stop(struct seq_file *seq, void *v)
1934 {
1935         read_unlock(&unix_table_lock);
1936 }
1937
1938 static int unix_seq_show(struct seq_file *seq, void *v)
1939 {
1940         
1941         if (v == (void *)1)
1942                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
1943                          "Inode Path\n");
1944         else {
1945                 struct sock *s = v;
1946                 struct unix_sock *u = unix_sk(s);
1947                 unix_state_rlock(s);
1948
1949                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1950                         s,
1951                         atomic_read(&s->sk_refcnt),
1952                         0,
1953                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1954                         s->sk_type,
1955                         s->sk_socket ?
1956                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1957                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1958                         sock_i_ino(s));
1959
1960                 if (u->addr) {
1961                         int i, len;
1962                         seq_putc(seq, ' ');
1963
1964                         i = 0;
1965                         len = u->addr->len - sizeof(short);
1966                         if (!UNIX_ABSTRACT(s))
1967                                 len--;
1968                         else {
1969                                 seq_putc(seq, '@');
1970                                 i++;
1971                         }
1972                         for ( ; i < len; i++)
1973                                 seq_putc(seq, u->addr->name->sun_path[i]);
1974                 }
1975                 unix_state_runlock(s);
1976                 seq_putc(seq, '\n');
1977         }
1978
1979         return 0;
1980 }
1981
1982 static struct seq_operations unix_seq_ops = {
1983         .start  = unix_seq_start,
1984         .next   = unix_seq_next,
1985         .stop   = unix_seq_stop,
1986         .show   = unix_seq_show,
1987 };
1988
1989
1990 static int unix_seq_open(struct inode *inode, struct file *file)
1991 {
1992         struct seq_file *seq;
1993         int rc = -ENOMEM;
1994         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
1995
1996         if (!iter)
1997                 goto out;
1998
1999         rc = seq_open(file, &unix_seq_ops);
2000         if (rc)
2001                 goto out_kfree;
2002
2003         seq          = file->private_data;
2004         seq->private = iter;
2005         *iter = 0;
2006 out:
2007         return rc;
2008 out_kfree:
2009         kfree(iter);
2010         goto out;
2011 }
2012
2013 static struct file_operations unix_seq_fops = {
2014         .owner          = THIS_MODULE,
2015         .open           = unix_seq_open,
2016         .read           = seq_read,
2017         .llseek         = seq_lseek,
2018         .release        = seq_release_private,
2019 };
2020
2021 #endif
2022
2023 static struct net_proto_family unix_family_ops = {
2024         .family = PF_UNIX,
2025         .create = unix_create,
2026         .owner  = THIS_MODULE,
2027 };
2028
2029 static int __init af_unix_init(void)
2030 {
2031         int rc = -1;
2032         struct sk_buff *dummy_skb;
2033
2034         if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2035                 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2036                 goto out;
2037         }
2038
2039         rc = proto_register(&unix_proto, 1);
2040         if (rc != 0) {
2041                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2042                        __FUNCTION__);
2043                 goto out;
2044         }
2045
2046         sock_register(&unix_family_ops);
2047 #ifdef CONFIG_PROC_FS
2048         proc_net_fops_create("unix", 0, &unix_seq_fops);
2049 #endif
2050         unix_sysctl_register();
2051 out:
2052         return rc;
2053 }
2054
2055 static void __exit af_unix_exit(void)
2056 {
2057         sock_unregister(PF_UNIX);
2058         unix_sysctl_unregister();
2059         proc_net_remove("unix");
2060         proto_unregister(&unix_proto);
2061 }
2062
2063 module_init(af_unix_init);
2064 module_exit(af_unix_exit);
2065
2066 MODULE_LICENSE("GPL");
2067 MODULE_ALIAS_NETPROTO(PF_UNIX);