[AF_UNIX]: Fix datagram connect race causing an OOPS.
[sfrench/cifs-2.6.git] / net / unix / af_unix.c
1 /*
2  * NET4:        Implementation of BSD Unix domain sockets.
3  *
4  * Authors:     Alan Cox, <alan.cox@linux.org>
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
12  *
13  * Fixes:
14  *              Linus Torvalds  :       Assorted bug cures.
15  *              Niibe Yutaka    :       async I/O support.
16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
17  *              Alan Cox        :       Limit size of allocated blocks.
18  *              Alan Cox        :       Fixed the stupid socketpair bug.
19  *              Alan Cox        :       BSD compatibility fine tuning.
20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
21  *              Alan Cox        :       Sorted out a proper draft version of
22  *                                      file descriptor passing hacked up from
23  *                                      Mike Shaver's work.
24  *              Marty Leisner   :       Fixes to fd passing
25  *              Nick Nevin      :       recvmsg bugfix.
26  *              Alan Cox        :       Started proper garbage collector
27  *              Heiko EiBfeldt  :       Missing verify_area check
28  *              Alan Cox        :       Started POSIXisms
29  *              Andreas Schwab  :       Replace inode by dentry for proper
30  *                                      reference counting
31  *              Kirk Petersen   :       Made this a module
32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
33  *                                      Lots of bug fixes.
34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
35  *                                      by above two patches.
36  *           Andrea Arcangeli   :       If possible we block in connect(2)
37  *                                      if the max backlog of the listen socket
38  *                                      is been reached. This won't break
39  *                                      old apps and it will avoid huge amount
40  *                                      of socks hashed (this for unix_gc()
41  *                                      performances reasons).
42  *                                      Security fix that limits the max
43  *                                      number of socks to 2*max_files and
44  *                                      the number of skb queueable in the
45  *                                      dgram receiver.
46  *              Artur Skawina   :       Hash function optimizations
47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
48  *            Malcolm Beattie   :       Set peercred for socketpair
49  *           Michal Ostrowski   :       Module initialization cleanup.
50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
51  *                                      the core infrastructure is doing that
52  *                                      for all net proto families now (2.5.69+)
53  *
54  *
55  * Known differences from reference BSD that was tested:
56  *
57  *      [TO FIX]
58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
59  *              other the moment one end closes.
60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
62  *      [NOT TO FIX]
63  *      accept() returns a path name even if the connecting socket has closed
64  *              in the meantime (BSD loses the path and gives up).
65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
68  *      BSD af_unix apparently has connect forgetting to block properly.
69  *              (need to check this with the POSIX spec in detail)
70  *
71  * Differences from 2.0.0-11-... (ANK)
72  *      Bug fixes and improvements.
73  *              - client shutdown killed server socket.
74  *              - removed all useless cli/sti pairs.
75  *
76  *      Semantic changes/extensions.
77  *              - generic control message passing.
78  *              - SCM_CREDENTIALS control message.
79  *              - "Abstract" (not FS based) socket bindings.
80  *                Abstract names are sequences of bytes (not zero terminated)
81  *                started by 0, so that this name space does not intersect
82  *                with BSD names.
83  */
84
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
95 #include <linux/un.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/sock.h>
107 #include <net/tcp_states.h>
108 #include <net/af_unix.h>
109 #include <linux/proc_fs.h>
110 #include <linux/seq_file.h>
111 #include <net/scm.h>
112 #include <linux/init.h>
113 #include <linux/poll.h>
114 #include <linux/rtnetlink.h>
115 #include <linux/mount.h>
116 #include <net/checksum.h>
117 #include <linux/security.h>
118
119 int sysctl_unix_max_dgram_qlen __read_mostly = 10;
120
121 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
122 DEFINE_SPINLOCK(unix_table_lock);
123 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
124
125 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
126
127 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
128
129 #ifdef CONFIG_SECURITY_NETWORK
130 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
131 {
132         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
133 }
134
135 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
136 {
137         scm->secid = *UNIXSID(skb);
138 }
139 #else
140 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
141 { }
142
143 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
144 { }
145 #endif /* CONFIG_SECURITY_NETWORK */
146
147 /*
148  *  SMP locking strategy:
149  *    hash table is protected with spinlock unix_table_lock
150  *    each socket state is protected by separate rwlock.
151  */
152
153 static inline unsigned unix_hash_fold(__wsum n)
154 {
155         unsigned hash = (__force unsigned)n;
156         hash ^= hash>>16;
157         hash ^= hash>>8;
158         return hash&(UNIX_HASH_SIZE-1);
159 }
160
161 #define unix_peer(sk) (unix_sk(sk)->peer)
162
163 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
164 {
165         return unix_peer(osk) == sk;
166 }
167
168 static inline int unix_may_send(struct sock *sk, struct sock *osk)
169 {
170         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
171 }
172
173 static struct sock *unix_peer_get(struct sock *s)
174 {
175         struct sock *peer;
176
177         unix_state_lock(s);
178         peer = unix_peer(s);
179         if (peer)
180                 sock_hold(peer);
181         unix_state_unlock(s);
182         return peer;
183 }
184
185 static inline void unix_release_addr(struct unix_address *addr)
186 {
187         if (atomic_dec_and_test(&addr->refcnt))
188                 kfree(addr);
189 }
190
191 /*
192  *      Check unix socket name:
193  *              - should be not zero length.
194  *              - if started by not zero, should be NULL terminated (FS object)
195  *              - if started by zero, it is abstract name.
196  */
197
198 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
199 {
200         if (len <= sizeof(short) || len > sizeof(*sunaddr))
201                 return -EINVAL;
202         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
203                 return -EINVAL;
204         if (sunaddr->sun_path[0]) {
205                 /*
206                  * This may look like an off by one error but it is a bit more
207                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
208                  * sun_path[108] doesnt as such exist.  However in kernel space
209                  * we are guaranteed that it is a valid memory location in our
210                  * kernel address buffer.
211                  */
212                 ((char *)sunaddr)[len]=0;
213                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
214                 return len;
215         }
216
217         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
218         return len;
219 }
220
221 static void __unix_remove_socket(struct sock *sk)
222 {
223         sk_del_node_init(sk);
224 }
225
226 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
227 {
228         BUG_TRAP(sk_unhashed(sk));
229         sk_add_node(sk, list);
230 }
231
232 static inline void unix_remove_socket(struct sock *sk)
233 {
234         spin_lock(&unix_table_lock);
235         __unix_remove_socket(sk);
236         spin_unlock(&unix_table_lock);
237 }
238
239 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
240 {
241         spin_lock(&unix_table_lock);
242         __unix_insert_socket(list, sk);
243         spin_unlock(&unix_table_lock);
244 }
245
246 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
247                                               int len, int type, unsigned hash)
248 {
249         struct sock *s;
250         struct hlist_node *node;
251
252         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
253                 struct unix_sock *u = unix_sk(s);
254
255                 if (u->addr->len == len &&
256                     !memcmp(u->addr->name, sunname, len))
257                         goto found;
258         }
259         s = NULL;
260 found:
261         return s;
262 }
263
264 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
265                                                    int len, int type,
266                                                    unsigned hash)
267 {
268         struct sock *s;
269
270         spin_lock(&unix_table_lock);
271         s = __unix_find_socket_byname(sunname, len, type, hash);
272         if (s)
273                 sock_hold(s);
274         spin_unlock(&unix_table_lock);
275         return s;
276 }
277
278 static struct sock *unix_find_socket_byinode(struct inode *i)
279 {
280         struct sock *s;
281         struct hlist_node *node;
282
283         spin_lock(&unix_table_lock);
284         sk_for_each(s, node,
285                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
286                 struct dentry *dentry = unix_sk(s)->dentry;
287
288                 if(dentry && dentry->d_inode == i)
289                 {
290                         sock_hold(s);
291                         goto found;
292                 }
293         }
294         s = NULL;
295 found:
296         spin_unlock(&unix_table_lock);
297         return s;
298 }
299
300 static inline int unix_writable(struct sock *sk)
301 {
302         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
303 }
304
305 static void unix_write_space(struct sock *sk)
306 {
307         read_lock(&sk->sk_callback_lock);
308         if (unix_writable(sk)) {
309                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
310                         wake_up_interruptible(sk->sk_sleep);
311                 sk_wake_async(sk, 2, POLL_OUT);
312         }
313         read_unlock(&sk->sk_callback_lock);
314 }
315
316 /* When dgram socket disconnects (or changes its peer), we clear its receive
317  * queue of packets arrived from previous peer. First, it allows to do
318  * flow control based only on wmem_alloc; second, sk connected to peer
319  * may receive messages only from that peer. */
320 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
321 {
322         if (!skb_queue_empty(&sk->sk_receive_queue)) {
323                 skb_queue_purge(&sk->sk_receive_queue);
324                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
325
326                 /* If one link of bidirectional dgram pipe is disconnected,
327                  * we signal error. Messages are lost. Do not make this,
328                  * when peer was not connected to us.
329                  */
330                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
331                         other->sk_err = ECONNRESET;
332                         other->sk_error_report(other);
333                 }
334         }
335 }
336
337 static void unix_sock_destructor(struct sock *sk)
338 {
339         struct unix_sock *u = unix_sk(sk);
340
341         skb_queue_purge(&sk->sk_receive_queue);
342
343         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
344         BUG_TRAP(sk_unhashed(sk));
345         BUG_TRAP(!sk->sk_socket);
346         if (!sock_flag(sk, SOCK_DEAD)) {
347                 printk("Attempt to release alive unix socket: %p\n", sk);
348                 return;
349         }
350
351         if (u->addr)
352                 unix_release_addr(u->addr);
353
354         atomic_dec(&unix_nr_socks);
355 #ifdef UNIX_REFCNT_DEBUG
356         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
357 #endif
358 }
359
360 static int unix_release_sock (struct sock *sk, int embrion)
361 {
362         struct unix_sock *u = unix_sk(sk);
363         struct dentry *dentry;
364         struct vfsmount *mnt;
365         struct sock *skpair;
366         struct sk_buff *skb;
367         int state;
368
369         unix_remove_socket(sk);
370
371         /* Clear state */
372         unix_state_lock(sk);
373         sock_orphan(sk);
374         sk->sk_shutdown = SHUTDOWN_MASK;
375         dentry       = u->dentry;
376         u->dentry    = NULL;
377         mnt          = u->mnt;
378         u->mnt       = NULL;
379         state = sk->sk_state;
380         sk->sk_state = TCP_CLOSE;
381         unix_state_unlock(sk);
382
383         wake_up_interruptible_all(&u->peer_wait);
384
385         skpair=unix_peer(sk);
386
387         if (skpair!=NULL) {
388                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
389                         unix_state_lock(skpair);
390                         /* No more writes */
391                         skpair->sk_shutdown = SHUTDOWN_MASK;
392                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
393                                 skpair->sk_err = ECONNRESET;
394                         unix_state_unlock(skpair);
395                         skpair->sk_state_change(skpair);
396                         read_lock(&skpair->sk_callback_lock);
397                         sk_wake_async(skpair,1,POLL_HUP);
398                         read_unlock(&skpair->sk_callback_lock);
399                 }
400                 sock_put(skpair); /* It may now die */
401                 unix_peer(sk) = NULL;
402         }
403
404         /* Try to flush out this socket. Throw out buffers at least */
405
406         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
407                 if (state==TCP_LISTEN)
408                         unix_release_sock(skb->sk, 1);
409                 /* passed fds are erased in the kfree_skb hook        */
410                 kfree_skb(skb);
411         }
412
413         if (dentry) {
414                 dput(dentry);
415                 mntput(mnt);
416         }
417
418         sock_put(sk);
419
420         /* ---- Socket is dead now and most probably destroyed ---- */
421
422         /*
423          * Fixme: BSD difference: In BSD all sockets connected to use get
424          *        ECONNRESET and we die on the spot. In Linux we behave
425          *        like files and pipes do and wait for the last
426          *        dereference.
427          *
428          * Can't we simply set sock->err?
429          *
430          *        What the above comment does talk about? --ANK(980817)
431          */
432
433         if (atomic_read(&unix_tot_inflight))
434                 unix_gc();              /* Garbage collect fds */
435
436         return 0;
437 }
438
439 static int unix_listen(struct socket *sock, int backlog)
440 {
441         int err;
442         struct sock *sk = sock->sk;
443         struct unix_sock *u = unix_sk(sk);
444
445         err = -EOPNOTSUPP;
446         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
447                 goto out;                       /* Only stream/seqpacket sockets accept */
448         err = -EINVAL;
449         if (!u->addr)
450                 goto out;                       /* No listens on an unbound socket */
451         unix_state_lock(sk);
452         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
453                 goto out_unlock;
454         if (backlog > sk->sk_max_ack_backlog)
455                 wake_up_interruptible_all(&u->peer_wait);
456         sk->sk_max_ack_backlog  = backlog;
457         sk->sk_state            = TCP_LISTEN;
458         /* set credentials so connect can copy them */
459         sk->sk_peercred.pid     = current->tgid;
460         sk->sk_peercred.uid     = current->euid;
461         sk->sk_peercred.gid     = current->egid;
462         err = 0;
463
464 out_unlock:
465         unix_state_unlock(sk);
466 out:
467         return err;
468 }
469
470 static int unix_release(struct socket *);
471 static int unix_bind(struct socket *, struct sockaddr *, int);
472 static int unix_stream_connect(struct socket *, struct sockaddr *,
473                                int addr_len, int flags);
474 static int unix_socketpair(struct socket *, struct socket *);
475 static int unix_accept(struct socket *, struct socket *, int);
476 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
477 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
478 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
479 static int unix_shutdown(struct socket *, int);
480 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
481                                struct msghdr *, size_t);
482 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
483                                struct msghdr *, size_t, int);
484 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
485                               struct msghdr *, size_t);
486 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
487                               struct msghdr *, size_t, int);
488 static int unix_dgram_connect(struct socket *, struct sockaddr *,
489                               int, int);
490 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
491                                   struct msghdr *, size_t);
492
493 static const struct proto_ops unix_stream_ops = {
494         .family =       PF_UNIX,
495         .owner =        THIS_MODULE,
496         .release =      unix_release,
497         .bind =         unix_bind,
498         .connect =      unix_stream_connect,
499         .socketpair =   unix_socketpair,
500         .accept =       unix_accept,
501         .getname =      unix_getname,
502         .poll =         unix_poll,
503         .ioctl =        unix_ioctl,
504         .listen =       unix_listen,
505         .shutdown =     unix_shutdown,
506         .setsockopt =   sock_no_setsockopt,
507         .getsockopt =   sock_no_getsockopt,
508         .sendmsg =      unix_stream_sendmsg,
509         .recvmsg =      unix_stream_recvmsg,
510         .mmap =         sock_no_mmap,
511         .sendpage =     sock_no_sendpage,
512 };
513
514 static const struct proto_ops unix_dgram_ops = {
515         .family =       PF_UNIX,
516         .owner =        THIS_MODULE,
517         .release =      unix_release,
518         .bind =         unix_bind,
519         .connect =      unix_dgram_connect,
520         .socketpair =   unix_socketpair,
521         .accept =       sock_no_accept,
522         .getname =      unix_getname,
523         .poll =         datagram_poll,
524         .ioctl =        unix_ioctl,
525         .listen =       sock_no_listen,
526         .shutdown =     unix_shutdown,
527         .setsockopt =   sock_no_setsockopt,
528         .getsockopt =   sock_no_getsockopt,
529         .sendmsg =      unix_dgram_sendmsg,
530         .recvmsg =      unix_dgram_recvmsg,
531         .mmap =         sock_no_mmap,
532         .sendpage =     sock_no_sendpage,
533 };
534
535 static const struct proto_ops unix_seqpacket_ops = {
536         .family =       PF_UNIX,
537         .owner =        THIS_MODULE,
538         .release =      unix_release,
539         .bind =         unix_bind,
540         .connect =      unix_stream_connect,
541         .socketpair =   unix_socketpair,
542         .accept =       unix_accept,
543         .getname =      unix_getname,
544         .poll =         datagram_poll,
545         .ioctl =        unix_ioctl,
546         .listen =       unix_listen,
547         .shutdown =     unix_shutdown,
548         .setsockopt =   sock_no_setsockopt,
549         .getsockopt =   sock_no_getsockopt,
550         .sendmsg =      unix_seqpacket_sendmsg,
551         .recvmsg =      unix_dgram_recvmsg,
552         .mmap =         sock_no_mmap,
553         .sendpage =     sock_no_sendpage,
554 };
555
556 static struct proto unix_proto = {
557         .name     = "UNIX",
558         .owner    = THIS_MODULE,
559         .obj_size = sizeof(struct unix_sock),
560 };
561
562 /*
563  * AF_UNIX sockets do not interact with hardware, hence they
564  * dont trigger interrupts - so it's safe for them to have
565  * bh-unsafe locking for their sk_receive_queue.lock. Split off
566  * this special lock-class by reinitializing the spinlock key:
567  */
568 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
569
570 static struct sock * unix_create1(struct socket *sock)
571 {
572         struct sock *sk = NULL;
573         struct unix_sock *u;
574
575         if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
576                 goto out;
577
578         sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
579         if (!sk)
580                 goto out;
581
582         atomic_inc(&unix_nr_socks);
583
584         sock_init_data(sock,sk);
585         lockdep_set_class(&sk->sk_receive_queue.lock,
586                                 &af_unix_sk_receive_queue_lock_key);
587
588         sk->sk_write_space      = unix_write_space;
589         sk->sk_max_ack_backlog  = sysctl_unix_max_dgram_qlen;
590         sk->sk_destruct         = unix_sock_destructor;
591         u         = unix_sk(sk);
592         u->dentry = NULL;
593         u->mnt    = NULL;
594         spin_lock_init(&u->lock);
595         atomic_set(&u->inflight, sock ? 0 : -1);
596         mutex_init(&u->readlock); /* single task reading lock */
597         init_waitqueue_head(&u->peer_wait);
598         unix_insert_socket(unix_sockets_unbound, sk);
599 out:
600         return sk;
601 }
602
603 static int unix_create(struct socket *sock, int protocol)
604 {
605         if (protocol && protocol != PF_UNIX)
606                 return -EPROTONOSUPPORT;
607
608         sock->state = SS_UNCONNECTED;
609
610         switch (sock->type) {
611         case SOCK_STREAM:
612                 sock->ops = &unix_stream_ops;
613                 break;
614                 /*
615                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
616                  *      nothing uses it.
617                  */
618         case SOCK_RAW:
619                 sock->type=SOCK_DGRAM;
620         case SOCK_DGRAM:
621                 sock->ops = &unix_dgram_ops;
622                 break;
623         case SOCK_SEQPACKET:
624                 sock->ops = &unix_seqpacket_ops;
625                 break;
626         default:
627                 return -ESOCKTNOSUPPORT;
628         }
629
630         return unix_create1(sock) ? 0 : -ENOMEM;
631 }
632
633 static int unix_release(struct socket *sock)
634 {
635         struct sock *sk = sock->sk;
636
637         if (!sk)
638                 return 0;
639
640         sock->sk = NULL;
641
642         return unix_release_sock (sk, 0);
643 }
644
645 static int unix_autobind(struct socket *sock)
646 {
647         struct sock *sk = sock->sk;
648         struct unix_sock *u = unix_sk(sk);
649         static u32 ordernum = 1;
650         struct unix_address * addr;
651         int err;
652
653         mutex_lock(&u->readlock);
654
655         err = 0;
656         if (u->addr)
657                 goto out;
658
659         err = -ENOMEM;
660         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
661         if (!addr)
662                 goto out;
663
664         addr->name->sun_family = AF_UNIX;
665         atomic_set(&addr->refcnt, 1);
666
667 retry:
668         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
669         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
670
671         spin_lock(&unix_table_lock);
672         ordernum = (ordernum+1)&0xFFFFF;
673
674         if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
675                                       addr->hash)) {
676                 spin_unlock(&unix_table_lock);
677                 /* Sanity yield. It is unusual case, but yet... */
678                 if (!(ordernum&0xFF))
679                         yield();
680                 goto retry;
681         }
682         addr->hash ^= sk->sk_type;
683
684         __unix_remove_socket(sk);
685         u->addr = addr;
686         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
687         spin_unlock(&unix_table_lock);
688         err = 0;
689
690 out:    mutex_unlock(&u->readlock);
691         return err;
692 }
693
694 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
695                                     int type, unsigned hash, int *error)
696 {
697         struct sock *u;
698         struct nameidata nd;
699         int err = 0;
700
701         if (sunname->sun_path[0]) {
702                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
703                 if (err)
704                         goto fail;
705                 err = vfs_permission(&nd, MAY_WRITE);
706                 if (err)
707                         goto put_fail;
708
709                 err = -ECONNREFUSED;
710                 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
711                         goto put_fail;
712                 u=unix_find_socket_byinode(nd.dentry->d_inode);
713                 if (!u)
714                         goto put_fail;
715
716                 if (u->sk_type == type)
717                         touch_atime(nd.mnt, nd.dentry);
718
719                 path_release(&nd);
720
721                 err=-EPROTOTYPE;
722                 if (u->sk_type != type) {
723                         sock_put(u);
724                         goto fail;
725                 }
726         } else {
727                 err = -ECONNREFUSED;
728                 u=unix_find_socket_byname(sunname, len, type, hash);
729                 if (u) {
730                         struct dentry *dentry;
731                         dentry = unix_sk(u)->dentry;
732                         if (dentry)
733                                 touch_atime(unix_sk(u)->mnt, dentry);
734                 } else
735                         goto fail;
736         }
737         return u;
738
739 put_fail:
740         path_release(&nd);
741 fail:
742         *error=err;
743         return NULL;
744 }
745
746
747 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
748 {
749         struct sock *sk = sock->sk;
750         struct unix_sock *u = unix_sk(sk);
751         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
752         struct dentry * dentry = NULL;
753         struct nameidata nd;
754         int err;
755         unsigned hash;
756         struct unix_address *addr;
757         struct hlist_head *list;
758
759         err = -EINVAL;
760         if (sunaddr->sun_family != AF_UNIX)
761                 goto out;
762
763         if (addr_len==sizeof(short)) {
764                 err = unix_autobind(sock);
765                 goto out;
766         }
767
768         err = unix_mkname(sunaddr, addr_len, &hash);
769         if (err < 0)
770                 goto out;
771         addr_len = err;
772
773         mutex_lock(&u->readlock);
774
775         err = -EINVAL;
776         if (u->addr)
777                 goto out_up;
778
779         err = -ENOMEM;
780         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
781         if (!addr)
782                 goto out_up;
783
784         memcpy(addr->name, sunaddr, addr_len);
785         addr->len = addr_len;
786         addr->hash = hash ^ sk->sk_type;
787         atomic_set(&addr->refcnt, 1);
788
789         if (sunaddr->sun_path[0]) {
790                 unsigned int mode;
791                 err = 0;
792                 /*
793                  * Get the parent directory, calculate the hash for last
794                  * component.
795                  */
796                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
797                 if (err)
798                         goto out_mknod_parent;
799
800                 dentry = lookup_create(&nd, 0);
801                 err = PTR_ERR(dentry);
802                 if (IS_ERR(dentry))
803                         goto out_mknod_unlock;
804
805                 /*
806                  * All right, let's create it.
807                  */
808                 mode = S_IFSOCK |
809                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
810                 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
811                 if (err)
812                         goto out_mknod_dput;
813                 mutex_unlock(&nd.dentry->d_inode->i_mutex);
814                 dput(nd.dentry);
815                 nd.dentry = dentry;
816
817                 addr->hash = UNIX_HASH_SIZE;
818         }
819
820         spin_lock(&unix_table_lock);
821
822         if (!sunaddr->sun_path[0]) {
823                 err = -EADDRINUSE;
824                 if (__unix_find_socket_byname(sunaddr, addr_len,
825                                               sk->sk_type, hash)) {
826                         unix_release_addr(addr);
827                         goto out_unlock;
828                 }
829
830                 list = &unix_socket_table[addr->hash];
831         } else {
832                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
833                 u->dentry = nd.dentry;
834                 u->mnt    = nd.mnt;
835         }
836
837         err = 0;
838         __unix_remove_socket(sk);
839         u->addr = addr;
840         __unix_insert_socket(list, sk);
841
842 out_unlock:
843         spin_unlock(&unix_table_lock);
844 out_up:
845         mutex_unlock(&u->readlock);
846 out:
847         return err;
848
849 out_mknod_dput:
850         dput(dentry);
851 out_mknod_unlock:
852         mutex_unlock(&nd.dentry->d_inode->i_mutex);
853         path_release(&nd);
854 out_mknod_parent:
855         if (err==-EEXIST)
856                 err=-EADDRINUSE;
857         unix_release_addr(addr);
858         goto out_up;
859 }
860
861 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
862 {
863         if (unlikely(sk1 == sk2) || !sk2) {
864                 unix_state_lock(sk1);
865                 return;
866         }
867         if (sk1 < sk2) {
868                 unix_state_lock(sk1);
869                 unix_state_lock_nested(sk2);
870         } else {
871                 unix_state_lock(sk2);
872                 unix_state_lock_nested(sk1);
873         }
874 }
875
876 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
877 {
878         if (unlikely(sk1 == sk2) || !sk2) {
879                 unix_state_unlock(sk1);
880                 return;
881         }
882         unix_state_unlock(sk1);
883         unix_state_unlock(sk2);
884 }
885
886 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
887                               int alen, int flags)
888 {
889         struct sock *sk = sock->sk;
890         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
891         struct sock *other;
892         unsigned hash;
893         int err;
894
895         if (addr->sa_family != AF_UNSPEC) {
896                 err = unix_mkname(sunaddr, alen, &hash);
897                 if (err < 0)
898                         goto out;
899                 alen = err;
900
901                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
902                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
903                         goto out;
904
905 restart:
906                 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
907                 if (!other)
908                         goto out;
909
910                 unix_state_double_lock(sk, other);
911
912                 /* Apparently VFS overslept socket death. Retry. */
913                 if (sock_flag(other, SOCK_DEAD)) {
914                         unix_state_double_unlock(sk, other);
915                         sock_put(other);
916                         goto restart;
917                 }
918
919                 err = -EPERM;
920                 if (!unix_may_send(sk, other))
921                         goto out_unlock;
922
923                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
924                 if (err)
925                         goto out_unlock;
926
927         } else {
928                 /*
929                  *      1003.1g breaking connected state with AF_UNSPEC
930                  */
931                 other = NULL;
932                 unix_state_double_lock(sk, other);
933         }
934
935         /*
936          * If it was connected, reconnect.
937          */
938         if (unix_peer(sk)) {
939                 struct sock *old_peer = unix_peer(sk);
940                 unix_peer(sk)=other;
941                 unix_state_double_unlock(sk, other);
942
943                 if (other != old_peer)
944                         unix_dgram_disconnected(sk, old_peer);
945                 sock_put(old_peer);
946         } else {
947                 unix_peer(sk)=other;
948                 unix_state_double_unlock(sk, other);
949         }
950         return 0;
951
952 out_unlock:
953         unix_state_double_unlock(sk, other);
954         sock_put(other);
955 out:
956         return err;
957 }
958
959 static long unix_wait_for_peer(struct sock *other, long timeo)
960 {
961         struct unix_sock *u = unix_sk(other);
962         int sched;
963         DEFINE_WAIT(wait);
964
965         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
966
967         sched = !sock_flag(other, SOCK_DEAD) &&
968                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
969                 (skb_queue_len(&other->sk_receive_queue) >
970                  other->sk_max_ack_backlog);
971
972         unix_state_unlock(other);
973
974         if (sched)
975                 timeo = schedule_timeout(timeo);
976
977         finish_wait(&u->peer_wait, &wait);
978         return timeo;
979 }
980
981 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
982                                int addr_len, int flags)
983 {
984         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
985         struct sock *sk = sock->sk;
986         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
987         struct sock *newsk = NULL;
988         struct sock *other = NULL;
989         struct sk_buff *skb = NULL;
990         unsigned hash;
991         int st;
992         int err;
993         long timeo;
994
995         err = unix_mkname(sunaddr, addr_len, &hash);
996         if (err < 0)
997                 goto out;
998         addr_len = err;
999
1000         if (test_bit(SOCK_PASSCRED, &sock->flags)
1001                 && !u->addr && (err = unix_autobind(sock)) != 0)
1002                 goto out;
1003
1004         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1005
1006         /* First of all allocate resources.
1007            If we will make it after state is locked,
1008            we will have to recheck all again in any case.
1009          */
1010
1011         err = -ENOMEM;
1012
1013         /* create new sock for complete connection */
1014         newsk = unix_create1(NULL);
1015         if (newsk == NULL)
1016                 goto out;
1017
1018         /* Allocate skb for sending to listening sock */
1019         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1020         if (skb == NULL)
1021                 goto out;
1022
1023 restart:
1024         /*  Find listening sock. */
1025         other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
1026         if (!other)
1027                 goto out;
1028
1029         /* Latch state of peer */
1030         unix_state_lock(other);
1031
1032         /* Apparently VFS overslept socket death. Retry. */
1033         if (sock_flag(other, SOCK_DEAD)) {
1034                 unix_state_unlock(other);
1035                 sock_put(other);
1036                 goto restart;
1037         }
1038
1039         err = -ECONNREFUSED;
1040         if (other->sk_state != TCP_LISTEN)
1041                 goto out_unlock;
1042
1043         if (skb_queue_len(&other->sk_receive_queue) >
1044             other->sk_max_ack_backlog) {
1045                 err = -EAGAIN;
1046                 if (!timeo)
1047                         goto out_unlock;
1048
1049                 timeo = unix_wait_for_peer(other, timeo);
1050
1051                 err = sock_intr_errno(timeo);
1052                 if (signal_pending(current))
1053                         goto out;
1054                 sock_put(other);
1055                 goto restart;
1056         }
1057
1058         /* Latch our state.
1059
1060            It is tricky place. We need to grab write lock and cannot
1061            drop lock on peer. It is dangerous because deadlock is
1062            possible. Connect to self case and simultaneous
1063            attempt to connect are eliminated by checking socket
1064            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1065            check this before attempt to grab lock.
1066
1067            Well, and we have to recheck the state after socket locked.
1068          */
1069         st = sk->sk_state;
1070
1071         switch (st) {
1072         case TCP_CLOSE:
1073                 /* This is ok... continue with connect */
1074                 break;
1075         case TCP_ESTABLISHED:
1076                 /* Socket is already connected */
1077                 err = -EISCONN;
1078                 goto out_unlock;
1079         default:
1080                 err = -EINVAL;
1081                 goto out_unlock;
1082         }
1083
1084         unix_state_lock_nested(sk);
1085
1086         if (sk->sk_state != st) {
1087                 unix_state_unlock(sk);
1088                 unix_state_unlock(other);
1089                 sock_put(other);
1090                 goto restart;
1091         }
1092
1093         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1094         if (err) {
1095                 unix_state_unlock(sk);
1096                 goto out_unlock;
1097         }
1098
1099         /* The way is open! Fastly set all the necessary fields... */
1100
1101         sock_hold(sk);
1102         unix_peer(newsk)        = sk;
1103         newsk->sk_state         = TCP_ESTABLISHED;
1104         newsk->sk_type          = sk->sk_type;
1105         newsk->sk_peercred.pid  = current->tgid;
1106         newsk->sk_peercred.uid  = current->euid;
1107         newsk->sk_peercred.gid  = current->egid;
1108         newu = unix_sk(newsk);
1109         newsk->sk_sleep         = &newu->peer_wait;
1110         otheru = unix_sk(other);
1111
1112         /* copy address information from listening to new sock*/
1113         if (otheru->addr) {
1114                 atomic_inc(&otheru->addr->refcnt);
1115                 newu->addr = otheru->addr;
1116         }
1117         if (otheru->dentry) {
1118                 newu->dentry    = dget(otheru->dentry);
1119                 newu->mnt       = mntget(otheru->mnt);
1120         }
1121
1122         /* Set credentials */
1123         sk->sk_peercred = other->sk_peercred;
1124
1125         sock->state     = SS_CONNECTED;
1126         sk->sk_state    = TCP_ESTABLISHED;
1127         sock_hold(newsk);
1128
1129         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1130         unix_peer(sk)   = newsk;
1131
1132         unix_state_unlock(sk);
1133
1134         /* take ten and and send info to listening sock */
1135         spin_lock(&other->sk_receive_queue.lock);
1136         __skb_queue_tail(&other->sk_receive_queue, skb);
1137         /* Undo artificially decreased inflight after embrion
1138          * is installed to listening socket. */
1139         atomic_inc(&newu->inflight);
1140         spin_unlock(&other->sk_receive_queue.lock);
1141         unix_state_unlock(other);
1142         other->sk_data_ready(other, 0);
1143         sock_put(other);
1144         return 0;
1145
1146 out_unlock:
1147         if (other)
1148                 unix_state_unlock(other);
1149
1150 out:
1151         if (skb)
1152                 kfree_skb(skb);
1153         if (newsk)
1154                 unix_release_sock(newsk, 0);
1155         if (other)
1156                 sock_put(other);
1157         return err;
1158 }
1159
1160 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1161 {
1162         struct sock *ska=socka->sk, *skb = sockb->sk;
1163
1164         /* Join our sockets back to back */
1165         sock_hold(ska);
1166         sock_hold(skb);
1167         unix_peer(ska)=skb;
1168         unix_peer(skb)=ska;
1169         ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1170         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1171         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1172
1173         if (ska->sk_type != SOCK_DGRAM) {
1174                 ska->sk_state = TCP_ESTABLISHED;
1175                 skb->sk_state = TCP_ESTABLISHED;
1176                 socka->state  = SS_CONNECTED;
1177                 sockb->state  = SS_CONNECTED;
1178         }
1179         return 0;
1180 }
1181
1182 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1183 {
1184         struct sock *sk = sock->sk;
1185         struct sock *tsk;
1186         struct sk_buff *skb;
1187         int err;
1188
1189         err = -EOPNOTSUPP;
1190         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1191                 goto out;
1192
1193         err = -EINVAL;
1194         if (sk->sk_state != TCP_LISTEN)
1195                 goto out;
1196
1197         /* If socket state is TCP_LISTEN it cannot change (for now...),
1198          * so that no locks are necessary.
1199          */
1200
1201         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1202         if (!skb) {
1203                 /* This means receive shutdown. */
1204                 if (err == 0)
1205                         err = -EINVAL;
1206                 goto out;
1207         }
1208
1209         tsk = skb->sk;
1210         skb_free_datagram(sk, skb);
1211         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1212
1213         /* attach accepted sock to socket */
1214         unix_state_lock(tsk);
1215         newsock->state = SS_CONNECTED;
1216         sock_graft(tsk, newsock);
1217         unix_state_unlock(tsk);
1218         return 0;
1219
1220 out:
1221         return err;
1222 }
1223
1224
1225 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1226 {
1227         struct sock *sk = sock->sk;
1228         struct unix_sock *u;
1229         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1230         int err = 0;
1231
1232         if (peer) {
1233                 sk = unix_peer_get(sk);
1234
1235                 err = -ENOTCONN;
1236                 if (!sk)
1237                         goto out;
1238                 err = 0;
1239         } else {
1240                 sock_hold(sk);
1241         }
1242
1243         u = unix_sk(sk);
1244         unix_state_lock(sk);
1245         if (!u->addr) {
1246                 sunaddr->sun_family = AF_UNIX;
1247                 sunaddr->sun_path[0] = 0;
1248                 *uaddr_len = sizeof(short);
1249         } else {
1250                 struct unix_address *addr = u->addr;
1251
1252                 *uaddr_len = addr->len;
1253                 memcpy(sunaddr, addr->name, *uaddr_len);
1254         }
1255         unix_state_unlock(sk);
1256         sock_put(sk);
1257 out:
1258         return err;
1259 }
1260
1261 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1262 {
1263         int i;
1264
1265         scm->fp = UNIXCB(skb).fp;
1266         skb->destructor = sock_wfree;
1267         UNIXCB(skb).fp = NULL;
1268
1269         for (i=scm->fp->count-1; i>=0; i--)
1270                 unix_notinflight(scm->fp->fp[i]);
1271 }
1272
1273 static void unix_destruct_fds(struct sk_buff *skb)
1274 {
1275         struct scm_cookie scm;
1276         memset(&scm, 0, sizeof(scm));
1277         unix_detach_fds(&scm, skb);
1278
1279         /* Alas, it calls VFS */
1280         /* So fscking what? fput() had been SMP-safe since the last Summer */
1281         scm_destroy(&scm);
1282         sock_wfree(skb);
1283 }
1284
1285 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1286 {
1287         int i;
1288         for (i=scm->fp->count-1; i>=0; i--)
1289                 unix_inflight(scm->fp->fp[i]);
1290         UNIXCB(skb).fp = scm->fp;
1291         skb->destructor = unix_destruct_fds;
1292         scm->fp = NULL;
1293 }
1294
1295 /*
1296  *      Send AF_UNIX data.
1297  */
1298
1299 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1300                               struct msghdr *msg, size_t len)
1301 {
1302         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1303         struct sock *sk = sock->sk;
1304         struct unix_sock *u = unix_sk(sk);
1305         struct sockaddr_un *sunaddr=msg->msg_name;
1306         struct sock *other = NULL;
1307         int namelen = 0; /* fake GCC */
1308         int err;
1309         unsigned hash;
1310         struct sk_buff *skb;
1311         long timeo;
1312         struct scm_cookie tmp_scm;
1313
1314         if (NULL == siocb->scm)
1315                 siocb->scm = &tmp_scm;
1316         err = scm_send(sock, msg, siocb->scm);
1317         if (err < 0)
1318                 return err;
1319
1320         err = -EOPNOTSUPP;
1321         if (msg->msg_flags&MSG_OOB)
1322                 goto out;
1323
1324         if (msg->msg_namelen) {
1325                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1326                 if (err < 0)
1327                         goto out;
1328                 namelen = err;
1329         } else {
1330                 sunaddr = NULL;
1331                 err = -ENOTCONN;
1332                 other = unix_peer_get(sk);
1333                 if (!other)
1334                         goto out;
1335         }
1336
1337         if (test_bit(SOCK_PASSCRED, &sock->flags)
1338                 && !u->addr && (err = unix_autobind(sock)) != 0)
1339                 goto out;
1340
1341         err = -EMSGSIZE;
1342         if (len > sk->sk_sndbuf - 32)
1343                 goto out;
1344
1345         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1346         if (skb==NULL)
1347                 goto out;
1348
1349         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1350         if (siocb->scm->fp)
1351                 unix_attach_fds(siocb->scm, skb);
1352         unix_get_secdata(siocb->scm, skb);
1353
1354         skb_reset_transport_header(skb);
1355         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1356         if (err)
1357                 goto out_free;
1358
1359         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1360
1361 restart:
1362         if (!other) {
1363                 err = -ECONNRESET;
1364                 if (sunaddr == NULL)
1365                         goto out_free;
1366
1367                 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1368                                         hash, &err);
1369                 if (other==NULL)
1370                         goto out_free;
1371         }
1372
1373         unix_state_lock(other);
1374         err = -EPERM;
1375         if (!unix_may_send(sk, other))
1376                 goto out_unlock;
1377
1378         if (sock_flag(other, SOCK_DEAD)) {
1379                 /*
1380                  *      Check with 1003.1g - what should
1381                  *      datagram error
1382                  */
1383                 unix_state_unlock(other);
1384                 sock_put(other);
1385
1386                 err = 0;
1387                 unix_state_lock(sk);
1388                 if (unix_peer(sk) == other) {
1389                         unix_peer(sk)=NULL;
1390                         unix_state_unlock(sk);
1391
1392                         unix_dgram_disconnected(sk, other);
1393                         sock_put(other);
1394                         err = -ECONNREFUSED;
1395                 } else {
1396                         unix_state_unlock(sk);
1397                 }
1398
1399                 other = NULL;
1400                 if (err)
1401                         goto out_free;
1402                 goto restart;
1403         }
1404
1405         err = -EPIPE;
1406         if (other->sk_shutdown & RCV_SHUTDOWN)
1407                 goto out_unlock;
1408
1409         if (sk->sk_type != SOCK_SEQPACKET) {
1410                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1411                 if (err)
1412                         goto out_unlock;
1413         }
1414
1415         if (unix_peer(other) != sk &&
1416             (skb_queue_len(&other->sk_receive_queue) >
1417              other->sk_max_ack_backlog)) {
1418                 if (!timeo) {
1419                         err = -EAGAIN;
1420                         goto out_unlock;
1421                 }
1422
1423                 timeo = unix_wait_for_peer(other, timeo);
1424
1425                 err = sock_intr_errno(timeo);
1426                 if (signal_pending(current))
1427                         goto out_free;
1428
1429                 goto restart;
1430         }
1431
1432         skb_queue_tail(&other->sk_receive_queue, skb);
1433         unix_state_unlock(other);
1434         other->sk_data_ready(other, len);
1435         sock_put(other);
1436         scm_destroy(siocb->scm);
1437         return len;
1438
1439 out_unlock:
1440         unix_state_unlock(other);
1441 out_free:
1442         kfree_skb(skb);
1443 out:
1444         if (other)
1445                 sock_put(other);
1446         scm_destroy(siocb->scm);
1447         return err;
1448 }
1449
1450
1451 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1452                                struct msghdr *msg, size_t len)
1453 {
1454         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1455         struct sock *sk = sock->sk;
1456         struct sock *other = NULL;
1457         struct sockaddr_un *sunaddr=msg->msg_name;
1458         int err,size;
1459         struct sk_buff *skb;
1460         int sent=0;
1461         struct scm_cookie tmp_scm;
1462
1463         if (NULL == siocb->scm)
1464                 siocb->scm = &tmp_scm;
1465         err = scm_send(sock, msg, siocb->scm);
1466         if (err < 0)
1467                 return err;
1468
1469         err = -EOPNOTSUPP;
1470         if (msg->msg_flags&MSG_OOB)
1471                 goto out_err;
1472
1473         if (msg->msg_namelen) {
1474                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1475                 goto out_err;
1476         } else {
1477                 sunaddr = NULL;
1478                 err = -ENOTCONN;
1479                 other = unix_peer(sk);
1480                 if (!other)
1481                         goto out_err;
1482         }
1483
1484         if (sk->sk_shutdown & SEND_SHUTDOWN)
1485                 goto pipe_err;
1486
1487         while(sent < len)
1488         {
1489                 /*
1490                  *      Optimisation for the fact that under 0.01% of X
1491                  *      messages typically need breaking up.
1492                  */
1493
1494                 size = len-sent;
1495
1496                 /* Keep two messages in the pipe so it schedules better */
1497                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1498                         size = (sk->sk_sndbuf >> 1) - 64;
1499
1500                 if (size > SKB_MAX_ALLOC)
1501                         size = SKB_MAX_ALLOC;
1502
1503                 /*
1504                  *      Grab a buffer
1505                  */
1506
1507                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1508
1509                 if (skb==NULL)
1510                         goto out_err;
1511
1512                 /*
1513                  *      If you pass two values to the sock_alloc_send_skb
1514                  *      it tries to grab the large buffer with GFP_NOFS
1515                  *      (which can fail easily), and if it fails grab the
1516                  *      fallback size buffer which is under a page and will
1517                  *      succeed. [Alan]
1518                  */
1519                 size = min_t(int, size, skb_tailroom(skb));
1520
1521                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1522                 if (siocb->scm->fp)
1523                         unix_attach_fds(siocb->scm, skb);
1524
1525                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1526                         kfree_skb(skb);
1527                         goto out_err;
1528                 }
1529
1530                 unix_state_lock(other);
1531
1532                 if (sock_flag(other, SOCK_DEAD) ||
1533                     (other->sk_shutdown & RCV_SHUTDOWN))
1534                         goto pipe_err_free;
1535
1536                 skb_queue_tail(&other->sk_receive_queue, skb);
1537                 unix_state_unlock(other);
1538                 other->sk_data_ready(other, size);
1539                 sent+=size;
1540         }
1541
1542         scm_destroy(siocb->scm);
1543         siocb->scm = NULL;
1544
1545         return sent;
1546
1547 pipe_err_free:
1548         unix_state_unlock(other);
1549         kfree_skb(skb);
1550 pipe_err:
1551         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1552                 send_sig(SIGPIPE,current,0);
1553         err = -EPIPE;
1554 out_err:
1555         scm_destroy(siocb->scm);
1556         siocb->scm = NULL;
1557         return sent ? : err;
1558 }
1559
1560 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1561                                   struct msghdr *msg, size_t len)
1562 {
1563         int err;
1564         struct sock *sk = sock->sk;
1565
1566         err = sock_error(sk);
1567         if (err)
1568                 return err;
1569
1570         if (sk->sk_state != TCP_ESTABLISHED)
1571                 return -ENOTCONN;
1572
1573         if (msg->msg_namelen)
1574                 msg->msg_namelen = 0;
1575
1576         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1577 }
1578
1579 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1580 {
1581         struct unix_sock *u = unix_sk(sk);
1582
1583         msg->msg_namelen = 0;
1584         if (u->addr) {
1585                 msg->msg_namelen = u->addr->len;
1586                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1587         }
1588 }
1589
1590 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1591                               struct msghdr *msg, size_t size,
1592                               int flags)
1593 {
1594         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1595         struct scm_cookie tmp_scm;
1596         struct sock *sk = sock->sk;
1597         struct unix_sock *u = unix_sk(sk);
1598         int noblock = flags & MSG_DONTWAIT;
1599         struct sk_buff *skb;
1600         int err;
1601
1602         err = -EOPNOTSUPP;
1603         if (flags&MSG_OOB)
1604                 goto out;
1605
1606         msg->msg_namelen = 0;
1607
1608         mutex_lock(&u->readlock);
1609
1610         skb = skb_recv_datagram(sk, flags, noblock, &err);
1611         if (!skb)
1612                 goto out_unlock;
1613
1614         wake_up_interruptible(&u->peer_wait);
1615
1616         if (msg->msg_name)
1617                 unix_copy_addr(msg, skb->sk);
1618
1619         if (size > skb->len)
1620                 size = skb->len;
1621         else if (size < skb->len)
1622                 msg->msg_flags |= MSG_TRUNC;
1623
1624         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1625         if (err)
1626                 goto out_free;
1627
1628         if (!siocb->scm) {
1629                 siocb->scm = &tmp_scm;
1630                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1631         }
1632         siocb->scm->creds = *UNIXCREDS(skb);
1633         unix_set_secdata(siocb->scm, skb);
1634
1635         if (!(flags & MSG_PEEK))
1636         {
1637                 if (UNIXCB(skb).fp)
1638                         unix_detach_fds(siocb->scm, skb);
1639         }
1640         else
1641         {
1642                 /* It is questionable: on PEEK we could:
1643                    - do not return fds - good, but too simple 8)
1644                    - return fds, and do not return them on read (old strategy,
1645                      apparently wrong)
1646                    - clone fds (I chose it for now, it is the most universal
1647                      solution)
1648
1649                    POSIX 1003.1g does not actually define this clearly
1650                    at all. POSIX 1003.1g doesn't define a lot of things
1651                    clearly however!
1652
1653                 */
1654                 if (UNIXCB(skb).fp)
1655                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1656         }
1657         err = size;
1658
1659         scm_recv(sock, msg, siocb->scm, flags);
1660
1661 out_free:
1662         skb_free_datagram(sk,skb);
1663 out_unlock:
1664         mutex_unlock(&u->readlock);
1665 out:
1666         return err;
1667 }
1668
1669 /*
1670  *      Sleep until data has arrive. But check for races..
1671  */
1672
1673 static long unix_stream_data_wait(struct sock * sk, long timeo)
1674 {
1675         DEFINE_WAIT(wait);
1676
1677         unix_state_lock(sk);
1678
1679         for (;;) {
1680                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1681
1682                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1683                     sk->sk_err ||
1684                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1685                     signal_pending(current) ||
1686                     !timeo)
1687                         break;
1688
1689                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1690                 unix_state_unlock(sk);
1691                 timeo = schedule_timeout(timeo);
1692                 unix_state_lock(sk);
1693                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1694         }
1695
1696         finish_wait(sk->sk_sleep, &wait);
1697         unix_state_unlock(sk);
1698         return timeo;
1699 }
1700
1701
1702
1703 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1704                                struct msghdr *msg, size_t size,
1705                                int flags)
1706 {
1707         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1708         struct scm_cookie tmp_scm;
1709         struct sock *sk = sock->sk;
1710         struct unix_sock *u = unix_sk(sk);
1711         struct sockaddr_un *sunaddr=msg->msg_name;
1712         int copied = 0;
1713         int check_creds = 0;
1714         int target;
1715         int err = 0;
1716         long timeo;
1717
1718         err = -EINVAL;
1719         if (sk->sk_state != TCP_ESTABLISHED)
1720                 goto out;
1721
1722         err = -EOPNOTSUPP;
1723         if (flags&MSG_OOB)
1724                 goto out;
1725
1726         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1727         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1728
1729         msg->msg_namelen = 0;
1730
1731         /* Lock the socket to prevent queue disordering
1732          * while sleeps in memcpy_tomsg
1733          */
1734
1735         if (!siocb->scm) {
1736                 siocb->scm = &tmp_scm;
1737                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1738         }
1739
1740         mutex_lock(&u->readlock);
1741
1742         do
1743         {
1744                 int chunk;
1745                 struct sk_buff *skb;
1746
1747                 skb = skb_dequeue(&sk->sk_receive_queue);
1748                 if (skb==NULL)
1749                 {
1750                         if (copied >= target)
1751                                 break;
1752
1753                         /*
1754                          *      POSIX 1003.1g mandates this order.
1755                          */
1756
1757                         if ((err = sock_error(sk)) != 0)
1758                                 break;
1759                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1760                                 break;
1761                         err = -EAGAIN;
1762                         if (!timeo)
1763                                 break;
1764                         mutex_unlock(&u->readlock);
1765
1766                         timeo = unix_stream_data_wait(sk, timeo);
1767
1768                         if (signal_pending(current)) {
1769                                 err = sock_intr_errno(timeo);
1770                                 goto out;
1771                         }
1772                         mutex_lock(&u->readlock);
1773                         continue;
1774                 }
1775
1776                 if (check_creds) {
1777                         /* Never glue messages from different writers */
1778                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1779                                 skb_queue_head(&sk->sk_receive_queue, skb);
1780                                 break;
1781                         }
1782                 } else {
1783                         /* Copy credentials */
1784                         siocb->scm->creds = *UNIXCREDS(skb);
1785                         check_creds = 1;
1786                 }
1787
1788                 /* Copy address just once */
1789                 if (sunaddr)
1790                 {
1791                         unix_copy_addr(msg, skb->sk);
1792                         sunaddr = NULL;
1793                 }
1794
1795                 chunk = min_t(unsigned int, skb->len, size);
1796                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1797                         skb_queue_head(&sk->sk_receive_queue, skb);
1798                         if (copied == 0)
1799                                 copied = -EFAULT;
1800                         break;
1801                 }
1802                 copied += chunk;
1803                 size -= chunk;
1804
1805                 /* Mark read part of skb as used */
1806                 if (!(flags & MSG_PEEK))
1807                 {
1808                         skb_pull(skb, chunk);
1809
1810                         if (UNIXCB(skb).fp)
1811                                 unix_detach_fds(siocb->scm, skb);
1812
1813                         /* put the skb back if we didn't use it up.. */
1814                         if (skb->len)
1815                         {
1816                                 skb_queue_head(&sk->sk_receive_queue, skb);
1817                                 break;
1818                         }
1819
1820                         kfree_skb(skb);
1821
1822                         if (siocb->scm->fp)
1823                                 break;
1824                 }
1825                 else
1826                 {
1827                         /* It is questionable, see note in unix_dgram_recvmsg.
1828                          */
1829                         if (UNIXCB(skb).fp)
1830                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1831
1832                         /* put message back and return */
1833                         skb_queue_head(&sk->sk_receive_queue, skb);
1834                         break;
1835                 }
1836         } while (size);
1837
1838         mutex_unlock(&u->readlock);
1839         scm_recv(sock, msg, siocb->scm, flags);
1840 out:
1841         return copied ? : err;
1842 }
1843
1844 static int unix_shutdown(struct socket *sock, int mode)
1845 {
1846         struct sock *sk = sock->sk;
1847         struct sock *other;
1848
1849         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1850
1851         if (mode) {
1852                 unix_state_lock(sk);
1853                 sk->sk_shutdown |= mode;
1854                 other=unix_peer(sk);
1855                 if (other)
1856                         sock_hold(other);
1857                 unix_state_unlock(sk);
1858                 sk->sk_state_change(sk);
1859
1860                 if (other &&
1861                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1862
1863                         int peer_mode = 0;
1864
1865                         if (mode&RCV_SHUTDOWN)
1866                                 peer_mode |= SEND_SHUTDOWN;
1867                         if (mode&SEND_SHUTDOWN)
1868                                 peer_mode |= RCV_SHUTDOWN;
1869                         unix_state_lock(other);
1870                         other->sk_shutdown |= peer_mode;
1871                         unix_state_unlock(other);
1872                         other->sk_state_change(other);
1873                         read_lock(&other->sk_callback_lock);
1874                         if (peer_mode == SHUTDOWN_MASK)
1875                                 sk_wake_async(other,1,POLL_HUP);
1876                         else if (peer_mode & RCV_SHUTDOWN)
1877                                 sk_wake_async(other,1,POLL_IN);
1878                         read_unlock(&other->sk_callback_lock);
1879                 }
1880                 if (other)
1881                         sock_put(other);
1882         }
1883         return 0;
1884 }
1885
1886 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1887 {
1888         struct sock *sk = sock->sk;
1889         long amount=0;
1890         int err;
1891
1892         switch(cmd)
1893         {
1894                 case SIOCOUTQ:
1895                         amount = atomic_read(&sk->sk_wmem_alloc);
1896                         err = put_user(amount, (int __user *)arg);
1897                         break;
1898                 case SIOCINQ:
1899                 {
1900                         struct sk_buff *skb;
1901
1902                         if (sk->sk_state == TCP_LISTEN) {
1903                                 err = -EINVAL;
1904                                 break;
1905                         }
1906
1907                         spin_lock(&sk->sk_receive_queue.lock);
1908                         if (sk->sk_type == SOCK_STREAM ||
1909                             sk->sk_type == SOCK_SEQPACKET) {
1910                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1911                                         amount += skb->len;
1912                         } else {
1913                                 skb = skb_peek(&sk->sk_receive_queue);
1914                                 if (skb)
1915                                         amount=skb->len;
1916                         }
1917                         spin_unlock(&sk->sk_receive_queue.lock);
1918                         err = put_user(amount, (int __user *)arg);
1919                         break;
1920                 }
1921
1922                 default:
1923                         err = -ENOIOCTLCMD;
1924                         break;
1925         }
1926         return err;
1927 }
1928
1929 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1930 {
1931         struct sock *sk = sock->sk;
1932         unsigned int mask;
1933
1934         poll_wait(file, sk->sk_sleep, wait);
1935         mask = 0;
1936
1937         /* exceptional events? */
1938         if (sk->sk_err)
1939                 mask |= POLLERR;
1940         if (sk->sk_shutdown == SHUTDOWN_MASK)
1941                 mask |= POLLHUP;
1942         if (sk->sk_shutdown & RCV_SHUTDOWN)
1943                 mask |= POLLRDHUP;
1944
1945         /* readable? */
1946         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1947             (sk->sk_shutdown & RCV_SHUTDOWN))
1948                 mask |= POLLIN | POLLRDNORM;
1949
1950         /* Connection-based need to check for termination and startup */
1951         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1952                 mask |= POLLHUP;
1953
1954         /*
1955          * we set writable also when the other side has shut down the
1956          * connection. This prevents stuck sockets.
1957          */
1958         if (unix_writable(sk))
1959                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1960
1961         return mask;
1962 }
1963
1964
1965 #ifdef CONFIG_PROC_FS
1966 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1967 {
1968         loff_t off = 0;
1969         struct sock *s;
1970
1971         for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1972                 if (off == pos)
1973                         return s;
1974                 ++off;
1975         }
1976         return NULL;
1977 }
1978
1979
1980 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1981 {
1982         spin_lock(&unix_table_lock);
1983         return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1984 }
1985
1986 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1987 {
1988         ++*pos;
1989
1990         if (v == (void *)1)
1991                 return first_unix_socket(seq->private);
1992         return next_unix_socket(seq->private, v);
1993 }
1994
1995 static void unix_seq_stop(struct seq_file *seq, void *v)
1996 {
1997         spin_unlock(&unix_table_lock);
1998 }
1999
2000 static int unix_seq_show(struct seq_file *seq, void *v)
2001 {
2002
2003         if (v == (void *)1)
2004                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2005                          "Inode Path\n");
2006         else {
2007                 struct sock *s = v;
2008                 struct unix_sock *u = unix_sk(s);
2009                 unix_state_lock(s);
2010
2011                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2012                         s,
2013                         atomic_read(&s->sk_refcnt),
2014                         0,
2015                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2016                         s->sk_type,
2017                         s->sk_socket ?
2018                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2019                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2020                         sock_i_ino(s));
2021
2022                 if (u->addr) {
2023                         int i, len;
2024                         seq_putc(seq, ' ');
2025
2026                         i = 0;
2027                         len = u->addr->len - sizeof(short);
2028                         if (!UNIX_ABSTRACT(s))
2029                                 len--;
2030                         else {
2031                                 seq_putc(seq, '@');
2032                                 i++;
2033                         }
2034                         for ( ; i < len; i++)
2035                                 seq_putc(seq, u->addr->name->sun_path[i]);
2036                 }
2037                 unix_state_unlock(s);
2038                 seq_putc(seq, '\n');
2039         }
2040
2041         return 0;
2042 }
2043
2044 static struct seq_operations unix_seq_ops = {
2045         .start  = unix_seq_start,
2046         .next   = unix_seq_next,
2047         .stop   = unix_seq_stop,
2048         .show   = unix_seq_show,
2049 };
2050
2051
2052 static int unix_seq_open(struct inode *inode, struct file *file)
2053 {
2054         struct seq_file *seq;
2055         int rc = -ENOMEM;
2056         int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2057
2058         if (!iter)
2059                 goto out;
2060
2061         rc = seq_open(file, &unix_seq_ops);
2062         if (rc)
2063                 goto out_kfree;
2064
2065         seq          = file->private_data;
2066         seq->private = iter;
2067         *iter = 0;
2068 out:
2069         return rc;
2070 out_kfree:
2071         kfree(iter);
2072         goto out;
2073 }
2074
2075 static const struct file_operations unix_seq_fops = {
2076         .owner          = THIS_MODULE,
2077         .open           = unix_seq_open,
2078         .read           = seq_read,
2079         .llseek         = seq_lseek,
2080         .release        = seq_release_private,
2081 };
2082
2083 #endif
2084
2085 static struct net_proto_family unix_family_ops = {
2086         .family = PF_UNIX,
2087         .create = unix_create,
2088         .owner  = THIS_MODULE,
2089 };
2090
2091 static int __init af_unix_init(void)
2092 {
2093         int rc = -1;
2094         struct sk_buff *dummy_skb;
2095
2096         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2097
2098         rc = proto_register(&unix_proto, 1);
2099         if (rc != 0) {
2100                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2101                        __FUNCTION__);
2102                 goto out;
2103         }
2104
2105         sock_register(&unix_family_ops);
2106 #ifdef CONFIG_PROC_FS
2107         proc_net_fops_create("unix", 0, &unix_seq_fops);
2108 #endif
2109         unix_sysctl_register();
2110 out:
2111         return rc;
2112 }
2113
2114 static void __exit af_unix_exit(void)
2115 {
2116         sock_unregister(PF_UNIX);
2117         unix_sysctl_unregister();
2118         proc_net_remove("unix");
2119         proto_unregister(&unix_proto);
2120 }
2121
2122 module_init(af_unix_init);
2123 module_exit(af_unix_exit);
2124
2125 MODULE_LICENSE("GPL");
2126 MODULE_ALIAS_NETPROTO(PF_UNIX);