net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched/signal.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <linux/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120 #include <linux/file.h>
 121
 122 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 123 EXPORT_SYMBOL_GPL(unix_socket_table);
 124 DEFINE_SPINLOCK(unix_table_lock);
 125 EXPORT_SYMBOL_GPL(unix_table_lock);
 126 static atomic_long_t unix_nr_socks;
 127
 128
 129 static struct hlist_head *unix_sockets_unbound(void *addr)
 130 {
 131         unsigned long hash = (unsigned long)addr;
 132
 133         hash ^= hash >> 16;
 134         hash ^= hash >> 8;
 135         hash %= UNIX_HASH_SIZE;
 136         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 137 }
 138
 139 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 140
 141 #ifdef CONFIG_SECURITY_NETWORK
 142 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 {
 144         UNIXCB(skb).secid = scm->secid;
 145 }
 146
 147 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 148 {
 149         scm->secid = UNIXCB(skb).secid;
 150 }
 151
 152 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 153 {
 154         return (scm->secid == UNIXCB(skb).secid);
 155 }
 156 #else
 157 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 158 { }
 159
 160 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 161 { }
 162
 163 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 164 {
 165         return true;
 166 }
 167 #endif /* CONFIG_SECURITY_NETWORK */
 168
 169 /*
 170  *  SMP locking strategy:
 171  *    hash table is protected with spinlock unix_table_lock
 172  *    each socket state is protected by separate spin lock.
 173  */
 174
 175 static inline unsigned int unix_hash_fold(__wsum n)
 176 {
 177         unsigned int hash = (__force unsigned int)csum_fold(n);
 178
 179         hash ^= hash>>8;
 180         return hash&(UNIX_HASH_SIZE-1);
 181 }
 182
 183 #define unix_peer(sk) (unix_sk(sk)->peer)
 184
 185 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 186 {
 187         return unix_peer(osk) == sk;
 188 }
 189
 190 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 191 {
 192         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 193 }
 194
 195 static inline int unix_recvq_full(struct sock const *sk)
 196 {
 197         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 198 }
 199
 200 struct sock *unix_peer_get(struct sock *s)
 201 {
 202         struct sock *peer;
 203
 204         unix_state_lock(s);
 205         peer = unix_peer(s);
 206         if (peer)
 207                 sock_hold(peer);
 208         unix_state_unlock(s);
 209         return peer;
 210 }
 211 EXPORT_SYMBOL_GPL(unix_peer_get);
 212
 213 static inline void unix_release_addr(struct unix_address *addr)
 214 {
 215         if (refcount_dec_and_test(&addr->refcnt))
 216                 kfree(addr);
 217 }
 218
 219 /*
 220  *      Check unix socket name:
 221  *              - should be not zero length.
 222  *              - if started by not zero, should be NULL terminated (FS object)
 223  *              - if started by zero, it is abstract name.
 224  */
 225
 226 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 227 {
 228         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 229                 return -EINVAL;
 230         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 231                 return -EINVAL;
 232         if (sunaddr->sun_path[0]) {
 233                 /*
 234                  * This may look like an off by one error but it is a bit more
 235                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 236                  * sun_path[108] doesn't as such exist.  However in kernel space
 237                  * we are guaranteed that it is a valid memory location in our
 238                  * kernel address buffer.
 239                  */
 240                 ((char *)sunaddr)[len] = 0;
 241                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 242                 return len;
 243         }
 244
 245         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 246         return len;
 247 }
 248
 249 static void __unix_remove_socket(struct sock *sk)
 250 {
 251         sk_del_node_init(sk);
 252 }
 253
 254 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 255 {
 256         WARN_ON(!sk_unhashed(sk));
 257         sk_add_node(sk, list);
 258 }
 259
 260 static inline void unix_remove_socket(struct sock *sk)
 261 {
 262         spin_lock(&unix_table_lock);
 263         __unix_remove_socket(sk);
 264         spin_unlock(&unix_table_lock);
 265 }
 266
 267 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 268 {
 269         spin_lock(&unix_table_lock);
 270         __unix_insert_socket(list, sk);
 271         spin_unlock(&unix_table_lock);
 272 }
 273
 274 static struct sock *__unix_find_socket_byname(struct net *net,
 275                                               struct sockaddr_un *sunname,
 276                                               int len, int type, unsigned int hash)
 277 {
 278         struct sock *s;
 279
 280         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 281                 struct unix_sock *u = unix_sk(s);
 282
 283                 if (!net_eq(sock_net(s), net))
 284                         continue;
 285
 286                 if (u->addr->len == len &&
 287                     !memcmp(u->addr->name, sunname, len))
 288                         goto found;
 289         }
 290         s = NULL;
 291 found:
 292         return s;
 293 }
 294
 295 static inline struct sock *unix_find_socket_byname(struct net *net,
 296                                                    struct sockaddr_un *sunname,
 297                                                    int len, int type,
 298                                                    unsigned int hash)
 299 {
 300         struct sock *s;
 301
 302         spin_lock(&unix_table_lock);
 303         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 304         if (s)
 305                 sock_hold(s);
 306         spin_unlock(&unix_table_lock);
 307         return s;
 308 }
 309
 310 static struct sock *unix_find_socket_byinode(struct inode *i)
 311 {
 312         struct sock *s;
 313
 314         spin_lock(&unix_table_lock);
 315         sk_for_each(s,
 316                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 317                 struct dentry *dentry = unix_sk(s)->path.dentry;
 318
 319                 if (dentry && d_backing_inode(dentry) == i) {
 320                         sock_hold(s);
 321                         goto found;
 322                 }
 323         }
 324         s = NULL;
 325 found:
 326         spin_unlock(&unix_table_lock);
 327         return s;
 328 }
 329
 330 /* Support code for asymmetrically connected dgram sockets
 331  *
 332  * If a datagram socket is connected to a socket not itself connected
 333  * to the first socket (eg, /dev/log), clients may only enqueue more
 334  * messages if the present receive queue of the server socket is not
 335  * "too large". This means there's a second writeability condition
 336  * poll and sendmsg need to test. The dgram recv code will do a wake
 337  * up on the peer_wait wait queue of a socket upon reception of a
 338  * datagram which needs to be propagated to sleeping would-be writers
 339  * since these might not have sent anything so far. This can't be
 340  * accomplished via poll_wait because the lifetime of the server
 341  * socket might be less than that of its clients if these break their
 342  * association with it or if the server socket is closed while clients
 343  * are still connected to it and there's no way to inform "a polling
 344  * implementation" that it should let go of a certain wait queue
 345  *
 346  * In order to propagate a wake up, a wait_queue_entry_t of the client
 347  * socket is enqueued on the peer_wait queue of the server socket
 348  * whose wake function does a wake_up on the ordinary client socket
 349  * wait queue. This connection is established whenever a write (or
 350  * poll for write) hit the flow control condition and broken when the
 351  * association to the server socket is dissolved or after a wake up
 352  * was relayed.
 353  */
 354
 355 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
 356                                       void *key)
 357 {
 358         struct unix_sock *u;
 359         wait_queue_head_t *u_sleep;
 360
 361         u = container_of(q, struct unix_sock, peer_wake);
 362
 363         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 364                             q);
 365         u->peer_wake.private = NULL;
 366
 367         /* relaying can only happen while the wq still exists */
 368         u_sleep = sk_sleep(&u->sk);
 369         if (u_sleep)
 370                 wake_up_interruptible_poll(u_sleep, key);
 371
 372         return 0;
 373 }
 374
 375 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 376 {
 377         struct unix_sock *u, *u_other;
 378         int rc;
 379
 380         u = unix_sk(sk);
 381         u_other = unix_sk(other);
 382         rc = 0;
 383         spin_lock(&u_other->peer_wait.lock);
 384
 385         if (!u->peer_wake.private) {
 386                 u->peer_wake.private = other;
 387                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 388
 389                 rc = 1;
 390         }
 391
 392         spin_unlock(&u_other->peer_wait.lock);
 393         return rc;
 394 }
 395
 396 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 397                                             struct sock *other)
 398 {
 399         struct unix_sock *u, *u_other;
 400
 401         u = unix_sk(sk);
 402         u_other = unix_sk(other);
 403         spin_lock(&u_other->peer_wait.lock);
 404
 405         if (u->peer_wake.private == other) {
 406                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 407                 u->peer_wake.private = NULL;
 408         }
 409
 410         spin_unlock(&u_other->peer_wait.lock);
 411 }
 412
 413 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 414                                                    struct sock *other)
 415 {
 416         unix_dgram_peer_wake_disconnect(sk, other);
 417         wake_up_interruptible_poll(sk_sleep(sk),
 418                                    POLLOUT |
 419                                    POLLWRNORM |
 420                                    POLLWRBAND);
 421 }
 422
 423 /* preconditions:
 424  *      - unix_peer(sk) == other
 425  *      - association is stable
 426  */
 427 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 428 {
 429         int connected;
 430
 431         connected = unix_dgram_peer_wake_connect(sk, other);
 432
 433         if (unix_recvq_full(other))
 434                 return 1;
 435
 436         if (connected)
 437                 unix_dgram_peer_wake_disconnect(sk, other);
 438
 439         return 0;
 440 }
 441
 442 static int unix_writable(const struct sock *sk)
 443 {
 444         return sk->sk_state != TCP_LISTEN &&
 445                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 446 }
 447
 448 static void unix_write_space(struct sock *sk)
 449 {
 450         struct socket_wq *wq;
 451
 452         rcu_read_lock();
 453         if (unix_writable(sk)) {
 454                 wq = rcu_dereference(sk->sk_wq);
 455                 if (skwq_has_sleeper(wq))
 456                         wake_up_interruptible_sync_poll(&wq->wait,
 457                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 458                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 459         }
 460         rcu_read_unlock();
 461 }
 462
 463 /* When dgram socket disconnects (or changes its peer), we clear its receive
 464  * queue of packets arrived from previous peer. First, it allows to do
 465  * flow control based only on wmem_alloc; second, sk connected to peer
 466  * may receive messages only from that peer. */
 467 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 468 {
 469         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 470                 skb_queue_purge(&sk->sk_receive_queue);
 471                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 472
 473                 /* If one link of bidirectional dgram pipe is disconnected,
 474                  * we signal error. Messages are lost. Do not make this,
 475                  * when peer was not connected to us.
 476                  */
 477                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 478                         other->sk_err = ECONNRESET;
 479                         other->sk_error_report(other);
 480                 }
 481         }
 482 }
 483
 484 static void unix_sock_destructor(struct sock *sk)
 485 {
 486         struct unix_sock *u = unix_sk(sk);
 487
 488         skb_queue_purge(&sk->sk_receive_queue);
 489
 490         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
 491         WARN_ON(!sk_unhashed(sk));
 492         WARN_ON(sk->sk_socket);
 493         if (!sock_flag(sk, SOCK_DEAD)) {
 494                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 495                 return;
 496         }
 497
 498         if (u->addr)
 499                 unix_release_addr(u->addr);
 500
 501         atomic_long_dec(&unix_nr_socks);
 502         local_bh_disable();
 503         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 504         local_bh_enable();
 505 #ifdef UNIX_REFCNT_DEBUG
 506         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 507                 atomic_long_read(&unix_nr_socks));
 508 #endif
 509 }
 510
 511 static void unix_release_sock(struct sock *sk, int embrion)
 512 {
 513         struct unix_sock *u = unix_sk(sk);
 514         struct path path;
 515         struct sock *skpair;
 516         struct sk_buff *skb;
 517         int state;
 518
 519         unix_remove_socket(sk);
 520
 521         /* Clear state */
 522         unix_state_lock(sk);
 523         sock_orphan(sk);
 524         sk->sk_shutdown = SHUTDOWN_MASK;
 525         path         = u->path;
 526         u->path.dentry = NULL;
 527         u->path.mnt = NULL;
 528         state = sk->sk_state;
 529         sk->sk_state = TCP_CLOSE;
 530         unix_state_unlock(sk);
 531
 532         wake_up_interruptible_all(&u->peer_wait);
 533
 534         skpair = unix_peer(sk);
 535
 536         if (skpair != NULL) {
 537                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 538                         unix_state_lock(skpair);
 539                         /* No more writes */
 540                         skpair->sk_shutdown = SHUTDOWN_MASK;
 541                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 542                                 skpair->sk_err = ECONNRESET;
 543                         unix_state_unlock(skpair);
 544                         skpair->sk_state_change(skpair);
 545                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 546                 }
 547
 548                 unix_dgram_peer_wake_disconnect(sk, skpair);
 549                 sock_put(skpair); /* It may now die */
 550                 unix_peer(sk) = NULL;
 551         }
 552
 553         /* Try to flush out this socket. Throw out buffers at least */
 554
 555         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 556                 if (state == TCP_LISTEN)
 557                         unix_release_sock(skb->sk, 1);
 558                 /* passed fds are erased in the kfree_skb hook        */
 559                 UNIXCB(skb).consumed = skb->len;
 560                 kfree_skb(skb);
 561         }
 562
 563         if (path.dentry)
 564                 path_put(&path);
 565
 566         sock_put(sk);
 567
 568         /* ---- Socket is dead now and most probably destroyed ---- */
 569
 570         /*
 571          * Fixme: BSD difference: In BSD all sockets connected to us get
 572          *        ECONNRESET and we die on the spot. In Linux we behave
 573          *        like files and pipes do and wait for the last
 574          *        dereference.
 575          *
 576          * Can't we simply set sock->err?
 577          *
 578          *        What the above comment does talk about? --ANK(980817)
 579          */
 580
 581         if (unix_tot_inflight)
 582                 unix_gc();              /* Garbage collect fds */
 583 }
 584
 585 static void init_peercred(struct sock *sk)
 586 {
 587         put_pid(sk->sk_peer_pid);
 588         if (sk->sk_peer_cred)
 589                 put_cred(sk->sk_peer_cred);
 590         sk->sk_peer_pid  = get_pid(task_tgid(current));
 591         sk->sk_peer_cred = get_current_cred();
 592 }
 593
 594 static void copy_peercred(struct sock *sk, struct sock *peersk)
 595 {
 596         put_pid(sk->sk_peer_pid);
 597         if (sk->sk_peer_cred)
 598                 put_cred(sk->sk_peer_cred);
 599         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 600         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 601 }
 602
 603 static int unix_listen(struct socket *sock, int backlog)
 604 {
 605         int err;
 606         struct sock *sk = sock->sk;
 607         struct unix_sock *u = unix_sk(sk);
 608         struct pid *old_pid = NULL;
 609
 610         err = -EOPNOTSUPP;
 611         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 612                 goto out;       /* Only stream/seqpacket sockets accept */
 613         err = -EINVAL;
 614         if (!u->addr)
 615                 goto out;       /* No listens on an unbound socket */
 616         unix_state_lock(sk);
 617         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 618                 goto out_unlock;
 619         if (backlog > sk->sk_max_ack_backlog)
 620                 wake_up_interruptible_all(&u->peer_wait);
 621         sk->sk_max_ack_backlog  = backlog;
 622         sk->sk_state            = TCP_LISTEN;
 623         /* set credentials so connect can copy them */
 624         init_peercred(sk);
 625         err = 0;
 626
 627 out_unlock:
 628         unix_state_unlock(sk);
 629         put_pid(old_pid);
 630 out:
 631         return err;
 632 }
 633
 634 static int unix_release(struct socket *);
 635 static int unix_bind(struct socket *, struct sockaddr *, int);
 636 static int unix_stream_connect(struct socket *, struct sockaddr *,
 637                                int addr_len, int flags);
 638 static int unix_socketpair(struct socket *, struct socket *);
 639 static int unix_accept(struct socket *, struct socket *, int, bool);
 640 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 641 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 642 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 643                                     poll_table *);
 644 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 645 static int unix_shutdown(struct socket *, int);
 646 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 647 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 648 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 649                                     size_t size, int flags);
 650 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 651                                        struct pipe_inode_info *, size_t size,
 652                                        unsigned int flags);
 653 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 654 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 655 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 656                               int, int);
 657 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 658 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 659                                   int);
 660
 661 static int unix_set_peek_off(struct sock *sk, int val)
 662 {
 663         struct unix_sock *u = unix_sk(sk);
 664
 665         if (mutex_lock_interruptible(&u->iolock))
 666                 return -EINTR;
 667
 668         sk->sk_peek_off = val;
 669         mutex_unlock(&u->iolock);
 670
 671         return 0;
 672 }
 673
 674
 675 static const struct proto_ops unix_stream_ops = {
 676         .family =       PF_UNIX,
 677         .owner =        THIS_MODULE,
 678         .release =      unix_release,
 679         .bind =         unix_bind,
 680         .connect =      unix_stream_connect,
 681         .socketpair =   unix_socketpair,
 682         .accept =       unix_accept,
 683         .getname =      unix_getname,
 684         .poll =         unix_poll,
 685         .ioctl =        unix_ioctl,
 686         .listen =       unix_listen,
 687         .shutdown =     unix_shutdown,
 688         .setsockopt =   sock_no_setsockopt,
 689         .getsockopt =   sock_no_getsockopt,
 690         .sendmsg =      unix_stream_sendmsg,
 691         .recvmsg =      unix_stream_recvmsg,
 692         .mmap =         sock_no_mmap,
 693         .sendpage =     unix_stream_sendpage,
 694         .splice_read =  unix_stream_splice_read,
 695         .set_peek_off = unix_set_peek_off,
 696 };
 697
 698 static const struct proto_ops unix_dgram_ops = {
 699         .family =       PF_UNIX,
 700         .owner =        THIS_MODULE,
 701         .release =      unix_release,
 702         .bind =         unix_bind,
 703         .connect =      unix_dgram_connect,
 704         .socketpair =   unix_socketpair,
 705         .accept =       sock_no_accept,
 706         .getname =      unix_getname,
 707         .poll =         unix_dgram_poll,
 708         .ioctl =        unix_ioctl,
 709         .listen =       sock_no_listen,
 710         .shutdown =     unix_shutdown,
 711         .setsockopt =   sock_no_setsockopt,
 712         .getsockopt =   sock_no_getsockopt,
 713         .sendmsg =      unix_dgram_sendmsg,
 714         .recvmsg =      unix_dgram_recvmsg,
 715         .mmap =         sock_no_mmap,
 716         .sendpage =     sock_no_sendpage,
 717         .set_peek_off = unix_set_peek_off,
 718 };
 719
 720 static const struct proto_ops unix_seqpacket_ops = {
 721         .family =       PF_UNIX,
 722         .owner =        THIS_MODULE,
 723         .release =      unix_release,
 724         .bind =         unix_bind,
 725         .connect =      unix_stream_connect,
 726         .socketpair =   unix_socketpair,
 727         .accept =       unix_accept,
 728         .getname =      unix_getname,
 729         .poll =         unix_dgram_poll,
 730         .ioctl =        unix_ioctl,
 731         .listen =       unix_listen,
 732         .shutdown =     unix_shutdown,
 733         .setsockopt =   sock_no_setsockopt,
 734         .getsockopt =   sock_no_getsockopt,
 735         .sendmsg =      unix_seqpacket_sendmsg,
 736         .recvmsg =      unix_seqpacket_recvmsg,
 737         .mmap =         sock_no_mmap,
 738         .sendpage =     sock_no_sendpage,
 739         .set_peek_off = unix_set_peek_off,
 740 };
 741
 742 static struct proto unix_proto = {
 743         .name                   = "UNIX",
 744         .owner                  = THIS_MODULE,
 745         .obj_size               = sizeof(struct unix_sock),
 746 };
 747
 748 /*
 749  * AF_UNIX sockets do not interact with hardware, hence they
 750  * dont trigger interrupts - so it's safe for them to have
 751  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 752  * this special lock-class by reinitializing the spinlock key:
 753  */
 754 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 755
 756 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 757 {
 758         struct sock *sk = NULL;
 759         struct unix_sock *u;
 760
 761         atomic_long_inc(&unix_nr_socks);
 762         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 763                 goto out;
 764
 765         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 766         if (!sk)
 767                 goto out;
 768
 769         sock_init_data(sock, sk);
 770         lockdep_set_class(&sk->sk_receive_queue.lock,
 771                                 &af_unix_sk_receive_queue_lock_key);
 772
 773         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
 774         sk->sk_write_space      = unix_write_space;
 775         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776         sk->sk_destruct         = unix_sock_destructor;
 777         u         = unix_sk(sk);
 778         u->path.dentry = NULL;
 779         u->path.mnt = NULL;
 780         spin_lock_init(&u->lock);
 781         atomic_long_set(&u->inflight, 0);
 782         INIT_LIST_HEAD(&u->link);
 783         mutex_init(&u->iolock); /* single task reading lock */
 784         mutex_init(&u->bindlock); /* single task binding lock */
 785         init_waitqueue_head(&u->peer_wait);
 786         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787         unix_insert_socket(unix_sockets_unbound(sk), sk);
 788 out:
 789         if (sk == NULL)
 790                 atomic_long_dec(&unix_nr_socks);
 791         else {
 792                 local_bh_disable();
 793                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                 local_bh_enable();
 795         }
 796         return sk;
 797 }
 798
 799 static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                        int kern)
 801 {
 802         if (protocol && protocol != PF_UNIX)
 803                 return -EPROTONOSUPPORT;
 804
 805         sock->state = SS_UNCONNECTED;
 806
 807         switch (sock->type) {
 808         case SOCK_STREAM:
 809                 sock->ops = &unix_stream_ops;
 810                 break;
 811                 /*
 812                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                  *      nothing uses it.
 814                  */
 815         case SOCK_RAW:
 816                 sock->type = SOCK_DGRAM;
 817         case SOCK_DGRAM:
 818                 sock->ops = &unix_dgram_ops;
 819                 break;
 820         case SOCK_SEQPACKET:
 821                 sock->ops = &unix_seqpacket_ops;
 822                 break;
 823         default:
 824                 return -ESOCKTNOSUPPORT;
 825         }
 826
 827         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828 }
 829
 830 static int unix_release(struct socket *sock)
 831 {
 832         struct sock *sk = sock->sk;
 833
 834         if (!sk)
 835                 return 0;
 836
 837         unix_release_sock(sk, 0);
 838         sock->sk = NULL;
 839
 840         return 0;
 841 }
 842
 843 static int unix_autobind(struct socket *sock)
 844 {
 845         struct sock *sk = sock->sk;
 846         struct net *net = sock_net(sk);
 847         struct unix_sock *u = unix_sk(sk);
 848         static u32 ordernum = 1;
 849         struct unix_address *addr;
 850         int err;
 851         unsigned int retries = 0;
 852
 853         err = mutex_lock_interruptible(&u->bindlock);
 854         if (err)
 855                 return err;
 856
 857         err = 0;
 858         if (u->addr)
 859                 goto out;
 860
 861         err = -ENOMEM;
 862         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863         if (!addr)
 864                 goto out;
 865
 866         addr->name->sun_family = AF_UNIX;
 867         refcount_set(&addr->refcnt, 1);
 868
 869 retry:
 870         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873         spin_lock(&unix_table_lock);
 874         ordernum = (ordernum+1)&0xFFFFF;
 875
 876         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                       addr->hash)) {
 878                 spin_unlock(&unix_table_lock);
 879                 /*
 880                  * __unix_find_socket_byname() may take long time if many names
 881                  * are already in use.
 882                  */
 883                 cond_resched();
 884                 /* Give up if all names seems to be in use. */
 885                 if (retries++ == 0xFFFFF) {
 886                         err = -ENOSPC;
 887                         kfree(addr);
 888                         goto out;
 889                 }
 890                 goto retry;
 891         }
 892         addr->hash ^= sk->sk_type;
 893
 894         __unix_remove_socket(sk);
 895         u->addr = addr;
 896         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897         spin_unlock(&unix_table_lock);
 898         err = 0;
 899
 900 out:    mutex_unlock(&u->bindlock);
 901         return err;
 902 }
 903
 904 static struct sock *unix_find_other(struct net *net,
 905                                     struct sockaddr_un *sunname, int len,
 906                                     int type, unsigned int hash, int *error)
 907 {
 908         struct sock *u;
 909         struct path path;
 910         int err = 0;
 911
 912         if (sunname->sun_path[0]) {
 913                 struct inode *inode;
 914                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                 if (err)
 916                         goto fail;
 917                 inode = d_backing_inode(path.dentry);
 918                 err = inode_permission(inode, MAY_WRITE);
 919                 if (err)
 920                         goto put_fail;
 921
 922                 err = -ECONNREFUSED;
 923                 if (!S_ISSOCK(inode->i_mode))
 924                         goto put_fail;
 925                 u = unix_find_socket_byinode(inode);
 926                 if (!u)
 927                         goto put_fail;
 928
 929                 if (u->sk_type == type)
 930                         touch_atime(&path);
 931
 932                 path_put(&path);
 933
 934                 err = -EPROTOTYPE;
 935                 if (u->sk_type != type) {
 936                         sock_put(u);
 937                         goto fail;
 938                 }
 939         } else {
 940                 err = -ECONNREFUSED;
 941                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                 if (u) {
 943                         struct dentry *dentry;
 944                         dentry = unix_sk(u)->path.dentry;
 945                         if (dentry)
 946                                 touch_atime(&unix_sk(u)->path);
 947                 } else
 948                         goto fail;
 949         }
 950         return u;
 951
 952 put_fail:
 953         path_put(&path);
 954 fail:
 955         *error = err;
 956         return NULL;
 957 }
 958
 959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960 {
 961         struct dentry *dentry;
 962         struct path path;
 963         int err = 0;
 964         /*
 965          * Get the parent directory, calculate the hash for last
 966          * component.
 967          */
 968         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969         err = PTR_ERR(dentry);
 970         if (IS_ERR(dentry))
 971                 return err;
 972
 973         /*
 974          * All right, let's create it.
 975          */
 976         err = security_path_mknod(&path, dentry, mode, 0);
 977         if (!err) {
 978                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                 if (!err) {
 980                         res->mnt = mntget(path.mnt);
 981                         res->dentry = dget(dentry);
 982                 }
 983         }
 984         done_path_create(&path, dentry);
 985         return err;
 986 }
 987
 988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989 {
 990         struct sock *sk = sock->sk;
 991         struct net *net = sock_net(sk);
 992         struct unix_sock *u = unix_sk(sk);
 993         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994         char *sun_path = sunaddr->sun_path;
 995         int err;
 996         unsigned int hash;
 997         struct unix_address *addr;
 998         struct hlist_head *list;
 999         struct path path = { };
1000
1001         err = -EINVAL;
1002         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003             sunaddr->sun_family != AF_UNIX)
1004                 goto out;
1005
1006         if (addr_len == sizeof(short)) {
1007                 err = unix_autobind(sock);
1008                 goto out;
1009         }
1010
1011         err = unix_mkname(sunaddr, addr_len, &hash);
1012         if (err < 0)
1013                 goto out;
1014         addr_len = err;
1015
1016         if (sun_path[0]) {
1017                 umode_t mode = S_IFSOCK |
1018                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                 err = unix_mknod(sun_path, mode, &path);
1020                 if (err) {
1021                         if (err == -EEXIST)
1022                                 err = -EADDRINUSE;
1023                         goto out;
1024                 }
1025         }
1026
1027         err = mutex_lock_interruptible(&u->bindlock);
1028         if (err)
1029                 goto out_put;
1030
1031         err = -EINVAL;
1032         if (u->addr)
1033                 goto out_up;
1034
1035         err = -ENOMEM;
1036         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037         if (!addr)
1038                 goto out_up;
1039
1040         memcpy(addr->name, sunaddr, addr_len);
1041         addr->len = addr_len;
1042         addr->hash = hash ^ sk->sk_type;
1043         refcount_set(&addr->refcnt, 1);
1044
1045         if (sun_path[0]) {
1046                 addr->hash = UNIX_HASH_SIZE;
1047                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                 spin_lock(&unix_table_lock);
1049                 u->path = path;
1050                 list = &unix_socket_table[hash];
1051         } else {
1052                 spin_lock(&unix_table_lock);
1053                 err = -EADDRINUSE;
1054                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                               sk->sk_type, hash)) {
1056                         unix_release_addr(addr);
1057                         goto out_unlock;
1058                 }
1059
1060                 list = &unix_socket_table[addr->hash];
1061         }
1062
1063         err = 0;
1064         __unix_remove_socket(sk);
1065         u->addr = addr;
1066         __unix_insert_socket(list, sk);
1067
1068 out_unlock:
1069         spin_unlock(&unix_table_lock);
1070 out_up:
1071         mutex_unlock(&u->bindlock);
1072 out_put:
1073         if (err)
1074                 path_put(&path);
1075 out:
1076         return err;
1077 }
1078
1079 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080 {
1081         if (unlikely(sk1 == sk2) || !sk2) {
1082                 unix_state_lock(sk1);
1083                 return;
1084         }
1085         if (sk1 < sk2) {
1086                 unix_state_lock(sk1);
1087                 unix_state_lock_nested(sk2);
1088         } else {
1089                 unix_state_lock(sk2);
1090                 unix_state_lock_nested(sk1);
1091         }
1092 }
1093
1094 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095 {
1096         if (unlikely(sk1 == sk2) || !sk2) {
1097                 unix_state_unlock(sk1);
1098                 return;
1099         }
1100         unix_state_unlock(sk1);
1101         unix_state_unlock(sk2);
1102 }
1103
1104 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                               int alen, int flags)
1106 {
1107         struct sock *sk = sock->sk;
1108         struct net *net = sock_net(sk);
1109         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110         struct sock *other;
1111         unsigned int hash;
1112         int err;
1113
1114         err = -EINVAL;
1115         if (alen < offsetofend(struct sockaddr, sa_family))
1116                 goto out;
1117
1118         if (addr->sa_family != AF_UNSPEC) {
1119                 err = unix_mkname(sunaddr, alen, &hash);
1120                 if (err < 0)
1121                         goto out;
1122                 alen = err;
1123
1124                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                         goto out;
1127
1128 restart:
1129                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                 if (!other)
1131                         goto out;
1132
1133                 unix_state_double_lock(sk, other);
1134
1135                 /* Apparently VFS overslept socket death. Retry. */
1136                 if (sock_flag(other, SOCK_DEAD)) {
1137                         unix_state_double_unlock(sk, other);
1138                         sock_put(other);
1139                         goto restart;
1140                 }
1141
1142                 err = -EPERM;
1143                 if (!unix_may_send(sk, other))
1144                         goto out_unlock;
1145
1146                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                 if (err)
1148                         goto out_unlock;
1149
1150         } else {
1151                 /*
1152                  *      1003.1g breaking connected state with AF_UNSPEC
1153                  */
1154                 other = NULL;
1155                 unix_state_double_lock(sk, other);
1156         }
1157
1158         /*
1159          * If it was connected, reconnect.
1160          */
1161         if (unix_peer(sk)) {
1162                 struct sock *old_peer = unix_peer(sk);
1163                 unix_peer(sk) = other;
1164                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                 unix_state_double_unlock(sk, other);
1167
1168                 if (other != old_peer)
1169                         unix_dgram_disconnected(sk, old_peer);
1170                 sock_put(old_peer);
1171         } else {
1172                 unix_peer(sk) = other;
1173                 unix_state_double_unlock(sk, other);
1174         }
1175         return 0;
1176
1177 out_unlock:
1178         unix_state_double_unlock(sk, other);
1179         sock_put(other);
1180 out:
1181         return err;
1182 }
1183
1184 static long unix_wait_for_peer(struct sock *other, long timeo)
1185 {
1186         struct unix_sock *u = unix_sk(other);
1187         int sched;
1188         DEFINE_WAIT(wait);
1189
1190         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192         sched = !sock_flag(other, SOCK_DEAD) &&
1193                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                 unix_recvq_full(other);
1195
1196         unix_state_unlock(other);
1197
1198         if (sched)
1199                 timeo = schedule_timeout(timeo);
1200
1201         finish_wait(&u->peer_wait, &wait);
1202         return timeo;
1203 }
1204
1205 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                                int addr_len, int flags)
1207 {
1208         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209         struct sock *sk = sock->sk;
1210         struct net *net = sock_net(sk);
1211         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212         struct sock *newsk = NULL;
1213         struct sock *other = NULL;
1214         struct sk_buff *skb = NULL;
1215         unsigned int hash;
1216         int st;
1217         int err;
1218         long timeo;
1219
1220         err = unix_mkname(sunaddr, addr_len, &hash);
1221         if (err < 0)
1222                 goto out;
1223         addr_len = err;
1224
1225         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226             (err = unix_autobind(sock)) != 0)
1227                 goto out;
1228
1229         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231         /* First of all allocate resources.
1232            If we will make it after state is locked,
1233            we will have to recheck all again in any case.
1234          */
1235
1236         err = -ENOMEM;
1237
1238         /* create new sock for complete connection */
1239         newsk = unix_create1(sock_net(sk), NULL, 0);
1240         if (newsk == NULL)
1241                 goto out;
1242
1243         /* Allocate skb for sending to listening sock */
1244         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245         if (skb == NULL)
1246                 goto out;
1247
1248 restart:
1249         /*  Find listening sock. */
1250         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251         if (!other)
1252                 goto out;
1253
1254         /* Latch state of peer */
1255         unix_state_lock(other);
1256
1257         /* Apparently VFS overslept socket death. Retry. */
1258         if (sock_flag(other, SOCK_DEAD)) {
1259                 unix_state_unlock(other);
1260                 sock_put(other);
1261                 goto restart;
1262         }
1263
1264         err = -ECONNREFUSED;
1265         if (other->sk_state != TCP_LISTEN)
1266                 goto out_unlock;
1267         if (other->sk_shutdown & RCV_SHUTDOWN)
1268                 goto out_unlock;
1269
1270         if (unix_recvq_full(other)) {
1271                 err = -EAGAIN;
1272                 if (!timeo)
1273                         goto out_unlock;
1274
1275                 timeo = unix_wait_for_peer(other, timeo);
1276
1277                 err = sock_intr_errno(timeo);
1278                 if (signal_pending(current))
1279                         goto out;
1280                 sock_put(other);
1281                 goto restart;
1282         }
1283
1284         /* Latch our state.
1285
1286            It is tricky place. We need to grab our state lock and cannot
1287            drop lock on peer. It is dangerous because deadlock is
1288            possible. Connect to self case and simultaneous
1289            attempt to connect are eliminated by checking socket
1290            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291            check this before attempt to grab lock.
1292
1293            Well, and we have to recheck the state after socket locked.
1294          */
1295         st = sk->sk_state;
1296
1297         switch (st) {
1298         case TCP_CLOSE:
1299                 /* This is ok... continue with connect */
1300                 break;
1301         case TCP_ESTABLISHED:
1302                 /* Socket is already connected */
1303                 err = -EISCONN;
1304                 goto out_unlock;
1305         default:
1306                 err = -EINVAL;
1307                 goto out_unlock;
1308         }
1309
1310         unix_state_lock_nested(sk);
1311
1312         if (sk->sk_state != st) {
1313                 unix_state_unlock(sk);
1314                 unix_state_unlock(other);
1315                 sock_put(other);
1316                 goto restart;
1317         }
1318
1319         err = security_unix_stream_connect(sk, other, newsk);
1320         if (err) {
1321                 unix_state_unlock(sk);
1322                 goto out_unlock;
1323         }
1324
1325         /* The way is open! Fastly set all the necessary fields... */
1326
1327         sock_hold(sk);
1328         unix_peer(newsk)        = sk;
1329         newsk->sk_state         = TCP_ESTABLISHED;
1330         newsk->sk_type          = sk->sk_type;
1331         init_peercred(newsk);
1332         newu = unix_sk(newsk);
1333         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334         otheru = unix_sk(other);
1335
1336         /* copy address information from listening to new sock*/
1337         if (otheru->addr) {
1338                 refcount_inc(&otheru->addr->refcnt);
1339                 newu->addr = otheru->addr;
1340         }
1341         if (otheru->path.dentry) {
1342                 path_get(&otheru->path);
1343                 newu->path = otheru->path;
1344         }
1345
1346         /* Set credentials */
1347         copy_peercred(sk, other);
1348
1349         sock->state     = SS_CONNECTED;
1350         sk->sk_state    = TCP_ESTABLISHED;
1351         sock_hold(newsk);
1352
1353         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1354         unix_peer(sk)   = newsk;
1355
1356         unix_state_unlock(sk);
1357
1358         /* take ten and and send info to listening sock */
1359         spin_lock(&other->sk_receive_queue.lock);
1360         __skb_queue_tail(&other->sk_receive_queue, skb);
1361         spin_unlock(&other->sk_receive_queue.lock);
1362         unix_state_unlock(other);
1363         other->sk_data_ready(other);
1364         sock_put(other);
1365         return 0;
1366
1367 out_unlock:
1368         if (other)
1369                 unix_state_unlock(other);
1370
1371 out:
1372         kfree_skb(skb);
1373         if (newsk)
1374                 unix_release_sock(newsk, 0);
1375         if (other)
1376                 sock_put(other);
1377         return err;
1378 }
1379
1380 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1381 {
1382         struct sock *ska = socka->sk, *skb = sockb->sk;
1383
1384         /* Join our sockets back to back */
1385         sock_hold(ska);
1386         sock_hold(skb);
1387         unix_peer(ska) = skb;
1388         unix_peer(skb) = ska;
1389         init_peercred(ska);
1390         init_peercred(skb);
1391
1392         if (ska->sk_type != SOCK_DGRAM) {
1393                 ska->sk_state = TCP_ESTABLISHED;
1394                 skb->sk_state = TCP_ESTABLISHED;
1395                 socka->state  = SS_CONNECTED;
1396                 sockb->state  = SS_CONNECTED;
1397         }
1398         return 0;
1399 }
1400
1401 static void unix_sock_inherit_flags(const struct socket *old,
1402                                     struct socket *new)
1403 {
1404         if (test_bit(SOCK_PASSCRED, &old->flags))
1405                 set_bit(SOCK_PASSCRED, &new->flags);
1406         if (test_bit(SOCK_PASSSEC, &old->flags))
1407                 set_bit(SOCK_PASSSEC, &new->flags);
1408 }
1409
1410 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1411                        bool kern)
1412 {
1413         struct sock *sk = sock->sk;
1414         struct sock *tsk;
1415         struct sk_buff *skb;
1416         int err;
1417
1418         err = -EOPNOTSUPP;
1419         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1420                 goto out;
1421
1422         err = -EINVAL;
1423         if (sk->sk_state != TCP_LISTEN)
1424                 goto out;
1425
1426         /* If socket state is TCP_LISTEN it cannot change (for now...),
1427          * so that no locks are necessary.
1428          */
1429
1430         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1431         if (!skb) {
1432                 /* This means receive shutdown. */
1433                 if (err == 0)
1434                         err = -EINVAL;
1435                 goto out;
1436         }
1437
1438         tsk = skb->sk;
1439         skb_free_datagram(sk, skb);
1440         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1441
1442         /* attach accepted sock to socket */
1443         unix_state_lock(tsk);
1444         newsock->state = SS_CONNECTED;
1445         unix_sock_inherit_flags(sock, newsock);
1446         sock_graft(tsk, newsock);
1447         unix_state_unlock(tsk);
1448         return 0;
1449
1450 out:
1451         return err;
1452 }
1453
1454
1455 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1456 {
1457         struct sock *sk = sock->sk;
1458         struct unix_sock *u;
1459         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1460         int err = 0;
1461
1462         if (peer) {
1463                 sk = unix_peer_get(sk);
1464
1465                 err = -ENOTCONN;
1466                 if (!sk)
1467                         goto out;
1468                 err = 0;
1469         } else {
1470                 sock_hold(sk);
1471         }
1472
1473         u = unix_sk(sk);
1474         unix_state_lock(sk);
1475         if (!u->addr) {
1476                 sunaddr->sun_family = AF_UNIX;
1477                 sunaddr->sun_path[0] = 0;
1478                 *uaddr_len = sizeof(short);
1479         } else {
1480                 struct unix_address *addr = u->addr;
1481
1482                 *uaddr_len = addr->len;
1483                 memcpy(sunaddr, addr->name, *uaddr_len);
1484         }
1485         unix_state_unlock(sk);
1486         sock_put(sk);
1487 out:
1488         return err;
1489 }
1490
1491 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1492 {
1493         int i;
1494
1495         scm->fp = UNIXCB(skb).fp;
1496         UNIXCB(skb).fp = NULL;
1497
1498         for (i = scm->fp->count-1; i >= 0; i--)
1499                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1500 }
1501
1502 static void unix_destruct_scm(struct sk_buff *skb)
1503 {
1504         struct scm_cookie scm;
1505         memset(&scm, 0, sizeof(scm));
1506         scm.pid  = UNIXCB(skb).pid;
1507         if (UNIXCB(skb).fp)
1508                 unix_detach_fds(&scm, skb);
1509
1510         /* Alas, it calls VFS */
1511         /* So fscking what? fput() had been SMP-safe since the last Summer */
1512         scm_destroy(&scm);
1513         sock_wfree(skb);
1514 }
1515
1516 /*
1517  * The "user->unix_inflight" variable is protected by the garbage
1518  * collection lock, and we just read it locklessly here. If you go
1519  * over the limit, there might be a tiny race in actually noticing
1520  * it across threads. Tough.
1521  */
1522 static inline bool too_many_unix_fds(struct task_struct *p)
1523 {
1524         struct user_struct *user = current_user();
1525
1526         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1527                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1528         return false;
1529 }
1530
1531 #define MAX_RECURSION_LEVEL 4
1532
1533 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1534 {
1535         int i;
1536         unsigned char max_level = 0;
1537
1538         if (too_many_unix_fds(current))
1539                 return -ETOOMANYREFS;
1540
1541         for (i = scm->fp->count - 1; i >= 0; i--) {
1542                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1543
1544                 if (sk)
1545                         max_level = max(max_level,
1546                                         unix_sk(sk)->recursion_level);
1547         }
1548         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1549                 return -ETOOMANYREFS;
1550
1551         /*
1552          * Need to duplicate file references for the sake of garbage
1553          * collection.  Otherwise a socket in the fps might become a
1554          * candidate for GC while the skb is not yet queued.
1555          */
1556         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1557         if (!UNIXCB(skb).fp)
1558                 return -ENOMEM;
1559
1560         for (i = scm->fp->count - 1; i >= 0; i--)
1561                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1562         return max_level;
1563 }
1564
1565 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1566 {
1567         int err = 0;
1568
1569         UNIXCB(skb).pid  = get_pid(scm->pid);
1570         UNIXCB(skb).uid = scm->creds.uid;
1571         UNIXCB(skb).gid = scm->creds.gid;
1572         UNIXCB(skb).fp = NULL;
1573         unix_get_secdata(scm, skb);
1574         if (scm->fp && send_fds)
1575                 err = unix_attach_fds(scm, skb);
1576
1577         skb->destructor = unix_destruct_scm;
1578         return err;
1579 }
1580
1581 static bool unix_passcred_enabled(const struct socket *sock,
1582                                   const struct sock *other)
1583 {
1584         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1585                !other->sk_socket ||
1586                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1587 }
1588
1589 /*
1590  * Some apps rely on write() giving SCM_CREDENTIALS
1591  * We include credentials if source or destination socket
1592  * asserted SOCK_PASSCRED.
1593  */
1594 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1595                             const struct sock *other)
1596 {
1597         if (UNIXCB(skb).pid)
1598                 return;
1599         if (unix_passcred_enabled(sock, other)) {
1600                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1601                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1602         }
1603 }
1604
1605 static int maybe_init_creds(struct scm_cookie *scm,
1606                             struct socket *socket,
1607                             const struct sock *other)
1608 {
1609         int err;
1610         struct msghdr msg = { .msg_controllen = 0 };
1611
1612         err = scm_send(socket, &msg, scm, false);
1613         if (err)
1614                 return err;
1615
1616         if (unix_passcred_enabled(socket, other)) {
1617                 scm->pid = get_pid(task_tgid(current));
1618                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1619         }
1620         return err;
1621 }
1622
1623 static bool unix_skb_scm_eq(struct sk_buff *skb,
1624                             struct scm_cookie *scm)
1625 {
1626         const struct unix_skb_parms *u = &UNIXCB(skb);
1627
1628         return u->pid == scm->pid &&
1629                uid_eq(u->uid, scm->creds.uid) &&
1630                gid_eq(u->gid, scm->creds.gid) &&
1631                unix_secdata_eq(scm, skb);
1632 }
1633
1634 /*
1635  *      Send AF_UNIX data.
1636  */
1637
1638 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1639                               size_t len)
1640 {
1641         struct sock *sk = sock->sk;
1642         struct net *net = sock_net(sk);
1643         struct unix_sock *u = unix_sk(sk);
1644         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1645         struct sock *other = NULL;
1646         int namelen = 0; /* fake GCC */
1647         int err;
1648         unsigned int hash;
1649         struct sk_buff *skb;
1650         long timeo;
1651         struct scm_cookie scm;
1652         int max_level;
1653         int data_len = 0;
1654         int sk_locked;
1655
1656         wait_for_unix_gc();
1657         err = scm_send(sock, msg, &scm, false);
1658         if (err < 0)
1659                 return err;
1660
1661         err = -EOPNOTSUPP;
1662         if (msg->msg_flags&MSG_OOB)
1663                 goto out;
1664
1665         if (msg->msg_namelen) {
1666                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1667                 if (err < 0)
1668                         goto out;
1669                 namelen = err;
1670         } else {
1671                 sunaddr = NULL;
1672                 err = -ENOTCONN;
1673                 other = unix_peer_get(sk);
1674                 if (!other)
1675                         goto out;
1676         }
1677
1678         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1679             && (err = unix_autobind(sock)) != 0)
1680                 goto out;
1681
1682         err = -EMSGSIZE;
1683         if (len > sk->sk_sndbuf - 32)
1684                 goto out;
1685
1686         if (len > SKB_MAX_ALLOC) {
1687                 data_len = min_t(size_t,
1688                                  len - SKB_MAX_ALLOC,
1689                                  MAX_SKB_FRAGS * PAGE_SIZE);
1690                 data_len = PAGE_ALIGN(data_len);
1691
1692                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1693         }
1694
1695         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1696                                    msg->msg_flags & MSG_DONTWAIT, &err,
1697                                    PAGE_ALLOC_COSTLY_ORDER);
1698         if (skb == NULL)
1699                 goto out;
1700
1701         err = unix_scm_to_skb(&scm, skb, true);
1702         if (err < 0)
1703                 goto out_free;
1704         max_level = err + 1;
1705
1706         skb_put(skb, len - data_len);
1707         skb->data_len = data_len;
1708         skb->len = len;
1709         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1710         if (err)
1711                 goto out_free;
1712
1713         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1714
1715 restart:
1716         if (!other) {
1717                 err = -ECONNRESET;
1718                 if (sunaddr == NULL)
1719                         goto out_free;
1720
1721                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1722                                         hash, &err);
1723                 if (other == NULL)
1724                         goto out_free;
1725         }
1726
1727         if (sk_filter(other, skb) < 0) {
1728                 /* Toss the packet but do not return any error to the sender */
1729                 err = len;
1730                 goto out_free;
1731         }
1732
1733         sk_locked = 0;
1734         unix_state_lock(other);
1735 restart_locked:
1736         err = -EPERM;
1737         if (!unix_may_send(sk, other))
1738                 goto out_unlock;
1739
1740         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1741                 /*
1742                  *      Check with 1003.1g - what should
1743                  *      datagram error
1744                  */
1745                 unix_state_unlock(other);
1746                 sock_put(other);
1747
1748                 if (!sk_locked)
1749                         unix_state_lock(sk);
1750
1751                 err = 0;
1752                 if (unix_peer(sk) == other) {
1753                         unix_peer(sk) = NULL;
1754                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1755
1756                         unix_state_unlock(sk);
1757
1758                         unix_dgram_disconnected(sk, other);
1759                         sock_put(other);
1760                         err = -ECONNREFUSED;
1761                 } else {
1762                         unix_state_unlock(sk);
1763                 }
1764
1765                 other = NULL;
1766                 if (err)
1767                         goto out_free;
1768                 goto restart;
1769         }
1770
1771         err = -EPIPE;
1772         if (other->sk_shutdown & RCV_SHUTDOWN)
1773                 goto out_unlock;
1774
1775         if (sk->sk_type != SOCK_SEQPACKET) {
1776                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1777                 if (err)
1778                         goto out_unlock;
1779         }
1780
1781         /* other == sk && unix_peer(other) != sk if
1782          * - unix_peer(sk) == NULL, destination address bound to sk
1783          * - unix_peer(sk) == sk by time of get but disconnected before lock
1784          */
1785         if (other != sk &&
1786             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1787                 if (timeo) {
1788                         timeo = unix_wait_for_peer(other, timeo);
1789
1790                         err = sock_intr_errno(timeo);
1791                         if (signal_pending(current))
1792                                 goto out_free;
1793
1794                         goto restart;
1795                 }
1796
1797                 if (!sk_locked) {
1798                         unix_state_unlock(other);
1799                         unix_state_double_lock(sk, other);
1800                 }
1801
1802                 if (unix_peer(sk) != other ||
1803                     unix_dgram_peer_wake_me(sk, other)) {
1804                         err = -EAGAIN;
1805                         sk_locked = 1;
1806                         goto out_unlock;
1807                 }
1808
1809                 if (!sk_locked) {
1810                         sk_locked = 1;
1811                         goto restart_locked;
1812                 }
1813         }
1814
1815         if (unlikely(sk_locked))
1816                 unix_state_unlock(sk);
1817
1818         if (sock_flag(other, SOCK_RCVTSTAMP))
1819                 __net_timestamp(skb);
1820         maybe_add_creds(skb, sock, other);
1821         skb_queue_tail(&other->sk_receive_queue, skb);
1822         if (max_level > unix_sk(other)->recursion_level)
1823                 unix_sk(other)->recursion_level = max_level;
1824         unix_state_unlock(other);
1825         other->sk_data_ready(other);
1826         sock_put(other);
1827         scm_destroy(&scm);
1828         return len;
1829
1830 out_unlock:
1831         if (sk_locked)
1832                 unix_state_unlock(sk);
1833         unix_state_unlock(other);
1834 out_free:
1835         kfree_skb(skb);
1836 out:
1837         if (other)
1838                 sock_put(other);
1839         scm_destroy(&scm);
1840         return err;
1841 }
1842
1843 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1844  * bytes, and a minimun of a full page.
1845  */
1846 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1847
1848 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1849                                size_t len)
1850 {
1851         struct sock *sk = sock->sk;
1852         struct sock *other = NULL;
1853         int err, size;
1854         struct sk_buff *skb;
1855         int sent = 0;
1856         struct scm_cookie scm;
1857         bool fds_sent = false;
1858         int max_level;
1859         int data_len;
1860
1861         wait_for_unix_gc();
1862         err = scm_send(sock, msg, &scm, false);
1863         if (err < 0)
1864                 return err;
1865
1866         err = -EOPNOTSUPP;
1867         if (msg->msg_flags&MSG_OOB)
1868                 goto out_err;
1869
1870         if (msg->msg_namelen) {
1871                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1872                 goto out_err;
1873         } else {
1874                 err = -ENOTCONN;
1875                 other = unix_peer(sk);
1876                 if (!other)
1877                         goto out_err;
1878         }
1879
1880         if (sk->sk_shutdown & SEND_SHUTDOWN)
1881                 goto pipe_err;
1882
1883         while (sent < len) {
1884                 size = len - sent;
1885
1886                 /* Keep two messages in the pipe so it schedules better */
1887                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1888
1889                 /* allow fallback to order-0 allocations */
1890                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1891
1892                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1893
1894                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1895
1896                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1897                                            msg->msg_flags & MSG_DONTWAIT, &err,
1898                                            get_order(UNIX_SKB_FRAGS_SZ));
1899                 if (!skb)
1900                         goto out_err;
1901
1902                 /* Only send the fds in the first buffer */
1903                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1904                 if (err < 0) {
1905                         kfree_skb(skb);
1906                         goto out_err;
1907                 }
1908                 max_level = err + 1;
1909                 fds_sent = true;
1910
1911                 skb_put(skb, size - data_len);
1912                 skb->data_len = data_len;
1913                 skb->len = size;
1914                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1915                 if (err) {
1916                         kfree_skb(skb);
1917                         goto out_err;
1918                 }
1919
1920                 unix_state_lock(other);
1921
1922                 if (sock_flag(other, SOCK_DEAD) ||
1923                     (other->sk_shutdown & RCV_SHUTDOWN))
1924                         goto pipe_err_free;
1925
1926                 maybe_add_creds(skb, sock, other);
1927                 skb_queue_tail(&other->sk_receive_queue, skb);
1928                 if (max_level > unix_sk(other)->recursion_level)
1929                         unix_sk(other)->recursion_level = max_level;
1930                 unix_state_unlock(other);
1931                 other->sk_data_ready(other);
1932                 sent += size;
1933         }
1934
1935         scm_destroy(&scm);
1936
1937         return sent;
1938
1939 pipe_err_free:
1940         unix_state_unlock(other);
1941         kfree_skb(skb);
1942 pipe_err:
1943         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1944                 send_sig(SIGPIPE, current, 0);
1945         err = -EPIPE;
1946 out_err:
1947         scm_destroy(&scm);
1948         return sent ? : err;
1949 }
1950
1951 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1952                                     int offset, size_t size, int flags)
1953 {
1954         int err;
1955         bool send_sigpipe = false;
1956         bool init_scm = true;
1957         struct scm_cookie scm;
1958         struct sock *other, *sk = socket->sk;
1959         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1960
1961         if (flags & MSG_OOB)
1962                 return -EOPNOTSUPP;
1963
1964         other = unix_peer(sk);
1965         if (!other || sk->sk_state != TCP_ESTABLISHED)
1966                 return -ENOTCONN;
1967
1968         if (false) {
1969 alloc_skb:
1970                 unix_state_unlock(other);
1971                 mutex_unlock(&unix_sk(other)->iolock);
1972                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1973                                               &err, 0);
1974                 if (!newskb)
1975                         goto err;
1976         }
1977
1978         /* we must acquire iolock as we modify already present
1979          * skbs in the sk_receive_queue and mess with skb->len
1980          */
1981         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1982         if (err) {
1983                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1984                 goto err;
1985         }
1986
1987         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1988                 err = -EPIPE;
1989                 send_sigpipe = true;
1990                 goto err_unlock;
1991         }
1992
1993         unix_state_lock(other);
1994
1995         if (sock_flag(other, SOCK_DEAD) ||
1996             other->sk_shutdown & RCV_SHUTDOWN) {
1997                 err = -EPIPE;
1998                 send_sigpipe = true;
1999                 goto err_state_unlock;
2000         }
2001
2002         if (init_scm) {
2003                 err = maybe_init_creds(&scm, socket, other);
2004                 if (err)
2005                         goto err_state_unlock;
2006                 init_scm = false;
2007         }
2008
2009         skb = skb_peek_tail(&other->sk_receive_queue);
2010         if (tail && tail == skb) {
2011                 skb = newskb;
2012         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2013                 if (newskb) {
2014                         skb = newskb;
2015                 } else {
2016                         tail = skb;
2017                         goto alloc_skb;
2018                 }
2019         } else if (newskb) {
2020                 /* this is fast path, we don't necessarily need to
2021                  * call to kfree_skb even though with newskb == NULL
2022                  * this - does no harm
2023                  */
2024                 consume_skb(newskb);
2025                 newskb = NULL;
2026         }
2027
2028         if (skb_append_pagefrags(skb, page, offset, size)) {
2029                 tail = skb;
2030                 goto alloc_skb;
2031         }
2032
2033         skb->len += size;
2034         skb->data_len += size;
2035         skb->truesize += size;
2036         refcount_add(size, &sk->sk_wmem_alloc);
2037
2038         if (newskb) {
2039                 err = unix_scm_to_skb(&scm, skb, false);
2040                 if (err)
2041                         goto err_state_unlock;
2042                 spin_lock(&other->sk_receive_queue.lock);
2043                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2044                 spin_unlock(&other->sk_receive_queue.lock);
2045         }
2046
2047         unix_state_unlock(other);
2048         mutex_unlock(&unix_sk(other)->iolock);
2049
2050         other->sk_data_ready(other);
2051         scm_destroy(&scm);
2052         return size;
2053
2054 err_state_unlock:
2055         unix_state_unlock(other);
2056 err_unlock:
2057         mutex_unlock(&unix_sk(other)->iolock);
2058 err:
2059         kfree_skb(newskb);
2060         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2061                 send_sig(SIGPIPE, current, 0);
2062         if (!init_scm)
2063                 scm_destroy(&scm);
2064         return err;
2065 }
2066
2067 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2068                                   size_t len)
2069 {
2070         int err;
2071         struct sock *sk = sock->sk;
2072
2073         err = sock_error(sk);
2074         if (err)
2075                 return err;
2076
2077         if (sk->sk_state != TCP_ESTABLISHED)
2078                 return -ENOTCONN;
2079
2080         if (msg->msg_namelen)
2081                 msg->msg_namelen = 0;
2082
2083         return unix_dgram_sendmsg(sock, msg, len);
2084 }
2085
2086 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2087                                   size_t size, int flags)
2088 {
2089         struct sock *sk = sock->sk;
2090
2091         if (sk->sk_state != TCP_ESTABLISHED)
2092                 return -ENOTCONN;
2093
2094         return unix_dgram_recvmsg(sock, msg, size, flags);
2095 }
2096
2097 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2098 {
2099         struct unix_sock *u = unix_sk(sk);
2100
2101         if (u->addr) {
2102                 msg->msg_namelen = u->addr->len;
2103                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2104         }
2105 }
2106
2107 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2108                               size_t size, int flags)
2109 {
2110         struct scm_cookie scm;
2111         struct sock *sk = sock->sk;
2112         struct unix_sock *u = unix_sk(sk);
2113         struct sk_buff *skb, *last;
2114         long timeo;
2115         int err;
2116         int peeked, skip;
2117
2118         err = -EOPNOTSUPP;
2119         if (flags&MSG_OOB)
2120                 goto out;
2121
2122         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2123
2124         do {
2125                 mutex_lock(&u->iolock);
2126
2127                 skip = sk_peek_offset(sk, flags);
2128                 skb = __skb_try_recv_datagram(sk, flags, NULL, &peeked, &skip,
2129                                               &err, &last);
2130                 if (skb)
2131                         break;
2132
2133                 mutex_unlock(&u->iolock);
2134
2135                 if (err != -EAGAIN)
2136                         break;
2137         } while (timeo &&
2138                  !__skb_wait_for_more_packets(sk, &err, &timeo, last));
2139
2140         if (!skb) { /* implies iolock unlocked */
2141                 unix_state_lock(sk);
2142                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2143                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2144                     (sk->sk_shutdown & RCV_SHUTDOWN))
2145                         err = 0;
2146                 unix_state_unlock(sk);
2147                 goto out;
2148         }
2149
2150         if (wq_has_sleeper(&u->peer_wait))
2151                 wake_up_interruptible_sync_poll(&u->peer_wait,
2152                                                 POLLOUT | POLLWRNORM |
2153                                                 POLLWRBAND);
2154
2155         if (msg->msg_name)
2156                 unix_copy_addr(msg, skb->sk);
2157
2158         if (size > skb->len - skip)
2159                 size = skb->len - skip;
2160         else if (size < skb->len - skip)
2161                 msg->msg_flags |= MSG_TRUNC;
2162
2163         err = skb_copy_datagram_msg(skb, skip, msg, size);
2164         if (err)
2165                 goto out_free;
2166
2167         if (sock_flag(sk, SOCK_RCVTSTAMP))
2168                 __sock_recv_timestamp(msg, sk, skb);
2169
2170         memset(&scm, 0, sizeof(scm));
2171
2172         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2173         unix_set_secdata(&scm, skb);
2174
2175         if (!(flags & MSG_PEEK)) {
2176                 if (UNIXCB(skb).fp)
2177                         unix_detach_fds(&scm, skb);
2178
2179                 sk_peek_offset_bwd(sk, skb->len);
2180         } else {
2181                 /* It is questionable: on PEEK we could:
2182                    - do not return fds - good, but too simple 8)
2183                    - return fds, and do not return them on read (old strategy,
2184                      apparently wrong)
2185                    - clone fds (I chose it for now, it is the most universal
2186                      solution)
2187
2188                    POSIX 1003.1g does not actually define this clearly
2189                    at all. POSIX 1003.1g doesn't define a lot of things
2190                    clearly however!
2191
2192                 */
2193
2194                 sk_peek_offset_fwd(sk, size);
2195
2196                 if (UNIXCB(skb).fp)
2197                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2198         }
2199         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2200
2201         scm_recv(sock, msg, &scm, flags);
2202
2203 out_free:
2204         skb_free_datagram(sk, skb);
2205         mutex_unlock(&u->iolock);
2206 out:
2207         return err;
2208 }
2209
2210 /*
2211  *      Sleep until more data has arrived. But check for races..
2212  */
2213 static long unix_stream_data_wait(struct sock *sk, long timeo,
2214                                   struct sk_buff *last, unsigned int last_len,
2215                                   bool freezable)
2216 {
2217         struct sk_buff *tail;
2218         DEFINE_WAIT(wait);
2219
2220         unix_state_lock(sk);
2221
2222         for (;;) {
2223                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2224
2225                 tail = skb_peek_tail(&sk->sk_receive_queue);
2226                 if (tail != last ||
2227                     (tail && tail->len != last_len) ||
2228                     sk->sk_err ||
2229                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2230                     signal_pending(current) ||
2231                     !timeo)
2232                         break;
2233
2234                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2235                 unix_state_unlock(sk);
2236                 if (freezable)
2237                         timeo = freezable_schedule_timeout(timeo);
2238                 else
2239                         timeo = schedule_timeout(timeo);
2240                 unix_state_lock(sk);
2241
2242                 if (sock_flag(sk, SOCK_DEAD))
2243                         break;
2244
2245                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2246         }
2247
2248         finish_wait(sk_sleep(sk), &wait);
2249         unix_state_unlock(sk);
2250         return timeo;
2251 }
2252
2253 static unsigned int unix_skb_len(const struct sk_buff *skb)
2254 {
2255         return skb->len - UNIXCB(skb).consumed;
2256 }
2257
2258 struct unix_stream_read_state {
2259         int (*recv_actor)(struct sk_buff *, int, int,
2260                           struct unix_stream_read_state *);
2261         struct socket *socket;
2262         struct msghdr *msg;
2263         struct pipe_inode_info *pipe;
2264         size_t size;
2265         int flags;
2266         unsigned int splice_flags;
2267 };
2268
2269 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2270                                     bool freezable)
2271 {
2272         struct scm_cookie scm;
2273         struct socket *sock = state->socket;
2274         struct sock *sk = sock->sk;
2275         struct unix_sock *u = unix_sk(sk);
2276         int copied = 0;
2277         int flags = state->flags;
2278         int noblock = flags & MSG_DONTWAIT;
2279         bool check_creds = false;
2280         int target;
2281         int err = 0;
2282         long timeo;
2283         int skip;
2284         size_t size = state->size;
2285         unsigned int last_len;
2286
2287         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2288                 err = -EINVAL;
2289                 goto out;
2290         }
2291
2292         if (unlikely(flags & MSG_OOB)) {
2293                 err = -EOPNOTSUPP;
2294                 goto out;
2295         }
2296
2297         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2298         timeo = sock_rcvtimeo(sk, noblock);
2299
2300         memset(&scm, 0, sizeof(scm));
2301
2302         /* Lock the socket to prevent queue disordering
2303          * while sleeps in memcpy_tomsg
2304          */
2305         mutex_lock(&u->iolock);
2306
2307         skip = max(sk_peek_offset(sk, flags), 0);
2308
2309         do {
2310                 int chunk;
2311                 bool drop_skb;
2312                 struct sk_buff *skb, *last;
2313
2314 redo:
2315                 unix_state_lock(sk);
2316                 if (sock_flag(sk, SOCK_DEAD)) {
2317                         err = -ECONNRESET;
2318                         goto unlock;
2319                 }
2320                 last = skb = skb_peek(&sk->sk_receive_queue);
2321                 last_len = last ? last->len : 0;
2322 again:
2323                 if (skb == NULL) {
2324                         unix_sk(sk)->recursion_level = 0;
2325                         if (copied >= target)
2326                                 goto unlock;
2327
2328                         /*
2329                          *      POSIX 1003.1g mandates this order.
2330                          */
2331
2332                         err = sock_error(sk);
2333                         if (err)
2334                                 goto unlock;
2335                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2336                                 goto unlock;
2337
2338                         unix_state_unlock(sk);
2339                         if (!timeo) {
2340                                 err = -EAGAIN;
2341                                 break;
2342                         }
2343
2344                         mutex_unlock(&u->iolock);
2345
2346                         timeo = unix_stream_data_wait(sk, timeo, last,
2347                                                       last_len, freezable);
2348
2349                         if (signal_pending(current)) {
2350                                 err = sock_intr_errno(timeo);
2351                                 scm_destroy(&scm);
2352                                 goto out;
2353                         }
2354
2355                         mutex_lock(&u->iolock);
2356                         goto redo;
2357 unlock:
2358                         unix_state_unlock(sk);
2359                         break;
2360                 }
2361
2362                 while (skip >= unix_skb_len(skb)) {
2363                         skip -= unix_skb_len(skb);
2364                         last = skb;
2365                         last_len = skb->len;
2366                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2367                         if (!skb)
2368                                 goto again;
2369                 }
2370
2371                 unix_state_unlock(sk);
2372
2373                 if (check_creds) {
2374                         /* Never glue messages from different writers */
2375                         if (!unix_skb_scm_eq(skb, &scm))
2376                                 break;
2377                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2378                         /* Copy credentials */
2379                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2380                         unix_set_secdata(&scm, skb);
2381                         check_creds = true;
2382                 }
2383
2384                 /* Copy address just once */
2385                 if (state->msg && state->msg->msg_name) {
2386                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2387                                          state->msg->msg_name);
2388                         unix_copy_addr(state->msg, skb->sk);
2389                         sunaddr = NULL;
2390                 }
2391
2392                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2393                 skb_get(skb);
2394                 chunk = state->recv_actor(skb, skip, chunk, state);
2395                 drop_skb = !unix_skb_len(skb);
2396                 /* skb is only safe to use if !drop_skb */
2397                 consume_skb(skb);
2398                 if (chunk < 0) {
2399                         if (copied == 0)
2400                                 copied = -EFAULT;
2401                         break;
2402                 }
2403                 copied += chunk;
2404                 size -= chunk;
2405
2406                 if (drop_skb) {
2407                         /* the skb was touched by a concurrent reader;
2408                          * we should not expect anything from this skb
2409                          * anymore and assume it invalid - we can be
2410                          * sure it was dropped from the socket queue
2411                          *
2412                          * let's report a short read
2413                          */
2414                         err = 0;
2415                         break;
2416                 }
2417
2418                 /* Mark read part of skb as used */
2419                 if (!(flags & MSG_PEEK)) {
2420                         UNIXCB(skb).consumed += chunk;
2421
2422                         sk_peek_offset_bwd(sk, chunk);
2423
2424                         if (UNIXCB(skb).fp)
2425                                 unix_detach_fds(&scm, skb);
2426
2427                         if (unix_skb_len(skb))
2428                                 break;
2429
2430                         skb_unlink(skb, &sk->sk_receive_queue);
2431                         consume_skb(skb);
2432
2433                         if (scm.fp)
2434                                 break;
2435                 } else {
2436                         /* It is questionable, see note in unix_dgram_recvmsg.
2437                          */
2438                         if (UNIXCB(skb).fp)
2439                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2440
2441                         sk_peek_offset_fwd(sk, chunk);
2442
2443                         if (UNIXCB(skb).fp)
2444                                 break;
2445
2446                         skip = 0;
2447                         last = skb;
2448                         last_len = skb->len;
2449                         unix_state_lock(sk);
2450                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2451                         if (skb)
2452                                 goto again;
2453                         unix_state_unlock(sk);
2454                         break;
2455                 }
2456         } while (size);
2457
2458         mutex_unlock(&u->iolock);
2459         if (state->msg)
2460                 scm_recv(sock, state->msg, &scm, flags);
2461         else
2462                 scm_destroy(&scm);
2463 out:
2464         return copied ? : err;
2465 }
2466
2467 static int unix_stream_read_actor(struct sk_buff *skb,
2468                                   int skip, int chunk,
2469                                   struct unix_stream_read_state *state)
2470 {
2471         int ret;
2472
2473         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2474                                     state->msg, chunk);
2475         return ret ?: chunk;
2476 }
2477
2478 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2479                                size_t size, int flags)
2480 {
2481         struct unix_stream_read_state state = {
2482                 .recv_actor = unix_stream_read_actor,
2483                 .socket = sock,
2484                 .msg = msg,
2485                 .size = size,
2486                 .flags = flags
2487         };
2488
2489         return unix_stream_read_generic(&state, true);
2490 }
2491
2492 static int unix_stream_splice_actor(struct sk_buff *skb,
2493                                     int skip, int chunk,
2494                                     struct unix_stream_read_state *state)
2495 {
2496         return skb_splice_bits(skb, state->socket->sk,
2497                                UNIXCB(skb).consumed + skip,
2498                                state->pipe, chunk, state->splice_flags);
2499 }
2500
2501 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2502                                        struct pipe_inode_info *pipe,
2503                                        size_t size, unsigned int flags)
2504 {
2505         struct unix_stream_read_state state = {
2506                 .recv_actor = unix_stream_splice_actor,
2507                 .socket = sock,
2508                 .pipe = pipe,
2509                 .size = size,
2510                 .splice_flags = flags,
2511         };
2512
2513         if (unlikely(*ppos))
2514                 return -ESPIPE;
2515
2516         if (sock->file->f_flags & O_NONBLOCK ||
2517             flags & SPLICE_F_NONBLOCK)
2518                 state.flags = MSG_DONTWAIT;
2519
2520         return unix_stream_read_generic(&state, false);
2521 }
2522
2523 static int unix_shutdown(struct socket *sock, int mode)
2524 {
2525         struct sock *sk = sock->sk;
2526         struct sock *other;
2527
2528         if (mode < SHUT_RD || mode > SHUT_RDWR)
2529                 return -EINVAL;
2530         /* This maps:
2531          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2532          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2533          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2534          */
2535         ++mode;
2536
2537         unix_state_lock(sk);
2538         sk->sk_shutdown |= mode;
2539         other = unix_peer(sk);
2540         if (other)
2541                 sock_hold(other);
2542         unix_state_unlock(sk);
2543         sk->sk_state_change(sk);
2544
2545         if (other &&
2546                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2547
2548                 int peer_mode = 0;
2549
2550                 if (mode&RCV_SHUTDOWN)
2551                         peer_mode |= SEND_SHUTDOWN;
2552                 if (mode&SEND_SHUTDOWN)
2553                         peer_mode |= RCV_SHUTDOWN;
2554                 unix_state_lock(other);
2555                 other->sk_shutdown |= peer_mode;
2556                 unix_state_unlock(other);
2557                 other->sk_state_change(other);
2558                 if (peer_mode == SHUTDOWN_MASK)
2559                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2560                 else if (peer_mode & RCV_SHUTDOWN)
2561                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2562         }
2563         if (other)
2564                 sock_put(other);
2565
2566         return 0;
2567 }
2568
2569 long unix_inq_len(struct sock *sk)
2570 {
2571         struct sk_buff *skb;
2572         long amount = 0;
2573
2574         if (sk->sk_state == TCP_LISTEN)
2575                 return -EINVAL;
2576
2577         spin_lock(&sk->sk_receive_queue.lock);
2578         if (sk->sk_type == SOCK_STREAM ||
2579             sk->sk_type == SOCK_SEQPACKET) {
2580                 skb_queue_walk(&sk->sk_receive_queue, skb)
2581                         amount += unix_skb_len(skb);
2582         } else {
2583                 skb = skb_peek(&sk->sk_receive_queue);
2584                 if (skb)
2585                         amount = skb->len;
2586         }
2587         spin_unlock(&sk->sk_receive_queue.lock);
2588
2589         return amount;
2590 }
2591 EXPORT_SYMBOL_GPL(unix_inq_len);
2592
2593 long unix_outq_len(struct sock *sk)
2594 {
2595         return sk_wmem_alloc_get(sk);
2596 }
2597 EXPORT_SYMBOL_GPL(unix_outq_len);
2598
2599 static int unix_open_file(struct sock *sk)
2600 {
2601         struct path path;
2602         struct file *f;
2603         int fd;
2604
2605         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2606                 return -EPERM;
2607
2608         unix_state_lock(sk);
2609         path = unix_sk(sk)->path;
2610         if (!path.dentry) {
2611                 unix_state_unlock(sk);
2612                 return -ENOENT;
2613         }
2614
2615         path_get(&path);
2616         unix_state_unlock(sk);
2617
2618         fd = get_unused_fd_flags(O_CLOEXEC);
2619         if (fd < 0)
2620                 goto out;
2621
2622         f = dentry_open(&path, O_PATH, current_cred());
2623         if (IS_ERR(f)) {
2624                 put_unused_fd(fd);
2625                 fd = PTR_ERR(f);
2626                 goto out;
2627         }
2628
2629         fd_install(fd, f);
2630 out:
2631         path_put(&path);
2632
2633         return fd;
2634 }
2635
2636 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2637 {
2638         struct sock *sk = sock->sk;
2639         long amount = 0;
2640         int err;
2641
2642         switch (cmd) {
2643         case SIOCOUTQ:
2644                 amount = unix_outq_len(sk);
2645                 err = put_user(amount, (int __user *)arg);
2646                 break;
2647         case SIOCINQ:
2648                 amount = unix_inq_len(sk);
2649                 if (amount < 0)
2650                         err = amount;
2651                 else
2652                         err = put_user(amount, (int __user *)arg);
2653                 break;
2654         case SIOCUNIXFILE:
2655                 err = unix_open_file(sk);
2656                 break;
2657         default:
2658                 err = -ENOIOCTLCMD;
2659                 break;
2660         }
2661         return err;
2662 }
2663
2664 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2665 {
2666         struct sock *sk = sock->sk;
2667         unsigned int mask;
2668
2669         sock_poll_wait(file, sk_sleep(sk), wait);
2670         mask = 0;
2671
2672         /* exceptional events? */
2673         if (sk->sk_err)
2674                 mask |= POLLERR;
2675         if (sk->sk_shutdown == SHUTDOWN_MASK)
2676                 mask |= POLLHUP;
2677         if (sk->sk_shutdown & RCV_SHUTDOWN)
2678                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2679
2680         /* readable? */
2681         if (!skb_queue_empty(&sk->sk_receive_queue))
2682                 mask |= POLLIN | POLLRDNORM;
2683
2684         /* Connection-based need to check for termination and startup */
2685         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2686             sk->sk_state == TCP_CLOSE)
2687                 mask |= POLLHUP;
2688
2689         /*
2690          * we set writable also when the other side has shut down the
2691          * connection. This prevents stuck sockets.
2692          */
2693         if (unix_writable(sk))
2694                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2695
2696         return mask;
2697 }
2698
2699 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2700                                     poll_table *wait)
2701 {
2702         struct sock *sk = sock->sk, *other;
2703         unsigned int mask, writable;
2704
2705         sock_poll_wait(file, sk_sleep(sk), wait);
2706         mask = 0;
2707
2708         /* exceptional events? */
2709         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2710                 mask |= POLLERR |
2711                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2712
2713         if (sk->sk_shutdown & RCV_SHUTDOWN)
2714                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2715         if (sk->sk_shutdown == SHUTDOWN_MASK)
2716                 mask |= POLLHUP;
2717
2718         /* readable? */
2719         if (!skb_queue_empty(&sk->sk_receive_queue))
2720                 mask |= POLLIN | POLLRDNORM;
2721
2722         /* Connection-based need to check for termination and startup */
2723         if (sk->sk_type == SOCK_SEQPACKET) {
2724                 if (sk->sk_state == TCP_CLOSE)
2725                         mask |= POLLHUP;
2726                 /* connection hasn't started yet? */
2727                 if (sk->sk_state == TCP_SYN_SENT)
2728                         return mask;
2729         }
2730
2731         /* No write status requested, avoid expensive OUT tests. */
2732         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2733                 return mask;
2734
2735         writable = unix_writable(sk);
2736         if (writable) {
2737                 unix_state_lock(sk);
2738
2739                 other = unix_peer(sk);
2740                 if (other && unix_peer(other) != sk &&
2741                     unix_recvq_full(other) &&
2742                     unix_dgram_peer_wake_me(sk, other))
2743                         writable = 0;
2744
2745                 unix_state_unlock(sk);
2746         }
2747
2748         if (writable)
2749                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2750         else
2751                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2752
2753         return mask;
2754 }
2755
2756 #ifdef CONFIG_PROC_FS
2757
2758 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2759
2760 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2761 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2762 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2763
2764 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2765 {
2766         unsigned long offset = get_offset(*pos);
2767         unsigned long bucket = get_bucket(*pos);
2768         struct sock *sk;
2769         unsigned long count = 0;
2770
2771         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2772                 if (sock_net(sk) != seq_file_net(seq))
2773                         continue;
2774                 if (++count == offset)
2775                         break;
2776         }
2777
2778         return sk;
2779 }
2780
2781 static struct sock *unix_next_socket(struct seq_file *seq,
2782                                      struct sock *sk,
2783                                      loff_t *pos)
2784 {
2785         unsigned long bucket;
2786
2787         while (sk > (struct sock *)SEQ_START_TOKEN) {
2788                 sk = sk_next(sk);
2789                 if (!sk)
2790                         goto next_bucket;
2791                 if (sock_net(sk) == seq_file_net(seq))
2792                         return sk;
2793         }
2794
2795         do {
2796                 sk = unix_from_bucket(seq, pos);
2797                 if (sk)
2798                         return sk;
2799
2800 next_bucket:
2801                 bucket = get_bucket(*pos) + 1;
2802                 *pos = set_bucket_offset(bucket, 1);
2803         } while (bucket < ARRAY_SIZE(unix_socket_table));
2804
2805         return NULL;
2806 }
2807
2808 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2809         __acquires(unix_table_lock)
2810 {
2811         spin_lock(&unix_table_lock);
2812
2813         if (!*pos)
2814                 return SEQ_START_TOKEN;
2815
2816         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2817                 return NULL;
2818
2819         return unix_next_socket(seq, NULL, pos);
2820 }
2821
2822 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2823 {
2824         ++*pos;
2825         return unix_next_socket(seq, v, pos);
2826 }
2827
2828 static void unix_seq_stop(struct seq_file *seq, void *v)
2829         __releases(unix_table_lock)
2830 {
2831         spin_unlock(&unix_table_lock);
2832 }
2833
2834 static int unix_seq_show(struct seq_file *seq, void *v)
2835 {
2836
2837         if (v == SEQ_START_TOKEN)
2838                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2839                          "Inode Path\n");
2840         else {
2841                 struct sock *s = v;
2842                 struct unix_sock *u = unix_sk(s);
2843                 unix_state_lock(s);
2844
2845                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2846                         s,
2847                         refcount_read(&s->sk_refcnt),
2848                         0,
2849                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2850                         s->sk_type,
2851                         s->sk_socket ?
2852                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2853                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2854                         sock_i_ino(s));
2855
2856                 if (u->addr) {
2857                         int i, len;
2858                         seq_putc(seq, ' ');
2859
2860                         i = 0;
2861                         len = u->addr->len - sizeof(short);
2862                         if (!UNIX_ABSTRACT(s))
2863                                 len--;
2864                         else {
2865                                 seq_putc(seq, '@');
2866                                 i++;
2867                         }
2868                         for ( ; i < len; i++)
2869                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2870                                          '@');
2871                 }
2872                 unix_state_unlock(s);
2873                 seq_putc(seq, '\n');
2874         }
2875
2876         return 0;
2877 }
2878
2879 static const struct seq_operations unix_seq_ops = {
2880         .start  = unix_seq_start,
2881         .next   = unix_seq_next,
2882         .stop   = unix_seq_stop,
2883         .show   = unix_seq_show,
2884 };
2885
2886 static int unix_seq_open(struct inode *inode, struct file *file)
2887 {
2888         return seq_open_net(inode, file, &unix_seq_ops,
2889                             sizeof(struct seq_net_private));
2890 }
2891
2892 static const struct file_operations unix_seq_fops = {
2893         .owner          = THIS_MODULE,
2894         .open           = unix_seq_open,
2895         .read           = seq_read,
2896         .llseek         = seq_lseek,
2897         .release        = seq_release_net,
2898 };
2899
2900 #endif
2901
2902 static const struct net_proto_family unix_family_ops = {
2903         .family = PF_UNIX,
2904         .create = unix_create,
2905         .owner  = THIS_MODULE,
2906 };
2907
2908
2909 static int __net_init unix_net_init(struct net *net)
2910 {
2911         int error = -ENOMEM;
2912
2913         net->unx.sysctl_max_dgram_qlen = 10;
2914         if (unix_sysctl_register(net))
2915                 goto out;
2916
2917 #ifdef CONFIG_PROC_FS
2918         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2919                 unix_sysctl_unregister(net);
2920                 goto out;
2921         }
2922 #endif
2923         error = 0;
2924 out:
2925         return error;
2926 }
2927
2928 static void __net_exit unix_net_exit(struct net *net)
2929 {
2930         unix_sysctl_unregister(net);
2931         remove_proc_entry("unix", net->proc_net);
2932 }
2933
2934 static struct pernet_operations unix_net_ops = {
2935         .init = unix_net_init,
2936         .exit = unix_net_exit,
2937 };
2938
2939 static int __init af_unix_init(void)
2940 {
2941         int rc = -1;
2942
2943         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2944
2945         rc = proto_register(&unix_proto, 1);
2946         if (rc != 0) {
2947                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2948                 goto out;
2949         }
2950
2951         sock_register(&unix_family_ops);
2952         register_pernet_subsys(&unix_net_ops);
2953 out:
2954         return rc;
2955 }
2956
2957 static void __exit af_unix_exit(void)
2958 {
2959         sock_unregister(PF_UNIX);
2960         proto_unregister(&unix_proto);
2961         unregister_pernet_subsys(&unix_net_ops);
2962 }
2963
2964 /* Earlier than device_initcall() so that other drivers invoking
2965    request_module() don't end up in a loop when modprobe tries
2966    to use a UNIX socket. But later than subsys_initcall() because
2967    we depend on stuff initialised there */
2968 fs_initcall(af_unix_init);
2969 module_exit(af_unix_exit);
2970
2971 MODULE_LICENSE("GPL");
2972 MODULE_ALIAS_NETPROTO(PF_UNIX);