Merge branch 'devel' into for-linus
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 1 Apr 2009 17:28:15 +0000 (13:28 -0400)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Wed, 1 Apr 2009 17:28:15 +0000 (13:28 -0400)
1  2 
fs/nfs/client.c
fs/nfs/dir.c
fs/nfs/file.c
include/linux/nfs_fs.h
include/linux/nfs_xdr.h
net/sunrpc/svc.c
net/sunrpc/xprtsock.c

diff --combined fs/nfs/client.c
index 2277421656e757a5b0132f2174c9a0a470dbf088,855daac0f246ad56759bede61000d6e29d1fb857..aba38017bdefc9773cc498090cca4766dcf2d863
@@@ -224,38 -224,6 +224,6 @@@ void nfs_put_client(struct nfs_client *
  }
  
  #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
- static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
- {
-       switch (sa->sa_family) {
-               default:
-                       return NULL;
-               case AF_INET6:
-                       return &((const struct sockaddr_in6 *)sa)->sin6_addr;
-                       break;
-               case AF_INET:
-                       ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
-                                       addr_mapped);
-                       return addr_mapped;
-       }
- }
- static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
-               const struct sockaddr *sa2)
- {
-       const struct in6_addr *addr1;
-       const struct in6_addr *addr2;
-       struct in6_addr addr1_mapped;
-       struct in6_addr addr2_mapped;
-       addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
-       if (likely(addr1 != NULL)) {
-               addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
-               if (likely(addr2 != NULL))
-                       return ipv6_addr_equal(addr1, addr2);
-       }
-       return 0;
- }
  /*
   * Test if two ip6 socket addresses refer to the same socket by
   * comparing relevant fields. The padding bytes specifically, are not
   *
   * The caller should ensure both socket addresses are AF_INET6.
   */
- static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
-                               const struct sockaddr *sa2)
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
  {
-       const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
-       const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
+       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
  
-       if (!ipv6_addr_equal(&saddr1->sin6_addr,
-                            &saddr1->sin6_addr))
-               return 0;
-       if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
-           saddr1->sin6_scope_id != saddr2->sin6_scope_id)
+       if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
+           sin1->sin6_scope_id != sin2->sin6_scope_id)
                return 0;
-       return saddr1->sin6_port == saddr2->sin6_port;
- }
- #else
- static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
-                                const struct sockaddr_in *sa2)
- {
-       return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
- }
  
- static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
-                                const struct sockaddr *sa2)
- {
-       if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
-               return 0;
-       return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
-                       (const struct sockaddr_in *)sa2);
+       return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
  }
- static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
-                               const struct sockaddr * sa2)
+ #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
  {
        return 0;
  }
   *
   * The caller should ensure both socket addresses are AF_INET.
   */
+ static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
+                                     const struct sockaddr *sa2)
+ {
+       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
+       return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+ }
+ static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
+                               const struct sockaddr *sa2)
+ {
+       const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
+       const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
+       return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
+               (sin1->sin6_port == sin2->sin6_port);
+ }
  static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
                                const struct sockaddr *sa2)
  {
-       const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
-       const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
+       const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
+       const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
  
-       if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
+       return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
+               (sin1->sin_port == sin2->sin_port);
+ }
+ /*
+  * Test if two socket addresses represent the same actual socket,
+  * by comparing (only) relevant fields, excluding the port number.
+  */
+ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+                                    const struct sockaddr *sa2)
+ {
+       if (sa1->sa_family != sa2->sa_family)
                return 0;
-       return saddr1->sin_port == saddr2->sin_port;
+       switch (sa1->sa_family) {
+       case AF_INET:
+               return nfs_sockaddr_match_ipaddr4(sa1, sa2);
+       case AF_INET6:
+               return nfs_sockaddr_match_ipaddr6(sa1, sa2);
+       }
+       return 0;
  }
  
  /*
   * Test if two socket addresses represent the same actual socket,
-  * by comparing (only) relevant fields.
+  * by comparing (only) relevant fields, including the port number.
   */
  static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
                            const struct sockaddr *sa2)
@@@ -1606,6 -1594,8 +1594,6 @@@ int __init nfs_fs_proc_init(void
        if (!proc_fs_nfs)
                goto error_0;
  
 -      proc_fs_nfs->owner = THIS_MODULE;
 -
        /* a file of servers with which we're dealing */
        p = proc_create("servers", S_IFREG|S_IRUGO,
                        proc_fs_nfs, &nfs_server_list_fops);
diff --combined fs/nfs/dir.c
index 78bf72fc1db3a79fe08ae65c77960eff1326443f,3b2f6973e7c50cbfdd579351ae99ba5e746bc64d..370b190a09d1d7ac7d03fe1cd45457ec866749a4
@@@ -899,7 -899,7 +899,7 @@@ static void nfs_dentry_iput(struct dent
        iput(inode);
  }
  
 -struct dentry_operations nfs_dentry_operations = {
 +const struct dentry_operations nfs_dentry_operations = {
        .d_revalidate   = nfs_lookup_revalidate,
        .d_delete       = nfs_dentry_delete,
        .d_iput         = nfs_dentry_iput,
@@@ -967,7 -967,7 +967,7 @@@ out
  #ifdef CONFIG_NFS_V4
  static int nfs_open_revalidate(struct dentry *, struct nameidata *);
  
 -struct dentry_operations nfs4_dentry_operations = {
 +const struct dentry_operations nfs4_dentry_operations = {
        .d_revalidate   = nfs_open_revalidate,
        .d_delete       = nfs_dentry_delete,
        .d_iput         = nfs_dentry_iput,
@@@ -1624,8 -1624,7 +1624,7 @@@ static int nfs_rename(struct inode *old
                } else if (atomic_read(&new_dentry->d_count) > 1)
                        /* dentry still busy? */
                        goto out;
-       } else
-               nfs_drop_nlink(new_inode);
+       }
  
  go_ahead:
        /*
        }
        nfs_inode_return_delegation(old_inode);
  
-       if (new_inode != NULL) {
+       if (new_inode != NULL)
                nfs_inode_return_delegation(new_inode);
-               d_delete(new_dentry);
-       }
  
        error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
                                           new_dir, &new_dentry->d_name);
@@@ -1650,6 -1647,8 +1647,8 @@@ out
        if (rehash)
                d_rehash(rehash);
        if (!error) {
+               if (new_inode != NULL)
+                       nfs_drop_nlink(new_inode);
                d_move(old_dentry, new_dentry);
                nfs_set_verifier(new_dentry,
                                        nfs_save_change_attribute(new_dir));
diff --combined fs/nfs/file.c
index cec79392e4ba3f1b746166928b073379e2622ab4,d451073c49474390685b3ca97304c664f93d93fd..0abf3f331f56c6a20249a00bd4719cb1cd3b9112
@@@ -64,11 -64,7 +64,7 @@@ const struct file_operations nfs_file_o
        .write          = do_sync_write,
        .aio_read       = nfs_file_read,
        .aio_write      = nfs_file_write,
- #ifdef CONFIG_MMU
        .mmap           = nfs_file_mmap,
- #else
-       .mmap           = generic_file_mmap,
- #endif
        .open           = nfs_file_open,
        .flush          = nfs_file_flush,
        .release        = nfs_file_release,
@@@ -141,9 -137,6 +137,6 @@@ nfs_file_release(struct inode *inode, s
                        dentry->d_parent->d_name.name,
                        dentry->d_name.name);
  
-       /* Ensure that dirty pages are flushed out with the right creds */
-       if (filp->f_mode & FMODE_WRITE)
-               nfs_wb_all(dentry->d_inode);
        nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
        return nfs_release(inode, filp);
  }
@@@ -235,7 -228,6 +228,6 @@@ nfs_file_flush(struct file *file, fl_ow
        struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct dentry   *dentry = file->f_path.dentry;
        struct inode    *inode = dentry->d_inode;
-       int             status;
  
        dprintk("NFS: flush(%s/%s)\n",
                        dentry->d_parent->d_name.name,
                return 0;
        nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
  
-       /* Ensure that data+attribute caches are up to date after close() */
-       status = nfs_do_fsync(ctx, inode);
-       if (!status)
-               nfs_revalidate_inode(NFS_SERVER(inode), inode);
-       return status;
+       /* Flush writes to the server and return any errors */
+       return nfs_do_fsync(ctx, inode);
  }
  
  static ssize_t
@@@ -304,11 -293,13 +293,13 @@@ nfs_file_mmap(struct file * file, struc
        dprintk("NFS: mmap(%s/%s)\n",
                dentry->d_parent->d_name.name, dentry->d_name.name);
  
-       status = nfs_revalidate_mapping(inode, file->f_mapping);
+       /* Note: generic_file_mmap() returns ENOSYS on nommu systems
+        *       so we call that before revalidating the mapping
+        */
+       status = generic_file_mmap(file, vma);
        if (!status) {
                vma->vm_ops = &nfs_file_vm_ops;
-               vma->vm_flags |= VM_CAN_NONLINEAR;
-               file_accessed(file);
+               status = nfs_revalidate_mapping(inode, file->f_mapping);
        }
        return status;
  }
@@@ -354,6 -345,15 +345,15 @@@ static int nfs_write_begin(struct file 
                file->f_path.dentry->d_name.name,
                mapping->host->i_ino, len, (long long) pos);
  
+       /*
+        * Prevent starvation issues if someone is doing a consistency
+        * sync-to-disk
+        */
+       ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
+                       nfs_wait_bit_killable, TASK_KILLABLE);
+       if (ret)
+               return ret;
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page)
                return -ENOMEM;
@@@ -451,9 -451,8 +451,9 @@@ const struct address_space_operations n
        .launder_page = nfs_launder_page,
  };
  
 -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
  {
 +      struct page *page = vmf->page;
        struct file *filp = vma->vm_file;
        struct dentry *dentry = filp->f_path.dentry;
        unsigned pagelen;
                ret = pagelen;
  out_unlock:
        unlock_page(page);
 +      if (ret)
 +              ret = VM_FAULT_SIGBUS;
        return ret;
  }
  
diff --combined include/linux/nfs_fs.h
index 8cc8807f77d6da286c6225ccb4a8c69642b826e3,933bc261c0dfbde37f926deb56e14e76cf1c19fe..bde2557c2a9cec10613328369aebe64bbf3150c4
@@@ -166,8 -166,7 +166,7 @@@ struct nfs_inode 
         */
        struct radix_tree_root  nfs_page_tree;
  
-       unsigned long           ncommit,
-                               npages;
+       unsigned long           npages;
  
        /* Open contexts for shared mmap writes */
        struct list_head        open_files;
  #define NFS_INO_STALE         (1)             /* possible stale inode */
  #define NFS_INO_ACL_LRU_SET   (2)             /* Inode is on the LRU list */
  #define NFS_INO_MOUNTPOINT    (3)             /* inode is remote mountpoint */
+ #define NFS_INO_FLUSHING      (4)             /* inode is flushing out data */
  
  static inline struct nfs_inode *NFS_I(const struct inode *inode)
  {
@@@ -415,7 -415,7 +415,7 @@@ extern const struct inode_operations nf
  extern const struct inode_operations nfs3_dir_inode_operations;
  #endif /* CONFIG_NFS_V3 */
  extern const struct file_operations nfs_dir_operations;
 -extern struct dentry_operations nfs_dentry_operations;
 +extern const struct dentry_operations nfs_dentry_operations;
  
  extern void nfs_force_lookup_revalidate(struct inode *dir);
  extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
diff --combined include/linux/nfs_xdr.h
index 43a713fce11cbfbd25571424e7201758f15b4bf5,9708e78a4d49a5215e985c3254968d51e60e28ad..b89c34e40bc2b23e5d34485f8640a28197b7a3e5
@@@ -27,12 -27,8 +27,8 @@@ static inline int nfs_fsid_equal(const 
  }
  
  struct nfs_fattr {
-       unsigned short          valid;          /* which fields are valid */
-       __u64                   pre_size;       /* pre_op_attr.size       */
-       struct timespec         pre_mtime;      /* pre_op_attr.mtime      */
-       struct timespec         pre_ctime;      /* pre_op_attr.ctime      */
-       enum nfs_ftype          type;           /* always use NFSv2 types */
-       __u32                   mode;
+       unsigned int            valid;          /* which fields are valid */
+       umode_t                 mode;
        __u32                   nlink;
        __u32                   uid;
        __u32                   gid;
        struct timespec         atime;
        struct timespec         mtime;
        struct timespec         ctime;
-       __u32                   bitmap[2];      /* NFSv4 returned attribute bitmap */
        __u64                   change_attr;    /* NFSv4 change attribute */
        __u64                   pre_change_attr;/* pre-op NFSv4 change attribute */
+       __u64                   pre_size;       /* pre_op_attr.size       */
+       struct timespec         pre_mtime;      /* pre_op_attr.mtime      */
+       struct timespec         pre_ctime;      /* pre_op_attr.ctime      */
        unsigned long           time_start;
        unsigned long           gencount;
  };
  
- #define NFS_ATTR_WCC          0x0001          /* pre-op WCC data    */
- #define NFS_ATTR_FATTR                0x0002          /* post-op attributes */
- #define NFS_ATTR_FATTR_V3     0x0004          /* NFSv3 attributes */
- #define NFS_ATTR_FATTR_V4     0x0008          /* NFSv4 change attribute */
- #define NFS_ATTR_WCC_V4               0x0010          /* pre-op change attribute */
- #define NFS_ATTR_FATTR_V4_REFERRAL    0x0020          /* NFSv4 referral */
+ #define NFS_ATTR_FATTR_TYPE           (1U << 0)
+ #define NFS_ATTR_FATTR_MODE           (1U << 1)
+ #define NFS_ATTR_FATTR_NLINK          (1U << 2)
+ #define NFS_ATTR_FATTR_OWNER          (1U << 3)
+ #define NFS_ATTR_FATTR_GROUP          (1U << 4)
+ #define NFS_ATTR_FATTR_RDEV           (1U << 5)
+ #define NFS_ATTR_FATTR_SIZE           (1U << 6)
+ #define NFS_ATTR_FATTR_PRESIZE                (1U << 7)
+ #define NFS_ATTR_FATTR_BLOCKS_USED    (1U << 8)
+ #define NFS_ATTR_FATTR_SPACE_USED     (1U << 9)
+ #define NFS_ATTR_FATTR_FSID           (1U << 10)
+ #define NFS_ATTR_FATTR_FILEID         (1U << 11)
+ #define NFS_ATTR_FATTR_ATIME          (1U << 12)
+ #define NFS_ATTR_FATTR_MTIME          (1U << 13)
+ #define NFS_ATTR_FATTR_CTIME          (1U << 14)
+ #define NFS_ATTR_FATTR_PREMTIME               (1U << 15)
+ #define NFS_ATTR_FATTR_PRECTIME               (1U << 16)
+ #define NFS_ATTR_FATTR_CHANGE         (1U << 17)
+ #define NFS_ATTR_FATTR_PRECHANGE      (1U << 18)
+ #define NFS_ATTR_FATTR_V4_REFERRAL    (1U << 19)      /* NFSv4 referral */
+ #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
+               | NFS_ATTR_FATTR_MODE \
+               | NFS_ATTR_FATTR_NLINK \
+               | NFS_ATTR_FATTR_OWNER \
+               | NFS_ATTR_FATTR_GROUP \
+               | NFS_ATTR_FATTR_RDEV \
+               | NFS_ATTR_FATTR_SIZE \
+               | NFS_ATTR_FATTR_FSID \
+               | NFS_ATTR_FATTR_FILEID \
+               | NFS_ATTR_FATTR_ATIME \
+               | NFS_ATTR_FATTR_MTIME \
+               | NFS_ATTR_FATTR_CTIME)
+ #define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_BLOCKS_USED)
+ #define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_SPACE_USED)
+ #define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
+               | NFS_ATTR_FATTR_SPACE_USED \
+               | NFS_ATTR_FATTR_CHANGE)
  
  /*
   * Info on the file system
@@@ -785,7 -817,7 +817,7 @@@ struct nfs_access_entry
   */
  struct nfs_rpc_ops {
        u32     version;                /* Protocol version */
 -      struct dentry_operations *dentry_ops;
 +      const struct dentry_operations *dentry_ops;
        const struct inode_operations *dir_inode_ops;
        const struct inode_operations *file_inode_ops;
  
        int     (*lock)(struct file *, int, struct file_lock *);
        int     (*lock_check_bounds)(const struct file_lock *);
        void    (*clear_acl_cache)(struct inode *);
+       void    (*close_context)(struct nfs_open_context *ctx, int);
  };
  
  /*
diff --combined net/sunrpc/svc.c
index bb507e2bb94d6fa4657508f29df558a54208928b,8ba654bdd6081c376a887955848893bb6ad5945c..9f2f2412a2f35245056f5174cd5bbbb956cabd13
@@@ -312,7 -312,7 +312,7 @@@ svc_pool_map_set_cpumask(struct task_st
        switch (m->mode) {
        case SVC_POOL_PERCPU:
        {
 -              set_cpus_allowed_ptr(task, &cpumask_of_cpu(node));
 +              set_cpus_allowed_ptr(task, cpumask_of(node));
                break;
        }
        case SVC_POOL_PERNODE:
@@@ -359,7 -359,7 +359,7 @@@ svc_pool_for_cpu(struct svc_serv *serv
   */
  static struct svc_serv *
  __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
-          sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+            void (*shutdown)(struct svc_serv *serv))
  {
        struct svc_serv *serv;
        unsigned int vers;
  
        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
                return NULL;
-       serv->sv_family    = family;
        serv->sv_name      = prog->pg_name;
        serv->sv_program   = prog;
        serv->sv_nrthreads = 1;
  
  struct svc_serv *
  svc_create(struct svc_program *prog, unsigned int bufsize,
-               sa_family_t family, void (*shutdown)(struct svc_serv *serv))
+          void (*shutdown)(struct svc_serv *serv))
  {
-       return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
+       return __svc_create(prog, bufsize, /*npools*/1, shutdown);
  }
  EXPORT_SYMBOL_GPL(svc_create);
  
  struct svc_serv *
  svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
-                 sa_family_t family, void (*shutdown)(struct svc_serv *serv),
+                 void (*shutdown)(struct svc_serv *serv),
                  svc_thread_fn func, struct module *mod)
  {
        struct svc_serv *serv;
        unsigned int npools = svc_pool_map_get();
  
-       serv = __svc_create(prog, bufsize, npools, family, shutdown);
+       serv = __svc_create(prog, bufsize, npools, shutdown);
  
        if (serv != NULL) {
                serv->sv_function = func;
@@@ -719,8 -718,6 +718,6 @@@ svc_exit_thread(struct svc_rqst *rqstp
  }
  EXPORT_SYMBOL_GPL(svc_exit_thread);
  
- #ifdef CONFIG_SUNRPC_REGISTER_V4
  /*
   * Register an "inet" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -735,12 -732,13 +732,13 @@@ static int __svc_rpcb_register4(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
-       struct sockaddr_in sin = {
+       const struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_ANY),
                .sin_port               = htons(port),
        };
-       char *netid;
+       const char *netid;
+       int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP;
                break;
        default:
-               return -EPROTONOSUPPORT;
+               return -ENOPROTOOPT;
        }
  
-       return rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin, netid);
+       error = rpcb_v4_register(program, version,
+                                       (const struct sockaddr *)&sin, netid);
+       /*
+        * User space didn't support rpcbind v4, so retry this
+        * registration request with the legacy rpcbind v2 protocol.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = rpcb_register(program, version, protocol, port);
+       return error;
  }
  
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
  /*
   * Register an "inet6" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -771,12 -779,13 +779,13 @@@ static int __svc_rpcb_register6(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
-       struct sockaddr_in6 sin6 = {
+       const struct sockaddr_in6 sin6 = {
                .sin6_family            = AF_INET6,
                .sin6_addr              = IN6ADDR_ANY_INIT,
                .sin6_port              = htons(port),
        };
-       char *netid;
+       const char *netid;
+       int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP6;
                break;
        default:
-               return -EPROTONOSUPPORT;
+               return -ENOPROTOOPT;
        }
  
-       return rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin6, netid);
+       error = rpcb_v4_register(program, version,
+                                       (const struct sockaddr *)&sin6, netid);
+       /*
+        * User space didn't support rpcbind version 4, so we won't
+        * use a PF_INET6 listener.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = -EAFNOSUPPORT;
+       return error;
  }
+ #endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
  
  /*
   * Register a kernel RPC service via rpcbind version 4.
   * Returns zero on success; a negative errno value is returned
   * if any error occurs.
   */
- static int __svc_register(const u32 program, const u32 version,
-                         const sa_family_t family,
+ static int __svc_register(const char *progname,
+                         const u32 program, const u32 version,
+                         const int family,
                          const unsigned short protocol,
                          const unsigned short port)
  {
-       int error;
+       int error = -EAFNOSUPPORT;
  
        switch (family) {
-       case AF_INET:
-               return __svc_rpcb_register4(program, version,
+       case PF_INET:
+               error = __svc_rpcb_register4(program, version,
                                                protocol, port);
-       case AF_INET6:
+               break;
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+       case PF_INET6:
                error = __svc_rpcb_register6(program, version,
                                                protocol, port);
-               if (error < 0)
-                       return error;
-               /*
-                * Work around bug in some versions of Linux rpcbind
-                * which don't allow registration of both inet and
-                * inet6 netids.
-                *
-                * Error return ignored for now.
-                */
-               __svc_rpcb_register4(program, version,
-                                               protocol, port);
-               return 0;
+ #endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
        }
  
-       return -EAFNOSUPPORT;
- }
- #else /* CONFIG_SUNRPC_REGISTER_V4 */
- /*
-  * Register a kernel RPC service via rpcbind version 2.
-  *
-  * Returns zero on success; a negative errno value is returned
-  * if any error occurs.
-  */
- static int __svc_register(const u32 program, const u32 version,
-                         sa_family_t family,
-                         const unsigned short protocol,
-                         const unsigned short port)
- {
-       if (family != AF_INET)
-               return -EAFNOSUPPORT;
-       return rpcb_register(program, version, protocol, port);
+       if (error < 0)
+               printk(KERN_WARNING "svc: failed to register %sv%u RPC "
+                       "service (errno %d).\n", progname, version, -error);
+       return error;
  }
  
- #endif /* CONFIG_SUNRPC_REGISTER_V4 */
  /**
   * svc_register - register an RPC service with the local portmapper
   * @serv: svc_serv struct for the service to register
+  * @family: protocol family of service's listener socket
   * @proto: transport protocol number to advertise
   * @port: port to advertise
   *
-  * Service is registered for any address in serv's address family
+  * Service is registered for any address in the passed-in protocol family
   */
- int svc_register(const struct svc_serv *serv, const unsigned short proto,
-                const unsigned short port)
+ int svc_register(const struct svc_serv *serv, const int family,
+                const unsigned short proto, const unsigned short port)
  {
        struct svc_program      *progp;
        unsigned int            i;
                                        i,
                                        proto == IPPROTO_UDP?  "udp" : "tcp",
                                        port,
-                                       serv->sv_family,
+                                       family,
                                        progp->pg_vers[i]->vs_hidden?
                                                " (but not telling portmap)" : "");
  
                        if (progp->pg_vers[i]->vs_hidden)
                                continue;
  
-                       error = __svc_register(progp->pg_prog, i,
-                                               serv->sv_family, proto, port);
+                       error = __svc_register(progp->pg_name, progp->pg_prog,
+                                               i, family, proto, port);
                        if (error < 0)
                                break;
                }
        return error;
  }
  
- #ifdef CONFIG_SUNRPC_REGISTER_V4
+ /*
+  * If user space is running rpcbind, it should take the v4 UNSET
+  * and clear everything for this [program, version].  If user space
+  * is running portmap, it will reject the v4 UNSET, but won't have
+  * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
+  * in this case to clear all existing entries for [program, version].
+  */
  static void __svc_unregister(const u32 program, const u32 version,
                             const char *progname)
  {
-       struct sockaddr_in6 sin6 = {
-               .sin6_family            = AF_INET6,
-               .sin6_addr              = IN6ADDR_ANY_INIT,
-               .sin6_port              = 0,
-       };
        int error;
  
-       error = rpcb_v4_register(program, version,
-                               (struct sockaddr *)&sin6, "");
-       dprintk("svc: %s(%sv%u), error %d\n",
-                       __func__, progname, version, error);
- }
- #else /* CONFIG_SUNRPC_REGISTER_V4 */
+       error = rpcb_v4_register(program, version, NULL, "");
  
- static void __svc_unregister(const u32 program, const u32 version,
-                            const char *progname)
- {
-       int error;
+       /*
+        * User space didn't support rpcbind v4, so retry this
+        * request with the legacy rpcbind v2 protocol.
+        */
+       if (error == -EPROTONOSUPPORT)
+               error = rpcb_register(program, version, 0, 0);
  
-       error = rpcb_register(program, version, 0, 0);
        dprintk("svc: %s(%sv%u), error %d\n",
                        __func__, progname, version, error);
  }
  
- #endif        /* CONFIG_SUNRPC_REGISTER_V4 */
  /*
   * All netids, bind addresses and ports registered for [program, version]
   * are removed from the local rpcbind database (if the service is not
diff --combined net/sunrpc/xprtsock.c
index 568330eebbfeb68e469ead8f23f8f059093850b4,fbc8725c20cbcfaed6d458e79e470393bb980ffc..d40ff50887aa468544c500de540d7b6a29108b67
@@@ -49,6 -49,9 +49,9 @@@ unsigned int xprt_tcp_slot_table_entrie
  unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
  unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
  
+ #define XS_TCP_LINGER_TO      (15U * HZ)
+ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
  /*
   * We can register our own files under /proc/sys/sunrpc by
   * calling register_sysctl_table() again.  The files in that
@@@ -116,6 -119,14 +119,14 @@@ static ctl_table xs_tunables_table[] = 
                .extra1         = &xprt_min_resvport_limit,
                .extra2         = &xprt_max_resvport_limit
        },
+       {
+               .procname       = "tcp_fin_timeout",
+               .data           = &xs_tcp_fin_timeout,
+               .maxlen         = sizeof(xs_tcp_fin_timeout),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_jiffies,
+               .strategy       = sysctl_jiffies
+       },
        {
                .ctl_name = 0,
        },
@@@ -521,11 -532,12 +532,12 @@@ static void xs_nospace_callback(struct 
   * @task: task to put to sleep
   *
   */
- static void xs_nospace(struct rpc_task *task)
+ static int xs_nospace(struct rpc_task *task)
  {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       int ret = 0;
  
        dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
                        task->tk_pid, req->rq_slen - req->rq_bytes_sent,
        /* Don't race with disconnect */
        if (xprt_connected(xprt)) {
                if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+                       ret = -EAGAIN;
                        /*
                         * Notify TCP that we're limited by the application
                         * window size
                }
        } else {
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               task->tk_status = -ENOTCONN;
+               ret = -ENOTCONN;
        }
  
        spin_unlock_bh(&xprt->transport_lock);
+       return ret;
  }
  
  /**
@@@ -594,6 -608,8 +608,8 @@@ static int xs_udp_send_request(struct r
                /* Still some bytes left; set up for a retry later. */
                status = -EAGAIN;
        }
+       if (!transport->sock)
+               goto out;
  
        switch (status) {
        case -ENOTSOCK:
                /* Should we call xs_close() here? */
                break;
        case -EAGAIN:
-               xs_nospace(task);
+               status = xs_nospace(task);
                break;
+       default:
+               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+                       -status);
        case -ENETUNREACH:
        case -EPIPE:
        case -ECONNREFUSED:
                /* When the server has died, an ICMP port unreachable message
                 * prompts ECONNREFUSED. */
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               break;
-       default:
-               clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
-                       -status);
        }
+ out:
        return status;
  }
  
@@@ -697,6 -711,8 +711,8 @@@ static int xs_tcp_send_request(struct r
                status = -EAGAIN;
                break;
        }
+       if (!transport->sock)
+               goto out;
  
        switch (status) {
        case -ENOTSOCK:
                /* Should we call xs_close() here? */
                break;
        case -EAGAIN:
-               xs_nospace(task);
+               status = xs_nospace(task);
                break;
+       default:
+               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
+                       -status);
        case -ECONNRESET:
+       case -EPIPE:
                xs_tcp_shutdown(xprt);
        case -ECONNREFUSED:
        case -ENOTCONN:
-       case -EPIPE:
-               status = -ENOTCONN;
                clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               break;
-       default:
-               dprintk("RPC:       sendmsg returned unrecognized error %d\n",
-                       -status);
-               clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
-               xs_tcp_shutdown(xprt);
        }
+ out:
        return status;
  }
  
@@@ -767,23 -779,13 +779,13 @@@ static void xs_restore_old_callbacks(st
        sk->sk_error_report = transport->old_error_report;
  }
  
- /**
-  * xs_close - close a socket
-  * @xprt: transport
-  *
-  * This is used when all requests are complete; ie, no DRC state remains
-  * on the server we want to save.
-  */
- static void xs_close(struct rpc_xprt *xprt)
+ static void xs_reset_transport(struct sock_xprt *transport)
  {
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct socket *sock = transport->sock;
        struct sock *sk = transport->inet;
  
-       if (!sk)
-               goto clear_close_wait;
-       dprintk("RPC:       xs_close xprt %p\n", xprt);
+       if (sk == NULL)
+               return;
  
        write_lock_bh(&sk->sk_callback_lock);
        transport->inet = NULL;
        sk->sk_no_check = 0;
  
        sock_release(sock);
- clear_close_wait:
+ }
+ /**
+  * xs_close - close a socket
+  * @xprt: transport
+  *
+  * This is used when all requests are complete; ie, no DRC state remains
+  * on the server we want to save.
+  */
+ static void xs_close(struct rpc_xprt *xprt)
+ {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       dprintk("RPC:       xs_close xprt %p\n", xprt);
+       xs_reset_transport(transport);
        smp_mb__before_clear_bit();
+       clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
        clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
        clear_bit(XPRT_CLOSING, &xprt->state);
        smp_mb__after_clear_bit();
        read_unlock(&sk->sk_callback_lock);
  }
  
+ /*
+  * Do the equivalent of linger/linger2 handling for dealing with
+  * broken servers that don't close the socket in a timely
+  * fashion
+  */
+ static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
+               unsigned long timeout)
+ {
+       struct sock_xprt *transport;
+       if (xprt_test_and_set_connecting(xprt))
+               return;
+       set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+       transport = container_of(xprt, struct sock_xprt, xprt);
+       queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
+                          timeout);
+ }
+ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
+ {
+       struct sock_xprt *transport;
+       transport = container_of(xprt, struct sock_xprt, xprt);
+       if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
+           !cancel_delayed_work(&transport->connect_worker))
+               return;
+       clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+       xprt_clear_connecting(xprt);
+ }
+ static void xs_sock_mark_closed(struct rpc_xprt *xprt)
+ {
+       smp_mb__before_clear_bit();
+       clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+       clear_bit(XPRT_CLOSING, &xprt->state);
+       smp_mb__after_clear_bit();
+       /* Mark transport as closed and wake up all pending tasks */
+       xprt_disconnect_done(xprt);
+ }
  /**
   * xs_tcp_state_change - callback to handle TCP socket state changes
   * @sk: socket whose state has changed
@@@ -1158,7 -1218,7 +1218,7 @@@ static void xs_tcp_state_change(struct 
                        transport->tcp_flags =
                                TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
  
-                       xprt_wake_pending_tasks(xprt, 0);
+                       xprt_wake_pending_tasks(xprt, -EAGAIN);
                }
                spin_unlock_bh(&xprt->transport_lock);
                break;
                clear_bit(XPRT_CONNECTED, &xprt->state);
                clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
                smp_mb__after_clear_bit();
+               xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
                break;
        case TCP_CLOSE_WAIT:
                /* The server initiated a shutdown of the socket */
-               set_bit(XPRT_CLOSING, &xprt->state);
                xprt_force_disconnect(xprt);
        case TCP_SYN_SENT:
                xprt->connect_cookie++;
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
                break;
        case TCP_LAST_ACK:
+               set_bit(XPRT_CLOSING, &xprt->state);
+               xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
                smp_mb__before_clear_bit();
                clear_bit(XPRT_CONNECTED, &xprt->state);
                smp_mb__after_clear_bit();
                break;
        case TCP_CLOSE:
-               smp_mb__before_clear_bit();
-               clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
-               clear_bit(XPRT_CLOSING, &xprt->state);
-               smp_mb__after_clear_bit();
-               /* Mark transport as closed and wake up all pending tasks */
-               xprt_disconnect_done(xprt);
+               xs_tcp_cancel_linger_timeout(xprt);
+               xs_sock_mark_closed(xprt);
        }
   out:
        read_unlock(&sk->sk_callback_lock);
  }
  
  /**
-  * xs_tcp_error_report - callback mainly for catching RST events
+  * xs_error_report - callback mainly for catching socket errors
   * @sk: socket
   */
- static void xs_tcp_error_report(struct sock *sk)
+ static void xs_error_report(struct sock *sk)
  {
        struct rpc_xprt *xprt;
  
        read_lock(&sk->sk_callback_lock);
-       if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
-               goto out;
        if (!(xprt = xprt_from_sock(sk)))
                goto out;
        dprintk("RPC:       %s client %p...\n"
                        "RPC:       error %d\n",
                        __func__, xprt, sk->sk_err);
-       xprt_force_disconnect(xprt);
+       xprt_wake_pending_tasks(xprt, -EAGAIN);
  out:
        read_unlock(&sk->sk_callback_lock);
  }
  
 +static void xs_write_space(struct sock *sk)
 +{
 +      struct socket *sock;
 +      struct rpc_xprt *xprt;
 +
 +      if (unlikely(!(sock = sk->sk_socket)))
 +              return;
 +      clear_bit(SOCK_NOSPACE, &sock->flags);
 +
 +      if (unlikely(!(xprt = xprt_from_sock(sk))))
 +              return;
 +      if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
 +              return;
 +
 +      xprt_write_space(xprt);
 +}
 +
  /**
   * xs_udp_write_space - callback invoked when socket buffer space
   *                             becomes available
@@@ -1257,9 -1295,23 +1312,9 @@@ static void xs_udp_write_space(struct s
        read_lock(&sk->sk_callback_lock);
  
        /* from net/core/sock.c:sock_def_write_space */
 -      if (sock_writeable(sk)) {
 -              struct socket *sock;
 -              struct rpc_xprt *xprt;
 -
 -              if (unlikely(!(sock = sk->sk_socket)))
 -                      goto out;
 -              clear_bit(SOCK_NOSPACE, &sock->flags);
 -
 -              if (unlikely(!(xprt = xprt_from_sock(sk))))
 -                      goto out;
 -              if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
 -                      goto out;
 -
 -              xprt_write_space(xprt);
 -      }
 +      if (sock_writeable(sk))
 +              xs_write_space(sk);
  
 - out:
        read_unlock(&sk->sk_callback_lock);
  }
  
@@@ -1278,9 -1330,23 +1333,9 @@@ static void xs_tcp_write_space(struct s
        read_lock(&sk->sk_callback_lock);
  
        /* from net/core/stream.c:sk_stream_write_space */
 -      if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
 -              struct socket *sock;
 -              struct rpc_xprt *xprt;
 -
 -              if (unlikely(!(sock = sk->sk_socket)))
 -                      goto out;
 -              clear_bit(SOCK_NOSPACE, &sock->flags);
 +      if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
 +              xs_write_space(sk);
  
 -              if (unlikely(!(xprt = xprt_from_sock(sk))))
 -                      goto out;
 -              if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
 -                      goto out;
 -
 -              xprt_write_space(xprt);
 -      }
 -
 - out:
        read_unlock(&sk->sk_callback_lock);
  }
  
@@@ -1494,6 -1560,7 +1549,7 @@@ static void xs_udp_finish_connecting(st
                sk->sk_user_data = xprt;
                sk->sk_data_ready = xs_udp_data_ready;
                sk->sk_write_space = xs_udp_write_space;
+               sk->sk_error_report = xs_error_report;
                sk->sk_no_check = UDP_CSUM_NORCV;
                sk->sk_allocation = GFP_ATOMIC;
  
@@@ -1526,9 -1593,10 +1582,10 @@@ static void xs_udp_connect_worker4(stru
                goto out;
  
        /* Start by resetting any existing state */
-       xs_close(xprt);
+       xs_reset_transport(transport);
  
-       if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+       err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       if (err < 0) {
                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
                goto out;
        }
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
  out:
-       xprt_wake_pending_tasks(xprt, status);
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
  }
  
  /**
@@@ -1567,9 -1635,10 +1624,10 @@@ static void xs_udp_connect_worker6(stru
                goto out;
  
        /* Start by resetting any existing state */
-       xs_close(xprt);
+       xs_reset_transport(transport);
  
-       if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
+       err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
+       if (err < 0) {
                dprintk("RPC:       can't create UDP transport socket (%d).\n", -err);
                goto out;
        }
        xs_udp_finish_connecting(xprt, sock);
        status = 0;
  out:
-       xprt_wake_pending_tasks(xprt, status);
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
  }
  
  /*
   * We need to preserve the port number so the reply cache on the server can
   * find our cached RPC replies when we get around to reconnecting.
   */
- static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
+ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
  {
        int result;
-       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct sockaddr any;
  
        dprintk("RPC:       disconnecting xprt %p to reuse port\n", xprt);
        memset(&any, 0, sizeof(any));
        any.sa_family = AF_UNSPEC;
        result = kernel_connect(transport->sock, &any, sizeof(any), 0);
-       if (result)
+       if (!result)
+               xs_sock_mark_closed(xprt);
+       else
                dprintk("RPC:       AF_UNSPEC connect return code %d\n",
                                result);
  }
  
+ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
+ {
+       unsigned int state = transport->inet->sk_state;
+       if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
+               return;
+       if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
+               return;
+       xs_abort_connection(xprt, transport);
+ }
  static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
  {
        struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
                sk->sk_data_ready = xs_tcp_data_ready;
                sk->sk_state_change = xs_tcp_state_change;
                sk->sk_write_space = xs_tcp_write_space;
-               sk->sk_error_report = xs_tcp_error_report;
+               sk->sk_error_report = xs_error_report;
                sk->sk_allocation = GFP_ATOMIC;
  
                /* socket options */
  }
  
  /**
-  * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
-  * @work: RPC transport to connect
+  * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
+  * @xprt: RPC transport to connect
+  * @transport: socket transport to connect
+  * @create_sock: function to create a socket of the correct type
   *
   * Invoked by a work queue tasklet.
   */
- static void xs_tcp_connect_worker4(struct work_struct *work)
+ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
+               struct sock_xprt *transport,
+               struct socket *(*create_sock)(struct rpc_xprt *,
+                       struct sock_xprt *))
  {
-       struct sock_xprt *transport =
-               container_of(work, struct sock_xprt, connect_worker.work);
-       struct rpc_xprt *xprt = &transport->xprt;
        struct socket *sock = transport->sock;
-       int err, status = -EIO;
+       int status = -EIO;
  
        if (xprt->shutdown)
                goto out;
  
        if (!sock) {
-               /* start from scratch */
-               if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-                       dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
+               clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+               sock = create_sock(xprt, transport);
+               if (IS_ERR(sock)) {
+                       status = PTR_ERR(sock);
                        goto out;
                }
-               xs_reclassify_socket4(sock);
+       } else {
+               int abort_and_exit;
  
-               if (xs_bind4(transport, sock) < 0) {
-                       sock_release(sock);
-                       goto out;
-               }
-       } else
+               abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
+                               &xprt->state);
                /* "close" the socket, preserving the local port */
-               xs_tcp_reuse_connection(xprt);
+               xs_tcp_reuse_connection(xprt, transport);
+               if (abort_and_exit)
+                       goto out_eagain;
+       }
  
        dprintk("RPC:       worker connecting xprt %p to address: %s\n",
                        xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
        dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
                        xprt, -status, xprt_connected(xprt),
                        sock->sk->sk_state);
-       if (status < 0) {
-               switch (status) {
-                       case -EINPROGRESS:
-                       case -EALREADY:
-                               goto out_clear;
-                       case -ECONNREFUSED:
-                       case -ECONNRESET:
-                               /* retry with existing socket, after a delay */
-                               break;
-                       default:
-                               /* get rid of existing socket, and retry */
-                               xs_tcp_shutdown(xprt);
-               }
+       switch (status) {
+       case -ECONNREFUSED:
+       case -ECONNRESET:
+       case -ENETUNREACH:
+               /* retry with existing socket, after a delay */
+       case 0:
+       case -EINPROGRESS:
+       case -EALREADY:
+               xprt_clear_connecting(xprt);
+               return;
        }
+       /* get rid of existing socket, and retry */
+       xs_tcp_shutdown(xprt);
+       printk("%s: connect returned unhandled error %d\n",
+                       __func__, status);
+ out_eagain:
+       status = -EAGAIN;
  out:
-       xprt_wake_pending_tasks(xprt, status);
- out_clear:
        xprt_clear_connecting(xprt);
+       xprt_wake_pending_tasks(xprt, status);
+ }
+ static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
+               struct sock_xprt *transport)
+ {
+       struct socket *sock;
+       int err;
+       /* start from scratch */
+       err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+       if (err < 0) {
+               dprintk("RPC:       can't create TCP transport socket (%d).\n",
+                               -err);
+               goto out_err;
+       }
+       xs_reclassify_socket4(sock);
+       if (xs_bind4(transport, sock) < 0) {
+               sock_release(sock);
+               goto out_err;
+       }
+       return sock;
+ out_err:
+       return ERR_PTR(-EIO);
  }
  
  /**
-  * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+  * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
   * @work: RPC transport to connect
   *
   * Invoked by a work queue tasklet.
   */
- static void xs_tcp_connect_worker6(struct work_struct *work)
+ static void xs_tcp_connect_worker4(struct work_struct *work)
  {
        struct sock_xprt *transport =
                container_of(work, struct sock_xprt, connect_worker.work);
        struct rpc_xprt *xprt = &transport->xprt;
-       struct socket *sock = transport->sock;
-       int err, status = -EIO;
  
-       if (xprt->shutdown)
-               goto out;
+       xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
+ }
  
-       if (!sock) {
-               /* start from scratch */
-               if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
-                       dprintk("RPC:       can't create TCP transport socket (%d).\n", -err);
-                       goto out;
-               }
-               xs_reclassify_socket6(sock);
+ static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
+               struct sock_xprt *transport)
+ {
+       struct socket *sock;
+       int err;
+       /* start from scratch */
+       err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
+       if (err < 0) {
+               dprintk("RPC:       can't create TCP transport socket (%d).\n",
+                               -err);
+               goto out_err;
+       }
+       xs_reclassify_socket6(sock);
  
-               if (xs_bind6(transport, sock) < 0) {
-                       sock_release(sock);
-                       goto out;
-               }
-       } else
-               /* "close" the socket, preserving the local port */
-               xs_tcp_reuse_connection(xprt);
+       if (xs_bind6(transport, sock) < 0) {
+               sock_release(sock);
+               goto out_err;
+       }
+       return sock;
+ out_err:
+       return ERR_PTR(-EIO);
+ }
  
-       dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+ /**
+  * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
+  * @work: RPC transport to connect
+  *
+  * Invoked by a work queue tasklet.
+  */
+ static void xs_tcp_connect_worker6(struct work_struct *work)
+ {
+       struct sock_xprt *transport =
+               container_of(work, struct sock_xprt, connect_worker.work);
+       struct rpc_xprt *xprt = &transport->xprt;
  
-       status = xs_tcp_finish_connecting(xprt, sock);
-       dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
-                       xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
-       if (status < 0) {
-               switch (status) {
-                       case -EINPROGRESS:
-                       case -EALREADY:
-                               goto out_clear;
-                       case -ECONNREFUSED:
-                       case -ECONNRESET:
-                               /* retry with existing socket, after a delay */
-                               break;
-                       default:
-                               /* get rid of existing socket, and retry */
-                               xs_tcp_shutdown(xprt);
-               }
-       }
- out:
-       xprt_wake_pending_tasks(xprt, status);
- out_clear:
-       xprt_clear_connecting(xprt);
+       xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
  }
  
  /**
@@@ -1817,9 -1924,6 +1913,6 @@@ static void xs_tcp_connect(struct rpc_t
  {
        struct rpc_xprt *xprt = task->tk_xprt;
  
-       /* Initiate graceful shutdown of the socket if not already done */
-       if (test_bit(XPRT_CONNECTED, &xprt->state))
-               xs_tcp_shutdown(xprt);
        /* Exit if we need to wait for socket shutdown to complete */
        if (test_bit(XPRT_CLOSING, &xprt->state))
                return;