Merge branch 'task_killable' of git://git.kernel.org/pub/scm/linux/kernel/git/willy...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
* 'task_killable' of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc: (22 commits)
  Remove commented-out code copied from NFS
  NFS: Switch from intr mount option to TASK_KILLABLE
  Add wait_for_completion_killable
  Add wait_event_killable
  Add schedule_timeout_killable
  Use mutex_lock_killable in vfs_readdir
  Add mutex_lock_killable
  Use lock_page_killable
  Add lock_page_killable
  Add fatal_signal_pending
  Add TASK_WAKEKILL
  exit: Use task_is_*
  signal: Use task_is_*
  sched: Use task_contributes_to_load, TASK_ALL and TASK_NORMAL
  ptrace: Use task_is_*
  power: Use task_is_*
  wait: Use TASK_NORMAL
  proc/base.c: Use task_is_*
  proc/array.c: Use TASK_REPORT
  perfmon: Use task_is_*
  ...

Fixed up conflicts in NFS/sunrpc manually..

23 files changed:
1  2 
fs/nfs/client.c
fs/nfs/direct.c
fs/nfs/inode.c
fs/nfs/nfs3proc.c
fs/nfs/nfs4proc.c
fs/nfs/pagelist.c
fs/nfs/super.c
fs/nfs/write.c
fs/proc/array.c
fs/proc/base.c
include/linux/nfs_fs.h
include/linux/sched.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/sched.h
kernel/ptrace.c
kernel/sched.c
kernel/signal.c
kernel/timer.c
mm/filemap.c
net/sunrpc/auth.c
net/sunrpc/clnt.c
net/sunrpc/rpcb_clnt.c
net/sunrpc/sched.c

diff --combined fs/nfs/client.c
index 685c43f810c10a476ada529d4e0a92075674ca9a,310fa2f4cbb837132a57ffea845ae66d31669de3..c5c0175898f68311f7a19c8c87e641de27acb30a
@@@ -34,8 -34,6 +34,8 @@@
  #include <linux/nfs_idmap.h>
  #include <linux/vfs.h>
  #include <linux/inet.h>
 +#include <linux/in6.h>
 +#include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
  
  #include <asm/system.h>
@@@ -95,30 -93,22 +95,30 @@@ struct rpc_program         nfsacl_program = 
  };
  #endif  /* CONFIG_NFS_V3_ACL */
  
 +struct nfs_client_initdata {
 +      const char *hostname;
 +      const struct sockaddr *addr;
 +      size_t addrlen;
 +      const struct nfs_rpc_ops *rpc_ops;
 +      int proto;
 +};
 +
  /*
   * Allocate a shared client record
   *
   * Since these are allocated/deallocated very rarely, we don't
   * bother putting them in a slab cache...
   */
 -static struct nfs_client *nfs_alloc_client(const char *hostname,
 -                                         const struct sockaddr_in *addr,
 -                                         int nfsversion)
 +static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
  {
        struct nfs_client *clp;
  
        if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
                goto error_0;
  
 -      if (nfsversion == 4) {
 +      clp->rpc_ops = cl_init->rpc_ops;
 +
 +      if (cl_init->rpc_ops->version == 4) {
                if (nfs_callback_up() < 0)
                        goto error_2;
                __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
        atomic_set(&clp->cl_count, 1);
        clp->cl_cons_state = NFS_CS_INITING;
  
 -      clp->cl_nfsversion = nfsversion;
 -      memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
 +      memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
 +      clp->cl_addrlen = cl_init->addrlen;
  
 -      if (hostname) {
 -              clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
 +      if (cl_init->hostname) {
 +              clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
                if (!clp->cl_hostname)
                        goto error_3;
        }
        INIT_LIST_HEAD(&clp->cl_superblocks);
        clp->cl_rpcclient = ERR_PTR(-EINVAL);
  
 +      clp->cl_proto = cl_init->proto;
 +
  #ifdef CONFIG_NFS_V4
        init_rwsem(&clp->cl_sem);
        INIT_LIST_HEAD(&clp->cl_delegations);
@@@ -178,7 -166,7 +178,7 @@@ static void nfs4_shutdown_client(struc
   */
  static void nfs_free_client(struct nfs_client *clp)
  {
 -      dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
 +      dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
  
        nfs4_shutdown_client(clp);
  
@@@ -215,148 -203,76 +215,148 @@@ void nfs_put_client(struct nfs_client *
        }
  }
  
 +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
 +                               const struct sockaddr_in *sa2)
 +{
 +      return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
 +}
 +
 +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1,
 +                               const struct sockaddr_in6 *sa2)
 +{
 +      return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr);
 +}
 +
 +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
 +                               const struct sockaddr *sa2)
 +{
 +      switch (sa1->sa_family) {
 +      case AF_INET:
 +              return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
 +                              (const struct sockaddr_in *)sa2);
 +      case AF_INET6:
 +              return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
 +                              (const struct sockaddr_in6 *)sa2);
 +      }
 +      BUG();
 +}
 +
  /*
 - * Find a client by address
 - * - caller must hold nfs_client_lock
 + * Find a client by IP address and protocol version
 + * - returns NULL if no such client
   */
 -static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port)
 +struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
  {
        struct nfs_client *clp;
  
 +      spin_lock(&nfs_client_lock);
        list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
 +              struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 +
                /* Don't match clients that failed to initialise properly */
 -              if (clp->cl_cons_state < 0)
 +              if (clp->cl_cons_state != NFS_CS_READY)
                        continue;
  
                /* Different NFS versions cannot share the same nfs_client */
 -              if (clp->cl_nfsversion != nfsversion)
 +              if (clp->rpc_ops->version != nfsversion)
                        continue;
  
 -              if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
 -                         sizeof(clp->cl_addr.sin_addr)) != 0)
 +              if (addr->sa_family != clap->sa_family)
 +                      continue;
 +              /* Match only the IP address, not the port number */
 +              if (!nfs_sockaddr_match_ipaddr(addr, clap))
                        continue;
  
 -              if (!match_port || clp->cl_addr.sin_port == addr->sin_port)
 -                      goto found;
 +              atomic_inc(&clp->cl_count);
 +              spin_unlock(&nfs_client_lock);
 +              return clp;
        }
 -
 +      spin_unlock(&nfs_client_lock);
        return NULL;
 -
 -found:
 -      atomic_inc(&clp->cl_count);
 -      return clp;
  }
  
  /*
   * Find a client by IP address and protocol version
   * - returns NULL if no such client
   */
 -struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
 +struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
  {
 -      struct nfs_client *clp;
 +      struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
 +      u32 nfsvers = clp->rpc_ops->version;
  
        spin_lock(&nfs_client_lock);
 -      clp = __nfs_find_client(addr, nfsversion, 0);
 +      list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
 +              struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
 +
 +              /* Don't match clients that failed to initialise properly */
 +              if (clp->cl_cons_state != NFS_CS_READY)
 +                      continue;
 +
 +              /* Different NFS versions cannot share the same nfs_client */
 +              if (clp->rpc_ops->version != nfsvers)
 +                      continue;
 +
 +              if (sap->sa_family != clap->sa_family)
 +                      continue;
 +              /* Match only the IP address, not the port number */
 +              if (!nfs_sockaddr_match_ipaddr(sap, clap))
 +                      continue;
 +
 +              atomic_inc(&clp->cl_count);
 +              spin_unlock(&nfs_client_lock);
 +              return clp;
 +      }
        spin_unlock(&nfs_client_lock);
 -      if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) {
 -              nfs_put_client(clp);
 -              clp = NULL;
 +      return NULL;
 +}
 +
 +/*
 + * Find an nfs_client on the list that matches the initialisation data
 + * that is supplied.
 + */
 +static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
 +{
 +      struct nfs_client *clp;
 +
 +      list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
 +              /* Don't match clients that failed to initialise properly */
 +              if (clp->cl_cons_state < 0)
 +                      continue;
 +
 +              /* Different NFS versions cannot share the same nfs_client */
 +              if (clp->rpc_ops != data->rpc_ops)
 +                      continue;
 +
 +              if (clp->cl_proto != data->proto)
 +                      continue;
 +
 +              /* Match the full socket address */
 +              if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
 +                      continue;
 +
 +              atomic_inc(&clp->cl_count);
 +              return clp;
        }
 -      return clp;
 +      return NULL;
  }
  
  /*
   * Look up a client by IP address and protocol version
   * - creates a new record if one doesn't yet exist
   */
 -static struct nfs_client *nfs_get_client(const char *hostname,
 -                                       const struct sockaddr_in *addr,
 -                                       int nfsversion)
 +static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
  {
        struct nfs_client *clp, *new = NULL;
        int error;
  
 -      dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
 -              hostname ?: "", NIPQUAD(addr->sin_addr),
 -              addr->sin_port, nfsversion);
 +      dprintk("--> nfs_get_client(%s,v%u)\n",
 +              cl_init->hostname ?: "", cl_init->rpc_ops->version);
  
        /* see if the client already exists */
        do {
                spin_lock(&nfs_client_lock);
  
 -              clp = __nfs_find_client(addr, nfsversion, 1);
 +              clp = nfs_match_client(cl_init);
                if (clp)
                        goto found_client;
                if (new)
  
                spin_unlock(&nfs_client_lock);
  
 -              new = nfs_alloc_client(hostname, addr, nfsversion);
 +              new = nfs_alloc_client(cl_init);
        } while (new);
  
        return ERR_PTR(-ENOMEM);
@@@ -386,7 -302,7 +386,7 @@@ found_client
        if (new)
                nfs_free_client(new);
  
-       error = wait_event_interruptible(nfs_client_active_wq,
+       error = wait_event_killable(nfs_client_active_wq,
                                clp->cl_cons_state != NFS_CS_INITING);
        if (error < 0) {
                nfs_put_client(clp);
@@@ -428,16 -344,12 +428,16 @@@ static void nfs_init_timeout_values(str
        switch (proto) {
        case XPRT_TRANSPORT_TCP:
        case XPRT_TRANSPORT_RDMA:
 -              if (!to->to_initval)
 +              if (to->to_initval == 0)
                        to->to_initval = 60 * HZ;
                if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
                        to->to_initval = NFS_MAX_TCP_TIMEOUT;
                to->to_increment = to->to_initval;
                to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
 +              if (to->to_maxval > NFS_MAX_TCP_TIMEOUT)
 +                      to->to_maxval = NFS_MAX_TCP_TIMEOUT;
 +              if (to->to_maxval < to->to_initval)
 +                      to->to_maxval = to->to_initval;
                to->to_exponential = 0;
                break;
        case XPRT_TRANSPORT_UDP:
  /*
   * Create an RPC client handle
   */
 -static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 -                                              unsigned int timeo,
 -                                              unsigned int retrans,
 -                                              rpc_authflavor_t flavor,
 -                                              int flags)
 +static int nfs_create_rpc_client(struct nfs_client *clp,
 +                               const struct rpc_timeout *timeparms,
 +                               rpc_authflavor_t flavor,
 +                               int flags)
  {
 -      struct rpc_timeout      timeparms;
        struct rpc_clnt         *clnt = NULL;
        struct rpc_create_args args = {
 -              .protocol       = proto,
 +              .protocol       = clp->cl_proto,
                .address        = (struct sockaddr *)&clp->cl_addr,
 -              .addrsize       = sizeof(clp->cl_addr),
 -              .timeout        = &timeparms,
 +              .addrsize       = clp->cl_addrlen,
 +              .timeout        = timeparms,
                .servername     = clp->cl_hostname,
                .program        = &nfs_program,
                .version        = clp->rpc_ops->version,
        if (!IS_ERR(clp->cl_rpcclient))
                return 0;
  
 -      nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
 -      clp->retrans_timeo = timeparms.to_initval;
 -      clp->retrans_count = timeparms.to_retries;
 -
        clnt = rpc_create(&args);
        if (IS_ERR(clnt)) {
                dprintk("%s: cannot create RPC client. Error = %ld\n",
   */
  static void nfs_destroy_server(struct nfs_server *server)
  {
 -      if (!IS_ERR(server->client_acl))
 -              rpc_shutdown_client(server->client_acl);
 -
        if (!(server->flags & NFS_MOUNT_NONLM))
 -              lockd_down();   /* release rpc.lockd */
 +              nlmclnt_done(server->nlm_host);
  }
  
  /*
   */
  static int nfs_start_lockd(struct nfs_server *server)
  {
 -      int error = 0;
 +      struct nlm_host *host;
 +      struct nfs_client *clp = server->nfs_client;
 +      struct nlmclnt_initdata nlm_init = {
 +              .hostname       = clp->cl_hostname,
 +              .address        = (struct sockaddr *)&clp->cl_addr,
 +              .addrlen        = clp->cl_addrlen,
 +              .protocol       = server->flags & NFS_MOUNT_TCP ?
 +                                              IPPROTO_TCP : IPPROTO_UDP,
 +              .nfs_version    = clp->rpc_ops->version,
 +      };
  
 -      if (server->nfs_client->cl_nfsversion > 3)
 -              goto out;
 +      if (nlm_init.nfs_version > 3)
 +              return 0;
        if (server->flags & NFS_MOUNT_NONLM)
 -              goto out;
 -      error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
 -                      IPPROTO_TCP : IPPROTO_UDP);
 -      if (error < 0)
 -              server->flags |= NFS_MOUNT_NONLM;
 -      else
 -              server->destroy = nfs_destroy_server;
 -out:
 -      return error;
 +              return 0;
 +
 +      host = nlmclnt_init(&nlm_init);
 +      if (IS_ERR(host))
 +              return PTR_ERR(host);
 +
 +      server->nlm_host = host;
 +      server->destroy = nfs_destroy_server;
 +      return 0;
  }
  
  /*
  #ifdef CONFIG_NFS_V3_ACL
  static void nfs_init_server_aclclient(struct nfs_server *server)
  {
 -      if (server->nfs_client->cl_nfsversion != 3)
 +      if (server->nfs_client->rpc_ops->version != 3)
                goto out_noacl;
        if (server->flags & NFS_MOUNT_NOACL)
                goto out_noacl;
@@@ -559,9 -471,7 +559,9 @@@ static inline void nfs_init_server_aclc
  /*
   * Create a general RPC client
   */
 -static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
 +static int nfs_init_server_rpcclient(struct nfs_server *server,
 +              const struct rpc_timeout *timeo,
 +              rpc_authflavor_t pseudoflavour)
  {
        struct nfs_client *clp = server->nfs_client;
  
                return PTR_ERR(server->client);
        }
  
 +      memcpy(&server->client->cl_timeout_default,
 +                      timeo,
 +                      sizeof(server->client->cl_timeout_default));
 +      server->client->cl_timeout = &server->client->cl_timeout_default;
 +
        if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
                struct rpc_auth *auth;
  
        if (server->flags & NFS_MOUNT_SOFT)
                server->client->cl_softrtry = 1;
  
-       server->client->cl_intr = 0;
-       if (server->flags & NFS4_MOUNT_INTR)
-               server->client->cl_intr = 1;
        return 0;
  }
  
   * Initialise an NFS2 or NFS3 client
   */
  static int nfs_init_client(struct nfs_client *clp,
 +                         const struct rpc_timeout *timeparms,
                           const struct nfs_parsed_mount_data *data)
  {
        int error;
                return 0;
        }
  
 -      /* Check NFS protocol revision and initialize RPC op vector */
 -      clp->rpc_ops = &nfs_v2_clientops;
 -#ifdef CONFIG_NFS_V3
 -      if (clp->cl_nfsversion == 3)
 -              clp->rpc_ops = &nfs_v3_clientops;
 -#endif
        /*
         * Create a client RPC handle for doing FSSTAT with UNIX auth only
         * - RFC 2623, sec 2.3.2
         */
 -      error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
 -                              data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
 +      error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0);
        if (error < 0)
                goto error;
        nfs_mark_client_ready(clp, NFS_CS_READY);
@@@ -633,34 -540,25 +629,34 @@@ error
  static int nfs_init_server(struct nfs_server *server,
                           const struct nfs_parsed_mount_data *data)
  {
 +      struct nfs_client_initdata cl_init = {
 +              .hostname = data->nfs_server.hostname,
 +              .addr = (const struct sockaddr *)&data->nfs_server.address,
 +              .addrlen = data->nfs_server.addrlen,
 +              .rpc_ops = &nfs_v2_clientops,
 +              .proto = data->nfs_server.protocol,
 +      };
 +      struct rpc_timeout timeparms;
        struct nfs_client *clp;
 -      int error, nfsvers = 2;
 +      int error;
  
        dprintk("--> nfs_init_server()\n");
  
  #ifdef CONFIG_NFS_V3
        if (data->flags & NFS_MOUNT_VER3)
 -              nfsvers = 3;
 +              cl_init.rpc_ops = &nfs_v3_clientops;
  #endif
  
        /* Allocate or find a client reference we can use */
 -      clp = nfs_get_client(data->nfs_server.hostname,
 -                              &data->nfs_server.address, nfsvers);
 +      clp = nfs_get_client(&cl_init);
        if (IS_ERR(clp)) {
                dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
                return PTR_ERR(clp);
        }
  
 -      error = nfs_init_client(clp, data);
 +      nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 +                      data->timeo, data->retrans);
 +      error = nfs_init_client(clp, &timeparms, data);
        if (error < 0)
                goto error;
  
        if (error < 0)
                goto error;
  
 -      error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
 +      error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
        if (error < 0)
                goto error;
  
@@@ -830,9 -728,6 +826,9 @@@ static struct nfs_server *nfs_alloc_ser
        INIT_LIST_HEAD(&server->client_link);
        INIT_LIST_HEAD(&server->master_link);
  
 +      init_waitqueue_head(&server->active_wq);
 +      atomic_set(&server->active, 0);
 +
        server->io_stats = nfs_alloc_iostats();
        if (!server->io_stats) {
                kfree(server);
@@@ -856,9 -751,6 +852,9 @@@ void nfs_free_server(struct nfs_server 
  
        if (server->destroy != NULL)
                server->destroy(server);
 +
 +      if (!IS_ERR(server->client_acl))
 +              rpc_shutdown_client(server->client_acl);
        if (!IS_ERR(server->client))
                rpc_shutdown_client(server->client);
  
@@@ -944,7 -836,7 +940,7 @@@ error
   * Initialise an NFS4 client record
   */
  static int nfs4_init_client(struct nfs_client *clp,
 -              int proto, int timeo, int retrans,
 +              const struct rpc_timeout *timeparms,
                const char *ip_addr,
                rpc_authflavor_t authflavour)
  {
        /* Check NFS protocol revision and initialize RPC op vector */
        clp->rpc_ops = &nfs_v4_clientops;
  
 -      error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour,
 +      error = nfs_create_rpc_client(clp, timeparms, authflavour,
                                        RPC_CLNT_CREATE_DISCRTRY);
        if (error < 0)
                goto error;
@@@ -986,32 -878,23 +982,32 @@@ error
   * Set up an NFS4 client
   */
  static int nfs4_set_client(struct nfs_server *server,
 -              const char *hostname, const struct sockaddr_in *addr,
 +              const char *hostname,
 +              const struct sockaddr *addr,
 +              const size_t addrlen,
                const char *ip_addr,
                rpc_authflavor_t authflavour,
 -              int proto, int timeo, int retrans)
 +              int proto, const struct rpc_timeout *timeparms)
  {
 +      struct nfs_client_initdata cl_init = {
 +              .hostname = hostname,
 +              .addr = addr,
 +              .addrlen = addrlen,
 +              .rpc_ops = &nfs_v4_clientops,
 +              .proto = proto,
 +      };
        struct nfs_client *clp;
        int error;
  
        dprintk("--> nfs4_set_client()\n");
  
        /* Allocate or find a client reference we can use */
 -      clp = nfs_get_client(hostname, addr, 4);
 +      clp = nfs_get_client(&cl_init);
        if (IS_ERR(clp)) {
                error = PTR_ERR(clp);
                goto error;
        }
 -      error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour);
 +      error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
        if (error < 0)
                goto error_put;
  
@@@ -1032,26 -915,10 +1028,26 @@@ error
  static int nfs4_init_server(struct nfs_server *server,
                const struct nfs_parsed_mount_data *data)
  {
 +      struct rpc_timeout timeparms;
        int error;
  
        dprintk("--> nfs4_init_server()\n");
  
 +      nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
 +                      data->timeo, data->retrans);
 +
 +      /* Get a client record */
 +      error = nfs4_set_client(server,
 +                      data->nfs_server.hostname,
 +                      (const struct sockaddr *)&data->nfs_server.address,
 +                      data->nfs_server.addrlen,
 +                      data->client_address,
 +                      data->auth_flavors[0],
 +                      data->nfs_server.protocol,
 +                      &timeparms);
 +      if (error < 0)
 +              goto error;
 +
        /* Initialise the client representation from the mount data */
        server->flags = data->flags & NFS_MOUNT_FLAGMASK;
        server->caps |= NFS_CAP_ATOMIC_OPEN;
        server->acdirmin = data->acdirmin * HZ;
        server->acdirmax = data->acdirmax * HZ;
  
 -      error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
 +      error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
  
 +error:
        /* Done */
        dprintk("<-- nfs4_init_server() = %d\n", error);
        return error;
@@@ -1091,6 -957,17 +1087,6 @@@ struct nfs_server *nfs4_create_server(c
        if (!server)
                return ERR_PTR(-ENOMEM);
  
 -      /* Get a client record */
 -      error = nfs4_set_client(server,
 -                      data->nfs_server.hostname,
 -                      &data->nfs_server.address,
 -                      data->client_address,
 -                      data->auth_flavors[0],
 -                      data->nfs_server.protocol,
 -                      data->timeo, data->retrans);
 -      if (error < 0)
 -              goto error;
 -
        /* set up the general RPC client */
        error = nfs4_init_server(server, data);
        if (error < 0)
@@@ -1158,13 -1035,12 +1154,13 @@@ struct nfs_server *nfs4_create_referral
  
        /* Get a client representation.
         * Note: NFSv4 always uses TCP, */
 -      error = nfs4_set_client(server, data->hostname, data->addr,
 -                      parent_client->cl_ipaddr,
 -                      data->authflavor,
 -                      parent_server->client->cl_xprt->prot,
 -                      parent_client->retrans_timeo,
 -                      parent_client->retrans_count);
 +      error = nfs4_set_client(server, data->hostname,
 +                              data->addr,
 +                              data->addrlen,
 +                              parent_client->cl_ipaddr,
 +                              data->authflavor,
 +                              parent_server->client->cl_xprt->prot,
 +                              parent_server->client->cl_timeout);
        if (error < 0)
                goto error;
  
        nfs_server_copy_userdata(server, parent_server);
        server->caps |= NFS_CAP_ATOMIC_OPEN;
  
 -      error = nfs_init_server_rpcclient(server, data->authflavor);
 +      error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
        if (error < 0)
                goto error;
  
@@@ -1241,9 -1117,7 +1237,9 @@@ struct nfs_server *nfs_clone_server(str
  
        server->fsid = fattr->fsid;
  
 -      error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
 +      error = nfs_init_server_rpcclient(server,
 +                      source->client->cl_timeout,
 +                      source->client->cl_auth->au_flavor);
        if (error < 0)
                goto out_free_server;
        if (!IS_ERR(source->client_acl))
@@@ -1385,10 -1259,10 +1381,10 @@@ static int nfs_server_list_show(struct 
        /* display one transport per line on subsequent lines */
        clp = list_entry(v, struct nfs_client, cl_share_link);
  
 -      seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
 -                 clp->cl_nfsversion,
 -                 NIPQUAD(clp->cl_addr.sin_addr),
 -                 ntohs(clp->cl_addr.sin_port),
 +      seq_printf(m, "v%u %s %s %3d %s\n",
 +                 clp->rpc_ops->version,
 +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
 +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
                   atomic_read(&clp->cl_count),
                   clp->cl_hostname);
  
@@@ -1464,10 -1338,10 +1460,10 @@@ static int nfs_volume_list_show(struct 
                 (unsigned long long) server->fsid.major,
                 (unsigned long long) server->fsid.minor);
  
 -      seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
 -                 clp->cl_nfsversion,
 -                 NIPQUAD(clp->cl_addr.sin_addr),
 -                 ntohs(clp->cl_addr.sin_port),
 +      seq_printf(m, "v%u %s %s %-7s %-17s\n",
 +                 clp->rpc_ops->version,
 +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
 +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
                   dev,
                   fsid);
  
diff --combined fs/nfs/direct.c
index f8e165c7d5a637de762e619e1ddd1a7db4436fb7,7b994b2fa593f6a50c8a21fbb5c2bc35fda7d852..16844f98f50e4b6f6ad24eea74470fb6da263adf
@@@ -188,17 -188,12 +188,12 @@@ static void nfs_direct_req_release(stru
  static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
  {
        ssize_t result = -EIOCBQUEUED;
-       struct rpc_clnt *clnt;
-       sigset_t oldset;
  
        /* Async requests don't wait here */
        if (dreq->iocb)
                goto out;
  
-       clnt = NFS_CLIENT(dreq->inode);
-       rpc_clnt_sigmask(clnt, &oldset);
-       result = wait_for_completion_interruptible(&dreq->completion);
-       rpc_clnt_sigunmask(clnt, &oldset);
+       result = wait_for_completion_killable(&dreq->completion);
  
        if (!result)
                result = dreq->error;
@@@ -277,16 -272,6 +272,16 @@@ static ssize_t nfs_direct_read_schedule
        unsigned long user_addr = (unsigned long)iov->iov_base;
        size_t count = iov->iov_len;
        size_t rsize = NFS_SERVER(inode)->rsize;
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_cred = ctx->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs_read_direct_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        unsigned int pgbase;
        int result;
        ssize_t started = 0;
  
                data->req = (struct nfs_page *) dreq;
                data->inode = inode;
 -              data->cred = ctx->cred;
 +              data->cred = msg.rpc_cred;
                data->args.fh = NFS_FH(inode);
                data->args.context = ctx;
                data->args.offset = pos;
                data->res.fattr = &data->fattr;
                data->res.eof = 0;
                data->res.count = bytes;
 +              msg.rpc_argp = &data->args;
 +              msg.rpc_resp = &data->res;
  
 -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 -                              &nfs_read_direct_ops, data);
 -              NFS_PROTO(inode)->read_setup(data);
 +              task_setup_data.task = &data->task;
 +              task_setup_data.callback_data = data;
 +              NFS_PROTO(inode)->read_setup(data, &msg);
  
 -              data->task.tk_cookie = (unsigned long) inode;
 -
 -              rpc_execute(&data->task);
 +              task = rpc_run_task(&task_setup_data);
 +              if (!IS_ERR(task))
 +                      rpc_put_task(task);
  
                dprintk("NFS: %5u initiated direct read call "
                        "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@@ -444,15 -427,6 +439,15 @@@ static void nfs_direct_write_reschedule
        struct inode *inode = dreq->inode;
        struct list_head *p;
        struct nfs_write_data *data;
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_cred = dreq->ctx->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(inode),
 +              .callback_ops = &nfs_write_direct_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
  
        dreq->count = 0;
        get_dreq(dreq);
  
                get_dreq(dreq);
  
 +              /* Use stable writes */
 +              data->args.stable = NFS_FILE_SYNC;
 +
                /*
                 * Reset data->res.
                 */
                 * Reuse data->task; data->args should not have changed
                 * since the original request was sent.
                 */
 -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 -                              &nfs_write_direct_ops, data);
 -              NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 -
 -              data->task.tk_priority = RPC_PRIORITY_NORMAL;
 -              data->task.tk_cookie = (unsigned long) inode;
 +              task_setup_data.task = &data->task;
 +              task_setup_data.callback_data = data;
 +              msg.rpc_argp = &data->args;
 +              msg.rpc_resp = &data->res;
 +              NFS_PROTO(inode)->write_setup(data, &msg);
  
                /*
                 * We're called via an RPC callback, so BKL is already held.
                 */
 -              rpc_execute(&data->task);
 +              task = rpc_run_task(&task_setup_data);
 +              if (!IS_ERR(task))
 +                      rpc_put_task(task);
  
                dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
                                data->task.tk_pid,
@@@ -530,23 -500,9 +525,23 @@@ static const struct rpc_call_ops nfs_co
  static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
  {
        struct nfs_write_data *data = dreq->commit_data;
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_argp = &data->args,
 +              .rpc_resp = &data->res,
 +              .rpc_cred = dreq->ctx->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .task = &data->task,
 +              .rpc_client = NFS_CLIENT(dreq->inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs_commit_direct_ops,
 +              .callback_data = data,
 +              .flags = RPC_TASK_ASYNC,
 +      };
  
        data->inode = dreq->inode;
 -      data->cred = dreq->ctx->cred;
 +      data->cred = msg.rpc_cred;
  
        data->args.fh = NFS_FH(data->inode);
        data->args.offset = 0;
        data->res.fattr = &data->fattr;
        data->res.verf = &data->verf;
  
 -      rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
 -                              &nfs_commit_direct_ops, data);
 -      NFS_PROTO(data->inode)->commit_setup(data, 0);
 +      NFS_PROTO(data->inode)->commit_setup(data, &msg);
  
 -      data->task.tk_priority = RPC_PRIORITY_NORMAL;
 -      data->task.tk_cookie = (unsigned long)data->inode;
        /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
        dreq->commit_data = NULL;
  
        dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
  
 -      rpc_execute(&data->task);
 +      task = rpc_run_task(&task_setup_data);
 +      if (!IS_ERR(task))
 +              rpc_put_task(task);
  }
  
  static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@@ -679,16 -637,6 +674,16 @@@ static ssize_t nfs_direct_write_schedul
        struct inode *inode = ctx->path.dentry->d_inode;
        unsigned long user_addr = (unsigned long)iov->iov_base;
        size_t count = iov->iov_len;
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_cred = ctx->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs_write_direct_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        size_t wsize = NFS_SERVER(inode)->wsize;
        unsigned int pgbase;
        int result;
  
                data->req = (struct nfs_page *) dreq;
                data->inode = inode;
 -              data->cred = ctx->cred;
 +              data->cred = msg.rpc_cred;
                data->args.fh = NFS_FH(inode);
                data->args.context = ctx;
                data->args.offset = pos;
                data->args.pgbase = pgbase;
                data->args.pages = data->pagevec;
                data->args.count = bytes;
 +              data->args.stable = sync;
                data->res.fattr = &data->fattr;
                data->res.count = bytes;
                data->res.verf = &data->verf;
  
 -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
 -                              &nfs_write_direct_ops, data);
 -              NFS_PROTO(inode)->write_setup(data, sync);
 +              task_setup_data.task = &data->task;
 +              task_setup_data.callback_data = data;
 +              msg.rpc_argp = &data->args;
 +              msg.rpc_resp = &data->res;
 +              NFS_PROTO(inode)->write_setup(data, &msg);
  
 -              data->task.tk_priority = RPC_PRIORITY_NORMAL;
 -              data->task.tk_cookie = (unsigned long) inode;
 -
 -              rpc_execute(&data->task);
 +              task = rpc_run_task(&task_setup_data);
 +              if (!IS_ERR(task))
 +                      rpc_put_task(task);
  
                dprintk("NFS: %5u initiated direct write call "
                        "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@@ -820,7 -766,7 +815,7 @@@ static ssize_t nfs_direct_write(struct 
        struct inode *inode = iocb->ki_filp->f_mapping->host;
        struct nfs_direct_req *dreq;
        size_t wsize = NFS_SERVER(inode)->wsize;
 -      int sync = 0;
 +      int sync = NFS_UNSTABLE;
  
        dreq = nfs_direct_req_alloc();
        if (!dreq)
        nfs_alloc_commit_data(dreq);
  
        if (dreq->commit_data == NULL || count < wsize)
 -              sync = FLUSH_STABLE;
 +              sync = NFS_FILE_SYNC;
  
        dreq->inode = inode;
        dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@@ -940,6 -886,8 +935,6 @@@ ssize_t nfs_file_direct_write(struct ki
        retval = generic_write_checks(file, &pos, &count, 0);
        if (retval)
                goto out;
 -      if (!count)
 -              goto out;       /* return 0 */
  
        retval = -EINVAL;
        if ((ssize_t) count < 0)
diff --combined fs/nfs/inode.c
index 3f332e54e760ad2056154c01491c8db9521b8319,f68c22215b14fb13667ddacf9e29ca134702f5f7..966a8850aa30be5330a524699069718cd00932ce
@@@ -192,7 -192,7 +192,7 @@@ void nfs_invalidate_atime(struct inode 
   */
  static void nfs_invalidate_inode(struct inode *inode)
  {
 -      set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
 +      set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
        nfs_zap_caches_locked(inode);
  }
  
@@@ -229,7 -229,7 +229,7 @@@ nfs_init_locked(struct inode *inode, vo
        struct nfs_find_desc    *desc = (struct nfs_find_desc *)opaque;
        struct nfs_fattr        *fattr = desc->fattr;
  
 -      NFS_FILEID(inode) = fattr->fileid;
 +      set_nfs_fileid(inode, fattr->fileid);
        nfs_copy_fh(NFS_FH(inode), desc->fh);
        return 0;
  }
@@@ -291,7 -291,7 +291,7 @@@ nfs_fhget(struct super_block *sb, struc
                        inode->i_fop = &nfs_dir_operations;
                        if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
                            && fattr->size <= NFS_LIMIT_READDIRPLUS)
 -                              set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 +                              set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                        /* Deal with crossing mountpoints */
                        if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
                                if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
@@@ -433,15 -433,11 +433,11 @@@ static int nfs_wait_schedule(void *word
   */
  static int nfs_wait_on_inode(struct inode *inode)
  {
-       struct rpc_clnt *clnt = NFS_CLIENT(inode);
        struct nfs_inode *nfsi = NFS_I(inode);
-       sigset_t oldmask;
        int error;
  
-       rpc_clnt_sigmask(clnt, &oldmask);
        error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
-                                       nfs_wait_schedule, TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldmask);
+                                       nfs_wait_schedule, TASK_KILLABLE);
  
        return error;
  }
@@@ -461,18 -457,9 +457,18 @@@ int nfs_getattr(struct vfsmount *mnt, s
        int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
        int err;
  
 -      /* Flush out writes to the server in order to update c/mtime */
 -      if (S_ISREG(inode->i_mode))
 +      /*
 +       * Flush out writes to the server in order to update c/mtime.
 +       *
 +       * Hold the i_mutex to suspend application writes temporarily;
 +       * this prevents long-running writing applications from blocking
 +       * nfs_wb_nocommit.
 +       */
 +      if (S_ISREG(inode->i_mode)) {
 +              mutex_lock(&inode->i_mutex);
                nfs_wb_nocommit(inode);
 +              mutex_unlock(&inode->i_mutex);
 +      }
  
        /*
         * We may force a getattr if the user cares about atime.
@@@ -668,7 -655,7 +664,7 @@@ __nfs_revalidate_inode(struct nfs_serve
                if (status == -ESTALE) {
                        nfs_zap_caches(inode);
                        if (!S_ISDIR(inode->i_mode))
 -                              set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
 +                              set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
                }
                goto out;
        }
@@@ -823,9 -810,8 +819,9 @@@ static void nfs_wcc_update_inode(struc
                        if (S_ISDIR(inode->i_mode))
                                nfsi->cache_validity |= NFS_INO_INVALID_DATA;
                }
 -              if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
 -                      inode->i_size = fattr->size;
 +              if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
 +                  nfsi->npages == 0)
 +                      inode->i_size = nfs_size_to_loff_t(fattr->size);
        }
  }
  
@@@ -1029,8 -1015,7 +1025,8 @@@ static int nfs_update_inode(struct inod
                        dprintk("NFS: mtime change on server for file %s/%ld\n",
                                        inode->i_sb->s_id, inode->i_ino);
                        invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 -                      nfsi->cache_change_attribute = now;
 +                      if (S_ISDIR(inode->i_mode))
 +                              nfs_force_lookup_revalidate(inode);
                }
                /* If ctime has changed we should definitely clear access+acl caches */
                if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
                dprintk("NFS: change_attr change on server for file %s/%ld\n",
                                inode->i_sb->s_id, inode->i_ino);
                invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 -              nfsi->cache_change_attribute = now;
 +              if (S_ISDIR(inode->i_mode))
 +                      nfs_force_lookup_revalidate(inode);
        }
  
        /* Check if our cached file size is stale */
  void nfs4_clear_inode(struct inode *inode)
  {
        /* If we are holding a delegation, return it! */
 -      nfs_inode_return_delegation(inode);
 +      nfs_inode_return_delegation_noreclaim(inode);
        /* First call standard NFS clear_inode() code */
        nfs_clear_inode(inode);
  }
diff --combined fs/nfs/nfs3proc.c
index b353c1a05bfda77c7074fcc31d4dbac00b4c09c9,5ae96340f2c2a36851b8e85cc424faef089bd899..549dbce714a4bde33c55386ed18357ddf911eab4
  static int
  nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
  {
-       sigset_t oldset;
        int res;
-       rpc_clnt_sigmask(clnt, &oldset);
        do {
                res = rpc_call_sync(clnt, msg, flags);
                if (res != -EJUKEBOX)
                        break;
-               schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
+               schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
                res = -ERESTARTSYS;
-       } while (!signalled());
-       rpc_clnt_sigunmask(clnt, &oldset);
+       } while (!fatal_signal_pending(current));
        return res;
  }
  
@@@ -732,9 -729,16 +729,9 @@@ static int nfs3_read_done(struct rpc_ta
        return 0;
  }
  
 -static void nfs3_proc_read_setup(struct nfs_read_data *data)
 +static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message      msg = {
 -              .rpc_proc       = &nfs3_procedures[NFS3PROC_READ],
 -              .rpc_argp       = &data->args,
 -              .rpc_resp       = &data->res,
 -              .rpc_cred       = data->cred,
 -      };
 -
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
  }
  
  static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
        return 0;
  }
  
 -static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 +static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message      msg = {
 -              .rpc_proc       = &nfs3_procedures[NFS3PROC_WRITE],
 -              .rpc_argp       = &data->args,
 -              .rpc_resp       = &data->res,
 -              .rpc_cred       = data->cred,
 -      };
 -
 -      data->args.stable = NFS_UNSTABLE;
 -      if (how & FLUSH_STABLE) {
 -              data->args.stable = NFS_FILE_SYNC;
 -              if (NFS_I(data->inode)->ncommit)
 -                      data->args.stable = NFS_DATA_SYNC;
 -      }
 -
 -      /* Finalize the task. */
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
  }
  
  static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
        return 0;
  }
  
 -static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 +static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message      msg = {
 -              .rpc_proc       = &nfs3_procedures[NFS3PROC_COMMIT],
 -              .rpc_argp       = &data->args,
 -              .rpc_resp       = &data->res,
 -              .rpc_cred       = data->cred,
 -      };
 -
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
  }
  
  static int
  nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
  {
 -      return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
 +      struct inode *inode = filp->f_path.dentry->d_inode;
 +
 +      return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
  }
  
  const struct nfs_rpc_ops nfs_v3_clientops = {
diff --combined fs/nfs/nfs4proc.c
index 5c189bd57eb2b6ea88bcc837bb8f821f04089fe4,c4faa43b36de1d5b5bba79a2e90c9f4fa96c530a..027e1095256ebe09cb001d1736489a7216620d0d
@@@ -210,7 -210,7 +210,7 @@@ static void update_changeattr(struct in
        spin_lock(&dir->i_lock);
        nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
        if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
 -              nfsi->cache_change_attribute = jiffies;
 +              nfs_force_lookup_revalidate(dir);
        nfsi->change_attr = cinfo->after;
        spin_unlock(&dir->i_lock);
  }
@@@ -316,12 -316,9 +316,9 @@@ static void nfs4_opendata_put(struct nf
  
  static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
  {
-       sigset_t oldset;
        int ret;
  
-       rpc_clnt_sigmask(task->tk_client, &oldset);
        ret = rpc_wait_for_completion_task(task);
-       rpc_clnt_sigunmask(task->tk_client, &oldset);
        return ret;
  }
  
@@@ -718,6 -715,19 +715,6 @@@ int nfs4_open_delegation_recall(struct 
        return err;
  }
  
 -static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 -{
 -      struct nfs4_opendata *data = calldata;
 -      struct  rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
 -              .rpc_argp = &data->c_arg,
 -              .rpc_resp = &data->c_res,
 -              .rpc_cred = data->owner->so_cred,
 -      };
 -      data->timestamp = jiffies;
 -      rpc_call_setup(task, &msg, 0);
 -}
 -
  static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
  {
        struct nfs4_opendata *data = calldata;
        if (data->rpc_status == 0) {
                memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
                                sizeof(data->o_res.stateid.data));
 +              nfs_confirm_seqid(&data->owner->so_seqid, 0);
                renew_lease(data->o_res.server, data->timestamp);
                data->rpc_done = 1;
        }
 -      nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
        nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
  }
  
@@@ -746,6 -756,7 +743,6 @@@ static void nfs4_open_confirm_release(v
        /* In case of error, no cleanup! */
        if (!data->rpc_done)
                goto out_free;
 -      nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
        if (!IS_ERR(state))
                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@@ -754,6 -765,7 +751,6 @@@ out_free
  }
  
  static const struct rpc_call_ops nfs4_open_confirm_ops = {
 -      .rpc_call_prepare = nfs4_open_confirm_prepare,
        .rpc_call_done = nfs4_open_confirm_done,
        .rpc_release = nfs4_open_confirm_release,
  };
@@@ -765,26 -777,12 +762,26 @@@ static int _nfs4_proc_open_confirm(stru
  {
        struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
        struct rpc_task *task;
 +      struct  rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
 +              .rpc_argp = &data->c_arg,
 +              .rpc_resp = &data->c_res,
 +              .rpc_cred = data->owner->so_cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = server->client,
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_open_confirm_ops,
 +              .callback_data = data,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        int status;
  
        kref_get(&data->kref);
        data->rpc_done = 0;
        data->rpc_status = 0;
 -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
 +      data->timestamp = jiffies;
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        status = nfs4_wait_for_completion_rpc_task(task);
@@@ -801,7 -799,13 +798,7 @@@ static void nfs4_open_prepare(struct rp
  {
        struct nfs4_opendata *data = calldata;
        struct nfs4_state_owner *sp = data->owner;
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
 -              .rpc_argp = &data->o_arg,
 -              .rpc_resp = &data->o_res,
 -              .rpc_cred = sp->so_cred,
 -      };
 -      
 +
        if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
                return;
        /*
        data->o_arg.id = sp->so_owner_id.id;
        data->o_arg.clientid = sp->so_client->cl_clientid;
        if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
 -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
 +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
                nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
        }
        data->timestamp = jiffies;
 -      rpc_call_setup(task, &msg, 0);
 +      rpc_call_start(task);
        return;
  out_no_action:
        task->tk_action = NULL;
@@@ -879,6 -883,7 +876,6 @@@ static void nfs4_open_release(void *cal
        /* In case we need an open_confirm, no cleanup! */
        if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
                goto out_free;
 -      nfs_confirm_seqid(&data->owner->so_seqid, 0);
        state = nfs4_opendata_to_nfs4_state(data);
        if (!IS_ERR(state))
                nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@@ -902,26 -907,13 +899,26 @@@ static int _nfs4_proc_open(struct nfs4_
        struct nfs_openargs *o_arg = &data->o_arg;
        struct nfs_openres *o_res = &data->o_res;
        struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
 +              .rpc_argp = o_arg,
 +              .rpc_resp = o_res,
 +              .rpc_cred = data->owner->so_cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = server->client,
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_open_ops,
 +              .callback_data = data,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        int status;
  
        kref_get(&data->kref);
        data->rpc_done = 0;
        data->rpc_status = 0;
        data->cancelled = 0;
 -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        status = nfs4_wait_for_completion_rpc_task(task);
@@@ -1251,6 -1243,12 +1248,6 @@@ static void nfs4_close_prepare(struct r
  {
        struct nfs4_closedata *calldata = data;
        struct nfs4_state *state = calldata->state;
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
 -              .rpc_argp = &calldata->arg,
 -              .rpc_resp = &calldata->res,
 -              .rpc_cred = state->owner->so_cred,
 -      };
        int clear_rd, clear_wr, clear_rdwr;
  
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
        }
        nfs_fattr_init(calldata->res.fattr);
        if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
 -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
                calldata->arg.open_flags = FMODE_READ;
        } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
 -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
                calldata->arg.open_flags = FMODE_WRITE;
        }
        calldata->timestamp = jiffies;
 -      rpc_call_setup(task, &msg, 0);
 +      rpc_call_start(task);
  }
  
  static const struct rpc_call_ops nfs4_close_ops = {
@@@ -1310,16 -1308,6 +1307,16 @@@ int nfs4_do_close(struct path *path, st
        struct nfs4_closedata *calldata;
        struct nfs4_state_owner *sp = state->owner;
        struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
 +              .rpc_cred = state->owner->so_cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = server->client,
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_close_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        int status = -ENOMEM;
  
        calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
        calldata->path.mnt = mntget(path->mnt);
        calldata->path.dentry = dget(path->dentry);
  
 -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
 +      msg.rpc_argp = &calldata->arg,
 +      msg.rpc_resp = &calldata->res,
 +      task_setup_data.callback_data = calldata;
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        status = 0;
@@@ -2428,10 -2413,18 +2425,10 @@@ static int nfs4_read_done(struct rpc_ta
        return 0;
  }
  
 -static void nfs4_proc_read_setup(struct nfs_read_data *data)
 +static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
 -              .rpc_argp = &data->args,
 -              .rpc_resp = &data->res,
 -              .rpc_cred = data->cred,
 -      };
 -
        data->timestamp   = jiffies;
 -
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
  }
  
  static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
        return 0;
  }
  
 -static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 +static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
 -              .rpc_argp = &data->args,
 -              .rpc_resp = &data->res,
 -              .rpc_cred = data->cred,
 -      };
 -      struct inode *inode = data->inode;
 -      struct nfs_server *server = NFS_SERVER(inode);
 -      int stable;
 -      
 -      if (how & FLUSH_STABLE) {
 -              if (!NFS_I(inode)->ncommit)
 -                      stable = NFS_FILE_SYNC;
 -              else
 -                      stable = NFS_DATA_SYNC;
 -      } else
 -              stable = NFS_UNSTABLE;
 -      data->args.stable = stable;
 +      struct nfs_server *server = NFS_SERVER(data->inode);
 +
        data->args.bitmask = server->attr_bitmask;
        data->res.server = server;
 -
        data->timestamp   = jiffies;
  
 -      /* Finalize the task. */
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
  }
  
  static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
        return 0;
  }
  
 -static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 +static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
  {
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
 -              .rpc_argp = &data->args,
 -              .rpc_resp = &data->res,
 -              .rpc_cred = data->cred,
 -      };      
        struct nfs_server *server = NFS_SERVER(data->inode);
        
        data->args.bitmask = server->attr_bitmask;
        data->res.server = server;
 -
 -      rpc_call_setup(&data->task, &msg, 0);
 +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
  }
  
  /*
@@@ -2785,9 -2803,9 +2782,9 @@@ nfs4_async_handle_error(struct rpc_tas
        return 0;
  }
  
- static int nfs4_wait_bit_interruptible(void *word)
+ static int nfs4_wait_bit_killable(void *word)
  {
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                return -ERESTARTSYS;
        schedule();
        return 0;
  
  static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
  {
-       sigset_t oldset;
        int res;
  
        might_sleep();
  
        rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
  
-       rpc_clnt_sigmask(clnt, &oldset);
        res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
-                       nfs4_wait_bit_interruptible,
-                       TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldset);
+                       nfs4_wait_bit_killable, TASK_KILLABLE);
  
        rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
        return res;
  
  static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
  {
-       sigset_t oldset;
        int res = 0;
  
        might_sleep();
                *timeout = NFS4_POLL_RETRY_MIN;
        if (*timeout > NFS4_POLL_RETRY_MAX)
                *timeout = NFS4_POLL_RETRY_MAX;
-       rpc_clnt_sigmask(clnt, &oldset);
-       if (clnt->cl_intr) {
-               schedule_timeout_interruptible(*timeout);
-               if (signalled())
-                       res = -ERESTARTSYS;
-       } else
-               schedule_timeout_uninterruptible(*timeout);
-       rpc_clnt_sigunmask(clnt, &oldset);
+       schedule_timeout_killable(*timeout);
+       if (fatal_signal_pending(current))
+               res = -ERESTARTSYS;
        *timeout <<= 1;
        return res;
  }
@@@ -2891,20 -2899,14 +2878,20 @@@ int nfs4_proc_setclientid(struct nfs_cl
  
        for(;;) {
                setclientid.sc_name_len = scnprintf(setclientid.sc_name,
 -                              sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
 -                              clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
 +                              sizeof(setclientid.sc_name), "%s/%s %s %s %u",
 +                              clp->cl_ipaddr,
 +                              rpc_peeraddr2str(clp->cl_rpcclient,
 +                                                      RPC_DISPLAY_ADDR),
 +                              rpc_peeraddr2str(clp->cl_rpcclient,
 +                                                      RPC_DISPLAY_PROTO),
                                cred->cr_ops->cr_name,
                                clp->cl_id_uniquifier);
                setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
 -                              sizeof(setclientid.sc_netid), "tcp");
 +                              sizeof(setclientid.sc_netid),
 +                              rpc_peeraddr2str(clp->cl_rpcclient,
 +                                                      RPC_DISPLAY_NETID));
                setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
 -                              sizeof(setclientid.sc_uaddr), "%s.%d.%d",
 +                              sizeof(setclientid.sc_uaddr), "%s.%u.%u",
                                clp->cl_ipaddr, port >> 8, port & 255);
  
                status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
@@@ -2968,11 -2970,25 +2955,11 @@@ struct nfs4_delegreturndata 
        struct nfs4_delegreturnres res;
        struct nfs_fh fh;
        nfs4_stateid stateid;
 -      struct rpc_cred *cred;
        unsigned long timestamp;
        struct nfs_fattr fattr;
        int rpc_status;
  };
  
 -static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
 -{
 -      struct nfs4_delegreturndata *data = calldata;
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
 -              .rpc_argp = &data->args,
 -              .rpc_resp = &data->res,
 -              .rpc_cred = data->cred,
 -      };
 -      nfs_fattr_init(data->res.fattr);
 -      rpc_call_setup(task, &msg, 0);
 -}
 -
  static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
  {
        struct nfs4_delegreturndata *data = calldata;
  
  static void nfs4_delegreturn_release(void *calldata)
  {
 -      struct nfs4_delegreturndata *data = calldata;
 -
 -      put_rpccred(data->cred);
        kfree(calldata);
  }
  
  static const struct rpc_call_ops nfs4_delegreturn_ops = {
 -      .rpc_call_prepare = nfs4_delegreturn_prepare,
        .rpc_call_done = nfs4_delegreturn_done,
        .rpc_release = nfs4_delegreturn_release,
  };
  
 -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
 +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
  {
        struct nfs4_delegreturndata *data;
        struct nfs_server *server = NFS_SERVER(inode);
        struct rpc_task *task;
 -      int status;
 +      struct rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
 +              .rpc_cred = cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = server->client,
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_delegreturn_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
 +      int status = 0;
  
        data = kmalloc(sizeof(*data), GFP_KERNEL);
        if (data == NULL)
        memcpy(&data->stateid, stateid, sizeof(data->stateid));
        data->res.fattr = &data->fattr;
        data->res.server = server;
 -      data->cred = get_rpccred(cred);
 +      nfs_fattr_init(data->res.fattr);
        data->timestamp = jiffies;
        data->rpc_status = 0;
  
 -      task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
 +      task_setup_data.callback_data = data;
 +      msg.rpc_argp = &data->args,
 +      msg.rpc_resp = &data->res,
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
 +      if (!issync)
 +              goto out;
        status = nfs4_wait_for_completion_rpc_task(task);
 -      if (status == 0) {
 -              status = data->rpc_status;
 -              if (status == 0)
 -                      nfs_refresh_inode(inode, &data->fattr);
 -      }
 +      if (status != 0)
 +              goto out;
 +      status = data->rpc_status;
 +      if (status != 0)
 +              goto out;
 +      nfs_refresh_inode(inode, &data->fattr);
 +out:
        rpc_put_task(task);
        return status;
  }
  
 -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
 +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
  {
        struct nfs_server *server = NFS_SERVER(inode);
        struct nfs4_exception exception = { };
        int err;
        do {
 -              err = _nfs4_proc_delegreturn(inode, cred, stateid);
 +              err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
                switch (err) {
                        case -NFS4ERR_STALE_STATEID:
                        case -NFS4ERR_EXPIRED:
  static unsigned long
  nfs4_set_lock_task_retry(unsigned long timeout)
  {
-       schedule_timeout_interruptible(timeout);
+       schedule_timeout_killable(timeout);
        timeout <<= 1;
        if (timeout > NFS4_LOCK_MAXTIMEOUT)
                return NFS4_LOCK_MAXTIMEOUT;
@@@ -3216,6 -3219,12 +3203,6 @@@ static void nfs4_locku_done(struct rpc_
  static void nfs4_locku_prepare(struct rpc_task *task, void *data)
  {
        struct nfs4_unlockdata *calldata = data;
 -      struct rpc_message msg = {
 -              .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
 -              .rpc_argp       = &calldata->arg,
 -              .rpc_resp       = &calldata->res,
 -              .rpc_cred       = calldata->lsp->ls_state->owner->so_cred,
 -      };
  
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                return;
                return;
        }
        calldata->timestamp = jiffies;
 -      rpc_call_setup(task, &msg, 0);
 +      rpc_call_start(task);
  }
  
  static const struct rpc_call_ops nfs4_locku_ops = {
@@@ -3240,16 -3249,6 +3227,16 @@@ static struct rpc_task *nfs4_do_unlck(s
                struct nfs_seqid *seqid)
  {
        struct nfs4_unlockdata *data;
 +      struct rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
 +              .rpc_cred = ctx->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(lsp->ls_state->inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_locku_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
  
        /* Ensure this is an unlock - when canceling a lock, the
         * canceled lock is passed in, and it won't be an unlock.
                return ERR_PTR(-ENOMEM);
        }
  
 -      return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
 +      msg.rpc_argp = &data->arg,
 +      msg.rpc_resp = &data->res,
 +      task_setup_data.callback_data = data;
 +      return rpc_run_task(&task_setup_data);
  }
  
  static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@@ -3324,12 -3320,9 +3311,12 @@@ static struct nfs4_lockdata *nfs4_alloc
  
        p->arg.fh = NFS_FH(inode);
        p->arg.fl = &p->fl;
 +      p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
 +      if (p->arg.open_seqid == NULL)
 +              goto out_free;
        p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
        if (p->arg.lock_seqid == NULL)
 -              goto out_free;
 +              goto out_free_seqid;
        p->arg.lock_stateid = &lsp->ls_stateid;
        p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
        p->arg.lock_owner.id = lsp->ls_id.id;
        p->ctx = get_nfs_open_context(ctx);
        memcpy(&p->fl, fl, sizeof(p->fl));
        return p;
 +out_free_seqid:
 +      nfs_free_seqid(p->arg.open_seqid);
  out_free:
        kfree(p);
        return NULL;
@@@ -3349,20 -3340,31 +3336,20 @@@ static void nfs4_lock_prepare(struct rp
  {
        struct nfs4_lockdata *data = calldata;
        struct nfs4_state *state = data->lsp->ls_state;
 -      struct nfs4_state_owner *sp = state->owner;
 -      struct rpc_message msg = {
 -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
 -              .rpc_argp = &data->arg,
 -              .rpc_resp = &data->res,
 -              .rpc_cred = sp->so_cred,
 -      };
  
 +      dprintk("%s: begin!\n", __FUNCTION__);
        if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
                return;
 -      dprintk("%s: begin!\n", __FUNCTION__);
        /* Do we need to do an open_to_lock_owner? */
        if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
 -              data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid);
 -              if (data->arg.open_seqid == NULL) {
 -                      data->rpc_status = -ENOMEM;
 -                      task->tk_action = NULL;
 -                      goto out;
 -              }
 +              if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
 +                      return;
                data->arg.open_stateid = &state->stateid;
                data->arg.new_lock_owner = 1;
 -      }
 +      } else
 +              data->arg.new_lock_owner = 0;
        data->timestamp = jiffies;
 -      rpc_call_setup(task, &msg, 0);
 -out:
 +      rpc_call_start(task);
        dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
  }
  
@@@ -3398,7 -3400,8 +3385,7 @@@ static void nfs4_lock_release(void *cal
        struct nfs4_lockdata *data = calldata;
  
        dprintk("%s: begin!\n", __FUNCTION__);
 -      if (data->arg.open_seqid != NULL)
 -              nfs_free_seqid(data->arg.open_seqid);
 +      nfs_free_seqid(data->arg.open_seqid);
        if (data->cancelled != 0) {
                struct rpc_task *task;
                task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
@@@ -3424,16 -3427,6 +3411,16 @@@ static int _nfs4_do_setlk(struct nfs4_s
  {
        struct nfs4_lockdata *data;
        struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
 +              .rpc_cred = state->owner->so_cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(state->inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs4_lock_ops,
 +              .flags = RPC_TASK_ASYNC,
 +      };
        int ret;
  
        dprintk("%s: begin!\n", __FUNCTION__);
                data->arg.block = 1;
        if (reclaim != 0)
                data->arg.reclaim = 1;
 -      task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
 -                      &nfs4_lock_ops, data);
 +      msg.rpc_argp = &data->arg,
 +      msg.rpc_resp = &data->res,
 +      task_setup_data.callback_data = data;
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        ret = nfs4_wait_for_completion_rpc_task(task);
@@@ -3621,6 -3612,10 +3608,6 @@@ int nfs4_setxattr(struct dentry *dentry
        if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
                return -EOPNOTSUPP;
  
 -      if (!S_ISREG(inode->i_mode) &&
 -          (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
 -              return -EPERM;
 -
        return nfs4_proc_set_acl(inode, buf, buflen);
  }
  
diff --combined fs/nfs/pagelist.c
index 3b3dbb94393de116c4bd8923aba8efa58bb3debb,2dff469f04fe396823e00d587ec7dc5f597b6e54..7f079209d70a91ff79f036dbec62ace856023913
@@@ -58,7 -58,7 +58,6 @@@ nfs_create_request(struct nfs_open_cont
                   struct page *page,
                   unsigned int offset, unsigned int count)
  {
--      struct nfs_server *server = NFS_SERVER(inode);
        struct nfs_page         *req;
  
        for (;;) {
@@@ -67,7 -67,7 +66,7 @@@
                if (req != NULL)
                        break;
  
-               if (signalled() && (server->flags & NFS_MOUNT_INTR))
+               if (fatal_signal_pending(current))
                        return ERR_PTR(-ERESTARTSYS);
                yield();
        }
@@@ -111,14 -111,13 +110,14 @@@ void nfs_unlock_request(struct nfs_pag
   * nfs_set_page_tag_locked - Tag a request as locked
   * @req:
   */
 -static int nfs_set_page_tag_locked(struct nfs_page *req)
 +int nfs_set_page_tag_locked(struct nfs_page *req)
  {
        struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
  
 -      if (!nfs_lock_request(req))
 +      if (!nfs_lock_request_dontget(req))
                return 0;
 -      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 +      if (req->wb_page != NULL)
 +              radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
        return 1;
  }
  
@@@ -133,10 -132,9 +132,10 @@@ void nfs_clear_page_tag_locked(struct n
        if (req->wb_page != NULL) {
                spin_lock(&inode->i_lock);
                radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 +              nfs_unlock_request(req);
                spin_unlock(&inode->i_lock);
 -      }
 -      nfs_unlock_request(req);
 +      } else
 +              nfs_unlock_request(req);
  }
  
  /**
@@@ -177,11 -175,11 +176,11 @@@ void nfs_release_request(struct nfs_pag
        kref_put(&req->wb_kref, nfs_free_request);
  }
  
- static int nfs_wait_bit_interruptible(void *word)
+ static int nfs_wait_bit_killable(void *word)
  {
        int ret = 0;
  
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                ret = -ERESTARTSYS;
        else
                schedule();
   * nfs_wait_on_request - Wait for a request to complete.
   * @req: request to wait upon.
   *
-  * Interruptible by signals only if mounted with intr flag.
+  * Interruptible by fatal signals only.
   * The user is responsible for holding a count on the request.
   */
  int
  nfs_wait_on_request(struct nfs_page *req)
  {
-       struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
-       sigset_t oldmask;
        int ret = 0;
  
        if (!test_bit(PG_BUSY, &req->wb_flags))
                goto out;
-       /*
-        * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
-        *       are not interrupted if intr flag is not set
-        */
-       rpc_clnt_sigmask(clnt, &oldmask);
        ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
-                       nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldmask);
+                       nfs_wait_bit_killable, TASK_KILLABLE);
  out:
        return ret;
  }
@@@ -423,7 -413,6 +414,7 @@@ int nfs_scan_list(struct nfs_inode *nfs
                                goto out;
                        idx_start = req->wb_index + 1;
                        if (nfs_set_page_tag_locked(req)) {
 +                              kref_get(&req->wb_kref);
                                nfs_list_remove_request(req);
                                radix_tree_tag_clear(&nfsi->nfs_page_tree,
                                                req->wb_index, tag);
diff --combined fs/nfs/super.c
index 22c49c02897d3244c8f53ffd9bb9537e90064238,5b6339f70a4cfb65f9b6d17850392b20a93f2706..7f4505f6ac6f55fd49c1a8c07bbb65ce63d41388
@@@ -45,8 -45,6 +45,8 @@@
  #include <linux/nfs_idmap.h>
  #include <linux/vfs.h>
  #include <linux/inet.h>
 +#include <linux/in6.h>
 +#include <net/ipv6.h>
  #include <linux/nfs_xdr.h>
  #include <linux/magic.h>
  #include <linux/parser.h>
@@@ -85,11 -83,11 +85,11 @@@ enum 
        Opt_actimeo,
        Opt_namelen,
        Opt_mountport,
 -      Opt_mountprog, Opt_mountvers,
 -      Opt_nfsprog, Opt_nfsvers,
 +      Opt_mountvers,
 +      Opt_nfsvers,
  
        /* Mount options that take string arguments */
 -      Opt_sec, Opt_proto, Opt_mountproto,
 +      Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
        Opt_addr, Opt_mountaddr, Opt_clientaddr,
  
        /* Mount options that are ignored */
@@@ -139,7 -137,9 +139,7 @@@ static match_table_t nfs_mount_option_t
        { Opt_userspace, "retry=%u" },
        { Opt_namelen, "namlen=%u" },
        { Opt_mountport, "mountport=%u" },
 -      { Opt_mountprog, "mountprog=%u" },
        { Opt_mountvers, "mountvers=%u" },
 -      { Opt_nfsprog, "nfsprog=%u" },
        { Opt_nfsvers, "nfsvers=%u" },
        { Opt_nfsvers, "vers=%u" },
  
        { Opt_mountproto, "mountproto=%s" },
        { Opt_addr, "addr=%s" },
        { Opt_clientaddr, "clientaddr=%s" },
 -      { Opt_userspace, "mounthost=%s" },
 +      { Opt_mounthost, "mounthost=%s" },
        { Opt_mountaddr, "mountaddr=%s" },
  
        { Opt_err, NULL }
@@@ -202,7 -202,6 +202,7 @@@ static int nfs_get_sb(struct file_syste
  static int nfs_xdev_get_sb(struct file_system_type *fs_type,
                int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
  static void nfs_kill_super(struct super_block *);
 +static void nfs_put_super(struct super_block *);
  
  static struct file_system_type nfs_fs_type = {
        .owner          = THIS_MODULE,
@@@ -224,7 -223,6 +224,7 @@@ static const struct super_operations nf
        .alloc_inode    = nfs_alloc_inode,
        .destroy_inode  = nfs_destroy_inode,
        .write_inode    = nfs_write_inode,
 +      .put_super      = nfs_put_super,
        .statfs         = nfs_statfs,
        .clear_inode    = nfs_clear_inode,
        .umount_begin   = nfs_umount_begin,
@@@ -327,28 -325,6 +327,28 @@@ void __exit unregister_nfs_fs(void
        unregister_filesystem(&nfs_fs_type);
  }
  
 +void nfs_sb_active(struct nfs_server *server)
 +{
 +      atomic_inc(&server->active);
 +}
 +
 +void nfs_sb_deactive(struct nfs_server *server)
 +{
 +      if (atomic_dec_and_test(&server->active))
 +              wake_up(&server->active_wq);
 +}
 +
 +static void nfs_put_super(struct super_block *sb)
 +{
 +      struct nfs_server *server = NFS_SB(sb);
 +      /*
 +       * Make sure there are no outstanding ops to this server.
 +       * If so, wait for them to finish before allowing the
 +       * unmount to continue.
 +       */
 +      wait_event(server->active_wq, atomic_read(&server->active) == 0);
 +}
 +
  /*
   * Deliver file system statistics to userspace
   */
@@@ -448,7 -424,6 +448,6 @@@ static void nfs_show_mount_options(stru
                const char *nostr;
        } nfs_info[] = {
                { NFS_MOUNT_SOFT, ",soft", ",hard" },
-               { NFS_MOUNT_INTR, ",intr", ",nointr" },
                { NFS_MOUNT_NOCTO, ",nocto", "" },
                { NFS_MOUNT_NOAC, ",noac", "" },
                { NFS_MOUNT_NONLM, ",nolock", "" },
        }
        seq_printf(m, ",proto=%s",
                   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
 -      seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
 -      seq_printf(m, ",retrans=%u", clp->retrans_count);
 +      seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
 +      seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
        seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
  }
  
@@@ -493,9 -468,8 +492,9 @@@ static int nfs_show_options(struct seq_
  
        nfs_show_mount_options(m, nfss, 0);
  
 -      seq_printf(m, ",addr="NIPQUAD_FMT,
 -              NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
 +      seq_printf(m, ",addr=%s",
 +                      rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
 +                                                      RPC_DISPLAY_ADDR));
  
        return 0;
  }
@@@ -532,7 -506,7 +531,7 @@@ static int nfs_show_stats(struct seq_fi
        seq_printf(m, ",namelen=%d", nfss->namelen);
  
  #ifdef CONFIG_NFS_V4
 -      if (nfss->nfs_client->cl_nfsversion == 4) {
 +      if (nfss->nfs_client->rpc_ops->version == 4) {
                seq_printf(m, "\n\tnfsv4:\t");
                seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
                seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@@ -600,80 -574,22 +599,80 @@@ static void nfs_umount_begin(struct vfs
  }
  
  /*
 - * Sanity-check a server address provided by the mount command
 + * Set the port number in an address.  Be agnostic about the address family.
 + */
 +static void nfs_set_port(struct sockaddr *sap, unsigned short port)
 +{
 +      switch (sap->sa_family) {
 +      case AF_INET: {
 +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
 +              ap->sin_port = htons(port);
 +              break;
 +      }
 +      case AF_INET6: {
 +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
 +              ap->sin6_port = htons(port);
 +              break;
 +      }
 +      }
 +}
 +
 +/*
 + * Sanity-check a server address provided by the mount command.
 + *
 + * Address family must be initialized, and address must not be
 + * the ANY address for that family.
   */
  static int nfs_verify_server_address(struct sockaddr *addr)
  {
        switch (addr->sa_family) {
        case AF_INET: {
 -              struct sockaddr_in *sa = (struct sockaddr_in *) addr;
 -              if (sa->sin_addr.s_addr != INADDR_ANY)
 -                      return 1;
 -              break;
 +              struct sockaddr_in *sa = (struct sockaddr_in *)addr;
 +              return sa->sin_addr.s_addr != INADDR_ANY;
 +      }
 +      case AF_INET6: {
 +              struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
 +              return !ipv6_addr_any(sa);
        }
        }
  
        return 0;
  }
  
 +/*
 + * Parse string addresses passed in via a mount option,
 + * and construct a sockaddr based on the result.
 + *
 + * If address parsing fails, set the sockaddr's address
 + * family to AF_UNSPEC to force nfs_verify_server_address()
 + * to punt the mount.
 + */
 +static void nfs_parse_server_address(char *value,
 +                                   struct sockaddr *sap,
 +                                   size_t *len)
 +{
 +      if (strchr(value, ':')) {
 +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
 +              u8 *addr = (u8 *)&ap->sin6_addr.in6_u;
 +
 +              ap->sin6_family = AF_INET6;
 +              *len = sizeof(*ap);
 +              if (in6_pton(value, -1, addr, '\0', NULL))
 +                      return;
 +      } else {
 +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
 +              u8 *addr = (u8 *)&ap->sin_addr.s_addr;
 +
 +              ap->sin_family = AF_INET;
 +              *len = sizeof(*ap);
 +              if (in4_pton(value, -1, addr, '\0', NULL))
 +                      return;
 +      }
 +
 +      sap->sa_family = AF_UNSPEC;
 +      *len = 0;
 +}
 +
  /*
   * Error-check and convert a string of mount options from user space into
   * a data structure
@@@ -682,7 -598,6 +681,7 @@@ static int nfs_parse_mount_options(cha
                                   struct nfs_parsed_mount_data *mnt)
  {
        char *p, *string;
 +      unsigned short port = 0;
  
        if (!raw) {
                dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
                        mnt->flags &= ~NFS_MOUNT_SOFT;
                        break;
                case Opt_intr:
-                       mnt->flags |= NFS_MOUNT_INTR;
-                       break;
                case Opt_nointr:
-                       mnt->flags &= ~NFS_MOUNT_INTR;
                        break;
                case Opt_posix:
                        mnt->flags |= NFS_MOUNT_POSIX;
                                return 0;
                        if (option < 0 || option > 65535)
                                return 0;
 -                      mnt->nfs_server.address.sin_port = htons(option);
 +                      port = option;
                        break;
                case Opt_rsize:
                        if (match_int(args, &mnt->rsize))
                                return 0;
                        mnt->mount_server.port = option;
                        break;
 -              case Opt_mountprog:
 -                      if (match_int(args, &option))
 -                              return 0;
 -                      if (option < 0)
 -                              return 0;
 -                      mnt->mount_server.program = option;
 -                      break;
                case Opt_mountvers:
                        if (match_int(args, &option))
                                return 0;
                                return 0;
                        mnt->mount_server.version = option;
                        break;
 -              case Opt_nfsprog:
 -                      if (match_int(args, &option))
 -                              return 0;
 -                      if (option < 0)
 -                              return 0;
 -                      mnt->nfs_server.program = option;
 -                      break;
                case Opt_nfsvers:
                        if (match_int(args, &option))
                                return 0;
                        string = match_strdup(args);
                        if (string == NULL)
                                goto out_nomem;
 -                      mnt->nfs_server.address.sin_family = AF_INET;
 -                      mnt->nfs_server.address.sin_addr.s_addr =
 -                                                      in_aton(string);
 +                      nfs_parse_server_address(string, (struct sockaddr *)
 +                                               &mnt->nfs_server.address,
 +                                               &mnt->nfs_server.addrlen);
                        kfree(string);
                        break;
                case Opt_clientaddr:
                        string = match_strdup(args);
                        if (string == NULL)
                                goto out_nomem;
 +                      kfree(mnt->client_address);
                        mnt->client_address = string;
                        break;
 +              case Opt_mounthost:
 +                      string = match_strdup(args);
 +                      if (string == NULL)
 +                              goto out_nomem;
 +                      kfree(mnt->mount_server.hostname);
 +                      mnt->mount_server.hostname = string;
 +                      break;
                case Opt_mountaddr:
                        string = match_strdup(args);
                        if (string == NULL)
                                goto out_nomem;
 -                      mnt->mount_server.address.sin_family = AF_INET;
 -                      mnt->mount_server.address.sin_addr.s_addr =
 -                                                      in_aton(string);
 +                      nfs_parse_server_address(string, (struct sockaddr *)
 +                                               &mnt->mount_server.address,
 +                                               &mnt->mount_server.addrlen);
                        kfree(string);
                        break;
  
                }
        }
  
 +      nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
 +
        return 1;
  
  out_nomem:
@@@ -1067,8 -983,7 +1063,8 @@@ out_unknown
  static int nfs_try_mount(struct nfs_parsed_mount_data *args,
                         struct nfs_fh *root_fh)
  {
 -      struct sockaddr_in sin;
 +      struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address;
 +      char *hostname;
        int status;
  
        if (args->mount_server.version == 0) {
                        args->mount_server.version = NFS_MNT_VERSION;
        }
  
 +      if (args->mount_server.hostname)
 +              hostname = args->mount_server.hostname;
 +      else
 +              hostname = args->nfs_server.hostname;
 +
        /*
         * Construct the mount server's address.
         */
 -      if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
 -              sin = args->mount_server.address;
 -      else
 -              sin = args->nfs_server.address;
 +      if (args->mount_server.address.ss_family == AF_UNSPEC) {
 +              memcpy(sap, &args->nfs_server.address,
 +                     args->nfs_server.addrlen);
 +              args->mount_server.addrlen = args->nfs_server.addrlen;
 +      }
 +
        /*
         * autobind will be used if mount_server.port == 0
         */
 -      sin.sin_port = htons(args->mount_server.port);
 +      nfs_set_port(sap, args->mount_server.port);
  
        /*
         * Now ask the mount server to map our export path
         * to a file handle.
         */
 -      status = nfs_mount((struct sockaddr *) &sin,
 -                         sizeof(sin),
 -                         args->nfs_server.hostname,
 +      status = nfs_mount(sap,
 +                         args->mount_server.addrlen,
 +                         hostname,
                           args->nfs_server.export_path,
                           args->mount_server.version,
                           args->mount_server.protocol,
        if (status == 0)
                return 0;
  
 -      dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
 -                      ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
 +      dfprintk(MOUNT, "NFS: unable to mount server %s, error %d",
 +                      hostname, status);
        return status;
  }
  
   *
   * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
   *   mountproto=tcp after mountproto=udp, and so on
 - *
 - * XXX: as far as I can tell, changing the NFS program number is not
 - *      supported in the NFS client.
   */
  static int nfs_validate_mount_data(void *options,
                                   struct nfs_parsed_mount_data *args,
        args->acdirmin          = 30;
        args->acdirmax          = 60;
        args->mount_server.protocol = XPRT_TRANSPORT_UDP;
 -      args->mount_server.program = NFS_MNT_PROGRAM;
        args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 -      args->nfs_server.program = NFS_PROGRAM;
  
        switch (data->version) {
        case 1:
                        memset(mntfh->data + mntfh->size, 0,
                               sizeof(mntfh->data) - mntfh->size);
  
 -              if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
 -                      goto out_no_address;
 -
                /*
                 * Translate to nfs_parsed_mount_data, which nfs_fill_super
                 * can deal with.
                args->acregmax          = data->acregmax;
                args->acdirmin          = data->acdirmin;
                args->acdirmax          = data->acdirmax;
 -              args->nfs_server.address = data->addr;
 +
 +              memcpy(&args->nfs_server.address, &data->addr,
 +                     sizeof(data->addr));
 +              args->nfs_server.addrlen = sizeof(data->addr);
 +              if (!nfs_verify_server_address((struct sockaddr *)
 +                                              &args->nfs_server.address))
 +                      goto out_no_address;
 +
                if (!(data->flags & NFS_MOUNT_TCP))
                        args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
                /* N.B. caller will free nfs_server.hostname in all cases */
@@@ -1409,50 -1318,15 +1405,50 @@@ static int nfs_set_super(struct super_b
        return ret;
  }
  
 +static int nfs_compare_super_address(struct nfs_server *server1,
 +                                   struct nfs_server *server2)
 +{
 +      struct sockaddr *sap1, *sap2;
 +
 +      sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr;
 +      sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr;
 +
 +      if (sap1->sa_family != sap2->sa_family)
 +              return 0;
 +
 +      switch (sap1->sa_family) {
 +      case AF_INET: {
 +              struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1;
 +              struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2;
 +              if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
 +                      return 0;
 +              if (sin1->sin_port != sin2->sin_port)
 +                      return 0;
 +              break;
 +      }
 +      case AF_INET6: {
 +              struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1;
 +              struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2;
 +              if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
 +                      return 0;
 +              if (sin1->sin6_port != sin2->sin6_port)
 +                      return 0;
 +              break;
 +      }
 +      default:
 +              return 0;
 +      }
 +
 +      return 1;
 +}
 +
  static int nfs_compare_super(struct super_block *sb, void *data)
  {
        struct nfs_sb_mountdata *sb_mntdata = data;
        struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
        int mntflags = sb_mntdata->mntflags;
  
 -      if (memcmp(&old->nfs_client->cl_addr,
 -                              &server->nfs_client->cl_addr,
 -                              sizeof(old->nfs_client->cl_addr)) != 0)
 +      if (!nfs_compare_super_address(old, server))
                return 0;
        /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
        if (old->flags & NFS_MOUNT_UNSHARED)
@@@ -1522,7 -1396,6 +1518,7 @@@ static int nfs_get_sb(struct file_syste
  
  out:
        kfree(data.nfs_server.hostname);
 +      kfree(data.mount_server.hostname);
        return error;
  
  out_err_nosb:
@@@ -1598,7 -1471,7 +1594,7 @@@ static int nfs_xdev_get_sb(struct file_
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
 -      if (mntroot->d_inode->i_op != &nfs_dir_inode_operations) {
 +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
                dput(mntroot);
                error = -ESTALE;
                goto error_splat_super;
@@@ -1650,28 -1523,6 +1646,28 @@@ static void nfs4_fill_super(struct supe
        nfs_initialise_sb(sb);
  }
  
 +/*
 + * If the user didn't specify a port, set the port number to
 + * the NFS version 4 default port.
 + */
 +static void nfs4_default_port(struct sockaddr *sap)
 +{
 +      switch (sap->sa_family) {
 +      case AF_INET: {
 +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
 +              if (ap->sin_port == 0)
 +                      ap->sin_port = htons(NFS_PORT);
 +              break;
 +      }
 +      case AF_INET6: {
 +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
 +              if (ap->sin6_port == 0)
 +                      ap->sin6_port = htons(NFS_PORT);
 +              break;
 +      }
 +      }
 +}
 +
  /*
   * Validate NFSv4 mount options
   */
@@@ -1679,7 -1530,6 +1675,7 @@@ static int nfs4_validate_mount_data(voi
                                    struct nfs_parsed_mount_data *args,
                                    const char *dev_name)
  {
 +      struct sockaddr_in *ap;
        struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
        char *c;
  
  
        switch (data->version) {
        case 1:
 -              if (data->host_addrlen != sizeof(args->nfs_server.address))
 +              ap = (struct sockaddr_in *)&args->nfs_server.address;
 +              if (data->host_addrlen > sizeof(args->nfs_server.address))
 +                      goto out_no_address;
 +              if (data->host_addrlen == 0)
                        goto out_no_address;
 -              if (copy_from_user(&args->nfs_server.address,
 -                                 data->host_addr,
 -                                 sizeof(args->nfs_server.address)))
 +              args->nfs_server.addrlen = data->host_addrlen;
 +              if (copy_from_user(ap, data->host_addr, data->host_addrlen))
                        return -EFAULT;
 -              if (args->nfs_server.address.sin_port == 0)
 -                      args->nfs_server.address.sin_port = htons(NFS_PORT);
                if (!nfs_verify_server_address((struct sockaddr *)
                                                &args->nfs_server.address))
                        goto out_no_address;
  
 +              nfs4_default_port((struct sockaddr *)
 +                                &args->nfs_server.address);
 +
                switch (data->auth_flavourlen) {
                case 0:
                        args->auth_flavors[0] = RPC_AUTH_UNIX;
                                                &args->nfs_server.address))
                        return -EINVAL;
  
 +              nfs4_default_port((struct sockaddr *)
 +                                &args->nfs_server.address);
 +
                switch (args->auth_flavor_len) {
                case 0:
                        args->auth_flavors[0] = RPC_AUTH_UNIX;
                len = c - dev_name;
                if (len > NFS4_MAXNAMLEN)
                        return -ENAMETOOLONG;
 -              args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
 -              if (args->nfs_server.hostname == NULL)
 -                      return -ENOMEM;
 -              strncpy(args->nfs_server.hostname, dev_name, len - 1);
 +              /* N.B. caller will free nfs_server.hostname in all cases */
 +              args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
  
                c++;                    /* step over the ':' */
                len = strlen(c);
                if (len > NFS4_MAXPATHLEN)
                        return -ENAMETOOLONG;
 -              args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
 -              if (args->nfs_server.export_path == NULL)
 -                      return -ENOMEM;
 -              strncpy(args->nfs_server.export_path, c, len);
 +              args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
  
 -              dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
 +              dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
  
                if (args->client_address == NULL)
                        goto out_no_client_address;
@@@ -1973,11 -1822,6 +1969,11 @@@ static int nfs4_xdev_get_sb(struct file
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
 +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
 +              dput(mntroot);
 +              error = -ESTALE;
 +              goto error_splat_super;
 +      }
  
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
@@@ -2052,11 -1896,6 +2048,11 @@@ static int nfs4_referral_get_sb(struct 
                error = PTR_ERR(mntroot);
                goto error_splat_super;
        }
 +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
 +              dput(mntroot);
 +              error = -ESTALE;
 +              goto error_splat_super;
 +      }
  
        s->s_flags |= MS_ACTIVE;
        mnt->mnt_sb = s;
diff --combined fs/nfs/write.c
index 5ac5b27b639a85decb94b5d49c72aeb46361a9ff,60e3e870ada46f2fb7282fe78df8d0f593de83a9..522efff3e2c51b67befab23c1b4d6c2673d54aeb
@@@ -196,7 -196,7 +196,7 @@@ static int nfs_writepage_setup(struct n
        }
        /* Update file length */
        nfs_grow_file(page, offset, count);
 -      nfs_unlock_request(req);
 +      nfs_clear_page_tag_locked(req);
        return 0;
  }
  
@@@ -252,6 -252,7 +252,6 @@@ static int nfs_page_async_flush(struct 
                                struct page *page)
  {
        struct inode *inode = page->mapping->host;
 -      struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page *req;
        int ret;
  
                        spin_unlock(&inode->i_lock);
                        return 0;
                }
 -              if (nfs_lock_request_dontget(req))
 +              if (nfs_set_page_tag_locked(req))
                        break;
                /* Note: If we hold the page lock, as is the case in nfs_writepage,
 -               *       then the call to nfs_lock_request_dontget() will always
 +               *       then the call to nfs_set_page_tag_locked() will always
                 *       succeed provided that someone hasn't already marked the
                 *       request as dirty (in which case we don't care).
                 */
        if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
                /* This request is marked for commit */
                spin_unlock(&inode->i_lock);
 -              nfs_unlock_request(req);
 +              nfs_clear_page_tag_locked(req);
                nfs_pageio_complete(pgio);
                return 0;
        }
                spin_unlock(&inode->i_lock);
                BUG();
        }
 -      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
 -                      NFS_PAGE_TAG_LOCKED);
        spin_unlock(&inode->i_lock);
        nfs_pageio_add_request(pgio, req);
        return 0;
@@@ -378,7 -381,6 +378,7 @@@ static int nfs_inode_add_request(struc
        set_page_private(req->wb_page, (unsigned long)req);
        nfsi->npages++;
        kref_get(&req->wb_kref);
 +      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
        return 0;
  }
  
@@@ -488,7 -490,7 +488,7 @@@ int nfs_reschedule_unstable_write(struc
  /*
   * Wait for a request to complete.
   *
-  * Interruptible by signals only if mounted with intr flag.
+  * Interruptible by fatal signals only.
   */
  static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
  {
@@@ -594,7 -596,7 +594,7 @@@ static struct nfs_page * nfs_update_req
                spin_lock(&inode->i_lock);
                req = nfs_page_find_request_locked(page);
                if (req) {
 -                      if (!nfs_lock_request_dontget(req)) {
 +                      if (!nfs_set_page_tag_locked(req)) {
                                int error;
  
                                spin_unlock(&inode->i_lock);
            || req->wb_page != page
            || !nfs_dirty_request(req)
            || offset > rqend || end < req->wb_offset) {
 -              nfs_unlock_request(req);
 +              nfs_clear_page_tag_locked(req);
                return ERR_PTR(-EBUSY);
        }
  
@@@ -753,7 -755,7 +753,7 @@@ static void nfs_writepage_release(struc
        nfs_clear_page_tag_locked(req);
  }
  
 -static inline int flush_task_priority(int how)
 +static int flush_task_priority(int how)
  {
        switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
                case FLUSH_HIGHPRI:
@@@ -773,31 -775,15 +773,31 @@@ static void nfs_write_rpcsetup(struct n
                unsigned int count, unsigned int offset,
                int how)
  {
 -      struct inode            *inode;
 -      int flags;
 +      struct inode *inode = req->wb_context->path.dentry->d_inode;
 +      int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 +      int priority = flush_task_priority(how);
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_argp = &data->args,
 +              .rpc_resp = &data->res,
 +              .rpc_cred = req->wb_context->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = NFS_CLIENT(inode),
 +              .task = &data->task,
 +              .rpc_message = &msg,
 +              .callback_ops = call_ops,
 +              .callback_data = data,
 +              .flags = flags,
 +              .priority = priority,
 +      };
  
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
  
        data->req = req;
        data->inode = inode = req->wb_context->path.dentry->d_inode;
 -      data->cred = req->wb_context->cred;
 +      data->cred = msg.rpc_cred;
  
        data->args.fh     = NFS_FH(inode);
        data->args.offset = req_offset(req) + offset;
        data->args.pages  = data->pagevec;
        data->args.count  = count;
        data->args.context = req->wb_context;
 +      data->args.stable  = NFS_UNSTABLE;
 +      if (how & FLUSH_STABLE) {
 +              data->args.stable = NFS_DATA_SYNC;
 +              if (!NFS_I(inode)->ncommit)
 +                      data->args.stable = NFS_FILE_SYNC;
 +      }
  
        data->res.fattr   = &data->fattr;
        data->res.count   = count;
        nfs_fattr_init(&data->fattr);
  
        /* Set up the initial task struct.  */
 -      flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 -      rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
 -      NFS_PROTO(inode)->write_setup(data, how);
 -
 -      data->task.tk_priority = flush_task_priority(how);
 -      data->task.tk_cookie = (unsigned long)inode;
 +      NFS_PROTO(inode)->write_setup(data, &msg);
  
        dprintk("NFS: %5u initiated write call "
                "(req %s/%Ld, %u bytes @ offset %Lu)\n",
                (long long)NFS_FILEID(inode),
                count,
                (unsigned long long)data->args.offset);
 -}
  
 -static void nfs_execute_write(struct nfs_write_data *data)
 -{
 -      rpc_execute(&data->task);
 +      task = rpc_run_task(&task_setup_data);
 +      if (!IS_ERR(task))
 +              rpc_put_task(task);
  }
  
  /*
@@@ -877,6 -863,7 +877,6 @@@ static int nfs_flush_multi(struct inod
                                   wsize, offset, how);
                offset += wsize;
                nbytes -= wsize;
 -              nfs_execute_write(data);
        } while (nbytes != 0);
  
        return 0;
@@@ -924,6 -911,7 +924,6 @@@ static int nfs_flush_one(struct inode *
        /* Set up the argument struct */
        nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
  
 -      nfs_execute_write(data);
        return 0;
   out_bad:
        while (!list_empty(head)) {
  static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
                                  struct inode *inode, int ioflags)
  {
 -      int wsize = NFS_SERVER(inode)->wsize;
 +      size_t wsize = NFS_SERVER(inode)->wsize;
  
        if (wsize < PAGE_CACHE_SIZE)
                nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
@@@ -1153,33 -1141,19 +1153,33 @@@ static void nfs_commit_rpcsetup(struct 
                struct nfs_write_data *data,
                int how)
  {
 -      struct nfs_page         *first;
 -      struct inode            *inode;
 -      int flags;
 +      struct nfs_page *first = nfs_list_entry(head->next);
 +      struct inode *inode = first->wb_context->path.dentry->d_inode;
 +      int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 +      int priority = flush_task_priority(how);
 +      struct rpc_task *task;
 +      struct rpc_message msg = {
 +              .rpc_argp = &data->args,
 +              .rpc_resp = &data->res,
 +              .rpc_cred = first->wb_context->cred,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .task = &data->task,
 +              .rpc_client = NFS_CLIENT(inode),
 +              .rpc_message = &msg,
 +              .callback_ops = &nfs_commit_ops,
 +              .callback_data = data,
 +              .flags = flags,
 +              .priority = priority,
 +      };
  
        /* Set up the RPC argument and reply structs
         * NB: take care not to mess about with data->commit et al. */
  
        list_splice_init(head, &data->pages);
 -      first = nfs_list_entry(data->pages.next);
 -      inode = first->wb_context->path.dentry->d_inode;
  
        data->inode       = inode;
 -      data->cred        = first->wb_context->cred;
 +      data->cred        = msg.rpc_cred;
  
        data->args.fh     = NFS_FH(data->inode);
        /* Note: we always request a commit of the entire inode */
        nfs_fattr_init(&data->fattr);
  
        /* Set up the initial task struct.  */
 -      flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 -      rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
 -      NFS_PROTO(inode)->commit_setup(data, how);
 +      NFS_PROTO(inode)->commit_setup(data, &msg);
  
 -      data->task.tk_priority = flush_task_priority(how);
 -      data->task.tk_cookie = (unsigned long)inode;
 -      
        dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 +
 +      task = rpc_run_task(&task_setup_data);
 +      if (!IS_ERR(task))
 +              rpc_put_task(task);
  }
  
  /*
@@@ -1217,6 -1192,7 +1217,6 @@@ nfs_commit_list(struct inode *inode, st
        /* Set up the argument struct */
        nfs_commit_rpcsetup(head, data, how);
  
 -      nfs_execute_write(data);
        return 0;
   out_bad:
        while (!list_empty(head)) {
diff --combined fs/proc/array.c
index eb97f2897e2b50724046446be149951c1685c650,5be663e5dad1bac105c4f59b55695da3ec87ce05..b380313092bd5e42173af45654e99b5a41aee908
@@@ -141,12 -141,7 +141,7 @@@ static const char *task_state_array[] 
  
  static inline const char *get_task_state(struct task_struct *tsk)
  {
-       unsigned int state = (tsk->state & (TASK_RUNNING |
-                                           TASK_INTERRUPTIBLE |
-                                           TASK_UNINTERRUPTIBLE |
-                                           TASK_STOPPED |
-                                           TASK_TRACED)) |
-                                          tsk->exit_state;
+       unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
        const char **p = &task_state_array[0];
  
        while (state) {
@@@ -169,7 -164,7 +164,7 @@@ static inline char *task_state(struct t
        ppid = pid_alive(p) ?
                task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
        tpid = pid_alive(p) && p->ptrace ?
 -              task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
 +              task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "Tgid:\t%d\n"
@@@ -464,8 -459,8 +459,8 @@@ static int do_task_stat(struct task_str
                }
  
                sid = task_session_nr_ns(task, ns);
 +              ppid = task_tgid_nr_ns(task->real_parent, ns);
                pgid = task_pgrp_nr_ns(task, ns);
 -              ppid = task_ppid_nr_ns(task, ns);
  
                unlock_task_sighand(task, &flags);
        }
diff --combined fs/proc/base.c
index 91fa8e6ce8ad6591cb496ca54731ab5531cef521,e88ee1a0323ac35aa51717e8c2d10e49c84d03d7..9fa9708cc7153c12ad10d62b760df88ba50e9681
@@@ -199,29 -199,9 +199,29 @@@ static int proc_root_link(struct inode 
        (task == current || \
        (task->parent == current && \
        (task->ptrace & PT_PTRACED) && \
-        (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
+        (task_is_stopped_or_traced(task)) && \
         security_ptrace(current,task) == 0))
  
 +struct mm_struct *mm_for_maps(struct task_struct *task)
 +{
 +      struct mm_struct *mm = get_task_mm(task);
 +      if (!mm)
 +              return NULL;
 +      down_read(&mm->mmap_sem);
 +      task_lock(task);
 +      if (task->mm != mm)
 +              goto out;
 +      if (task->mm != current->mm && __ptrace_may_attach(task) < 0)
 +              goto out;
 +      task_unlock(task);
 +      return mm;
 +out:
 +      task_unlock(task);
 +      up_read(&mm->mmap_sem);
 +      mmput(mm);
 +      return NULL;
 +}
 +
  static int proc_pid_cmdline(struct task_struct *task, char * buffer)
  {
        int res = 0;
@@@ -310,77 -290,6 +310,77 @@@ static int proc_pid_schedstat(struct ta
  }
  #endif
  
 +#ifdef CONFIG_LATENCYTOP
 +static int lstats_show_proc(struct seq_file *m, void *v)
 +{
 +      int i;
 +      struct task_struct *task = m->private;
 +      seq_puts(m, "Latency Top version : v0.1\n");
 +
 +      for (i = 0; i < 32; i++) {
 +              if (task->latency_record[i].backtrace[0]) {
 +                      int q;
 +                      seq_printf(m, "%i %li %li ",
 +                              task->latency_record[i].count,
 +                              task->latency_record[i].time,
 +                              task->latency_record[i].max);
 +                      for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
 +                              char sym[KSYM_NAME_LEN];
 +                              char *c;
 +                              if (!task->latency_record[i].backtrace[q])
 +                                      break;
 +                              if (task->latency_record[i].backtrace[q] == ULONG_MAX)
 +                                      break;
 +                              sprint_symbol(sym, task->latency_record[i].backtrace[q]);
 +                              c = strchr(sym, '+');
 +                              if (c)
 +                                      *c = 0;
 +                              seq_printf(m, "%s ", sym);
 +                      }
 +                      seq_printf(m, "\n");
 +              }
 +
 +      }
 +      return 0;
 +}
 +
 +static int lstats_open(struct inode *inode, struct file *file)
 +{
 +      int ret;
 +      struct seq_file *m;
 +      struct task_struct *task = get_proc_task(inode);
 +
 +      ret = single_open(file, lstats_show_proc, NULL);
 +      if (!ret) {
 +              m = file->private_data;
 +              m->private = task;
 +      }
 +      return ret;
 +}
 +
 +static ssize_t lstats_write(struct file *file, const char __user *buf,
 +                          size_t count, loff_t *offs)
 +{
 +      struct seq_file *m;
 +      struct task_struct *task;
 +
 +      m = file->private_data;
 +      task = m->private;
 +      clear_all_latency_tracing(task);
 +
 +      return count;
 +}
 +
 +static const struct file_operations proc_lstats_operations = {
 +      .open           = lstats_open,
 +      .read           = seq_read,
 +      .write          = lstats_write,
 +      .llseek         = seq_lseek,
 +      .release        = single_release,
 +};
 +
 +#endif
 +
  /* The badness from the OOM killer */
  unsigned long badness(struct task_struct *p, unsigned long uptime);
  static int proc_oom_score(struct task_struct *task, char *buffer)
@@@ -1091,7 -1000,6 +1091,7 @@@ static const struct file_operations pro
  };
  #endif
  
 +
  #ifdef CONFIG_SCHED_DEBUG
  /*
   * Print out various scheduling related per-task fields:
@@@ -2302,9 -2210,6 +2302,9 @@@ static const struct pid_entry tgid_base
  #ifdef CONFIG_SCHEDSTATS
        INF("schedstat",  S_IRUGO, pid_schedstat),
  #endif
 +#ifdef CONFIG_LATENCYTOP
 +      REG("latency",  S_IRUGO, lstats),
 +#endif
  #ifdef CONFIG_PROC_PID_CPUSET
        REG("cpuset",     S_IRUGO, cpuset),
  #endif
@@@ -2630,9 -2535,6 +2630,9 @@@ static const struct pid_entry tid_base_
  #ifdef CONFIG_SCHEDSTATS
        INF("schedstat", S_IRUGO, pid_schedstat),
  #endif
 +#ifdef CONFIG_LATENCYTOP
 +      REG("latency",  S_IRUGO, lstats),
 +#endif
  #ifdef CONFIG_PROC_PID_CPUSET
        REG("cpuset",    S_IRUGO, cpuset),
  #endif
diff --combined include/linux/nfs_fs.h
index 099ddb4481c07d9d64b4708db3ce519de622ed84,2814bd40edf6e20a8c8248c998e92680439555f7..a69ba80f2dfe1398b9de602e0b793fedc69e4db9
@@@ -196,67 -196,28 +196,67 @@@ struct nfs_inode 
  #define NFS_INO_STALE         (2)             /* possible stale inode */
  #define NFS_INO_ACL_LRU_SET   (3)             /* Inode is on the LRU list */
  
 -static inline struct nfs_inode *NFS_I(struct inode *inode)
 +static inline struct nfs_inode *NFS_I(const struct inode *inode)
  {
        return container_of(inode, struct nfs_inode, vfs_inode);
  }
 -#define NFS_SB(s)             ((struct nfs_server *)(s->s_fs_info))
  
 -#define NFS_FH(inode)                 (&NFS_I(inode)->fh)
 -#define NFS_SERVER(inode)             (NFS_SB(inode->i_sb))
 -#define NFS_CLIENT(inode)             (NFS_SERVER(inode)->client)
 -#define NFS_PROTO(inode)              (NFS_SERVER(inode)->nfs_client->rpc_ops)
 -#define NFS_COOKIEVERF(inode)         (NFS_I(inode)->cookieverf)
 -#define NFS_MINATTRTIMEO(inode) \
 -      (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
 -                             : NFS_SERVER(inode)->acregmin)
 -#define NFS_MAXATTRTIMEO(inode) \
 -      (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
 -                             : NFS_SERVER(inode)->acregmax)
 +static inline struct nfs_server *NFS_SB(const struct super_block *s)
 +{
 +      return (struct nfs_server *)(s->s_fs_info);
 +}
 +
 +static inline struct nfs_fh *NFS_FH(const struct inode *inode)
 +{
 +      return &NFS_I(inode)->fh;
 +}
 +
 +static inline struct nfs_server *NFS_SERVER(const struct inode *inode)
 +{
 +      return NFS_SB(inode->i_sb);
 +}
 +
 +static inline struct rpc_clnt *NFS_CLIENT(const struct inode *inode)
 +{
 +      return NFS_SERVER(inode)->client;
 +}
 +
 +static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
 +{
 +      return NFS_SERVER(inode)->nfs_client->rpc_ops;
 +}
 +
 +static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
 +{
 +      return NFS_I(inode)->cookieverf;
 +}
 +
 +static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
 +{
 +      struct nfs_server *nfss = NFS_SERVER(inode);
 +      return S_ISDIR(inode->i_mode) ? nfss->acdirmin : nfss->acregmin;
 +}
  
 -#define NFS_FLAGS(inode)              (NFS_I(inode)->flags)
 -#define NFS_STALE(inode)              (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
 +static inline unsigned NFS_MAXATTRTIMEO(const struct inode *inode)
 +{
 +      struct nfs_server *nfss = NFS_SERVER(inode);
 +      return S_ISDIR(inode->i_mode) ? nfss->acdirmax : nfss->acregmax;
 +}
  
 -#define NFS_FILEID(inode)             (NFS_I(inode)->fileid)
 +static inline int NFS_STALE(const struct inode *inode)
 +{
 +      return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 +}
 +
 +static inline __u64 NFS_FILEID(const struct inode *inode)
 +{
 +      return NFS_I(inode)->fileid;
 +}
 +
 +static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
 +{
 +      NFS_I(inode)->fileid = fileid;
 +}
  
  static inline void nfs_mark_for_revalidate(struct inode *inode)
  {
@@@ -276,7 -237,7 +276,7 @@@ static inline int nfs_server_capable(st
  
  static inline int NFS_USE_READDIRPLUS(struct inode *inode)
  {
 -      return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
 +      return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
  }
  
  static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
@@@ -405,7 -366,6 +405,7 @@@ extern const struct inode_operations nf
  extern const struct file_operations nfs_dir_operations;
  extern struct dentry_operations nfs_dentry_operations;
  
 +extern void nfs_force_lookup_revalidate(struct inode *dir);
  extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
  extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
  extern void nfs_access_zap_cache(struct inode *inode);
@@@ -556,14 -516,7 +556,7 @@@ extern void * nfs_root_data(void)
  
  #define nfs_wait_event(clnt, wq, condition)                           \
  ({                                                                    \
-       int __retval = 0;                                               \
-       if (clnt->cl_intr) {                                            \
-               sigset_t oldmask;                                       \
-               rpc_clnt_sigmask(clnt, &oldmask);                       \
-               __retval = wait_event_interruptible(wq, condition);     \
-               rpc_clnt_sigunmask(clnt, &oldmask);                     \
-       } else                                                          \
-               wait_event(wq, condition);                              \
+       int __retval = wait_event_killable(wq, condition);              \
        __retval;                                                       \
  })
  
diff --combined include/linux/sched.h
index 9d4797609aa5e368439671d603da5f7e7bedb979,e4921aad4063d5d0fc23209ed88fe32f12dd78bd..6c333579d9da0e2c66788516948e83886b0fe7ed
@@@ -27,7 -27,6 +27,7 @@@
  #define CLONE_NEWUSER         0x10000000      /* New user namespace */
  #define CLONE_NEWPID          0x20000000      /* New pid namespace */
  #define CLONE_NEWNET          0x40000000      /* New network namespace */
 +#define CLONE_IO              0x80000000      /* Clone io context */
  
  /*
   * Scheduling policies
@@@ -79,6 -78,7 +79,6 @@@ struct sched_param 
  #include <linux/proportions.h>
  #include <linux/seccomp.h>
  #include <linux/rcupdate.h>
 -#include <linux/futex.h>
  #include <linux/rtmutex.h>
  
  #include <linux/time.h>
  #include <linux/hrtimer.h>
  #include <linux/task_io_accounting.h>
  #include <linux/kobject.h>
 +#include <linux/latencytop.h>
  
  #include <asm/processor.h>
  
  struct exec_domain;
  struct futex_pi_state;
 +struct robust_list_head;
  struct bio;
  
  /*
@@@ -172,13 -170,35 +172,35 @@@ print_cfs_rq(struct seq_file *m, int cp
  #define TASK_RUNNING          0
  #define TASK_INTERRUPTIBLE    1
  #define TASK_UNINTERRUPTIBLE  2
- #define TASK_STOPPED          4
- #define TASK_TRACED           8
+ #define __TASK_STOPPED                4
+ #define __TASK_TRACED         8
  /* in tsk->exit_state */
  #define EXIT_ZOMBIE           16
  #define EXIT_DEAD             32
  /* in tsk->state again */
  #define TASK_DEAD             64
+ #define TASK_WAKEKILL         128
+ /* Convenience macros for the sake of set_task_state */
+ #define TASK_KILLABLE         (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+ #define TASK_STOPPED          (TASK_WAKEKILL | __TASK_STOPPED)
+ #define TASK_TRACED           (TASK_WAKEKILL | __TASK_TRACED)
+ /* Convenience macros for the sake of wake_up */
+ #define TASK_NORMAL           (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+ #define TASK_ALL              (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+ /* get_task_state() */
+ #define TASK_REPORT           (TASK_RUNNING | TASK_INTERRUPTIBLE | \
+                                TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+                                __TASK_TRACED)
+ #define task_is_traced(task)  ((task->state & __TASK_TRACED) != 0)
+ #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+ #define task_is_stopped_or_traced(task)       \
+                       ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+ #define task_contributes_to_load(task)        \
+                               ((task->state & TASK_UNINTERRUPTIBLE) != 0)
  
  #define __set_task_state(tsk, state_value)            \
        do { (tsk)->state = (state_value); } while (0)
@@@ -232,8 -252,6 +254,8 @@@ static inline int select_nohz_load_bala
  }
  #endif
  
 +extern unsigned long rt_needs_cpu(int cpu);
 +
  /*
   * Only dump TASK_* tasks. (0 for all tasks)
   */
@@@ -261,19 -279,13 +283,19 @@@ extern void trap_init(void)
  extern void account_process_tick(struct task_struct *task, int user);
  extern void update_process_times(int user);
  extern void scheduler_tick(void);
 +extern void hrtick_resched(void);
 +
 +extern void sched_show_task(struct task_struct *p);
  
  #ifdef CONFIG_DETECT_SOFTLOCKUP
  extern void softlockup_tick(void);
  extern void spawn_softlockup_task(void);
  extern void touch_softlockup_watchdog(void);
  extern void touch_all_softlockup_watchdogs(void);
 -extern int softlockup_thresh;
 +extern unsigned long  softlockup_thresh;
 +extern unsigned long sysctl_hung_task_check_count;
 +extern unsigned long sysctl_hung_task_timeout_secs;
 +extern unsigned long sysctl_hung_task_warnings;
  #else
  static inline void softlockup_tick(void)
  {
@@@ -302,6 -314,7 +324,7 @@@ extern int in_sched_functions(unsigned 
  #define       MAX_SCHEDULE_TIMEOUT    LONG_MAX
  extern signed long FASTCALL(schedule_timeout(signed long timeout));
  extern signed long schedule_timeout_interruptible(signed long timeout);
+ extern signed long schedule_timeout_killable(signed long timeout);
  extern signed long schedule_timeout_uninterruptible(signed long timeout);
  asmlinkage void schedule(void);
  
@@@ -562,13 -575,18 +585,13 @@@ struct user_struct 
  #ifdef CONFIG_FAIR_USER_SCHED
        struct task_group *tg;
  #ifdef CONFIG_SYSFS
 -      struct kset kset;
 -      struct subsys_attribute user_attr;
 +      struct kobject kobj;
        struct work_struct work;
  #endif
  #endif
  };
  
 -#ifdef CONFIG_FAIR_USER_SCHED
 -extern int uids_kobject_init(void);
 -#else
 -static inline int uids_kobject_init(void) { return 0; }
 -#endif
 +extern int uids_sysfs_init(void);
  
  extern struct user_struct *find_user(uid_t);
  
@@@ -832,7 -850,6 +855,7 @@@ struct sched_class 
        void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
        void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
        void (*yield_task) (struct rq *rq);
 +      int  (*select_task_rq)(struct task_struct *p, int sync);
  
        void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
  
        int (*move_one_task) (struct rq *this_rq, int this_cpu,
                              struct rq *busiest, struct sched_domain *sd,
                              enum cpu_idle_type idle);
 +      void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 +      void (*post_schedule) (struct rq *this_rq);
 +      void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
  #endif
  
        void (*set_curr_task) (struct rq *rq);
 -      void (*task_tick) (struct rq *rq, struct task_struct *p);
 +      void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
        void (*task_new) (struct rq *rq, struct task_struct *p);
 +      void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
 +
 +      void (*join_domain)(struct rq *rq);
 +      void (*leave_domain)(struct rq *rq);
 +
 +      void (*switched_from) (struct rq *this_rq, struct task_struct *task,
 +                             int running);
 +      void (*switched_to) (struct rq *this_rq, struct task_struct *task,
 +                           int running);
 +      void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 +                           int oldprio, int running);
  };
  
  struct load_weight {
@@@ -896,8 -899,6 +919,8 @@@ struct sched_entity 
  #ifdef CONFIG_SCHEDSTATS
        u64                     wait_start;
        u64                     wait_max;
 +      u64                     wait_count;
 +      u64                     wait_sum;
  
        u64                     sleep_start;
        u64                     sleep_max;
  #endif
  };
  
 +struct sched_rt_entity {
 +      struct list_head run_list;
 +      unsigned int time_slice;
 +      unsigned long timeout;
 +      int nr_cpus_allowed;
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      struct sched_rt_entity  *parent;
 +      /* rq on which this entity is (to be) queued: */
 +      struct rt_rq            *rt_rq;
 +      /* rq "owned" by this entity/group: */
 +      struct rt_rq            *my_q;
 +#endif
 +};
 +
  struct task_struct {
        volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
        void *stack;
  #endif
  
        int prio, static_prio, normal_prio;
 -      struct list_head run_list;
        const struct sched_class *sched_class;
        struct sched_entity se;
 +      struct sched_rt_entity rt;
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
        /* list of struct preempt_notifier: */
        struct hlist_head preempt_notifiers;
  #endif
  
 -      unsigned short ioprio;
        /*
         * fpu_counter contains the number of consecutive context switches
         * that the FPU is used. If this is over a threshold, the lazy fpu
  
        unsigned int policy;
        cpumask_t cpus_allowed;
 -      unsigned int time_slice;
 +
 +#ifdef CONFIG_PREEMPT_RCU
 +      int rcu_read_lock_nesting;
 +      int rcu_flipctr_idx;
 +#endif /* #ifdef CONFIG_PREEMPT_RCU */
  
  #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
        struct sched_info sched_info;
  /* ipc stuff */
        struct sysv_sem sysvsem;
  #endif
 +#ifdef CONFIG_DETECT_SOFTLOCKUP
 +/* hung task detection */
 +      unsigned long last_switch_timestamp;
 +      unsigned long last_switch_count;
 +#endif
  /* CPU-specific state of this task */
        struct thread_struct thread;
  /* filesystem information */
        int make_it_fail;
  #endif
        struct prop_local_single dirties;
 +#ifdef CONFIG_LATENCYTOP
 +      int latency_record_count;
 +      struct latency_record latency_record[LT_SAVECOUNT];
 +#endif
  };
  
  /*
@@@ -1304,6 -1278,13 +1327,6 @@@ struct pid_namespace
   *
   * set_task_vxid()   : assigns a virtual id to a task;
   *
 - * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
 - *                     the result depends on the namespace and whether the
 - *                     task in question is the namespace's init. e.g. for the
 - *                     namespace's init this will return 0 when called from
 - *                     the namespace of this init, or appropriate id otherwise.
 - *
 - *
   * see also pid_nr() etc in include/linux/pid.h
   */
  
@@@ -1359,6 -1340,12 +1382,6 @@@ static inline pid_t task_session_vnr(st
  }
  
  
 -static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
 -              struct pid_namespace *ns)
 -{
 -      return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
 -}
 -
  /**
   * pid_alive - check that a task structure is not stale
   * @p: Task structure to be checked.
@@@ -1507,12 -1494,6 +1530,12 @@@ extern unsigned int sysctl_sched_child_
  extern unsigned int sysctl_sched_features;
  extern unsigned int sysctl_sched_migration_cost;
  extern unsigned int sysctl_sched_nr_migrate;
 +extern unsigned int sysctl_sched_rt_period;
 +extern unsigned int sysctl_sched_rt_ratio;
 +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
 +extern unsigned int sysctl_sched_min_bal_int_shares;
 +extern unsigned int sysctl_sched_max_bal_int_shares;
 +#endif
  
  int sched_nr_latency_handler(struct ctl_table *table, int write,
                struct file *file, void __user *buffer, size_t *length,
@@@ -1892,7 -1873,14 +1915,14 @@@ static inline int signal_pending(struc
  {
        return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
  }
-   
+ extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+ static inline int fatal_signal_pending(struct task_struct *p)
+ {
+       return signal_pending(p) && __fatal_signal_pending(p);
+ }
  static inline int need_resched(void)
  {
        return unlikely(test_thread_flag(TIF_NEED_RESCHED));
   * cond_resched_lock() will drop the spinlock before scheduling,
   * cond_resched_softirq() will enable bhs before scheduling.
   */
 -extern int cond_resched(void);
 -extern int cond_resched_lock(spinlock_t * lock);
 -extern int cond_resched_softirq(void);
 -
 -/*
 - * Does a critical section need to be broken due to another
 - * task waiting?:
 - */
 -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
 -# define need_lockbreak(lock) ((lock)->break_lock)
 +#ifdef CONFIG_PREEMPT
 +static inline int cond_resched(void)
 +{
 +      return 0;
 +}
  #else
 -# define need_lockbreak(lock) 0
 +extern int _cond_resched(void);
 +static inline int cond_resched(void)
 +{
 +      return _cond_resched();
 +}
  #endif
 +extern int cond_resched_lock(spinlock_t * lock);
 +extern int cond_resched_softirq(void);
  
  /*
   * Does a critical section need to be broken due to another
 - * task waiting or preemption being signalled:
 + * task waiting?: (technically does not depend on CONFIG_PREEMPT,
 + * but a general need for low latency)
   */
 -static inline int lock_need_resched(spinlock_t *lock)
 +static inline int spin_needbreak(spinlock_t *lock)
  {
 -      if (need_lockbreak(lock) || need_resched())
 -              return 1;
 +#ifdef CONFIG_PREEMPT
 +      return spin_is_contended(lock);
 +#else
        return 0;
 +#endif
  }
  
  /*
index 3e9addc741c1ad8a57b41c9172d3114b53faee1b,01879365f4ec747eb409533989fc1228420b3dc8..129a86e25d2989df395e29913fe4322b614701c7
@@@ -41,12 -41,10 +41,11 @@@ struct rpc_clnt 
        struct rpc_iostats *    cl_metrics;     /* per-client statistics */
  
        unsigned int            cl_softrtry : 1,/* soft timeouts */
-                               cl_intr     : 1,/* interruptible */
                                cl_discrtry : 1,/* disconnect before retry */
                                cl_autobind : 1;/* use getport() */
  
        struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
 +      const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
  
        int                     cl_nodelen;     /* nodename length */
        char                    cl_nodename[UNX_MAXNODENAME];
@@@ -55,7 -53,6 +54,7 @@@
        struct dentry *         cl_dentry;      /* inode */
        struct rpc_clnt *       cl_parent;      /* Points to parent of clones */
        struct rpc_rtt          cl_rtt_default;
 +      struct rpc_timeout      cl_timeout_default;
        struct rpc_program *    cl_program;
        char                    cl_inline_name[32];
  };
@@@ -101,7 -98,7 +100,7 @@@ struct rpc_create_args 
        struct sockaddr         *address;
        size_t                  addrsize;
        struct sockaddr         *saddress;
 -      struct rpc_timeout      *timeout;
 +      const struct rpc_timeout *timeout;
        char                    *servername;
        struct rpc_program      *program;
        u32                     version;
  
  /* Values for "flags" field */
  #define RPC_CLNT_CREATE_HARDRTRY      (1UL << 0)
- #define RPC_CLNT_CREATE_INTR          (1UL << 1)
  #define RPC_CLNT_CREATE_AUTOBIND      (1UL << 2)
  #define RPC_CLNT_CREATE_NONPRIVPORT   (1UL << 3)
  #define RPC_CLNT_CREATE_NOPING                (1UL << 4)
@@@ -125,10 -121,11 +123,10 @@@ void            rpc_shutdown_client(struct rpc_cl
  void          rpc_release_client(struct rpc_clnt *);
  
  int           rpcb_register(u32, u32, int, unsigned short, int *);
 -int           rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
 +int           rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
  void          rpcb_getport_async(struct rpc_task *);
  
 -void          rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
 -
 +void          rpc_call_start(struct rpc_task *);
  int           rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
                               int flags, const struct rpc_call_ops *tk_ops,
                               void *calldata);
@@@ -137,13 -134,13 +135,11 @@@ int             rpc_call_sync(struct rpc_clnt *cln
  struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
                               int flags);
  void          rpc_restart_call(struct rpc_task *);
--void          rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
--void          rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
  void          rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
  size_t                rpc_max_payload(struct rpc_clnt *);
  void          rpc_force_rebind(struct rpc_clnt *);
  size_t                rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 -char *                rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 +const char    *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
  
  #endif /* __KERNEL__ */
  #endif /* _LINUX_SUNRPC_CLNT_H */
index ce3d1b13272901a7ac3cfa07c9b3042bebb6043e,19160e63d6ad242a29e7f0cf37e41d43005778ce..f689f02e6793de2aa6ee21d9a3054af669714cb1
@@@ -56,6 -56,8 +56,6 @@@ struct rpc_task 
        __u8                    tk_garb_retry;
        __u8                    tk_cred_retry;
  
 -      unsigned long           tk_cookie;      /* Cookie for batching tasks */
 -
        /*
         * timeout_fn   to be executed by timer bottom half
         * callback     to be executed after waking up
@@@ -76,6 -78,7 +76,6 @@@
        struct timer_list       tk_timer;       /* kernel timer */
        unsigned long           tk_timeout;     /* timeout for rpc_sleep() */
        unsigned short          tk_flags;       /* misc flags */
 -      unsigned char           tk_priority : 2;/* Task priority */
        unsigned long           tk_runstate;    /* Task run status */
        struct workqueue_struct *tk_workqueue;  /* Normally rpciod, but could
                                                 * be any workqueue
@@@ -91,9 -94,6 +91,9 @@@
        unsigned long           tk_start;       /* RPC task init timestamp */
        long                    tk_rtt;         /* round-trip time (jiffies) */
  
 +      pid_t                   tk_owner;       /* Process id for batching tasks */
 +      unsigned char           tk_priority : 2;/* Task priority */
 +
  #ifdef RPC_DEBUG
        unsigned short          tk_pid;         /* debugging aid */
  #endif
@@@ -117,15 -117,6 +117,15 @@@ struct rpc_call_ops 
        void (*rpc_release)(void *);
  };
  
 +struct rpc_task_setup {
 +      struct rpc_task *task;
 +      struct rpc_clnt *rpc_client;
 +      const struct rpc_message *rpc_message;
 +      const struct rpc_call_ops *callback_ops;
 +      void *callback_data;
 +      unsigned short flags;
 +      signed char priority;
 +};
  
  /*
   * RPC task flags
  #define RPC_TASK_DYNAMIC      0x0080          /* task was kmalloc'ed */
  #define RPC_TASK_KILLED               0x0100          /* task was killed */
  #define RPC_TASK_SOFT         0x0200          /* Use soft timeouts */
- #define RPC_TASK_NOINTR               0x0400          /* uninterruptible task */
  
  #define RPC_IS_ASYNC(t)               ((t)->tk_flags & RPC_TASK_ASYNC)
  #define RPC_IS_SWAPPER(t)     ((t)->tk_flags & RPC_TASK_SWAPPER)
  #define RPC_ASSASSINATED(t)   ((t)->tk_flags & RPC_TASK_KILLED)
  #define RPC_DO_CALLBACK(t)    ((t)->tk_callback != NULL)
  #define RPC_IS_SOFT(t)                ((t)->tk_flags & RPC_TASK_SOFT)
- #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
  
  #define RPC_TASK_RUNNING      0
  #define RPC_TASK_QUEUED               1
   * Note: if you change these, you must also change
   * the task initialization definitions below.
   */
 -#define RPC_PRIORITY_LOW      0
 -#define RPC_PRIORITY_NORMAL   1
 -#define RPC_PRIORITY_HIGH     2
 -#define RPC_NR_PRIORITY               (RPC_PRIORITY_HIGH+1)
 +#define RPC_PRIORITY_LOW      (-1)
 +#define RPC_PRIORITY_NORMAL   (0)
 +#define RPC_PRIORITY_HIGH     (1)
 +#define RPC_NR_PRIORITY               (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
  
  /*
   * RPC synchronization objects
  struct rpc_wait_queue {
        spinlock_t              lock;
        struct list_head        tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
 -      unsigned long           cookie;                 /* cookie of last task serviced */
 +      pid_t                   owner;                  /* process id of last task serviced */
        unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
        unsigned char           priority;               /* current priority */
        unsigned char           count;                  /* # task groups remaining serviced so far */
   * performance of NFS operations such as read/write.
   */
  #define RPC_BATCH_COUNT                       16
 -
 -#ifndef RPC_DEBUG
 -# define RPC_WAITQ_INIT(var,qname) { \
 -              .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
 -              .tasks = { \
 -                      [0] = LIST_HEAD_INIT(var.tasks[0]), \
 -                      [1] = LIST_HEAD_INIT(var.tasks[1]), \
 -                      [2] = LIST_HEAD_INIT(var.tasks[2]), \
 -              }, \
 -      }
 -#else
 -# define RPC_WAITQ_INIT(var,qname) { \
 -              .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
 -              .tasks = { \
 -                      [0] = LIST_HEAD_INIT(var.tasks[0]), \
 -                      [1] = LIST_HEAD_INIT(var.tasks[1]), \
 -                      [2] = LIST_HEAD_INIT(var.tasks[2]), \
 -              }, \
 -              .name = qname, \
 -      }
 -#endif
 -# define RPC_WAITQ(var,qname)      struct rpc_wait_queue var = RPC_WAITQ_INIT(var,qname)
 -
  #define RPC_IS_PRIORITY(q)            ((q)->maxpriority > 0)
  
  /*
   * Function prototypes
   */
 -struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
 -                              const struct rpc_call_ops *ops, void *data);
 -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 -                              const struct rpc_call_ops *ops, void *data);
 -void          rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 -                              int flags, const struct rpc_call_ops *ops,
 -                              void *data);
 +struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 +struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
  void          rpc_put_task(struct rpc_task *);
  void          rpc_exit_task(struct rpc_task *);
  void          rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --combined kernel/ptrace.c
index e6e9b8be4b053c5f0074bdeca6f1a4ac9517e8fb,26f9923baddc6abb454022f61bbe403894c28103..b0d4ab4dfd3d27ee60e3a044f9e0bc3bba665d88
@@@ -51,7 -51,7 +51,7 @@@ void __ptrace_link(struct task_struct *
  void ptrace_untrace(struct task_struct *child)
  {
        spin_lock(&child->sighand->siglock);
-       if (child->state == TASK_TRACED) {
+       if (task_is_traced(child)) {
                if (child->signal->flags & SIGNAL_STOP_STOPPED) {
                        child->state = TASK_STOPPED;
                } else {
@@@ -79,7 -79,7 +79,7 @@@ void __ptrace_unlink(struct task_struc
                add_parent(child);
        }
  
-       if (child->state == TASK_TRACED)
+       if (task_is_traced(child))
                ptrace_untrace(child);
  }
  
@@@ -103,9 -103,9 +103,9 @@@ int ptrace_check_attach(struct task_str
            && child->signal != NULL) {
                ret = 0;
                spin_lock_irq(&child->sighand->siglock);
-               if (child->state == TASK_STOPPED) {
+               if (task_is_stopped(child)) {
                        child->state = TASK_TRACED;
-               } else if (child->state != TASK_TRACED && !kill) {
+               } else if (!task_is_traced(child) && !kill) {
                        ret = -ESRCH;
                }
                spin_unlock_irq(&child->sighand->siglock);
        return ret;
  }
  
 -static int may_attach(struct task_struct *task)
 +int __ptrace_may_attach(struct task_struct *task)
  {
        /* May we inspect the given task?
         * This check is used both for attaching with ptrace
@@@ -154,7 -154,7 +154,7 @@@ int ptrace_may_attach(struct task_struc
  {
        int err;
        task_lock(task);
 -      err = may_attach(task);
 +      err = __ptrace_may_attach(task);
        task_unlock(task);
        return !err;
  }
@@@ -196,7 -196,7 +196,7 @@@ repeat
        /* the same process cannot be attached many times */
        if (task->ptrace & PT_PTRACED)
                goto bad;
 -      retval = may_attach(task);
 +      retval = __ptrace_may_attach(task);
        if (retval)
                goto bad;
  
@@@ -366,73 -366,12 +366,73 @@@ static int ptrace_setsiginfo(struct tas
        return error;
  }
  
 +
 +#ifdef PTRACE_SINGLESTEP
 +#define is_singlestep(request)                ((request) == PTRACE_SINGLESTEP)
 +#else
 +#define is_singlestep(request)                0
 +#endif
 +
 +#ifdef PTRACE_SINGLEBLOCK
 +#define is_singleblock(request)               ((request) == PTRACE_SINGLEBLOCK)
 +#else
 +#define is_singleblock(request)               0
 +#endif
 +
 +#ifdef PTRACE_SYSEMU
 +#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
 +#else
 +#define is_sysemu_singlestep(request) 0
 +#endif
 +
 +static int ptrace_resume(struct task_struct *child, long request, long data)
 +{
 +      if (!valid_signal(data))
 +              return -EIO;
 +
 +      if (request == PTRACE_SYSCALL)
 +              set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 +      else
 +              clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 +
 +#ifdef TIF_SYSCALL_EMU
 +      if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
 +              set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 +      else
 +              clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 +#endif
 +
 +      if (is_singleblock(request)) {
 +              if (unlikely(!arch_has_block_step()))
 +                      return -EIO;
 +              user_enable_block_step(child);
 +      } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
 +              if (unlikely(!arch_has_single_step()))
 +                      return -EIO;
 +              user_enable_single_step(child);
 +      }
 +      else
 +              user_disable_single_step(child);
 +
 +      child->exit_code = data;
 +      wake_up_process(child);
 +
 +      return 0;
 +}
 +
  int ptrace_request(struct task_struct *child, long request,
                   long addr, long data)
  {
        int ret = -EIO;
  
        switch (request) {
 +      case PTRACE_PEEKTEXT:
 +      case PTRACE_PEEKDATA:
 +              return generic_ptrace_peekdata(child, addr, data);
 +      case PTRACE_POKETEXT:
 +      case PTRACE_POKEDATA:
 +              return generic_ptrace_pokedata(child, addr, data);
 +
  #ifdef PTRACE_OLDSETOPTIONS
        case PTRACE_OLDSETOPTIONS:
  #endif
        case PTRACE_DETACH:      /* detach a process that was attached. */
                ret = ptrace_detach(child, data);
                break;
 +
 +#ifdef PTRACE_SINGLESTEP
 +      case PTRACE_SINGLESTEP:
 +#endif
 +#ifdef PTRACE_SINGLEBLOCK
 +      case PTRACE_SINGLEBLOCK:
 +#endif
 +#ifdef PTRACE_SYSEMU
 +      case PTRACE_SYSEMU:
 +      case PTRACE_SYSEMU_SINGLESTEP:
 +#endif
 +      case PTRACE_SYSCALL:
 +      case PTRACE_CONT:
 +              return ptrace_resume(child, request, data);
 +
 +      case PTRACE_KILL:
 +              if (child->exit_state)  /* already dead */
 +                      return 0;
 +              return ptrace_resume(child, request, SIGKILL);
 +
        default:
                break;
        }
@@@ -551,8 -470,6 +551,8 @@@ asmlinkage long sys_ptrace(long request
        lock_kernel();
        if (request == PTRACE_TRACEME) {
                ret = ptrace_traceme();
 +              if (!ret)
 +                      arch_ptrace_attach(current);
                goto out;
        }
  
@@@ -607,87 -524,3 +607,87 @@@ int generic_ptrace_pokedata(struct task
        copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
        return (copied == sizeof(data)) ? 0 : -EIO;
  }
 +
 +#ifdef CONFIG_COMPAT
 +#include <linux/compat.h>
 +
 +int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 +                        compat_ulong_t addr, compat_ulong_t data)
 +{
 +      compat_ulong_t __user *datap = compat_ptr(data);
 +      compat_ulong_t word;
 +      int ret;
 +
 +      switch (request) {
 +      case PTRACE_PEEKTEXT:
 +      case PTRACE_PEEKDATA:
 +              ret = access_process_vm(child, addr, &word, sizeof(word), 0);
 +              if (ret != sizeof(word))
 +                      ret = -EIO;
 +              else
 +                      ret = put_user(word, datap);
 +              break;
 +
 +      case PTRACE_POKETEXT:
 +      case PTRACE_POKEDATA:
 +              ret = access_process_vm(child, addr, &data, sizeof(data), 1);
 +              ret = (ret != sizeof(data) ? -EIO : 0);
 +              break;
 +
 +      case PTRACE_GETEVENTMSG:
 +              ret = put_user((compat_ulong_t) child->ptrace_message, datap);
 +              break;
 +
 +      default:
 +              ret = ptrace_request(child, request, addr, data);
 +      }
 +
 +      return ret;
 +}
 +
 +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
 +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
 +                                compat_long_t addr, compat_long_t data)
 +{
 +      struct task_struct *child;
 +      long ret;
 +
 +      /*
 +       * This lock_kernel fixes a subtle race with suid exec
 +       */
 +      lock_kernel();
 +      if (request == PTRACE_TRACEME) {
 +              ret = ptrace_traceme();
 +              goto out;
 +      }
 +
 +      child = ptrace_get_task_struct(pid);
 +      if (IS_ERR(child)) {
 +              ret = PTR_ERR(child);
 +              goto out;
 +      }
 +
 +      if (request == PTRACE_ATTACH) {
 +              ret = ptrace_attach(child);
 +              /*
 +               * Some architectures need to do book-keeping after
 +               * a ptrace attach.
 +               */
 +              if (!ret)
 +                      arch_ptrace_attach(child);
 +              goto out_put_task_struct;
 +      }
 +
 +      ret = ptrace_check_attach(child, request == PTRACE_KILL);
 +      if (!ret)
 +              ret = compat_arch_ptrace(child, request, addr, data);
 +
 + out_put_task_struct:
 +      put_task_struct(child);
 + out:
 +      unlock_kernel();
 +      return ret;
 +}
 +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
 +
 +#endif        /* CONFIG_COMPAT */
diff --combined kernel/sched.c
index 8355e007e0213e1f2384ad80d059e56e342b0d6d,d2f77fab0f4621a68b2b8bb95558bf146b40cec7..9474b23c28bf41f5989df3b94c5e810b0f1e971a
@@@ -22,8 -22,6 +22,8 @@@
   *              by Peter Williams
   *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
   *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
 + *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
 + *              Thomas Gleixner, Mike Kravetz
   */
  
  #include <linux/mm.h>
@@@ -65,7 -63,6 +65,7 @@@
  #include <linux/reciprocal_div.h>
  #include <linux/unistd.h>
  #include <linux/pagemap.h>
 +#include <linux/hrtimer.h>
  
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
@@@ -99,9 -96,10 +99,9 @@@ unsigned long long __attribute__((weak)
  #define MAX_USER_PRIO         (USER_PRIO(MAX_PRIO))
  
  /*
 - * Some helpers for converting nanosecond timing to jiffy resolution
 + * Helpers for converting nanosecond timing to jiffy resolution
   */
  #define NS_TO_JIFFIES(TIME)   ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
 -#define JIFFIES_TO_NS(TIME)   ((TIME) * (NSEC_PER_SEC / HZ))
  
  #define NICE_0_LOAD           SCHED_LOAD_SCALE
  #define NICE_0_SHIFT          SCHED_LOAD_SHIFT
@@@ -161,8 -159,6 +161,8 @@@ struct rt_prio_array 
  
  struct cfs_rq;
  
 +static LIST_HEAD(task_groups);
 +
  /* task group related information */
  struct task_group {
  #ifdef CONFIG_FAIR_CGROUP_SCHED
        struct sched_entity **se;
        /* runqueue "owned" by this group on each cpu */
        struct cfs_rq **cfs_rq;
 +
 +      struct sched_rt_entity **rt_se;
 +      struct rt_rq **rt_rq;
 +
 +      unsigned int rt_ratio;
 +
 +      /*
 +       * shares assigned to a task group governs how much of cpu bandwidth
 +       * is allocated to the group. The more shares a group has, the more is
 +       * the cpu bandwidth allocated to it.
 +       *
 +       * For ex, lets say that there are three task groups, A, B and C which
 +       * have been assigned shares 1000, 2000 and 3000 respectively. Then,
 +       * cpu bandwidth allocated by the scheduler to task groups A, B and C
 +       * should be:
 +       *
 +       *      Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
 +       *      Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
 +       *      Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
 +       *
 +       * The weight assigned to a task group's schedulable entities on every
 +       * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
 +       * group's shares. For ex: lets say that task group A has been
 +       * assigned shares of 1000 and there are two CPUs in a system. Then,
 +       *
 +       *  tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
 +       *
 +       * Note: It's not necessary that each of a task's group schedulable
 +       *       entity have the same weight on all CPUs. If the group
 +       *       has 2 of its tasks on CPU0 and 1 task on CPU1, then a
 +       *       better distribution of weight could be:
 +       *
 +       *      tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
 +       *      tg_A->se[1]->load.weight = 1/2 * 2000 =  667
 +       *
 +       * rebalance_shares() is responsible for distributing the shares of a
 +       * task groups like this among the group's schedulable entities across
 +       * cpus.
 +       *
 +       */
        unsigned long shares;
 -      /* spinlock to serialize modification to shares */
 -      spinlock_t lock;
 +
        struct rcu_head rcu;
 +      struct list_head list;
  };
  
  /* Default task group's sched entity on each cpu */
@@@ -223,51 -179,24 +223,51 @@@ static DEFINE_PER_CPU(struct sched_enti
  /* Default task group's cfs_rq on each cpu */
  static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
  
 +static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
 +static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 +
  static struct sched_entity *init_sched_entity_p[NR_CPUS];
  static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
  
 +static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
 +static struct rt_rq *init_rt_rq_p[NR_CPUS];
 +
 +/* task_group_mutex serializes add/remove of task groups and also changes to
 + * a task group's cpu shares.
 + */
 +static DEFINE_MUTEX(task_group_mutex);
 +
 +/* doms_cur_mutex serializes access to doms_cur[] array */
 +static DEFINE_MUTEX(doms_cur_mutex);
 +
 +#ifdef CONFIG_SMP
 +/* kernel thread that runs rebalance_shares() periodically */
 +static struct task_struct *lb_monitor_task;
 +static int load_balance_monitor(void *unused);
 +#endif
 +
 +static void set_se_shares(struct sched_entity *se, unsigned long shares);
 +
  /* Default task group.
   *    Every task in system belong to this group at bootup.
   */
  struct task_group init_task_group = {
 -      .se     = init_sched_entity_p,
 +      .se     = init_sched_entity_p,
        .cfs_rq = init_cfs_rq_p,
 +
 +      .rt_se  = init_sched_rt_entity_p,
 +      .rt_rq  = init_rt_rq_p,
  };
  
  #ifdef CONFIG_FAIR_USER_SCHED
 -# define INIT_TASK_GRP_LOAD   2*NICE_0_LOAD
 +# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
  #else
 -# define INIT_TASK_GRP_LOAD   NICE_0_LOAD
 +# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
  #endif
  
 -static int init_task_group_load = INIT_TASK_GRP_LOAD;
 +#define MIN_GROUP_SHARES      2
 +
 +static int init_task_group_load = INIT_TASK_GROUP_LOAD;
  
  /* return group to which a task belongs */
  static inline struct task_group *task_group(struct task_struct *p)
  }
  
  /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 -static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu)
 +static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
  {
        p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
        p->se.parent = task_group(p)->se[cpu];
 +
 +      p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
 +      p->rt.parent = task_group(p)->rt_se[cpu];
 +}
 +
 +static inline void lock_task_group_list(void)
 +{
 +      mutex_lock(&task_group_mutex);
 +}
 +
 +static inline void unlock_task_group_list(void)
 +{
 +      mutex_unlock(&task_group_mutex);
 +}
 +
 +static inline void lock_doms_cur(void)
 +{
 +      mutex_lock(&doms_cur_mutex);
 +}
 +
 +static inline void unlock_doms_cur(void)
 +{
 +      mutex_unlock(&doms_cur_mutex);
  }
  
  #else
  
 -static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
 +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
 +static inline void lock_task_group_list(void) { }
 +static inline void unlock_task_group_list(void) { }
 +static inline void lock_doms_cur(void) { }
 +static inline void unlock_doms_cur(void) { }
  
  #endif        /* CONFIG_FAIR_GROUP_SCHED */
  
@@@ -362,56 -264,10 +362,56 @@@ struct cfs_rq 
  /* Real-Time classes' related field in a runqueue: */
  struct rt_rq {
        struct rt_prio_array active;
 -      int rt_load_balance_idx;
 -      struct list_head *rt_load_balance_head, *rt_load_balance_curr;
 +      unsigned long rt_nr_running;
 +#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
 +      int highest_prio; /* highest queued rt task prio */
 +#endif
 +#ifdef CONFIG_SMP
 +      unsigned long rt_nr_migratory;
 +      int overloaded;
 +#endif
 +      int rt_throttled;
 +      u64 rt_time;
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      struct rq *rq;
 +      struct list_head leaf_rt_rq_list;
 +      struct task_group *tg;
 +      struct sched_rt_entity *rt_se;
 +#endif
 +};
 +
 +#ifdef CONFIG_SMP
 +
 +/*
 + * We add the notion of a root-domain which will be used to define per-domain
 + * variables. Each exclusive cpuset essentially defines an island domain by
 + * fully partitioning the member cpus from any other cpuset. Whenever a new
 + * exclusive cpuset is created, we also create and attach a new root-domain
 + * object.
 + *
 + */
 +struct root_domain {
 +      atomic_t refcount;
 +      cpumask_t span;
 +      cpumask_t online;
 +
 +      /*
 +       * The "RT overload" flag: it gets set if a CPU has more than
 +       * one runnable RT task.
 +       */
 +      cpumask_t rto_mask;
 +      atomic_t rto_count;
  };
  
 +/*
 + * By default the system creates a single root-domain with all cpus as
 + * members (mimicking the global state we have today).
 + */
 +static struct root_domain def_root_domain;
 +
 +#endif
 +
  /*
   * This is the main, per-CPU runqueue data structure.
   *
@@@ -440,15 -296,11 +440,15 @@@ struct rq 
        u64 nr_switches;
  
        struct cfs_rq cfs;
 +      struct rt_rq rt;
 +      u64 rt_period_expire;
 +      int rt_throttled;
 +
  #ifdef CONFIG_FAIR_GROUP_SCHED
        /* list of leaf cfs_rq on this cpu: */
        struct list_head leaf_cfs_rq_list;
 +      struct list_head leaf_rt_rq_list;
  #endif
 -      struct rt_rq rt;
  
        /*
         * This is part of a global counter where only the total sum
        u64 clock, prev_clock_raw;
        s64 clock_max_delta;
  
 -      unsigned int clock_warps, clock_overflows;
 +      unsigned int clock_warps, clock_overflows, clock_underflows;
        u64 idle_clock;
        unsigned int clock_deep_idle_events;
        u64 tick_timestamp;
        atomic_t nr_iowait;
  
  #ifdef CONFIG_SMP
 +      struct root_domain *rd;
        struct sched_domain *sd;
  
        /* For active balancing */
        struct list_head migration_queue;
  #endif
  
 +#ifdef CONFIG_SCHED_HRTICK
 +      unsigned long hrtick_flags;
 +      ktime_t hrtick_expire;
 +      struct hrtimer hrtick_timer;
 +#endif
 +
  #ifdef CONFIG_SCHEDSTATS
        /* latency stats */
        struct sched_info rq_sched_info;
  };
  
  static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 -static DEFINE_MUTEX(sched_hotcpu_mutex);
  
  static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
  {
@@@ -595,23 -441,6 +595,23 @@@ static void update_rq_clock(struct rq *
  #define task_rq(p)            cpu_rq(task_cpu(p))
  #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
  
 +unsigned long rt_needs_cpu(int cpu)
 +{
 +      struct rq *rq = cpu_rq(cpu);
 +      u64 delta;
 +
 +      if (!rq->rt_throttled)
 +              return 0;
 +
 +      if (rq->clock > rq->rt_period_expire)
 +              return 1;
 +
 +      delta = rq->rt_period_expire - rq->clock;
 +      do_div(delta, NSEC_PER_SEC / HZ);
 +
 +      return (unsigned long)delta;
 +}
 +
  /*
   * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
   */
@@@ -630,8 -459,6 +630,8 @@@ enum 
        SCHED_FEAT_START_DEBIT          = 4,
        SCHED_FEAT_TREE_AVG             = 8,
        SCHED_FEAT_APPROX_AVG           = 16,
 +      SCHED_FEAT_HRTICK               = 32,
 +      SCHED_FEAT_DOUBLE_TICK          = 64,
  };
  
  const_debug unsigned int sysctl_sched_features =
                SCHED_FEAT_WAKEUP_PREEMPT       * 1 |
                SCHED_FEAT_START_DEBIT          * 1 |
                SCHED_FEAT_TREE_AVG             * 0 |
 -              SCHED_FEAT_APPROX_AVG           * 0;
 +              SCHED_FEAT_APPROX_AVG           * 0 |
 +              SCHED_FEAT_HRTICK               * 1 |
 +              SCHED_FEAT_DOUBLE_TICK          * 0;
  
  #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
  
   */
  const_debug unsigned int sysctl_sched_nr_migrate = 32;
  
 +/*
 + * period over which we measure -rt task cpu usage in ms.
 + * default: 1s
 + */
 +const_debug unsigned int sysctl_sched_rt_period = 1000;
 +
 +#define SCHED_RT_FRAC_SHIFT   16
 +#define SCHED_RT_FRAC         (1UL << SCHED_RT_FRAC_SHIFT)
 +
 +/*
 + * ratio of time -rt tasks may consume.
 + * default: 95%
 + */
 +const_debug unsigned int sysctl_sched_rt_ratio = 62259;
 +
  /*
   * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
   * clock constructed from sched_clock():
@@@ -678,12 -488,7 +678,12 @@@ unsigned long long cpu_clock(int cpu
  
        local_irq_save(flags);
        rq = cpu_rq(cpu);
 -      update_rq_clock(rq);
 +      /*
 +       * Only call sched_clock() if the scheduler has already been
 +       * initialized (some code might call cpu_clock() very early):
 +       */
 +      if (rq->idle)
 +              update_rq_clock(rq);
        now = rq->clock;
        local_irq_restore(flags);
  
@@@ -698,15 -503,10 +698,15 @@@ EXPORT_SYMBOL_GPL(cpu_clock)
  # define finish_arch_switch(prev)     do { } while (0)
  #endif
  
 +static inline int task_current(struct rq *rq, struct task_struct *p)
 +{
 +      return rq->curr == p;
 +}
 +
  #ifndef __ARCH_WANT_UNLOCKED_CTXSW
  static inline int task_running(struct rq *rq, struct task_struct *p)
  {
 -      return rq->curr == p;
 +      return task_current(rq, p);
  }
  
  static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
@@@ -735,7 -535,7 +735,7 @@@ static inline int task_running(struct r
  #ifdef CONFIG_SMP
        return p->oncpu;
  #else
 -      return rq->curr == p;
 +      return task_current(rq, p);
  #endif
  }
  
@@@ -869,177 -669,9 +869,177 @@@ void sched_clock_idle_wakeup_event(u64 
        rq->prev_clock_raw = now;
        rq->clock += delta_ns;
        spin_unlock(&rq->lock);
 +      touch_softlockup_watchdog();
  }
  EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
  
 +static void __resched_task(struct task_struct *p, int tif_bit);
 +
 +static inline void resched_task(struct task_struct *p)
 +{
 +      __resched_task(p, TIF_NEED_RESCHED);
 +}
 +
 +#ifdef CONFIG_SCHED_HRTICK
 +/*
 + * Use HR-timers to deliver accurate preemption points.
 + *
 + * Its all a bit involved since we cannot program an hrt while holding the
 + * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
 + * reschedule event.
 + *
 + * When we get rescheduled we reprogram the hrtick_timer outside of the
 + * rq->lock.
 + */
 +static inline void resched_hrt(struct task_struct *p)
 +{
 +      __resched_task(p, TIF_HRTICK_RESCHED);
 +}
 +
 +static inline void resched_rq(struct rq *rq)
 +{
 +      unsigned long flags;
 +
 +      spin_lock_irqsave(&rq->lock, flags);
 +      resched_task(rq->curr);
 +      spin_unlock_irqrestore(&rq->lock, flags);
 +}
 +
 +enum {
 +      HRTICK_SET,             /* re-programm hrtick_timer */
 +      HRTICK_RESET,           /* not a new slice */
 +};
 +
 +/*
 + * Use hrtick when:
 + *  - enabled by features
 + *  - hrtimer is actually high res
 + */
 +static inline int hrtick_enabled(struct rq *rq)
 +{
 +      if (!sched_feat(HRTICK))
 +              return 0;
 +      return hrtimer_is_hres_active(&rq->hrtick_timer);
 +}
 +
 +/*
 + * Called to set the hrtick timer state.
 + *
 + * called with rq->lock held and irqs disabled
 + */
 +static void hrtick_start(struct rq *rq, u64 delay, int reset)
 +{
 +      assert_spin_locked(&rq->lock);
 +
 +      /*
 +       * preempt at: now + delay
 +       */
 +      rq->hrtick_expire =
 +              ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
 +      /*
 +       * indicate we need to program the timer
 +       */
 +      __set_bit(HRTICK_SET, &rq->hrtick_flags);
 +      if (reset)
 +              __set_bit(HRTICK_RESET, &rq->hrtick_flags);
 +
 +      /*
 +       * New slices are called from the schedule path and don't need a
 +       * forced reschedule.
 +       */
 +      if (reset)
 +              resched_hrt(rq->curr);
 +}
 +
 +static void hrtick_clear(struct rq *rq)
 +{
 +      if (hrtimer_active(&rq->hrtick_timer))
 +              hrtimer_cancel(&rq->hrtick_timer);
 +}
 +
 +/*
 + * Update the timer from the possible pending state.
 + */
 +static void hrtick_set(struct rq *rq)
 +{
 +      ktime_t time;
 +      int set, reset;
 +      unsigned long flags;
 +
 +      WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 +
 +      spin_lock_irqsave(&rq->lock, flags);
 +      set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
 +      reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
 +      time = rq->hrtick_expire;
 +      clear_thread_flag(TIF_HRTICK_RESCHED);
 +      spin_unlock_irqrestore(&rq->lock, flags);
 +
 +      if (set) {
 +              hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
 +              if (reset && !hrtimer_active(&rq->hrtick_timer))
 +                      resched_rq(rq);
 +      } else
 +              hrtick_clear(rq);
 +}
 +
 +/*
 + * High-resolution timer tick.
 + * Runs from hardirq context with interrupts disabled.
 + */
 +static enum hrtimer_restart hrtick(struct hrtimer *timer)
 +{
 +      struct rq *rq = container_of(timer, struct rq, hrtick_timer);
 +
 +      WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 +
 +      spin_lock(&rq->lock);
 +      __update_rq_clock(rq);
 +      rq->curr->sched_class->task_tick(rq, rq->curr, 1);
 +      spin_unlock(&rq->lock);
 +
 +      return HRTIMER_NORESTART;
 +}
 +
 +static inline void init_rq_hrtick(struct rq *rq)
 +{
 +      rq->hrtick_flags = 0;
 +      hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 +      rq->hrtick_timer.function = hrtick;
 +      rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 +}
 +
 +void hrtick_resched(void)
 +{
 +      struct rq *rq;
 +      unsigned long flags;
 +
 +      if (!test_thread_flag(TIF_HRTICK_RESCHED))
 +              return;
 +
 +      local_irq_save(flags);
 +      rq = cpu_rq(smp_processor_id());
 +      hrtick_set(rq);
 +      local_irq_restore(flags);
 +}
 +#else
 +static inline void hrtick_clear(struct rq *rq)
 +{
 +}
 +
 +static inline void hrtick_set(struct rq *rq)
 +{
 +}
 +
 +static inline void init_rq_hrtick(struct rq *rq)
 +{
 +}
 +
 +void hrtick_resched(void)
 +{
 +}
 +#endif
 +
  /*
   * resched_task - mark a task 'to be rescheduled now'.
   *
  #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
  #endif
  
 -static void resched_task(struct task_struct *p)
 +static void __resched_task(struct task_struct *p, int tif_bit)
  {
        int cpu;
  
        assert_spin_locked(&task_rq(p)->lock);
  
 -      if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
 +      if (unlikely(test_tsk_thread_flag(p, tif_bit)))
                return;
  
 -      set_tsk_thread_flag(p, TIF_NEED_RESCHED);
 +      set_tsk_thread_flag(p, tif_bit);
  
        cpu = task_cpu(p);
        if (cpu == smp_processor_id())
@@@ -1085,10 -717,10 +1085,10 @@@ static void resched_cpu(int cpu
        spin_unlock_irqrestore(&rq->lock, flags);
  }
  #else
 -static inline void resched_task(struct task_struct *p)
 +static void __resched_task(struct task_struct *p, int tif_bit)
  {
        assert_spin_locked(&task_rq(p)->lock);
 -      set_tsk_need_resched(p);
 +      set_tsk_thread_flag(p, tif_bit);
  }
  #endif
  
@@@ -1228,23 -860,6 +1228,23 @@@ static void cpuacct_charge(struct task_
  static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
  #endif
  
 +static inline void inc_cpu_load(struct rq *rq, unsigned long load)
 +{
 +      update_load_add(&rq->load, load);
 +}
 +
 +static inline void dec_cpu_load(struct rq *rq, unsigned long load)
 +{
 +      update_load_sub(&rq->load, load);
 +}
 +
 +#ifdef CONFIG_SMP
 +static unsigned long source_load(int cpu, int type);
 +static unsigned long target_load(int cpu, int type);
 +static unsigned long cpu_avg_load_per_task(int cpu);
 +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 +#endif /* CONFIG_SMP */
 +
  #include "sched_stats.h"
  #include "sched_idletask.c"
  #include "sched_fair.c"
  
  #define sched_class_highest (&rt_sched_class)
  
 -/*
 - * Update delta_exec, delta_fair fields for rq.
 - *
 - * delta_fair clock advances at a rate inversely proportional to
 - * total load (rq->load.weight) on the runqueue, while
 - * delta_exec advances at the same rate as wall-clock (provided
 - * cpu is not idle).
 - *
 - * delta_exec / delta_fair is a measure of the (smoothened) load on this
 - * runqueue over any given interval. This (smoothened) load is used
 - * during load balance.
 - *
 - * This function is called /before/ updating rq->load
 - * and when switching tasks.
 - */
 -static inline void inc_load(struct rq *rq, const struct task_struct *p)
 -{
 -      update_load_add(&rq->load, p->se.load.weight);
 -}
 -
 -static inline void dec_load(struct rq *rq, const struct task_struct *p)
 -{
 -      update_load_sub(&rq->load, p->se.load.weight);
 -}
 -
 -static void inc_nr_running(struct task_struct *p, struct rq *rq)
 +static void inc_nr_running(struct rq *rq)
  {
        rq->nr_running++;
 -      inc_load(rq, p);
  }
  
 -static void dec_nr_running(struct task_struct *p, struct rq *rq)
 +static void dec_nr_running(struct rq *rq)
  {
        rq->nr_running--;
 -      dec_load(rq, p);
  }
  
  static void set_load_weight(struct task_struct *p)
@@@ -1350,11 -992,11 +1350,11 @@@ static int effective_prio(struct task_s
   */
  static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
  {
-       if (p->state == TASK_UNINTERRUPTIBLE)
+       if (task_contributes_to_load(p))
                rq->nr_uninterruptible--;
  
        enqueue_task(rq, p, wakeup);
 -      inc_nr_running(p, rq);
 +      inc_nr_running(rq);
  }
  
  /*
   */
  static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
  {
-       if (p->state == TASK_UNINTERRUPTIBLE)
+       if (task_contributes_to_load(p))
                rq->nr_uninterruptible++;
  
        dequeue_task(rq, p, sleep);
 -      dec_nr_running(p, rq);
 +      dec_nr_running(rq);
  }
  
  /**
@@@ -1386,7 -1028,7 +1386,7 @@@ unsigned long weighted_cpuload(const in
  
  static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
  {
 -      set_task_cfs_rq(p, cpu);
 +      set_task_rq(p, cpu);
  #ifdef CONFIG_SMP
        /*
         * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
  #endif
  }
  
 +static inline void check_class_changed(struct rq *rq, struct task_struct *p,
 +                                     const struct sched_class *prev_class,
 +                                     int oldprio, int running)
 +{
 +      if (prev_class != p->sched_class) {
 +              if (prev_class->switched_from)
 +                      prev_class->switched_from(rq, p, running);
 +              p->sched_class->switched_to(rq, p, running);
 +      } else
 +              p->sched_class->prio_changed(rq, p, oldprio, running);
 +}
 +
  #ifdef CONFIG_SMP
  
  /*
   * Is this task likely cache-hot:
   */
 -static inline int
 +static int
  task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
  {
        s64 delta;
@@@ -1640,7 -1270,7 +1640,7 @@@ static unsigned long target_load(int cp
  /*
   * Return the average load per task on the cpu's run queue
   */
 -static inline unsigned long cpu_avg_load_per_task(int cpu)
 +static unsigned long cpu_avg_load_per_task(int cpu)
  {
        struct rq *rq = cpu_rq(cpu);
        unsigned long total = weighted_cpuload(cpu);
@@@ -1797,6 -1427,58 +1797,6 @@@ static int sched_balance_self(int cpu, 
  
  #endif /* CONFIG_SMP */
  
 -/*
 - * wake_idle() will wake a task on an idle cpu if task->cpu is
 - * not idle and an idle cpu is available.  The span of cpus to
 - * search starts with cpus closest then further out as needed,
 - * so we always favor a closer, idle cpu.
 - *
 - * Returns the CPU we should wake onto.
 - */
 -#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
 -static int wake_idle(int cpu, struct task_struct *p)
 -{
 -      cpumask_t tmp;
 -      struct sched_domain *sd;
 -      int i;
 -
 -      /*
 -       * If it is idle, then it is the best cpu to run this task.
 -       *
 -       * This cpu is also the best, if it has more than one task already.
 -       * Siblings must be also busy(in most cases) as they didn't already
 -       * pickup the extra load from this cpu and hence we need not check
 -       * sibling runqueue info. This will avoid the checks and cache miss
 -       * penalities associated with that.
 -       */
 -      if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
 -              return cpu;
 -
 -      for_each_domain(cpu, sd) {
 -              if (sd->flags & SD_WAKE_IDLE) {
 -                      cpus_and(tmp, sd->span, p->cpus_allowed);
 -                      for_each_cpu_mask(i, tmp) {
 -                              if (idle_cpu(i)) {
 -                                      if (i != task_cpu(p)) {
 -                                              schedstat_inc(p,
 -                                                      se.nr_wakeups_idle);
 -                                      }
 -                                      return i;
 -                              }
 -                      }
 -              } else {
 -                      break;
 -              }
 -      }
 -      return cpu;
 -}
 -#else
 -static inline int wake_idle(int cpu, struct task_struct *p)
 -{
 -      return cpu;
 -}
 -#endif
 -
  /***
   * try_to_wake_up - wake up a thread
   * @p: the to-be-woken-up thread
@@@ -1817,6 -1499,11 +1817,6 @@@ static int try_to_wake_up(struct task_s
        unsigned long flags;
        long old_state;
        struct rq *rq;
 -#ifdef CONFIG_SMP
 -      struct sched_domain *sd, *this_sd = NULL;
 -      unsigned long load, this_load;
 -      int new_cpu;
 -#endif
  
        rq = task_rq_lock(p, &flags);
        old_state = p->state;
        if (unlikely(task_running(rq, p)))
                goto out_activate;
  
 -      new_cpu = cpu;
 -
 -      schedstat_inc(rq, ttwu_count);
 -      if (cpu == this_cpu) {
 -              schedstat_inc(rq, ttwu_local);
 -              goto out_set_cpu;
 -      }
 -
 -      for_each_domain(this_cpu, sd) {
 -              if (cpu_isset(cpu, sd->span)) {
 -                      schedstat_inc(sd, ttwu_wake_remote);
 -                      this_sd = sd;
 -                      break;
 -              }
 -      }
 -
 -      if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 -              goto out_set_cpu;
 -
 -      /*
 -       * Check for affine wakeup and passive balancing possibilities.
 -       */
 -      if (this_sd) {
 -              int idx = this_sd->wake_idx;
 -              unsigned int imbalance;
 -
 -              imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
 -
 -              load = source_load(cpu, idx);
 -              this_load = target_load(this_cpu, idx);
 -
 -              new_cpu = this_cpu; /* Wake to this CPU if we can */
 -
 -              if (this_sd->flags & SD_WAKE_AFFINE) {
 -                      unsigned long tl = this_load;
 -                      unsigned long tl_per_task;
 -
 -                      /*
 -                       * Attract cache-cold tasks on sync wakeups:
 -                       */
 -                      if (sync && !task_hot(p, rq->clock, this_sd))
 -                              goto out_set_cpu;
 -
 -                      schedstat_inc(p, se.nr_wakeups_affine_attempts);
 -                      tl_per_task = cpu_avg_load_per_task(this_cpu);
 -
 -                      /*
 -                       * If sync wakeup then subtract the (maximum possible)
 -                       * effect of the currently running task from the load
 -                       * of the current CPU:
 -                       */
 -                      if (sync)
 -                              tl -= current->se.load.weight;
 -
 -                      if ((tl <= load &&
 -                              tl + target_load(cpu, idx) <= tl_per_task) ||
 -                             100*(tl + p->se.load.weight) <= imbalance*load) {
 -                              /*
 -                               * This domain has SD_WAKE_AFFINE and
 -                               * p is cache cold in this domain, and
 -                               * there is no bad imbalance.
 -                               */
 -                              schedstat_inc(this_sd, ttwu_move_affine);
 -                              schedstat_inc(p, se.nr_wakeups_affine);
 -                              goto out_set_cpu;
 -                      }
 -              }
 -
 -              /*
 -               * Start passive balancing when half the imbalance_pct
 -               * limit is reached.
 -               */
 -              if (this_sd->flags & SD_WAKE_BALANCE) {
 -                      if (imbalance*this_load <= 100*load) {
 -                              schedstat_inc(this_sd, ttwu_move_balance);
 -                              schedstat_inc(p, se.nr_wakeups_passive);
 -                              goto out_set_cpu;
 -                      }
 -              }
 -      }
 -
 -      new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
 -out_set_cpu:
 -      new_cpu = wake_idle(new_cpu, p);
 -      if (new_cpu != cpu) {
 -              set_task_cpu(p, new_cpu);
 +      cpu = p->sched_class->select_task_rq(p, sync);
 +      if (cpu != orig_cpu) {
 +              set_task_cpu(p, cpu);
                task_rq_unlock(rq, &flags);
                /* might preempt at this point */
                rq = task_rq_lock(p, &flags);
                cpu = task_cpu(p);
        }
  
 +#ifdef CONFIG_SCHEDSTATS
 +      schedstat_inc(rq, ttwu_count);
 +      if (cpu == this_cpu)
 +              schedstat_inc(rq, ttwu_local);
 +      else {
 +              struct sched_domain *sd;
 +              for_each_domain(this_cpu, sd) {
 +                      if (cpu_isset(cpu, sd->span)) {
 +                              schedstat_inc(sd, ttwu_wake_remote);
 +                              break;
 +                      }
 +              }
 +      }
 +#endif
 +
  out_activate:
  #endif /* CONFIG_SMP */
        schedstat_inc(p, se.nr_wakeups);
  
  out_running:
        p->state = TASK_RUNNING;
 +#ifdef CONFIG_SMP
 +      if (p->sched_class->task_wake_up)
 +              p->sched_class->task_wake_up(rq, p);
 +#endif
  out:
        task_rq_unlock(rq, &flags);
  
  
  int fastcall wake_up_process(struct task_struct *p)
  {
-       return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-                                TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+       return try_to_wake_up(p, TASK_ALL, 0);
  }
  EXPORT_SYMBOL(wake_up_process);
  
@@@ -1929,7 -1679,7 +1928,7 @@@ static void __sched_fork(struct task_st
        p->se.wait_max                  = 0;
  #endif
  
 -      INIT_LIST_HEAD(&p->run_list);
 +      INIT_LIST_HEAD(&p->rt.run_list);
        p->se.on_rq = 0;
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
@@@ -2006,13 -1756,9 +2005,13 @@@ void fastcall wake_up_new_task(struct t
                 * management (if any):
                 */
                p->sched_class->task_new(rq, p);
 -              inc_nr_running(p, rq);
 +              inc_nr_running(rq);
        }
        check_preempt_curr(rq, p);
 +#ifdef CONFIG_SMP
 +      if (p->sched_class->task_wake_up)
 +              p->sched_class->task_wake_up(rq, p);
 +#endif
        task_rq_unlock(rq, &flags);
  }
  
@@@ -2133,11 -1879,6 +2132,11 @@@ static void finish_task_switch(struct r
        prev_state = prev->state;
        finish_arch_switch(prev);
        finish_lock_switch(rq, prev);
 +#ifdef CONFIG_SMP
 +      if (current->sched_class->post_schedule)
 +              current->sched_class->post_schedule(rq);
 +#endif
 +
        fire_sched_in_preempt_notifiers(current);
        if (mm)
                mmdrop(mm);
@@@ -2371,13 -2112,11 +2370,13 @@@ static void double_rq_unlock(struct rq 
  /*
   * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
   */
 -static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
 +static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
        __releases(this_rq->lock)
        __acquires(busiest->lock)
        __acquires(this_rq->lock)
  {
 +      int ret = 0;
 +
        if (unlikely(!irqs_disabled())) {
                /* printk() doesn't work good under rq->lock */
                spin_unlock(&this_rq->lock);
                        spin_unlock(&this_rq->lock);
                        spin_lock(&busiest->lock);
                        spin_lock(&this_rq->lock);
 +                      ret = 1;
                } else
                        spin_lock(&busiest->lock);
        }
 +      return ret;
  }
  
  /*
@@@ -3591,7 -3328,7 +3590,7 @@@ unsigned long long task_sched_runtime(s
  
        rq = task_rq_lock(p, &flags);
        ns = p->se.sum_exec_runtime;
 -      if (rq->curr == p) {
 +      if (task_current(rq, p)) {
                update_rq_clock(rq);
                delta_exec = rq->clock - p->se.exec_start;
                if ((s64)delta_exec > 0)
@@@ -3736,14 -3473,12 +3735,14 @@@ void scheduler_tick(void
        /*
         * Let rq->clock advance by at least TICK_NSEC:
         */
 -      if (unlikely(rq->clock < next_tick))
 +      if (unlikely(rq->clock < next_tick)) {
                rq->clock = next_tick;
 +              rq->clock_underflows++;
 +      }
        rq->tick_timestamp = rq->clock;
        update_cpu_load(rq);
 -      if (curr != rq->idle) /* FIXME: needed? */
 -              curr->sched_class->task_tick(rq, curr);
 +      curr->sched_class->task_tick(rq, curr, 0);
 +      update_sched_rt_period(rq);
        spin_unlock(&rq->lock);
  
  #ifdef CONFIG_SMP
@@@ -3889,8 -3624,6 +3888,8 @@@ need_resched_nonpreemptible
  
        schedule_debug(prev);
  
 +      hrtick_clear(rq);
 +
        /*
         * Do the rq-clock update outside the rq lock:
         */
                switch_count = &prev->nvcsw;
        }
  
 +#ifdef CONFIG_SMP
 +      if (prev->sched_class->pre_schedule)
 +              prev->sched_class->pre_schedule(rq, prev);
 +#endif
 +
        if (unlikely(!rq->nr_running))
                idle_balance(cpu, rq);
  
                ++*switch_count;
  
                context_switch(rq, prev, next); /* unlocks the rq */
 +              /*
 +               * the context switch might have flipped the stack from under
 +               * us, hence refresh the local variables.
 +               */
 +              cpu = smp_processor_id();
 +              rq = cpu_rq(cpu);
        } else
                spin_unlock_irq(&rq->lock);
  
 -      if (unlikely(reacquire_kernel_lock(current) < 0)) {
 -              cpu = smp_processor_id();
 -              rq = cpu_rq(cpu);
 +      hrtick_set(rq);
 +
 +      if (unlikely(reacquire_kernel_lock(current) < 0))
                goto need_resched_nonpreemptible;
 -      }
 +
        preempt_enable_no_resched();
        if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                goto need_resched;
@@@ -3957,9 -3679,10 +3956,9 @@@ EXPORT_SYMBOL(schedule)
  asmlinkage void __sched preempt_schedule(void)
  {
        struct thread_info *ti = current_thread_info();
 -#ifdef CONFIG_PREEMPT_BKL
        struct task_struct *task = current;
        int saved_lock_depth;
 -#endif
 +
        /*
         * If there is a non-zero preempt_count or interrupts are disabled,
         * we do not want to preempt the current task. Just return..
                 * clear ->lock_depth so that schedule() doesnt
                 * auto-release the semaphore:
                 */
 -#ifdef CONFIG_PREEMPT_BKL
                saved_lock_depth = task->lock_depth;
                task->lock_depth = -1;
 -#endif
                schedule();
 -#ifdef CONFIG_PREEMPT_BKL
                task->lock_depth = saved_lock_depth;
 -#endif
                sub_preempt_count(PREEMPT_ACTIVE);
  
                /*
@@@ -3999,9 -3726,10 +3998,9 @@@ EXPORT_SYMBOL(preempt_schedule)
  asmlinkage void __sched preempt_schedule_irq(void)
  {
        struct thread_info *ti = current_thread_info();
 -#ifdef CONFIG_PREEMPT_BKL
        struct task_struct *task = current;
        int saved_lock_depth;
 -#endif
 +
        /* Catch callers which need to be fixed */
        BUG_ON(ti->preempt_count || !irqs_disabled());
  
                 * clear ->lock_depth so that schedule() doesnt
                 * auto-release the semaphore:
                 */
 -#ifdef CONFIG_PREEMPT_BKL
                saved_lock_depth = task->lock_depth;
                task->lock_depth = -1;
 -#endif
                local_irq_enable();
                schedule();
                local_irq_disable();
 -#ifdef CONFIG_PREEMPT_BKL
                task->lock_depth = saved_lock_depth;
 -#endif
                sub_preempt_count(PREEMPT_ACTIVE);
  
                /*
@@@ -4124,8 -3856,7 +4123,7 @@@ void complete(struct completion *x
  
        spin_lock_irqsave(&x->wait.lock, flags);
        x->done++;
-       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-                        1, 0, NULL);
+       __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
        spin_unlock_irqrestore(&x->wait.lock, flags);
  }
  EXPORT_SYMBOL(complete);
@@@ -4136,8 -3867,7 +4134,7 @@@ void complete_all(struct completion *x
  
        spin_lock_irqsave(&x->wait.lock, flags);
        x->done += UINT_MAX/2;
-       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-                        0, 0, NULL);
+       __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
        spin_unlock_irqrestore(&x->wait.lock, flags);
  }
  EXPORT_SYMBOL(complete_all);
@@@ -4151,8 -3881,10 +4148,10 @@@ do_wait_for_common(struct completion *x
                wait.flags |= WQ_FLAG_EXCLUSIVE;
                __add_wait_queue_tail(&x->wait, &wait);
                do {
-                       if (state == TASK_INTERRUPTIBLE &&
-                           signal_pending(current)) {
+                       if ((state == TASK_INTERRUPTIBLE &&
+                            signal_pending(current)) ||
+                           (state == TASK_KILLABLE &&
+                            fatal_signal_pending(current))) {
                                __remove_wait_queue(&x->wait, &wait);
                                return -ERESTARTSYS;
                        }
@@@ -4212,6 -3944,15 +4211,15 @@@ wait_for_completion_interruptible_timeo
  }
  EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
  
+ int __sched wait_for_completion_killable(struct completion *x)
+ {
+       long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
+       if (t == -ERESTARTSYS)
+               return t;
+       return 0;
+ }
+ EXPORT_SYMBOL(wait_for_completion_killable);
  static long __sched
  sleep_on_common(wait_queue_head_t *q, int state, long timeout)
  {
@@@ -4275,7 -4016,6 +4283,7 @@@ void rt_mutex_setprio(struct task_struc
        unsigned long flags;
        int oldprio, on_rq, running;
        struct rq *rq;
 +      const struct sched_class *prev_class = p->sched_class;
  
        BUG_ON(prio < 0 || prio > MAX_PRIO);
  
  
        oldprio = p->prio;
        on_rq = p->se.on_rq;
 -      running = task_running(rq, p);
 +      running = task_current(rq, p);
        if (on_rq) {
                dequeue_task(rq, p, 0);
                if (running)
        if (on_rq) {
                if (running)
                        p->sched_class->set_curr_task(rq);
 +
                enqueue_task(rq, p, 0);
 -              /*
 -               * Reschedule if we are currently running on this runqueue and
 -               * our priority decreased, or if we are not currently running on
 -               * this runqueue and our priority is higher than the current's
 -               */
 -              if (running) {
 -                      if (p->prio > oldprio)
 -                              resched_task(rq->curr);
 -              } else {
 -                      check_preempt_curr(rq, p);
 -              }
 +
 +              check_class_changed(rq, p, prev_class, oldprio, running);
        }
        task_rq_unlock(rq, &flags);
  }
@@@ -4336,8 -4084,10 +4344,8 @@@ void set_user_nice(struct task_struct *
                goto out_unlock;
        }
        on_rq = p->se.on_rq;
 -      if (on_rq) {
 +      if (on_rq)
                dequeue_task(rq, p, 0);
 -              dec_load(rq, p);
 -      }
  
        p->static_prio = NICE_TO_PRIO(nice);
        set_load_weight(p);
  
        if (on_rq) {
                enqueue_task(rq, p, 0);
 -              inc_load(rq, p);
                /*
                 * If the task increased its priority or is running and
                 * lowered its priority, then reschedule its CPU:
@@@ -4504,7 -4255,6 +4512,7 @@@ int sched_setscheduler(struct task_stru
  {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
        unsigned long flags;
 +      const struct sched_class *prev_class = p->sched_class;
        struct rq *rq;
  
        /* may grab non-irq protected spin_locks */
@@@ -4585,7 -4335,7 +4593,7 @@@ recheck
        }
        update_rq_clock(rq);
        on_rq = p->se.on_rq;
 -      running = task_running(rq, p);
 +      running = task_current(rq, p);
        if (on_rq) {
                deactivate_task(rq, p, 0);
                if (running)
        if (on_rq) {
                if (running)
                        p->sched_class->set_curr_task(rq);
 +
                activate_task(rq, p, 0);
 -              /*
 -               * Reschedule if we are currently running on this runqueue and
 -               * our priority decreased, or if we are not currently running on
 -               * this runqueue and our priority is higher than the current's
 -               */
 -              if (running) {
 -                      if (p->prio > oldprio)
 -                              resched_task(rq->curr);
 -              } else {
 -                      check_preempt_curr(rq, p);
 -              }
 +
 +              check_class_changed(rq, p, prev_class, oldprio, running);
        }
        __task_rq_unlock(rq);
        spin_unlock_irqrestore(&p->pi_lock, flags);
@@@ -4729,13 -4487,13 +4737,13 @@@ long sched_setaffinity(pid_t pid, cpuma
        struct task_struct *p;
        int retval;
  
 -      mutex_lock(&sched_hotcpu_mutex);
 +      get_online_cpus();
        read_lock(&tasklist_lock);
  
        p = find_process_by_pid(pid);
        if (!p) {
                read_unlock(&tasklist_lock);
 -              mutex_unlock(&sched_hotcpu_mutex);
 +              put_online_cpus();
                return -ESRCH;
        }
  
        }
  out_unlock:
        put_task_struct(p);
 -      mutex_unlock(&sched_hotcpu_mutex);
 +      put_online_cpus();
        return retval;
  }
  
@@@ -4832,7 -4590,7 +4840,7 @@@ long sched_getaffinity(pid_t pid, cpuma
        struct task_struct *p;
        int retval;
  
 -      mutex_lock(&sched_hotcpu_mutex);
 +      get_online_cpus();
        read_lock(&tasklist_lock);
  
        retval = -ESRCH;
  
  out_unlock:
        read_unlock(&tasklist_lock);
 -      mutex_unlock(&sched_hotcpu_mutex);
 +      put_online_cpus();
  
        return retval;
  }
@@@ -4922,8 -4680,7 +4930,8 @@@ static void __cond_resched(void
        } while (need_resched());
  }
  
 -int __sched cond_resched(void)
 +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
 +int __sched _cond_resched(void)
  {
        if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
                                        system_state == SYSTEM_RUNNING) {
        }
        return 0;
  }
 -EXPORT_SYMBOL(cond_resched);
 +EXPORT_SYMBOL(_cond_resched);
 +#endif
  
  /*
   * cond_resched_lock() - if a reschedule is pending, drop the given lock,
   */
  int cond_resched_lock(spinlock_t *lock)
  {
 +      int resched = need_resched() && system_state == SYSTEM_RUNNING;
        int ret = 0;
  
 -      if (need_lockbreak(lock)) {
 +      if (spin_needbreak(lock) || resched) {
                spin_unlock(lock);
 -              cpu_relax();
 -              ret = 1;
 -              spin_lock(lock);
 -      }
 -      if (need_resched() && system_state == SYSTEM_RUNNING) {
 -              spin_release(&lock->dep_map, 1, _THIS_IP_);
 -              _raw_spin_unlock(lock);
 -              preempt_enable_no_resched();
 -              __cond_resched();
 +              if (resched && need_resched())
 +                      __cond_resched();
 +              else
 +                      cpu_relax();
                ret = 1;
                spin_lock(lock);
        }
@@@ -5127,7 -4887,7 +5135,7 @@@ out_unlock
  
  static const char stat_nam[] = "RSDTtZX";
  
 -static void show_task(struct task_struct *p)
 +void sched_show_task(struct task_struct *p)
  {
        unsigned long free = 0;
        unsigned state;
        }
  #endif
        printk(KERN_CONT "%5lu %5d %6d\n", free,
 -              task_pid_nr(p), task_pid_nr(p->parent));
 +              task_pid_nr(p), task_pid_nr(p->real_parent));
  
 -      if (state != TASK_RUNNING)
 -              show_stack(p, NULL);
 +      show_stack(p, NULL);
  }
  
  void show_state_filter(unsigned long state_filter)
                 */
                touch_nmi_watchdog();
                if (!state_filter || (p->state & state_filter))
 -                      show_task(p);
 +                      sched_show_task(p);
        } while_each_thread(g, p);
  
        touch_all_softlockup_watchdogs();
@@@ -5228,8 -4989,11 +5236,8 @@@ void __cpuinit init_idle(struct task_st
        spin_unlock_irqrestore(&rq->lock, flags);
  
        /* Set the preempt count _outside_ the spinlocks! */
 -#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
 -      task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
 -#else
        task_thread_info(idle)->preempt_count = 0;
 -#endif
 +
        /*
         * The idle tasks have their own, simple scheduling class:
         */
@@@ -5310,13 -5074,7 +5318,13 @@@ int set_cpus_allowed(struct task_struc
                goto out;
        }
  
 -      p->cpus_allowed = new_mask;
 +      if (p->sched_class->set_cpus_allowed)
 +              p->sched_class->set_cpus_allowed(p, &new_mask);
 +      else {
 +              p->cpus_allowed = new_mask;
 +              p->rt.nr_cpus_allowed = cpus_weight(new_mask);
 +      }
 +
        /* Can the task run on the task's current CPU? If so, we're done */
        if (cpu_isset(task_cpu(p), new_mask))
                goto out;
@@@ -5808,6 -5566,9 +5816,6 @@@ migration_call(struct notifier_block *n
        struct rq *rq;
  
        switch (action) {
 -      case CPU_LOCK_ACQUIRE:
 -              mutex_lock(&sched_hotcpu_mutex);
 -              break;
  
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
        case CPU_ONLINE_FROZEN:
                /* Strictly unnecessary, as first user will wake it. */
                wake_up_process(cpu_rq(cpu)->migration_thread);
 +
 +              /* Update our root-domain */
 +              rq = cpu_rq(cpu);
 +              spin_lock_irqsave(&rq->lock, flags);
 +              if (rq->rd) {
 +                      BUG_ON(!cpu_isset(cpu, rq->rd->span));
 +                      cpu_set(cpu, rq->rd->online);
 +              }
 +              spin_unlock_irqrestore(&rq->lock, flags);
                break;
  
  #ifdef CONFIG_HOTPLUG_CPU
                }
                spin_unlock_irq(&rq->lock);
                break;
 -#endif
 -      case CPU_LOCK_RELEASE:
 -              mutex_unlock(&sched_hotcpu_mutex);
 +
 +      case CPU_DOWN_PREPARE:
 +              /* Update our root-domain */
 +              rq = cpu_rq(cpu);
 +              spin_lock_irqsave(&rq->lock, flags);
 +              if (rq->rd) {
 +                      BUG_ON(!cpu_isset(cpu, rq->rd->span));
 +                      cpu_clear(cpu, rq->rd->online);
 +              }
 +              spin_unlock_irqrestore(&rq->lock, flags);
                break;
 +#endif
        }
        return NOTIFY_OK;
  }
@@@ -6084,76 -5828,11 +6092,76 @@@ sd_parent_degenerate(struct sched_domai
        return 1;
  }
  
 +static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 +{
 +      unsigned long flags;
 +      const struct sched_class *class;
 +
 +      spin_lock_irqsave(&rq->lock, flags);
 +
 +      if (rq->rd) {
 +              struct root_domain *old_rd = rq->rd;
 +
 +              for (class = sched_class_highest; class; class = class->next) {
 +                      if (class->leave_domain)
 +                              class->leave_domain(rq);
 +              }
 +
 +              cpu_clear(rq->cpu, old_rd->span);
 +              cpu_clear(rq->cpu, old_rd->online);
 +
 +              if (atomic_dec_and_test(&old_rd->refcount))
 +                      kfree(old_rd);
 +      }
 +
 +      atomic_inc(&rd->refcount);
 +      rq->rd = rd;
 +
 +      cpu_set(rq->cpu, rd->span);
 +      if (cpu_isset(rq->cpu, cpu_online_map))
 +              cpu_set(rq->cpu, rd->online);
 +
 +      for (class = sched_class_highest; class; class = class->next) {
 +              if (class->join_domain)
 +                      class->join_domain(rq);
 +      }
 +
 +      spin_unlock_irqrestore(&rq->lock, flags);
 +}
 +
 +static void init_rootdomain(struct root_domain *rd)
 +{
 +      memset(rd, 0, sizeof(*rd));
 +
 +      cpus_clear(rd->span);
 +      cpus_clear(rd->online);
 +}
 +
 +static void init_defrootdomain(void)
 +{
 +      init_rootdomain(&def_root_domain);
 +      atomic_set(&def_root_domain.refcount, 1);
 +}
 +
 +static struct root_domain *alloc_rootdomain(void)
 +{
 +      struct root_domain *rd;
 +
 +      rd = kmalloc(sizeof(*rd), GFP_KERNEL);
 +      if (!rd)
 +              return NULL;
 +
 +      init_rootdomain(rd);
 +
 +      return rd;
 +}
 +
  /*
 - * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
 + * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
   * hold the hotplug lock.
   */
 -static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 +static void
 +cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
  {
        struct rq *rq = cpu_rq(cpu);
        struct sched_domain *tmp;
  
        sched_domain_debug(sd, cpu);
  
 +      rq_attach_root(rq, rd);
        rcu_assign_pointer(rq->sd, sd);
  }
  
@@@ -6547,7 -6225,6 +6555,7 @@@ static void init_sched_groups_power(in
  static int build_sched_domains(const cpumask_t *cpu_map)
  {
        int i;
 +      struct root_domain *rd;
  #ifdef CONFIG_NUMA
        struct sched_group **sched_group_nodes = NULL;
        int sd_allnodes = 0;
        sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
  #endif
  
 +      rd = alloc_rootdomain();
 +      if (!rd) {
 +              printk(KERN_WARNING "Cannot alloc root domain\n");
 +              return -ENOMEM;
 +      }
 +
        /*
         * Set up domains for cpus specified by the cpu_map.
         */
  #else
                sd = &per_cpu(phys_domains, i);
  #endif
 -              cpu_attach_domain(sd, i);
 +              cpu_attach_domain(sd, rd, i);
        }
  
        return 0;
@@@ -6844,7 -6515,7 +6852,7 @@@ static void detach_destroy_domains(cons
        unregister_sched_domain_sysctl();
  
        for_each_cpu_mask(i, *cpu_map)
 -              cpu_attach_domain(NULL, i);
 +              cpu_attach_domain(NULL, &def_root_domain, i);
        synchronize_sched();
        arch_destroy_sched_domains(cpu_map);
  }
@@@ -6874,8 -6545,6 +6882,8 @@@ void partition_sched_domains(int ndoms_
  {
        int i, j;
  
 +      lock_doms_cur();
 +
        /* always unregister in case we don't destroy any domains */
        unregister_sched_domain_sysctl();
  
@@@ -6916,8 -6585,6 +6924,8 @@@ match2
        ndoms_cur = ndoms_new;
  
        register_sched_domain_sysctl();
 +
 +      unlock_doms_cur();
  }
  
  #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@@ -6925,10 -6592,10 +6933,10 @@@ static int arch_reinit_sched_domains(vo
  {
        int err;
  
 -      mutex_lock(&sched_hotcpu_mutex);
 +      get_online_cpus();
        detach_destroy_domains(&cpu_online_map);
        err = arch_init_sched_domains(&cpu_online_map);
 -      mutex_unlock(&sched_hotcpu_mutex);
 +      put_online_cpus();
  
        return err;
  }
@@@ -7039,12 -6706,12 +7047,12 @@@ void __init sched_init_smp(void
  {
        cpumask_t non_isolated_cpus;
  
 -      mutex_lock(&sched_hotcpu_mutex);
 +      get_online_cpus();
        arch_init_sched_domains(&cpu_online_map);
        cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
        if (cpus_empty(non_isolated_cpus))
                cpu_set(smp_processor_id(), non_isolated_cpus);
 -      mutex_unlock(&sched_hotcpu_mutex);
 +      put_online_cpus();
        /* XXX: Theoretical race here - CPU may be hotplugged now */
        hotcpu_notifier(update_sched_domains, 0);
  
        if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                BUG();
        sched_init_granularity();
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      if (nr_cpu_ids == 1)
 +              return;
 +
 +      lb_monitor_task = kthread_create(load_balance_monitor, NULL,
 +                                       "group_balance");
 +      if (!IS_ERR(lb_monitor_task)) {
 +              lb_monitor_task->flags |= PF_NOFREEZE;
 +              wake_up_process(lb_monitor_task);
 +      } else {
 +              printk(KERN_ERR "Could not create load balance monitor thread"
 +                      "(error = %ld) \n", PTR_ERR(lb_monitor_task));
 +      }
 +#endif
  }
  #else
  void __init sched_init_smp(void)
@@@ -7091,87 -6743,13 +7099,87 @@@ static void init_cfs_rq(struct cfs_rq *
        cfs_rq->min_vruntime = (u64)(-(1LL << 20));
  }
  
 +static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 +{
 +      struct rt_prio_array *array;
 +      int i;
 +
 +      array = &rt_rq->active;
 +      for (i = 0; i < MAX_RT_PRIO; i++) {
 +              INIT_LIST_HEAD(array->queue + i);
 +              __clear_bit(i, array->bitmap);
 +      }
 +      /* delimiter for bitsearch: */
 +      __set_bit(MAX_RT_PRIO, array->bitmap);
 +
 +#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
 +      rt_rq->highest_prio = MAX_RT_PRIO;
 +#endif
 +#ifdef CONFIG_SMP
 +      rt_rq->rt_nr_migratory = 0;
 +      rt_rq->overloaded = 0;
 +#endif
 +
 +      rt_rq->rt_time = 0;
 +      rt_rq->rt_throttled = 0;
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      rt_rq->rq = rq;
 +#endif
 +}
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
 +              struct cfs_rq *cfs_rq, struct sched_entity *se,
 +              int cpu, int add)
 +{
 +      tg->cfs_rq[cpu] = cfs_rq;
 +      init_cfs_rq(cfs_rq, rq);
 +      cfs_rq->tg = tg;
 +      if (add)
 +              list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 +
 +      tg->se[cpu] = se;
 +      se->cfs_rq = &rq->cfs;
 +      se->my_q = cfs_rq;
 +      se->load.weight = tg->shares;
 +      se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
 +      se->parent = NULL;
 +}
 +
 +static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
 +              struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
 +              int cpu, int add)
 +{
 +      tg->rt_rq[cpu] = rt_rq;
 +      init_rt_rq(rt_rq, rq);
 +      rt_rq->tg = tg;
 +      rt_rq->rt_se = rt_se;
 +      if (add)
 +              list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
 +
 +      tg->rt_se[cpu] = rt_se;
 +      rt_se->rt_rq = &rq->rt;
 +      rt_se->my_q = rt_rq;
 +      rt_se->parent = NULL;
 +      INIT_LIST_HEAD(&rt_se->run_list);
 +}
 +#endif
 +
  void __init sched_init(void)
  {
        int highest_cpu = 0;
        int i, j;
  
 +#ifdef CONFIG_SMP
 +      init_defrootdomain();
 +#endif
 +
 +#ifdef CONFIG_FAIR_GROUP_SCHED
 +      list_add(&init_task_group.list, &task_groups);
 +#endif
 +
        for_each_possible_cpu(i) {
 -              struct rt_prio_array *array;
                struct rq *rq;
  
                rq = cpu_rq(i);
                rq->nr_running = 0;
                rq->clock = 1;
                init_cfs_rq(&rq->cfs, rq);
 +              init_rt_rq(&rq->rt, rq);
  #ifdef CONFIG_FAIR_GROUP_SCHED
 -              INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 -              {
 -                      struct cfs_rq *cfs_rq = &per_cpu(init_cfs_rq, i);
 -                      struct sched_entity *se =
 -                                       &per_cpu(init_sched_entity, i);
 -
 -                      init_cfs_rq_p[i] = cfs_rq;
 -                      init_cfs_rq(cfs_rq, rq);
 -                      cfs_rq->tg = &init_task_group;
 -                      list_add(&cfs_rq->leaf_cfs_rq_list,
 -                                                       &rq->leaf_cfs_rq_list);
 -
 -                      init_sched_entity_p[i] = se;
 -                      se->cfs_rq = &rq->cfs;
 -                      se->my_q = cfs_rq;
 -                      se->load.weight = init_task_group_load;
 -                      se->load.inv_weight =
 -                               div64_64(1ULL<<32, init_task_group_load);
 -                      se->parent = NULL;
 -              }
                init_task_group.shares = init_task_group_load;
 -              spin_lock_init(&init_task_group.lock);
 +              INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
 +              init_tg_cfs_entry(rq, &init_task_group,
 +                              &per_cpu(init_cfs_rq, i),
 +                              &per_cpu(init_sched_entity, i), i, 1);
 +
 +              init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
 +              INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
 +              init_tg_rt_entry(rq, &init_task_group,
 +                              &per_cpu(init_rt_rq, i),
 +                              &per_cpu(init_sched_rt_entity, i), i, 1);
  #endif
 +              rq->rt_period_expire = 0;
 +              rq->rt_throttled = 0;
  
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                        rq->cpu_load[j] = 0;
  #ifdef CONFIG_SMP
                rq->sd = NULL;
 +              rq->rd = NULL;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
                rq->push_cpu = 0;
                rq->cpu = i;
                rq->migration_thread = NULL;
                INIT_LIST_HEAD(&rq->migration_queue);
 +              rq_attach_root(rq, &def_root_domain);
  #endif
 +              init_rq_hrtick(rq);
                atomic_set(&rq->nr_iowait, 0);
 -
 -              array = &rq->rt.active;
 -              for (j = 0; j < MAX_RT_PRIO; j++) {
 -                      INIT_LIST_HEAD(array->queue + j);
 -                      __clear_bit(j, array->bitmap);
 -              }
                highest_cpu = i;
 -              /* delimiter for bitsearch: */
 -              __set_bit(MAX_RT_PRIO, array->bitmap);
        }
  
        set_load_weight(&init_task);
@@@ -7381,187 -6972,12 +7389,187 @@@ void set_curr_task(int cpu, struct task
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
  
 +#ifdef CONFIG_SMP
 +/*
 + * distribute shares of all task groups among their schedulable entities,
 + * to reflect load distribution across cpus.
 + */
 +static int rebalance_shares(struct sched_domain *sd, int this_cpu)
 +{
 +      struct cfs_rq *cfs_rq;
 +      struct rq *rq = cpu_rq(this_cpu);
 +      cpumask_t sdspan = sd->span;
 +      int balanced = 1;
 +
 +      /* Walk thr' all the task groups that we have */
 +      for_each_leaf_cfs_rq(rq, cfs_rq) {
 +              int i;
 +              unsigned long total_load = 0, total_shares;
 +              struct task_group *tg = cfs_rq->tg;
 +
 +              /* Gather total task load of this group across cpus */
 +              for_each_cpu_mask(i, sdspan)
 +                      total_load += tg->cfs_rq[i]->load.weight;
 +
 +              /* Nothing to do if this group has no load */
 +              if (!total_load)
 +                      continue;
 +
 +              /*
 +               * tg->shares represents the number of cpu shares the task group
 +               * is eligible to hold on a single cpu. On N cpus, it is
 +               * eligible to hold (N * tg->shares) number of cpu shares.
 +               */
 +              total_shares = tg->shares * cpus_weight(sdspan);
 +
 +              /*
 +               * redistribute total_shares across cpus as per the task load
 +               * distribution.
 +               */
 +              for_each_cpu_mask(i, sdspan) {
 +                      unsigned long local_load, local_shares;
 +
 +                      local_load = tg->cfs_rq[i]->load.weight;
 +                      local_shares = (local_load * total_shares) / total_load;
 +                      if (!local_shares)
 +                              local_shares = MIN_GROUP_SHARES;
 +                      if (local_shares == tg->se[i]->load.weight)
 +                              continue;
 +
 +                      spin_lock_irq(&cpu_rq(i)->lock);
 +                      set_se_shares(tg->se[i], local_shares);
 +                      spin_unlock_irq(&cpu_rq(i)->lock);
 +                      balanced = 0;
 +              }
 +      }
 +
 +      return balanced;
 +}
 +
 +/*
 + * How frequently should we rebalance_shares() across cpus?
 + *
 + * The more frequently we rebalance shares, the more accurate is the fairness
 + * of cpu bandwidth distribution between task groups. However higher frequency
 + * also implies increased scheduling overhead.
 + *
 + * sysctl_sched_min_bal_int_shares represents the minimum interval between
 + * consecutive calls to rebalance_shares() in the same sched domain.
 + *
 + * sysctl_sched_max_bal_int_shares represents the maximum interval between
 + * consecutive calls to rebalance_shares() in the same sched domain.
 + *
 + * These settings allows for the appropriate trade-off between accuracy of
 + * fairness and the associated overhead.
 + *
 + */
 +
 +/* default: 8ms, units: milliseconds */
 +const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
 +
 +/* default: 128ms, units: milliseconds */
 +const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
 +
 +/* kernel thread that runs rebalance_shares() periodically */
 +static int load_balance_monitor(void *unused)
 +{
 +      unsigned int timeout = sysctl_sched_min_bal_int_shares;
 +      struct sched_param schedparm;
 +      int ret;
 +
 +      /*
 +       * We don't want this thread's execution to be limited by the shares
 +       * assigned to default group (init_task_group). Hence make it run
 +       * as a SCHED_RR RT task at the lowest priority.
 +       */
 +      schedparm.sched_priority = 1;
 +      ret = sched_setscheduler(current, SCHED_RR, &schedparm);
 +      if (ret)
 +              printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
 +                              " monitor thread (error = %d) \n", ret);
 +
 +      while (!kthread_should_stop()) {
 +              int i, cpu, balanced = 1;
 +
 +              /* Prevent cpus going down or coming up */
 +              get_online_cpus();
 +              /* lockout changes to doms_cur[] array */
 +              lock_doms_cur();
 +              /*
 +               * Enter a rcu read-side critical section to safely walk rq->sd
 +               * chain on various cpus and to walk task group list
 +               * (rq->leaf_cfs_rq_list) in rebalance_shares().
 +               */
 +              rcu_read_lock();
 +
 +              for (i = 0; i < ndoms_cur; i++) {
 +                      cpumask_t cpumap = doms_cur[i];
 +                      struct sched_domain *sd = NULL, *sd_prev = NULL;
 +
 +                      cpu = first_cpu(cpumap);
 +
 +                      /* Find the highest domain at which to balance shares */
 +                      for_each_domain(cpu, sd) {
 +                              if (!(sd->flags & SD_LOAD_BALANCE))
 +                                      continue;
 +                              sd_prev = sd;
 +                      }
 +
 +                      sd = sd_prev;
 +                      /* sd == NULL? No load balance reqd in this domain */
 +                      if (!sd)
 +                              continue;
 +
 +                      balanced &= rebalance_shares(sd, cpu);
 +              }
 +
 +              rcu_read_unlock();
 +
 +              unlock_doms_cur();
 +              put_online_cpus();
 +
 +              if (!balanced)
 +                      timeout = sysctl_sched_min_bal_int_shares;
 +              else if (timeout < sysctl_sched_max_bal_int_shares)
 +                      timeout *= 2;
 +
 +              msleep_interruptible(timeout);
 +      }
 +
 +      return 0;
 +}
 +#endif        /* CONFIG_SMP */
 +
 +static void free_sched_group(struct task_group *tg)
 +{
 +      int i;
 +
 +      for_each_possible_cpu(i) {
 +              if (tg->cfs_rq)
 +                      kfree(tg->cfs_rq[i]);
 +              if (tg->se)
 +                      kfree(tg->se[i]);
 +              if (tg->rt_rq)
 +                      kfree(tg->rt_rq[i]);
 +              if (tg->rt_se)
 +                      kfree(tg->rt_se[i]);
 +      }
 +
 +      kfree(tg->cfs_rq);
 +      kfree(tg->se);
 +      kfree(tg->rt_rq);
 +      kfree(tg->rt_se);
 +      kfree(tg);
 +}
 +
  /* allocate runqueue etc for a new task group */
  struct task_group *sched_create_group(void)
  {
        struct task_group *tg;
        struct cfs_rq *cfs_rq;
        struct sched_entity *se;
 +      struct rt_rq *rt_rq;
 +      struct sched_rt_entity *rt_se;
        struct rq *rq;
        int i;
  
        tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
        if (!tg->se)
                goto err;
 +      tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
 +      if (!tg->rt_rq)
 +              goto err;
 +      tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
 +      if (!tg->rt_se)
 +              goto err;
 +
 +      tg->shares = NICE_0_LOAD;
 +      tg->rt_ratio = 0; /* XXX */
  
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
  
 -              cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
 -                                                       cpu_to_node(i));
 +              cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
 +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                if (!cfs_rq)
                        goto err;
  
 -              se = kmalloc_node(sizeof(struct sched_entity), GFP_KERNEL,
 -                                                      cpu_to_node(i));
 +              se = kmalloc_node(sizeof(struct sched_entity),
 +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                if (!se)
                        goto err;
  
 -              memset(cfs_rq, 0, sizeof(struct cfs_rq));
 -              memset(se, 0, sizeof(struct sched_entity));
 +              rt_rq = kmalloc_node(sizeof(struct rt_rq),
 +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 +              if (!rt_rq)
 +                      goto err;
  
 -              tg->cfs_rq[i] = cfs_rq;
 -              init_cfs_rq(cfs_rq, rq);
 -              cfs_rq->tg = tg;
 +              rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
 +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
 +              if (!rt_se)
 +                      goto err;
  
 -              tg->se[i] = se;
 -              se->cfs_rq = &rq->cfs;
 -              se->my_q = cfs_rq;
 -              se->load.weight = NICE_0_LOAD;
 -              se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
 -              se->parent = NULL;
 +              init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
 +              init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
        }
  
 +      lock_task_group_list();
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
                cfs_rq = tg->cfs_rq[i];
                list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 +              rt_rq = tg->rt_rq[i];
 +              list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
        }
 -
 -      tg->shares = NICE_0_LOAD;
 -      spin_lock_init(&tg->lock);
 +      list_add_rcu(&tg->list, &task_groups);
 +      unlock_task_group_list();
  
        return tg;
  
  err:
 -      for_each_possible_cpu(i) {
 -              if (tg->cfs_rq)
 -                      kfree(tg->cfs_rq[i]);
 -              if (tg->se)
 -                      kfree(tg->se[i]);
 -      }
 -      kfree(tg->cfs_rq);
 -      kfree(tg->se);
 -      kfree(tg);
 -
 +      free_sched_group(tg);
        return ERR_PTR(-ENOMEM);
  }
  
  /* rcu callback to free various structures associated with a task group */
 -static void free_sched_group(struct rcu_head *rhp)
 +static void free_sched_group_rcu(struct rcu_head *rhp)
  {
 -      struct task_group *tg = container_of(rhp, struct task_group, rcu);
 -      struct cfs_rq *cfs_rq;
 -      struct sched_entity *se;
 -      int i;
 -
        /* now it should be safe to free those cfs_rqs */
 -      for_each_possible_cpu(i) {
 -              cfs_rq = tg->cfs_rq[i];
 -              kfree(cfs_rq);
 -
 -              se = tg->se[i];
 -              kfree(se);
 -      }
 -
 -      kfree(tg->cfs_rq);
 -      kfree(tg->se);
 -      kfree(tg);
 +      free_sched_group(container_of(rhp, struct task_group, rcu));
  }
  
  /* Destroy runqueue etc associated with a task group */
  void sched_destroy_group(struct task_group *tg)
  {
        struct cfs_rq *cfs_rq = NULL;
 +      struct rt_rq *rt_rq = NULL;
        int i;
  
 +      lock_task_group_list();
        for_each_possible_cpu(i) {
                cfs_rq = tg->cfs_rq[i];
                list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
 +              rt_rq = tg->rt_rq[i];
 +              list_del_rcu(&rt_rq->leaf_rt_rq_list);
        }
 +      list_del_rcu(&tg->list);
 +      unlock_task_group_list();
  
        BUG_ON(!cfs_rq);
  
        /* wait for possible concurrent references to cfs_rqs complete */
 -      call_rcu(&tg->rcu, free_sched_group);
 +      call_rcu(&tg->rcu, free_sched_group_rcu);
  }
  
  /* change task's runqueue when it moves between groups.
@@@ -7673,9 -7097,14 +7681,9 @@@ void sched_move_task(struct task_struc
  
        rq = task_rq_lock(tsk, &flags);
  
 -      if (tsk->sched_class != &fair_sched_class) {
 -              set_task_cfs_rq(tsk, task_cpu(tsk));
 -              goto done;
 -      }
 -
        update_rq_clock(rq);
  
 -      running = task_running(rq, tsk);
 +      running = task_current(rq, tsk);
        on_rq = tsk->se.on_rq;
  
        if (on_rq) {
                        tsk->sched_class->put_prev_task(rq, tsk);
        }
  
 -      set_task_cfs_rq(tsk, task_cpu(tsk));
 +      set_task_rq(tsk, task_cpu(tsk));
  
        if (on_rq) {
                if (unlikely(running))
                enqueue_task(rq, tsk, 0);
        }
  
 -done:
        task_rq_unlock(rq, &flags);
  }
  
 +/* rq->lock to be locked by caller */
  static void set_se_shares(struct sched_entity *se, unsigned long shares)
  {
        struct cfs_rq *cfs_rq = se->cfs_rq;
        struct rq *rq = cfs_rq->rq;
        int on_rq;
  
 -      spin_lock_irq(&rq->lock);
 +      if (!shares)
 +              shares = MIN_GROUP_SHARES;
  
        on_rq = se->on_rq;
 -      if (on_rq)
 +      if (on_rq) {
                dequeue_entity(cfs_rq, se, 0);
 +              dec_cpu_load(rq, se->load.weight);
 +      }
  
        se->load.weight = shares;
        se->load.inv_weight = div64_64((1ULL<<32), shares);
  
 -      if (on_rq)
 +      if (on_rq) {
                enqueue_entity(cfs_rq, se, 0);
 -
 -      spin_unlock_irq(&rq->lock);
 +              inc_cpu_load(rq, se->load.weight);
 +      }
  }
  
  int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  {
        int i;
 +      struct cfs_rq *cfs_rq;
 +      struct rq *rq;
  
 -      spin_lock(&tg->lock);
 +      lock_task_group_list();
        if (tg->shares == shares)
                goto done;
  
 +      if (shares < MIN_GROUP_SHARES)
 +              shares = MIN_GROUP_SHARES;
 +
 +      /*
 +       * Prevent any load balance activity (rebalance_shares,
 +       * load_balance_fair) from referring to this group first,
 +       * by taking it off the rq->leaf_cfs_rq_list on each cpu.
 +       */
 +      for_each_possible_cpu(i) {
 +              cfs_rq = tg->cfs_rq[i];
 +              list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
 +      }
 +
 +      /* wait for any ongoing reference to this group to finish */
 +      synchronize_sched();
 +
 +      /*
 +       * Now we are free to modify the group's share on each cpu
 +       * w/o tripping rebalance_share or load_balance_fair.
 +       */
        tg->shares = shares;
 -      for_each_possible_cpu(i)
 +      for_each_possible_cpu(i) {
 +              spin_lock_irq(&cpu_rq(i)->lock);
                set_se_shares(tg->se[i], shares);
 +              spin_unlock_irq(&cpu_rq(i)->lock);
 +      }
  
 +      /*
 +       * Enable load balance activity on this group, by inserting it back on
 +       * each cpu's rq->leaf_cfs_rq_list.
 +       */
 +      for_each_possible_cpu(i) {
 +              rq = cpu_rq(i);
 +              cfs_rq = tg->cfs_rq[i];
 +              list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
 +      }
  done:
 -      spin_unlock(&tg->lock);
 +      unlock_task_group_list();
        return 0;
  }
  
@@@ -7776,31 -7168,6 +7784,31 @@@ unsigned long sched_group_shares(struc
        return tg->shares;
  }
  
 +/*
 + * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
 + */
 +int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
 +{
 +      struct task_group *tgi;
 +      unsigned long total = 0;
 +
 +      rcu_read_lock();
 +      list_for_each_entry_rcu(tgi, &task_groups, list)
 +              total += tgi->rt_ratio;
 +      rcu_read_unlock();
 +
 +      if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
 +              return -EINVAL;
 +
 +      tg->rt_ratio = rt_ratio;
 +      return 0;
 +}
 +
 +unsigned long sched_group_rt_ratio(struct task_group *tg)
 +{
 +      return tg->rt_ratio;
 +}
 +
  #endif        /* CONFIG_FAIR_GROUP_SCHED */
  
  #ifdef CONFIG_FAIR_CGROUP_SCHED
@@@ -7876,30 -7243,12 +7884,30 @@@ static u64 cpu_shares_read_uint(struct 
        return (u64) tg->shares;
  }
  
 +static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
 +              u64 rt_ratio_val)
 +{
 +      return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
 +}
 +
 +static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
 +{
 +      struct task_group *tg = cgroup_tg(cgrp);
 +
 +      return (u64) tg->rt_ratio;
 +}
 +
  static struct cftype cpu_files[] = {
        {
                .name = "shares",
                .read_uint = cpu_shares_read_uint,
                .write_uint = cpu_shares_write_uint,
        },
 +      {
 +              .name = "rt_ratio",
 +              .read_uint = cpu_rt_ratio_read_uint,
 +              .write_uint = cpu_rt_ratio_write_uint,
 +      },
  };
  
  static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --combined kernel/signal.c
index bf49ce6f016bee66cb89bdb39dd416d4d20635ba,657aa16d97cbfa860b73463bee90a15830fa8b31..8054dd4e2d76c22ae86eb7e0380acbb99de4b853
@@@ -456,15 -456,15 +456,15 @@@ void signal_wake_up(struct task_struct 
        set_tsk_thread_flag(t, TIF_SIGPENDING);
  
        /*
-        * For SIGKILL, we want to wake it up in the stopped/traced case.
-        * We don't check t->state here because there is a race with it
+        * For SIGKILL, we want to wake it up in the stopped/traced/killable
+        * case. We don't check t->state here because there is a race with it
         * executing another processor and just now entering stopped state.
         * By using wake_up_state, we ensure the process will wake up and
         * handle its death signal.
         */
        mask = TASK_INTERRUPTIBLE;
        if (resume)
-               mask |= TASK_STOPPED | TASK_TRACED;
+               mask |= TASK_WAKEKILL;
        if (!wake_up_state(t, mask))
                kick_process(t);
  }
@@@ -620,7 -620,7 +620,7 @@@ static void handle_stop_signal(int sig
                         * Wake up the stopped thread _after_ setting
                         * TIF_SIGPENDING
                         */
-                       state = TASK_STOPPED;
+                       state = __TASK_STOPPED;
                        if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
                                set_tsk_thread_flag(t, TIF_SIGPENDING);
                                state |= TASK_INTERRUPTIBLE;
@@@ -733,13 -733,13 +733,13 @@@ static void print_fatal_signal(struct p
                current->comm, task_pid_nr(current), signr);
  
  #if defined(__i386__) && !defined(__arch_um__)
 -      printk("code at %08lx: ", regs->eip);
 +      printk("code at %08lx: ", regs->ip);
        {
                int i;
                for (i = 0; i < 16; i++) {
                        unsigned char insn;
  
 -                      __get_user(insn, (unsigned char *)(regs->eip + i));
 +                      __get_user(insn, (unsigned char *)(regs->ip + i));
                        printk("%02x ", insn);
                }
        }
@@@ -838,7 -838,7 +838,7 @@@ static inline int wants_signal(int sig
                return 0;
        if (sig == SIGKILL)
                return 1;
-       if (p->state & (TASK_STOPPED | TASK_TRACED))
+       if (task_is_stopped_or_traced(p))
                return 0;
        return task_curr(p) || !signal_pending(p);
  }
@@@ -994,6 -994,11 +994,11 @@@ void zap_other_threads(struct task_stru
        }
  }
  
+ int fastcall __fatal_signal_pending(struct task_struct *tsk)
+ {
+       return sigismember(&tsk->pending.signal, SIGKILL);
+ }
  /*
   * Must be called under rcu_read_lock() or with tasklist_lock read-held.
   */
@@@ -1441,7 -1446,7 +1446,7 @@@ void do_notify_parent(struct task_struc
        BUG_ON(sig == -1);
  
        /* do_notify_parent_cldstop should have been called instead.  */
-       BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
+       BUG_ON(task_is_stopped_or_traced(tsk));
  
        BUG_ON(!tsk->ptrace &&
               (tsk->group_leader != tsk || !thread_group_empty(tsk)));
@@@ -1729,7 -1734,7 +1734,7 @@@ static int do_signal_stop(int signr
                         * so this check has no races.
                         */
                        if (!t->exit_state &&
-                           !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+                           !task_is_stopped_or_traced(t)) {
                                stop_count++;
                                signal_wake_up(t, 0);
                        }
diff --combined kernel/timer.c
index 23f7ead78faeae25b07ad78819f4cadefdd3b4b6,66d7d8bca1a3a5cec5652085d167e1c10bc260da..9fbb472b8cf0a4e3016480fb205c42eda38dabf8
@@@ -58,57 -58,59 +58,57 @@@ EXPORT_SYMBOL(jiffies_64)
  #define TVN_MASK (TVN_SIZE - 1)
  #define TVR_MASK (TVR_SIZE - 1)
  
 -typedef struct tvec_s {
 +struct tvec {
        struct list_head vec[TVN_SIZE];
 -} tvec_t;
 +};
  
 -typedef struct tvec_root_s {
 +struct tvec_root {
        struct list_head vec[TVR_SIZE];
 -} tvec_root_t;
 +};
  
 -struct tvec_t_base_s {
 +struct tvec_base {
        spinlock_t lock;
        struct timer_list *running_timer;
        unsigned long timer_jiffies;
 -      tvec_root_t tv1;
 -      tvec_t tv2;
 -      tvec_t tv3;
 -      tvec_t tv4;
 -      tvec_t tv5;
 +      struct tvec_root tv1;
 +      struct tvec tv2;
 +      struct tvec tv3;
 +      struct tvec tv4;
 +      struct tvec tv5;
  } ____cacheline_aligned;
  
 -typedef struct tvec_t_base_s tvec_base_t;
 -
 -tvec_base_t boot_tvec_bases;
 +struct tvec_base boot_tvec_bases;
  EXPORT_SYMBOL(boot_tvec_bases);
 -static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
 +static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
  
  /*
 - * Note that all tvec_bases is 2 byte aligned and lower bit of
 + * Note that all tvec_bases are 2 byte aligned and lower bit of
   * base in timer_list is guaranteed to be zero. Use the LSB for
   * the new flag to indicate whether the timer is deferrable
   */
  #define TBASE_DEFERRABLE_FLAG         (0x1)
  
  /* Functions below help us manage 'deferrable' flag */
 -static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
 +static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
  {
        return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
  }
  
 -static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
 +static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
  {
 -      return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
 +      return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
  }
  
  static inline void timer_set_deferrable(struct timer_list *timer)
  {
 -      timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
 +      timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
                                       TBASE_DEFERRABLE_FLAG));
  }
  
  static inline void
 -timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
 +timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
  {
 -      timer->base = (tvec_base_t *)((unsigned long)(new_base) |
 +      timer->base = (struct tvec_base *)((unsigned long)(new_base) |
                                      tbase_get_deferrable(timer->base));
  }
  
@@@ -244,7 -246,7 +244,7 @@@ unsigned long round_jiffies_relative(un
  EXPORT_SYMBOL_GPL(round_jiffies_relative);
  
  
 -static inline void set_running_timer(tvec_base_t *base,
 +static inline void set_running_timer(struct tvec_base *base,
                                        struct timer_list *timer)
  {
  #ifdef CONFIG_SMP
  #endif
  }
  
 -static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
 +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
  {
        unsigned long expires = timer->expires;
        unsigned long idx = expires - base->timer_jiffies;
@@@ -369,14 -371,14 +369,14 @@@ static inline void detach_timer(struct 
   * possible to set timer->base = NULL and drop the lock: the timer remains
   * locked.
   */
 -static tvec_base_t *lock_timer_base(struct timer_list *timer,
 +static struct tvec_base *lock_timer_base(struct timer_list *timer,
                                        unsigned long *flags)
        __acquires(timer->base->lock)
  {
 -      tvec_base_t *base;
 +      struct tvec_base *base;
  
        for (;;) {
 -              tvec_base_t *prelock_base = timer->base;
 +              struct tvec_base *prelock_base = timer->base;
                base = tbase_get_base(prelock_base);
                if (likely(base != NULL)) {
                        spin_lock_irqsave(&base->lock, *flags);
  
  int __mod_timer(struct timer_list *timer, unsigned long expires)
  {
 -      tvec_base_t *base, *new_base;
 +      struct tvec_base *base, *new_base;
        unsigned long flags;
        int ret = 0;
  
@@@ -443,7 -445,7 +443,7 @@@ EXPORT_SYMBOL(__mod_timer)
   */
  void add_timer_on(struct timer_list *timer, int cpu)
  {
 -      tvec_base_t *base = per_cpu(tvec_bases, cpu);
 +      struct tvec_base *base = per_cpu(tvec_bases, cpu);
        unsigned long flags;
  
        timer_stats_timer_set_start_info(timer);
@@@ -506,7 -508,7 +506,7 @@@ EXPORT_SYMBOL(mod_timer)
   */
  int del_timer(struct timer_list *timer)
  {
 -      tvec_base_t *base;
 +      struct tvec_base *base;
        unsigned long flags;
        int ret = 0;
  
@@@ -537,7 -539,7 +537,7 @@@ EXPORT_SYMBOL(del_timer)
   */
  int try_to_del_timer_sync(struct timer_list *timer)
  {
 -      tvec_base_t *base;
 +      struct tvec_base *base;
        unsigned long flags;
        int ret = -1;
  
@@@ -589,7 -591,7 +589,7 @@@ int del_timer_sync(struct timer_list *t
  EXPORT_SYMBOL(del_timer_sync);
  #endif
  
 -static int cascade(tvec_base_t *base, tvec_t *tv, int index)
 +static int cascade(struct tvec_base *base, struct tvec *tv, int index)
  {
        /* cascade all the timers from tv up one level */
        struct timer_list *timer, *tmp;
   * This function cascades all vectors and executes all expired timer
   * vectors.
   */
 -static inline void __run_timers(tvec_base_t *base)
 +static inline void __run_timers(struct tvec_base *base)
  {
        struct timer_list *timer;
  
                                int preempt_count = preempt_count();
                                fn(data);
                                if (preempt_count != preempt_count()) {
 -                                      printk(KERN_WARNING "huh, entered %p "
 +                                      printk(KERN_ERR "huh, entered %p "
                                               "with preempt_count %08x, exited"
                                               " with %08x?\n",
                                               fn, preempt_count,
   * is used on S/390 to stop all activity when a cpus is idle.
   * This functions needs to be called disabled.
   */
 -static unsigned long __next_timer_interrupt(tvec_base_t *base)
 +static unsigned long __next_timer_interrupt(struct tvec_base *base)
  {
        unsigned long timer_jiffies = base->timer_jiffies;
        unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
        int index, slot, array, found = 0;
        struct timer_list *nte;
 -      tvec_t *varray[4];
 +      struct tvec *varray[4];
  
        /* Look for timer events in tv1. */
        index = slot = timer_jiffies & TVR_MASK;
@@@ -714,7 -716,7 +714,7 @@@ cascade
        varray[3] = &base->tv5;
  
        for (array = 0; array < 4; array++) {
 -              tvec_t *varp = varray[array];
 +              struct tvec *varp = varray[array];
  
                index = slot = timer_jiffies & TVN_MASK;
                do {
@@@ -793,7 -795,7 +793,7 @@@ static unsigned long cmp_next_hrtimer_e
   */
  unsigned long get_next_timer_interrupt(unsigned long now)
  {
 -      tvec_base_t *base = __get_cpu_var(tvec_bases);
 +      struct tvec_base *base = __get_cpu_var(tvec_bases);
        unsigned long expires;
  
        spin_lock(&base->lock);
@@@ -892,9 -894,9 +892,9 @@@ static inline void calc_load(unsigned l
   */
  static void run_timer_softirq(struct softirq_action *h)
  {
 -      tvec_base_t *base = __get_cpu_var(tvec_bases);
 +      struct tvec_base *base = __get_cpu_var(tvec_bases);
  
 -      hrtimer_run_queues();
 +      hrtimer_run_pending();
  
        if (time_after_eq(jiffies, base->timer_jiffies))
                __run_timers(base);
   */
  void run_local_timers(void)
  {
 +      hrtimer_run_queues();
        raise_softirq(TIMER_SOFTIRQ);
        softlockup_tick();
  }
@@@ -977,7 -978,7 +977,7 @@@ asmlinkage long sys_getppid(void
        int pid;
  
        rcu_read_lock();
 -      pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
 +      pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns);
        rcu_read_unlock();
  
        return pid;
@@@ -1099,6 -1100,13 +1099,13 @@@ signed long __sched schedule_timeout_in
  }
  EXPORT_SYMBOL(schedule_timeout_interruptible);
  
+ signed long __sched schedule_timeout_killable(signed long timeout)
+ {
+       __set_current_state(TASK_KILLABLE);
+       return schedule_timeout(timeout);
+ }
+ EXPORT_SYMBOL(schedule_timeout_killable);
  signed long __sched schedule_timeout_uninterruptible(signed long timeout)
  {
        __set_current_state(TASK_UNINTERRUPTIBLE);
@@@ -1218,11 -1226,11 +1225,11 @@@ asmlinkage long sys_sysinfo(struct sysi
   */
  static struct lock_class_key base_lock_keys[NR_CPUS];
  
 -static int __devinit init_timers_cpu(int cpu)
 +static int __cpuinit init_timers_cpu(int cpu)
  {
        int j;
 -      tvec_base_t *base;
 -      static char __devinitdata tvec_base_done[NR_CPUS];
 +      struct tvec_base *base;
 +      static char __cpuinitdata tvec_base_done[NR_CPUS];
  
        if (!tvec_base_done[cpu]) {
                static char boot_done;
  }
  
  #ifdef CONFIG_HOTPLUG_CPU
 -static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
 +static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
  {
        struct timer_list *timer;
  
        }
  }
  
 -static void __devinit migrate_timers(int cpu)
 +static void __cpuinit migrate_timers(int cpu)
  {
 -      tvec_base_t *old_base;
 -      tvec_base_t *new_base;
 +      struct tvec_base *old_base;
 +      struct tvec_base *new_base;
        int i;
  
        BUG_ON(cpu_online(cpu));
diff --combined mm/filemap.c
index f4d0cded0e10aa21b02707fcaf99c4cbcafa4f06,455119cc7f40c532540b92a39d2354522ef09dcc..89ce6fe5f8be152e71218085af5396dcb72d315c
@@@ -124,18 -124,6 +124,18 @@@ void __remove_from_page_cache(struct pa
        mapping->nrpages--;
        __dec_zone_page_state(page, NR_FILE_PAGES);
        BUG_ON(page_mapped(page));
 +
 +      /*
 +       * Some filesystems seem to re-dirty the page even after
 +       * the VM has canceled the dirty bit (eg ext3 journaling).
 +       *
 +       * Fix it up by doing a final dirty accounting check after
 +       * having removed the page entirely.
 +       */
 +      if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
 +              dec_zone_page_state(page, NR_FILE_DIRTY);
 +              dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
 +      }
  }
  
  void remove_from_page_cache(struct page *page)
@@@ -185,6 -173,12 +185,12 @@@ static int sync_page(void *word
        return 0;
  }
  
+ static int sync_page_killable(void *word)
+ {
+       sync_page(word);
+       return fatal_signal_pending(current) ? -EINTR : 0;
+ }
  /**
   * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
   * @mapping:  address space structure to write
@@@ -589,6 -583,14 +595,14 @@@ void fastcall __lock_page(struct page *
  }
  EXPORT_SYMBOL(__lock_page);
  
+ int fastcall __lock_page_killable(struct page *page)
+ {
+       DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+       return __wait_on_bit_lock(page_waitqueue(page), &wait,
+                                       sync_page_killable, TASK_KILLABLE);
+ }
  /*
   * Variant of lock_page that does not require the caller to hold a reference
   * on the page's mapping.
@@@ -980,7 -982,8 +994,8 @@@ page_ok
  
  page_not_up_to_date:
                /* Get exclusive access to the page ... */
-               lock_page(page);
+               if (lock_page_killable(page))
+                       goto readpage_eio;
  
                /* Did it get truncated before we got the lock? */
                if (!page->mapping) {
@@@ -1008,7 -1011,8 +1023,8 @@@ readpage
                }
  
                if (!PageUptodate(page)) {
-                       lock_page(page);
+                       if (lock_page_killable(page))
+                               goto readpage_eio;
                        if (!PageUptodate(page)) {
                                if (page->mapping == NULL) {
                                        /*
                                        goto find_page;
                                }
                                unlock_page(page);
-                               error = -EIO;
                                shrink_readahead_size_eio(filp, ra);
-                               goto readpage_error;
+                               goto readpage_eio;
                        }
                        unlock_page(page);
                }
  
                goto page_ok;
  
+ readpage_eio:
+               error = -EIO;
  readpage_error:
                /* UHHUH! A synchronous read error occurred. Report it */
                desc->error = error;
diff --combined net/sunrpc/auth.c
index bcd9abdb031c49a4473b75294d44ae2b2d6c02ac,1ea27559b1deb43dbeb9613fc3c9c501ef6aa942..eca941ce298b6465507c2f672dc5bf44350fffe6
@@@ -51,7 -51,6 +51,7 @@@ rpcauth_register(const struct rpc_autho
        spin_unlock(&rpc_authflavor_lock);
        return ret;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_register);
  
  int
  rpcauth_unregister(const struct rpc_authops *ops)
@@@ -69,7 -68,6 +69,7 @@@
        spin_unlock(&rpc_authflavor_lock);
        return ret;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_unregister);
  
  struct rpc_auth *
  rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
  out:
        return auth;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_create);
  
  void
  rpcauth_release(struct rpc_auth *auth)
@@@ -154,7 -151,6 +154,7 @@@ rpcauth_init_credcache(struct rpc_auth 
        auth->au_credcache = new;
        return 0;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
  
  /*
   * Destroy a list of credentials
@@@ -217,7 -213,6 +217,7 @@@ rpcauth_destroy_credcache(struct rpc_au
                kfree(cache);
        }
  }
 +EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
  
  /*
   * Remove stale credentials. Avoid sleeping inside the loop.
@@@ -337,7 -332,6 +337,7 @@@ found
  out:
        return cred;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
  
  struct rpc_cred *
  rpcauth_lookupcred(struct rpc_auth *auth, int flags)
        put_group_info(acred.group_info);
        return ret;
  }
 +EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
  
  void
  rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
  #endif
        cred->cr_uid = acred->uid;
  }
 -EXPORT_SYMBOL(rpcauth_init_cred);
 +EXPORT_SYMBOL_GPL(rpcauth_init_cred);
  
  struct rpc_cred *
  rpcauth_bindcred(struct rpc_task *task)
                .group_info = current->group_info,
        };
        struct rpc_cred *ret;
-       sigset_t oldset;
        int flags = 0;
  
        dprintk("RPC: %5u looking up %s cred\n",
        get_group_info(acred.group_info);
        if (task->tk_flags & RPC_TASK_ROOTCREDS)
                flags |= RPCAUTH_LOOKUP_ROOTCREDS;
-       rpc_clnt_sigmask(task->tk_client, &oldset);
        ret = auth->au_ops->lookup_cred(auth, &acred, flags);
-       rpc_clnt_sigunmask(task->tk_client, &oldset);
        if (!IS_ERR(ret))
                task->tk_msg.rpc_cred = ret;
        else
@@@ -445,7 -435,6 +442,7 @@@ need_lock
  out_destroy:
        cred->cr_ops->crdestroy(cred);
  }
 +EXPORT_SYMBOL_GPL(put_rpccred);
  
  void
  rpcauth_unbindcred(struct rpc_task *task)
diff --combined net/sunrpc/clnt.c
index 924916ceaa435b531b043b190cd006268492d28b,a99729ff450e3342434440950bebb68e7088dfaa..0998e6d0966469df4ceb8f5adde3a842956093d5
@@@ -30,7 -30,6 +30,7 @@@
  #include <linux/smp_lock.h>
  #include <linux/utsname.h>
  #include <linux/workqueue.h>
 +#include <linux/in6.h>
  
  #include <linux/sunrpc/clnt.h>
  #include <linux/sunrpc/rpc_pipe_fs.h>
@@@ -122,9 -121,8 +122,9 @@@ rpc_setup_pipedir(struct rpc_clnt *clnt
        }
  }
  
 -static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
 +static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
  {
 +      struct rpc_program      *program = args->program;
        struct rpc_version      *version;
        struct rpc_clnt         *clnt = NULL;
        struct rpc_auth         *auth;
  
        /* sanity check the name before trying to print it */
        err = -EINVAL;
 -      len = strlen(servname);
 +      len = strlen(args->servername);
        if (len > RPC_MAXNETNAMELEN)
                goto out_no_rpciod;
        len++;
  
        dprintk("RPC:       creating %s client for %s (xprt %p)\n",
 -                      program->name, servname, xprt);
 +                      program->name, args->servername, xprt);
  
        err = rpciod_up();
        if (err)
        err = -EINVAL;
        if (!xprt)
                goto out_no_xprt;
 -      if (vers >= program->nrvers || !(version = program->version[vers]))
 +
 +      if (args->version >= program->nrvers)
 +              goto out_err;
 +      version = program->version[args->version];
 +      if (version == NULL)
                goto out_err;
  
        err = -ENOMEM;
        clnt->cl_server = clnt->cl_inline_name;
        if (len > sizeof(clnt->cl_inline_name)) {
                char *buf = kmalloc(len, GFP_KERNEL);
 -              if (buf != 0)
 +              if (buf != NULL)
                        clnt->cl_server = buf;
                else
                        len = sizeof(clnt->cl_inline_name);
        }
 -      strlcpy(clnt->cl_server, servname, len);
 +      strlcpy(clnt->cl_server, args->servername, len);
  
        clnt->cl_xprt     = xprt;
        clnt->cl_procinfo = version->procs;
        if (!xprt_bound(clnt->cl_xprt))
                clnt->cl_autobind = 1;
  
 +      clnt->cl_timeout = xprt->timeout;
 +      if (args->timeout != NULL) {
 +              memcpy(&clnt->cl_timeout_default, args->timeout,
 +                              sizeof(clnt->cl_timeout_default));
 +              clnt->cl_timeout = &clnt->cl_timeout_default;
 +      }
 +
        clnt->cl_rtt = &clnt->cl_rtt_default;
 -      rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
 +      rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
  
        kref_init(&clnt->cl_kref);
  
        if (err < 0)
                goto out_no_path;
  
 -      auth = rpcauth_create(flavor, clnt);
 +      auth = rpcauth_create(args->authflavor, clnt);
        if (IS_ERR(auth)) {
                printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
 -                              flavor);
 +                              args->authflavor);
                err = PTR_ERR(auth);
                goto out_no_auth;
        }
@@@ -258,8 -245,9 +258,8 @@@ struct rpc_clnt *rpc_create(struct rpc_
                .srcaddr = args->saddress,
                .dstaddr = args->address,
                .addrlen = args->addrsize,
 -              .timeout = args->timeout
        };
 -      char servername[20];
 +      char servername[48];
  
        xprt = xprt_create_transport(&xprtargs);
        if (IS_ERR(xprt))
         * up a string representation of the passed-in address.
         */
        if (args->servername == NULL) {
 -              struct sockaddr_in *addr =
 -                                      (struct sockaddr_in *) args->address;
 -              snprintf(servername, sizeof(servername), NIPQUAD_FMT,
 -                      NIPQUAD(addr->sin_addr.s_addr));
 +              servername[0] = '\0';
 +              switch (args->address->sa_family) {
 +              case AF_INET: {
 +                      struct sockaddr_in *sin =
 +                                      (struct sockaddr_in *)args->address;
 +                      snprintf(servername, sizeof(servername), NIPQUAD_FMT,
 +                               NIPQUAD(sin->sin_addr.s_addr));
 +                      break;
 +              }
 +              case AF_INET6: {
 +                      struct sockaddr_in6 *sin =
 +                                      (struct sockaddr_in6 *)args->address;
 +                      snprintf(servername, sizeof(servername), NIP6_FMT,
 +                               NIP6(sin->sin6_addr));
 +                      break;
 +              }
 +              default:
 +                      /* caller wants default server name, but
 +                       * address family isn't recognized. */
 +                      return ERR_PTR(-EINVAL);
 +              }
                args->servername = servername;
        }
  
 +      xprt = xprt_create_transport(&xprtargs);
 +      if (IS_ERR(xprt))
 +              return (struct rpc_clnt *)xprt;
 +
        /*
         * By default, kernel RPC client connects from a reserved port.
         * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
        if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
                xprt->resvport = 0;
  
 -      clnt = rpc_new_client(xprt, args->servername, args->program,
 -                              args->version, args->authflavor);
 +      clnt = rpc_new_client(args, xprt);
        if (IS_ERR(clnt))
                return clnt;
  
        if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
-               int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+               int err = rpc_ping(clnt, RPC_TASK_SOFT);
                if (err != 0) {
                        rpc_shutdown_client(clnt);
                        return ERR_PTR(err);
        if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
                clnt->cl_softrtry = 0;
  
-       if (args->flags & RPC_CLNT_CREATE_INTR)
-               clnt->cl_intr = 1;
        if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
                clnt->cl_autobind = 1;
        if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
@@@ -354,7 -320,7 +352,7 @@@ rpc_clone_client(struct rpc_clnt *clnt
        new->cl_autobind = 0;
        INIT_LIST_HEAD(&new->cl_tasks);
        spin_lock_init(&new->cl_lock);
 -      rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
 +      rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
        new->cl_metrics = rpc_alloc_iostats(clnt);
        if (new->cl_metrics == NULL)
                goto out_no_stats;
@@@ -377,7 -343,6 +375,7 @@@ out_no_clnt
        dprintk("RPC:       %s: returned error %d\n", __FUNCTION__, err);
        return ERR_PTR(err);
  }
 +EXPORT_SYMBOL_GPL(rpc_clone_client);
  
  /*
   * Properly shut down an RPC client, terminating all outstanding
@@@ -396,7 -361,6 +394,7 @@@ void rpc_shutdown_client(struct rpc_cln
  
        rpc_release_client(clnt);
  }
 +EXPORT_SYMBOL_GPL(rpc_shutdown_client);
  
  /*
   * Free an RPC client
@@@ -493,7 -457,7 +491,7 @@@ struct rpc_clnt *rpc_bind_new_program(s
        clnt->cl_prog     = program->number;
        clnt->cl_vers     = version->number;
        clnt->cl_stats    = program->stats;
-       err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+       err = rpc_ping(clnt, RPC_TASK_SOFT);
        if (err != 0) {
                rpc_shutdown_client(clnt);
                clnt = ERR_PTR(err);
  out:
        return clnt;
  }
 +EXPORT_SYMBOL_GPL(rpc_bind_new_program);
  
  /*
   * Default callback for async RPC calls
@@@ -515,81 -478,36 +513,34 @@@ static const struct rpc_call_ops rpc_de
        .rpc_call_done = rpc_default_callback,
  };
  
- /*
-  *    Export the signal mask handling for synchronous code that
-  *    sleeps on RPC calls
-  */
- #define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
- static void rpc_save_sigmask(sigset_t *oldset, int intr)
- {
-       unsigned long   sigallow = sigmask(SIGKILL);
-       sigset_t sigmask;
-       /* Block all signals except those listed in sigallow */
-       if (intr)
-               sigallow |= RPC_INTR_SIGNALS;
-       siginitsetinv(&sigmask, sigallow);
-       sigprocmask(SIG_BLOCK, &sigmask, oldset);
- }
- static void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
- {
-       rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
- }
- static void rpc_restore_sigmask(sigset_t *oldset)
- {
-       sigprocmask(SIG_SETMASK, oldset, NULL);
- }
- void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
-       rpc_save_sigmask(oldset, clnt->cl_intr);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigmask);
- void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
-       rpc_restore_sigmask(oldset);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
 -static
 -struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
 -              struct rpc_message *msg,
 -              int flags,
 -              const struct rpc_call_ops *ops,
 -              void *data)
 +/**
 + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
 + * @task_setup_data: pointer to task initialisation data
 + */
 +struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
  {
        struct rpc_task *task, *ret;
-       sigset_t oldset;
  
 -      task = rpc_new_task(clnt, flags, ops, data);
 +      task = rpc_new_task(task_setup_data);
        if (task == NULL) {
 -              rpc_release_calldata(ops, data);
 -              return ERR_PTR(-ENOMEM);
 +              rpc_release_calldata(task_setup_data->callback_ops,
 +                              task_setup_data->callback_data);
 +              ret = ERR_PTR(-ENOMEM);
 +              goto out;
        }
  
 -      /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
 -      if (msg != NULL) {
 -              rpc_call_setup(task, msg, 0);
 -              if (task->tk_status != 0) {
 -                      ret = ERR_PTR(task->tk_status);
 -                      rpc_put_task(task);
 -                      goto out;
 -              }
 +      if (task->tk_status != 0) {
 +              ret = ERR_PTR(task->tk_status);
 +              rpc_put_task(task);
 +              goto out;
        }
        atomic_inc(&task->tk_count);
-       /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
-       if (!RPC_IS_ASYNC(task)) {
-               rpc_task_sigmask(task, &oldset);
-               rpc_execute(task);
-               rpc_restore_sigmask(&oldset);
-       } else
-               rpc_execute(task);
+       rpc_execute(task);
        ret = task;
  out:
        return ret;
  }
 +EXPORT_SYMBOL_GPL(rpc_run_task);
  
  /**
   * rpc_call_sync - Perform a synchronous RPC call
  int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
  {
        struct rpc_task *task;
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = clnt,
 +              .rpc_message = msg,
 +              .callback_ops = &rpc_default_ops,
 +              .flags = flags,
 +      };
        int status;
  
        BUG_ON(flags & RPC_TASK_ASYNC);
  
 -      task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        status = task->tk_status;
        rpc_put_task(task);
        return status;
  }
 +EXPORT_SYMBOL_GPL(rpc_call_sync);
  
  /**
   * rpc_call_async - Perform an asynchronous RPC call
@@@ -632,28 -543,45 +583,28 @@@ rpc_call_async(struct rpc_clnt *clnt, s
               const struct rpc_call_ops *tk_ops, void *data)
  {
        struct rpc_task *task;
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = clnt,
 +              .rpc_message = msg,
 +              .callback_ops = tk_ops,
 +              .callback_data = data,
 +              .flags = flags|RPC_TASK_ASYNC,
 +      };
  
 -      task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
 +      task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return PTR_ERR(task);
        rpc_put_task(task);
        return 0;
  }
 -
 -/**
 - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
 - * @clnt: pointer to RPC client
 - * @flags: RPC flags
 - * @ops: RPC call ops
 - * @data: user call data
 - */
 -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 -                                      const struct rpc_call_ops *tk_ops,
 -                                      void *data)
 -{
 -      return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
 -}
 -EXPORT_SYMBOL(rpc_run_task);
 +EXPORT_SYMBOL_GPL(rpc_call_async);
  
  void
 -rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 +rpc_call_start(struct rpc_task *task)
  {
 -      task->tk_msg   = *msg;
 -      task->tk_flags |= flags;
 -      /* Bind the user cred */
 -      if (task->tk_msg.rpc_cred != NULL)
 -              rpcauth_holdcred(task);
 -      else
 -              rpcauth_bindcred(task);
 -
 -      if (task->tk_status == 0)
 -              task->tk_action = call_start;
 -      else
 -              task->tk_action = rpc_exit_task;
 +      task->tk_action = call_start;
  }
 +EXPORT_SYMBOL_GPL(rpc_call_start);
  
  /**
   * rpc_peeraddr - extract remote peer address from clnt's xprt
@@@ -682,8 -610,7 +633,8 @@@ EXPORT_SYMBOL_GPL(rpc_peeraddr)
   * @format: address format
   *
   */
 -char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
 +const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
 +                           enum rpc_display_format_t format)
  {
        struct rpc_xprt *xprt = clnt->cl_xprt;
  
@@@ -701,7 -628,6 +652,7 @@@ rpc_setbufsize(struct rpc_clnt *clnt, u
        if (xprt->ops->set_buffer_size)
                xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
  }
 +EXPORT_SYMBOL_GPL(rpc_setbufsize);
  
  /*
   * Return size of largest payload RPC client can support, in bytes
@@@ -741,7 -667,6 +692,7 @@@ rpc_restart_call(struct rpc_task *task
  
        task->tk_action = call_start;
  }
 +EXPORT_SYMBOL_GPL(rpc_restart_call);
  
  /*
   * 0.  Initial state
@@@ -1169,7 -1094,7 +1120,7 @@@ call_status(struct rpc_task *task
        case -ETIMEDOUT:
                task->tk_action = call_timeout;
                if (task->tk_client->cl_discrtry)
 -                      xprt_disconnect(task->tk_xprt);
 +                      xprt_force_disconnect(task->tk_xprt);
                break;
        case -ECONNREFUSED:
        case -ENOTCONN:
@@@ -1292,7 -1217,7 +1243,7 @@@ out_retry
        req->rq_received = req->rq_private_buf.len = 0;
        task->tk_status = 0;
        if (task->tk_client->cl_discrtry)
 -              xprt_disconnect(task->tk_xprt);
 +              xprt_force_disconnect(task->tk_xprt);
  }
  
  /*
@@@ -1549,15 -1474,9 +1500,15 @@@ struct rpc_task *rpc_call_null(struct r
                .rpc_proc = &rpcproc_null,
                .rpc_cred = cred,
        };
 -      return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = clnt,
 +              .rpc_message = &msg,
 +              .callback_ops = &rpc_default_ops,
 +              .flags = flags,
 +      };
 +      return rpc_run_task(&task_setup_data);
  }
 -EXPORT_SYMBOL(rpc_call_null);
 +EXPORT_SYMBOL_GPL(rpc_call_null);
  
  #ifdef RPC_DEBUG
  void rpc_show_tasks(void)
diff --combined net/sunrpc/rpcb_clnt.c
index fa5b8f202d5b3a358e090f484cff3475d4fc228e,c35b6e7fc68046b769b785ed64014657e1df601a..3164a0871cf039ca7c2f9ebb80ad2dc1eba7b180
@@@ -54,6 -54,45 +54,6 @@@ enum 
  #define RPCB_HIGHPROC_3               RPCBPROC_TADDR2UADDR
  #define RPCB_HIGHPROC_4               RPCBPROC_GETSTAT
  
 -/*
 - * r_addr
 - *
 - * Quoting RFC 3530, section 2.2:
 - *
 - * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
 - * US-ASCII string:
 - *
 - *    h1.h2.h3.h4.p1.p2
 - *
 - * The prefix, "h1.h2.h3.h4", is the standard textual form for
 - * representing an IPv4 address, which is always four octets long.
 - * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
 - * the first through fourth octets each converted to ASCII-decimal.
 - * Assuming big-endian ordering, p1 and p2 are, respectively, the first
 - * and second octets each converted to ASCII-decimal.  For example, if a
 - * host, in big-endian order, has an address of 0x0A010307 and there is
 - * a service listening on, in big endian order, port 0x020F (decimal
 - * 527), then the complete universal address is "10.1.3.7.2.15".
 - *
 - * ...
 - *
 - * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
 - * US-ASCII string:
 - *
 - *    x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
 - *
 - * The suffix "p1.p2" is the service port, and is computed the same way
 - * as with universal addresses for TCP and UDP over IPv4.  The prefix,
 - * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
 - * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
 - * Additionally, the two alternative forms specified in Section 2.2 of
 - * [RFC2373] are also acceptable.
 - *
 - * XXX: Currently this implementation does not explicitly convert the
 - *      stored address to US-ASCII on non-ASCII systems.
 - */
 -#define RPCB_MAXADDRLEN               (128u)
 -
  /*
   * r_owner
   *
@@@ -73,9 -112,9 +73,9 @@@ struct rpcbind_args 
        u32                     r_vers;
        u32                     r_prot;
        unsigned short          r_port;
 -      char *                  r_netid;
 -      char                    r_addr[RPCB_MAXADDRLEN];
 -      char *                  r_owner;
 +      const char *            r_netid;
 +      const char *            r_addr;
 +      const char *            r_owner;
  };
  
  static struct rpc_procinfo rpcb_procedures2[];
@@@ -89,6 -128,19 +89,6 @@@ struct rpcb_info 
  static struct rpcb_info rpcb_next_version[];
  static struct rpcb_info rpcb_next_version6[];
  
 -static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
 -{
 -      struct rpcbind_args *map = calldata;
 -      struct rpc_xprt *xprt = map->r_xprt;
 -      struct rpc_message msg = {
 -              .rpc_proc       = rpcb_next_version[xprt->bind_index].rpc_proc,
 -              .rpc_argp       = map,
 -              .rpc_resp       = &map->r_port,
 -      };
 -
 -      rpc_call_setup(task, &msg, 0);
 -}
 -
  static void rpcb_map_release(void *data)
  {
        struct rpcbind_args *map = data;
  }
  
  static const struct rpc_call_ops rpcb_getport_ops = {
 -      .rpc_call_prepare       = rpcb_getport_prepare,
        .rpc_call_done          = rpcb_getport_done,
        .rpc_release            = rpcb_map_release,
  };
@@@ -109,19 -162,17 +109,18 @@@ static void rpcb_wake_rpcbind_waiters(s
  }
  
  static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
 -                                      int proto, int version, int privileged)
 +                                  size_t salen, int proto, u32 version,
 +                                  int privileged)
  {
        struct rpc_create_args args = {
                .protocol       = proto,
                .address        = srvaddr,
 -              .addrsize       = sizeof(struct sockaddr_in),
 +              .addrsize       = salen,
                .servername     = hostname,
                .program        = &rpcb_program,
                .version        = version,
                .authflavor     = RPC_AUTH_UNIX,
-               .flags          = (RPC_CLNT_CREATE_NOPING |
-                                  RPC_CLNT_CREATE_INTR),
+               .flags          = RPC_CLNT_CREATE_NOPING,
        };
  
        switch (srvaddr->sa_family) {
@@@ -178,7 -229,7 +177,7 @@@ int rpcb_register(u32 prog, u32 vers, i
                        prog, vers, prot, port);
  
        rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
 -                                      XPRT_TRANSPORT_UDP, 2, 1);
 +                              sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
        if (IS_ERR(rpcb_clnt))
                return PTR_ERR(rpcb_clnt);
  
   * @vers: RPC version number to bind
   * @prot: transport protocol to use to make this request
   *
 + * Return value is the requested advertised port number,
 + * or a negative errno value.
 + *
   * Called from outside the RPC client in a synchronous task context.
   * Uses default timeout parameters specified by underlying transport.
   *
 - * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
 + * XXX: Needs to support IPv6
   */
 -int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
 -                    __u32 vers, int prot)
 +int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
  {
        struct rpcbind_args map = {
                .r_prog         = prog,
                .rpc_resp       = &map.r_port,
        };
        struct rpc_clnt *rpcb_clnt;
 -      char hostname[40];
        int status;
  
        dprintk("RPC:       %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
                __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
  
 -      sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
 -      rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
 +      rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
 +                              sizeof(*sin), prot, 2, 0);
        if (IS_ERR(rpcb_clnt))
                return PTR_ERR(rpcb_clnt);
  
  }
  EXPORT_SYMBOL_GPL(rpcb_getport_sync);
  
 +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
 +{
 +      struct rpc_message msg = {
 +              .rpc_proc = rpcb_next_version[version].rpc_proc,
 +              .rpc_argp = map,
 +              .rpc_resp = &map->r_port,
 +      };
 +      struct rpc_task_setup task_setup_data = {
 +              .rpc_client = rpcb_clnt,
 +              .rpc_message = &msg,
 +              .callback_ops = &rpcb_getport_ops,
 +              .callback_data = map,
 +              .flags = RPC_TASK_ASYNC,
 +      };
 +
 +      return rpc_run_task(&task_setup_data);
 +}
 +
  /**
   * rpcb_getport_async - obtain the port for a given RPC service on a given host
   * @task: task that is waiting for portmapper request
  void rpcb_getport_async(struct rpc_task *task)
  {
        struct rpc_clnt *clnt = task->tk_client;
 -      int bind_version;
 +      u32 bind_version;
        struct rpc_xprt *xprt = task->tk_xprt;
        struct rpc_clnt *rpcb_clnt;
        static struct rpcbind_args *map;
        struct rpc_task *child;
 -      struct sockaddr addr;
 +      struct sockaddr_storage addr;
 +      struct sockaddr *sap = (struct sockaddr *)&addr;
 +      size_t salen;
        int status;
        struct rpcb_info *info;
  
                goto bailout_nofree;
        }
  
 -      rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
 +      salen = rpc_peeraddr(clnt, sap, sizeof(addr));
  
        /* Don't ever use rpcbind v2 for AF_INET6 requests */
 -      switch (addr.sa_family) {
 +      switch (sap->sa_family) {
        case AF_INET:
                info = rpcb_next_version;
                break;
        dprintk("RPC: %5u %s: trying rpcbind version %u\n",
                task->tk_pid, __FUNCTION__, bind_version);
  
 -      rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
 +      rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
                                bind_version, 0);
        if (IS_ERR(rpcb_clnt)) {
                status = PTR_ERR(rpcb_clnt);
        map->r_port = 0;
        map->r_xprt = xprt_get(xprt);
        map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
 -      memcpy(map->r_addr,
 -             rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
 -             sizeof(map->r_addr));
 +      map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
        map->r_owner = RPCB_OWNER_STRING;       /* ignored for GETADDR */
  
 -      child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
 +      child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
        rpc_release_client(rpcb_clnt);
        if (IS_ERR(child)) {
                status = -EIO;
@@@ -485,7 -517,7 +484,7 @@@ static int rpcb_decode_getaddr(struct r
         * Simple sanity check.  The smallest possible universal
         * address is an IPv4 address string containing 11 bytes.
         */
 -      if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
 +      if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
                goto out_err;
  
        /*
@@@ -536,7 -568,7 +535,7 @@@ out_err
  #define RPCB_boolean_sz               (1u)
  
  #define RPCB_netid_sz         (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
 -#define RPCB_addr_sz          (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
 +#define RPCB_addr_sz          (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
  #define RPCB_ownerstring_sz   (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
  
  #define RPCB_mappingargs_sz   RPCB_program_sz+RPCB_version_sz+        \
diff --combined net/sunrpc/sched.c
index 40ce6f6672d6bb8b735d2ed379dff9add6ff087b,4b22910b446106bff68781b94d380da0b479dfc5..4c669121e607f774b7739a326639e8f2aec146a3
@@@ -45,7 -45,7 +45,7 @@@ static void                    rpc_release_task(struct 
  /*
   * RPC tasks sit here while waiting for conditions to improve.
   */
 -static RPC_WAITQ(delay_queue, "delayq");
 +static struct rpc_wait_queue delay_queue;
  
  /*
   * rpciod-related stuff
@@@ -135,7 -135,7 +135,7 @@@ static void __rpc_add_wait_queue_priori
        if (unlikely(task->tk_priority > queue->maxpriority))
                q = &queue->tasks[queue->maxpriority];
        list_for_each_entry(t, q, u.tk_wait.list) {
 -              if (t->tk_cookie == task->tk_cookie) {
 +              if (t->tk_owner == task->tk_owner) {
                        list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
                        return;
                }
@@@ -208,26 -208,26 +208,26 @@@ static inline void rpc_set_waitqueue_pr
        queue->count = 1 << (priority * 2);
  }
  
 -static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
 +static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
  {
 -      queue->cookie = cookie;
 +      queue->owner = pid;
        queue->nr = RPC_BATCH_COUNT;
  }
  
  static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
  {
        rpc_set_waitqueue_priority(queue, queue->maxpriority);
 -      rpc_set_waitqueue_cookie(queue, 0);
 +      rpc_set_waitqueue_owner(queue, 0);
  }
  
 -static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
 +static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
  {
        int i;
  
        spin_lock_init(&queue->lock);
        for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
                INIT_LIST_HEAD(&queue->tasks[i]);
 -      queue->maxpriority = maxprio;
 +      queue->maxpriority = nr_queues - 1;
        rpc_reset_waitqueue_priority(queue);
  #ifdef RPC_DEBUG
        queue->name = qname;
  
  void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
  {
 -      __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
 +      __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
  }
  
  void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
  {
 -      __rpc_init_priority_wait_queue(queue, qname, 0);
 +      __rpc_init_priority_wait_queue(queue, qname, 1);
  }
 -EXPORT_SYMBOL(rpc_init_wait_queue);
 +EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
  
- static int rpc_wait_bit_interruptible(void *word)
+ static int rpc_wait_bit_killable(void *word)
  {
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                return -ERESTARTSYS;
        schedule();
        return 0;
@@@ -299,11 -299,11 +299,11 @@@ static void rpc_mark_complete_task(stru
  int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
  {
        if (action == NULL)
-               action = rpc_wait_bit_interruptible;
+               action = rpc_wait_bit_killable;
        return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
-                       action, TASK_INTERRUPTIBLE);
+                       action, TASK_KILLABLE);
  }
 -EXPORT_SYMBOL(__rpc_wait_for_completion_task);
 +EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
  
  /*
   * Make an RPC task runnable.
@@@ -373,7 -373,6 +373,7 @@@ void rpc_sleep_on(struct rpc_wait_queu
        __rpc_sleep_on(q, task, action, timer);
        spin_unlock_bh(&q->lock);
  }
 +EXPORT_SYMBOL_GPL(rpc_sleep_on);
  
  /**
   * __rpc_do_wake_up_task - wake up a single rpc_task
@@@ -445,7 -444,6 +445,7 @@@ void rpc_wake_up_task(struct rpc_task *
        }
        rcu_read_unlock_bh();
  }
 +EXPORT_SYMBOL_GPL(rpc_wake_up_task);
  
  /*
   * Wake up the next task on a priority queue.
@@@ -456,12 -454,12 +456,12 @@@ static struct rpc_task * __rpc_wake_up_
        struct rpc_task *task;
  
        /*
 -       * Service a batch of tasks from a single cookie.
 +       * Service a batch of tasks from a single owner.
         */
        q = &queue->tasks[queue->priority];
        if (!list_empty(q)) {
                task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
 -              if (queue->cookie == task->tk_cookie) {
 +              if (queue->owner == task->tk_owner) {
                        if (--queue->nr)
                                goto out;
                        list_move_tail(&task->u.tk_wait.list, q);
                 * Check if we need to switch queues.
                 */
                if (--queue->count)
 -                      goto new_cookie;
 +                      goto new_owner;
        }
  
        /*
  
  new_queue:
        rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
 -new_cookie:
 -      rpc_set_waitqueue_cookie(queue, task->tk_cookie);
 +new_owner:
 +      rpc_set_waitqueue_owner(queue, task->tk_owner);
  out:
        __rpc_wake_up_task(task);
        return task;
@@@ -521,7 -519,6 +521,7 @@@ struct rpc_task * rpc_wake_up_next(stru
  
        return task;
  }
 +EXPORT_SYMBOL_GPL(rpc_wake_up_next);
  
  /**
   * rpc_wake_up - wake up all rpc_tasks
@@@ -547,7 -544,6 +547,7 @@@ void rpc_wake_up(struct rpc_wait_queue 
        spin_unlock(&queue->lock);
        rcu_read_unlock_bh();
  }
 +EXPORT_SYMBOL_GPL(rpc_wake_up);
  
  /**
   * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@@ -576,7 -572,6 +576,7 @@@ void rpc_wake_up_status(struct rpc_wait
        spin_unlock(&queue->lock);
        rcu_read_unlock_bh();
  }
 +EXPORT_SYMBOL_GPL(rpc_wake_up_status);
  
  static void __rpc_atrun(struct rpc_task *task)
  {
@@@ -591,7 -586,6 +591,7 @@@ void rpc_delay(struct rpc_task *task, u
        task->tk_timeout = delay;
        rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
  }
 +EXPORT_SYMBOL_GPL(rpc_delay);
  
  /*
   * Helper to call task->tk_ops->rpc_call_prepare
@@@ -620,7 -614,7 +620,7 @@@ void rpc_exit_task(struct rpc_task *tas
                }
        }
  }
 -EXPORT_SYMBOL(rpc_exit_task);
 +EXPORT_SYMBOL_GPL(rpc_exit_task);
  
  void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
  {
@@@ -696,10 -690,9 +696,9 @@@ static void __rpc_execute(struct rpc_ta
  
                /* sync task: sleep here */
                dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
-               /* Note: Caller should be using rpc_clnt_sigmask() */
                status = out_of_line_wait_on_bit(&task->tk_runstate,
-                               RPC_TASK_QUEUED, rpc_wait_bit_interruptible,
-                               TASK_INTERRUPTIBLE);
+                               RPC_TASK_QUEUED, rpc_wait_bit_killable,
+                               TASK_KILLABLE);
                if (status == -ERESTARTSYS) {
                        /*
                         * When a sync task receives a signal, it exits with
@@@ -814,49 -807,38 +813,47 @@@ EXPORT_SYMBOL_GPL(rpc_free)
  /*
   * Creation and deletion of RPC task structures
   */
 -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 +static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
  {
        memset(task, 0, sizeof(*task));
 -      init_timer(&task->tk_timer);
 -      task->tk_timer.data     = (unsigned long) task;
 -      task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
 +      setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
 +                      (unsigned long)task);
        atomic_set(&task->tk_count, 1);
 -      task->tk_client = clnt;
 -      task->tk_flags  = flags;
 -      task->tk_ops = tk_ops;
 -      if (tk_ops->rpc_call_prepare != NULL)
 -              task->tk_action = rpc_prepare_task;
 -      task->tk_calldata = calldata;
 +      task->tk_flags  = task_setup_data->flags;
 +      task->tk_ops = task_setup_data->callback_ops;
 +      task->tk_calldata = task_setup_data->callback_data;
        INIT_LIST_HEAD(&task->tk_task);
  
        /* Initialize retry counters */
        task->tk_garb_retry = 2;
        task->tk_cred_retry = 2;
  
 -      task->tk_priority = RPC_PRIORITY_NORMAL;
 -      task->tk_cookie = (unsigned long)current;
 +      task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
 +      task->tk_owner = current->tgid;
  
        /* Initialize workqueue for async tasks */
        task->tk_workqueue = rpciod_workqueue;
  
 -      if (clnt) {
 -              kref_get(&clnt->cl_kref);
 -              if (clnt->cl_softrtry)
 +      task->tk_client = task_setup_data->rpc_client;
 +      if (task->tk_client != NULL) {
 +              kref_get(&task->tk_client->cl_kref);
 +              if (task->tk_client->cl_softrtry)
                        task->tk_flags |= RPC_TASK_SOFT;
-               if (!task->tk_client->cl_intr)
-                       task->tk_flags |= RPC_TASK_NOINTR;
        }
  
 -      BUG_ON(task->tk_ops == NULL);
 +      if (task->tk_ops->rpc_call_prepare != NULL)
 +              task->tk_action = rpc_prepare_task;
 +
 +      if (task_setup_data->rpc_message != NULL) {
 +              memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
 +              /* Bind the user cred */
 +              if (task->tk_msg.rpc_cred != NULL)
 +                      rpcauth_holdcred(task);
 +              else
 +                      rpcauth_bindcred(task);
 +              if (task->tk_action == NULL)
 +                      rpc_call_start(task);
 +      }
  
        /* starting timestamp */
        task->tk_start = jiffies;
@@@ -881,22 -863,18 +878,22 @@@ static void rpc_free_task(struct rcu_he
  /*
   * Create a new task for the specified client.
   */
 -struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 +struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
  {
 -      struct rpc_task *task;
 -
 -      task = rpc_alloc_task();
 -      if (!task)
 -              goto out;
 +      struct rpc_task *task = setup_data->task;
 +      unsigned short flags = 0;
 +
 +      if (task == NULL) {
 +              task = rpc_alloc_task();
 +              if (task == NULL)
 +                      goto out;
 +              flags = RPC_TASK_DYNAMIC;
 +      }
  
 -      rpc_init_task(task, clnt, flags, tk_ops, calldata);
 +      rpc_init_task(task, setup_data);
  
 +      task->tk_flags |= flags;
        dprintk("RPC:       allocated task %p\n", task);
 -      task->tk_flags |= RPC_TASK_DYNAMIC;
  out:
        return task;
  }
@@@ -922,7 -900,7 +919,7 @@@ void rpc_put_task(struct rpc_task *task
                call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
        rpc_release_calldata(tk_ops, calldata);
  }
 -EXPORT_SYMBOL(rpc_put_task);
 +EXPORT_SYMBOL_GPL(rpc_put_task);
  
  static void rpc_release_task(struct rpc_task *task)
  {
@@@ -979,7 -957,6 +976,7 @@@ void rpc_killall_tasks(struct rpc_clnt 
        }
        spin_unlock(&clnt->cl_lock);
  }
 +EXPORT_SYMBOL_GPL(rpc_killall_tasks);
  
  int rpciod_up(void)
  {
@@@ -1059,11 -1036,6 +1056,11 @@@ rpc_init_mempool(void
                goto err_nomem;
        if (!rpciod_start())
                goto err_nomem;
 +      /*
 +       * The following is not strictly a mempool initialisation,
 +       * but there is no harm in doing it here
 +       */
 +      rpc_init_wait_queue(&delay_queue, "delayq");
        return 0;
  err_nomem:
        rpc_destroy_mempool();