Merge branch 'task_killable' of git://git.kernel.org/pub/scm/linux/kernel/git/willy...

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
diff --combined fs/nfs/client.c

index 685c43f810c10a476ada529d4e0a92075674ca9a,310fa2f4cbb837132a57ffea845ae66d31669de3..c5c0175898f68311f7a19c8c87e641de27acb30a
--- 1/fs/nfs/client.c
--- 2/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@@ -34,8 -34,6 +34,8 @@@
   #include <linux/nfs_idmap.h>
   #include <linux/vfs.h>
   #include <linux/inet.h>
+ +#include <linux/in6.h>
+ +#include <net/ipv6.h>
   #include <linux/nfs_xdr.h>
   
   #include <asm/system.h>
@@@ -95,30 -93,22 +95,30 @@@ struct rpc_program         nfsacl_program = 
   };
   #endif  /* CONFIG_NFS_V3_ACL */
   
+ +struct nfs_client_initdata {
+ +      const char *hostname;
+ +      const struct sockaddr *addr;
+ +      size_t addrlen;
+ +      const struct nfs_rpc_ops *rpc_ops;
+ +      int proto;
+ +};
+ +
   /*
    * Allocate a shared client record
    *
    * Since these are allocated/deallocated very rarely, we don't
    * bother putting them in a slab cache...
    */
- -static struct nfs_client *nfs_alloc_client(const char *hostname,
- -                                         const struct sockaddr_in *addr,
- -                                         int nfsversion)
+ +static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
   {
         struct nfs_client *clp;
   
         if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
                 goto error_0;
   
- -      if (nfsversion == 4) {
+ +      clp->rpc_ops = cl_init->rpc_ops;
+ +
+ +      if (cl_init->rpc_ops->version == 4) {
                 if (nfs_callback_up() < 0)
                         goto error_2;
                 __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
@@@ -127,11 -117,11 +127,11 @@@
         atomic_set(&clp->cl_count, 1);
         clp->cl_cons_state = NFS_CS_INITING;
   
- -      clp->cl_nfsversion = nfsversion;
- -      memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+ +      memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
+ +      clp->cl_addrlen = cl_init->addrlen;
   
- -      if (hostname) {
- -              clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ +      if (cl_init->hostname) {
+ +              clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
                 if (!clp->cl_hostname)
                         goto error_3;
         }
@@@ -139,8 -129,6 +139,8 @@@
         INIT_LIST_HEAD(&clp->cl_superblocks);
         clp->cl_rpcclient = ERR_PTR(-EINVAL);
   
+ +      clp->cl_proto = cl_init->proto;
+ +
   #ifdef CONFIG_NFS_V4
         init_rwsem(&clp->cl_sem);
         INIT_LIST_HEAD(&clp->cl_delegations);
@@@ -178,7 -166,7 +178,7 @@@ static void nfs4_shutdown_client(struc
    */
   static void nfs_free_client(struct nfs_client *clp)
   {
- -      dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+ +      dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
   
         nfs4_shutdown_client(clp);
   
@@@ -215,148 -203,76 +215,148 @@@ void nfs_put_client(struct nfs_client *
         }
   }
   
+ +static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
+ +                               const struct sockaddr_in *sa2)
+ +{
+ +      return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
+ +}
+ +
+ +static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1,
+ +                               const struct sockaddr_in6 *sa2)
+ +{
+ +      return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr);
+ +}
+ +
+ +static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+ +                               const struct sockaddr *sa2)
+ +{
+ +      switch (sa1->sa_family) {
+ +      case AF_INET:
+ +              return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
+ +                              (const struct sockaddr_in *)sa2);
+ +      case AF_INET6:
+ +              return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
+ +                              (const struct sockaddr_in6 *)sa2);
+ +      }
+ +      BUG();
+ +}
+ +
   /*
- - * Find a client by address
- - * - caller must hold nfs_client_lock
+ + * Find a client by IP address and protocol version
+ + * - returns NULL if no such client
    */
- -static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port)
+ +struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
   {
         struct nfs_client *clp;
   
+ +      spin_lock(&nfs_client_lock);
         list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ +              struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+ +
                 /* Don't match clients that failed to initialise properly */
- -              if (clp->cl_cons_state < 0)
+ +              if (clp->cl_cons_state != NFS_CS_READY)
                         continue;
   
                 /* Different NFS versions cannot share the same nfs_client */
- -              if (clp->cl_nfsversion != nfsversion)
+ +              if (clp->rpc_ops->version != nfsversion)
                         continue;
   
- -              if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
- -                         sizeof(clp->cl_addr.sin_addr)) != 0)
+ +              if (addr->sa_family != clap->sa_family)
+ +                      continue;
+ +              /* Match only the IP address, not the port number */
+ +              if (!nfs_sockaddr_match_ipaddr(addr, clap))
                         continue;
   
- -              if (!match_port || clp->cl_addr.sin_port == addr->sin_port)
- -                      goto found;
+ +              atomic_inc(&clp->cl_count);
+ +              spin_unlock(&nfs_client_lock);
+ +              return clp;
         }
- -
+ +      spin_unlock(&nfs_client_lock);
         return NULL;
- -
- -found:
- -      atomic_inc(&clp->cl_count);
- -      return clp;
   }
   
   /*
    * Find a client by IP address and protocol version
    * - returns NULL if no such client
    */
- -struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+ +struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
   {
- -      struct nfs_client *clp;
+ +      struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
+ +      u32 nfsvers = clp->rpc_ops->version;
   
         spin_lock(&nfs_client_lock);
- -      clp = __nfs_find_client(addr, nfsversion, 0);
+ +      list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
+ +              struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+ +
+ +              /* Don't match clients that failed to initialise properly */
+ +              if (clp->cl_cons_state != NFS_CS_READY)
+ +                      continue;
+ +
+ +              /* Different NFS versions cannot share the same nfs_client */
+ +              if (clp->rpc_ops->version != nfsvers)
+ +                      continue;
+ +
+ +              if (sap->sa_family != clap->sa_family)
+ +                      continue;
+ +              /* Match only the IP address, not the port number */
+ +              if (!nfs_sockaddr_match_ipaddr(sap, clap))
+ +                      continue;
+ +
+ +              atomic_inc(&clp->cl_count);
+ +              spin_unlock(&nfs_client_lock);
+ +              return clp;
+ +      }
         spin_unlock(&nfs_client_lock);
- -      if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) {
- -              nfs_put_client(clp);
- -              clp = NULL;
+ +      return NULL;
+ +}
+ +
+ +/*
+ + * Find an nfs_client on the list that matches the initialisation data
+ + * that is supplied.
+ + */
+ +static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
+ +{
+ +      struct nfs_client *clp;
+ +
+ +      list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ +              /* Don't match clients that failed to initialise properly */
+ +              if (clp->cl_cons_state < 0)
+ +                      continue;
+ +
+ +              /* Different NFS versions cannot share the same nfs_client */
+ +              if (clp->rpc_ops != data->rpc_ops)
+ +                      continue;
+ +
+ +              if (clp->cl_proto != data->proto)
+ +                      continue;
+ +
+ +              /* Match the full socket address */
+ +              if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
+ +                      continue;
+ +
+ +              atomic_inc(&clp->cl_count);
+ +              return clp;
         }
- -      return clp;
+ +      return NULL;
   }
   
   /*
    * Look up a client by IP address and protocol version
    * - creates a new record if one doesn't yet exist
    */
- -static struct nfs_client *nfs_get_client(const char *hostname,
- -                                       const struct sockaddr_in *addr,
- -                                       int nfsversion)
+ +static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
   {
         struct nfs_client *clp, *new = NULL;
         int error;
   
- -      dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
- -              hostname ?: "", NIPQUAD(addr->sin_addr),
- -              addr->sin_port, nfsversion);
+ +      dprintk("--> nfs_get_client(%s,v%u)\n",
+ +              cl_init->hostname ?: "", cl_init->rpc_ops->version);
   
         /* see if the client already exists */
         do {
                 spin_lock(&nfs_client_lock);
   
- -              clp = __nfs_find_client(addr, nfsversion, 1);
+ +              clp = nfs_match_client(cl_init);
                 if (clp)
                         goto found_client;
                 if (new)
@@@ -364,7 -280,7 +364,7 @@@
   
                 spin_unlock(&nfs_client_lock);
   
- -              new = nfs_alloc_client(hostname, addr, nfsversion);
+ +              new = nfs_alloc_client(cl_init);
         } while (new);
   
         return ERR_PTR(-ENOMEM);
@@@ -386,7 -302,7 +386,7 @@@ found_client
         if (new)
                 nfs_free_client(new);
   
-       error = wait_event_interruptible(nfs_client_active_wq,
+       error = wait_event_killable(nfs_client_active_wq,
                                 clp->cl_cons_state != NFS_CS_INITING);
         if (error < 0) {
                 nfs_put_client(clp);
@@@ -428,16 -344,12 +428,16 @@@ static void nfs_init_timeout_values(str
         switch (proto) {
         case XPRT_TRANSPORT_TCP:
         case XPRT_TRANSPORT_RDMA:
- -              if (!to->to_initval)
+ +              if (to->to_initval == 0)
                         to->to_initval = 60 * HZ;
                 if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
                         to->to_initval = NFS_MAX_TCP_TIMEOUT;
                 to->to_increment = to->to_initval;
                 to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+ +              if (to->to_maxval > NFS_MAX_TCP_TIMEOUT)
+ +                      to->to_maxval = NFS_MAX_TCP_TIMEOUT;
+ +              if (to->to_maxval < to->to_initval)
+ +                      to->to_maxval = to->to_initval;
                 to->to_exponential = 0;
                 break;
         case XPRT_TRANSPORT_UDP:
@@@ -455,17 -367,19 +455,17 @@@
   /*
    * Create an RPC client handle
    */
- -static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
- -                                              unsigned int timeo,
- -                                              unsigned int retrans,
- -                                              rpc_authflavor_t flavor,
- -                                              int flags)
+ +static int nfs_create_rpc_client(struct nfs_client *clp,
+ +                               const struct rpc_timeout *timeparms,
+ +                               rpc_authflavor_t flavor,
+ +                               int flags)
   {
- -      struct rpc_timeout      timeparms;
         struct rpc_clnt         *clnt = NULL;
         struct rpc_create_args args = {
- -              .protocol       = proto,
+ +              .protocol       = clp->cl_proto,
                 .address        = (struct sockaddr *)&clp->cl_addr,
- -              .addrsize       = sizeof(clp->cl_addr),
- -              .timeout        = &timeparms,
+ +              .addrsize       = clp->cl_addrlen,
+ +              .timeout        = timeparms,
                 .servername     = clp->cl_hostname,
                 .program        = &nfs_program,
                 .version        = clp->rpc_ops->version,
@@@ -476,6 -390,10 +476,6 @@@
         if (!IS_ERR(clp->cl_rpcclient))
                 return 0;
   
- -      nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
- -      clp->retrans_timeo = timeparms.to_initval;
- -      clp->retrans_count = timeparms.to_retries;
- -
         clnt = rpc_create(&args);
         if (IS_ERR(clnt)) {
                 dprintk("%s: cannot create RPC client. Error = %ld\n",
@@@ -492,8 -410,11 +492,8 @@@
    */
   static void nfs_destroy_server(struct nfs_server *server)
   {
- -      if (!IS_ERR(server->client_acl))
- -              rpc_shutdown_client(server->client_acl);
- -
         if (!(server->flags & NFS_MOUNT_NONLM))
- -              lockd_down();   /* release rpc.lockd */
+ +              nlmclnt_done(server->nlm_host);
   }
   
   /*
@@@ -501,29 -422,20 +501,29 @@@
    */
   static int nfs_start_lockd(struct nfs_server *server)
   {
- -      int error = 0;
+ +      struct nlm_host *host;
+ +      struct nfs_client *clp = server->nfs_client;
+ +      struct nlmclnt_initdata nlm_init = {
+ +              .hostname       = clp->cl_hostname,
+ +              .address        = (struct sockaddr *)&clp->cl_addr,
+ +              .addrlen        = clp->cl_addrlen,
+ +              .protocol       = server->flags & NFS_MOUNT_TCP ?
+ +                                              IPPROTO_TCP : IPPROTO_UDP,
+ +              .nfs_version    = clp->rpc_ops->version,
+ +      };
   
- -      if (server->nfs_client->cl_nfsversion > 3)
- -              goto out;
+ +      if (nlm_init.nfs_version > 3)
+ +              return 0;
         if (server->flags & NFS_MOUNT_NONLM)
- -              goto out;
- -      error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
- -                      IPPROTO_TCP : IPPROTO_UDP);
- -      if (error < 0)
- -              server->flags |= NFS_MOUNT_NONLM;
- -      else
- -              server->destroy = nfs_destroy_server;
- -out:
- -      return error;
+ +              return 0;
+ +
+ +      host = nlmclnt_init(&nlm_init);
+ +      if (IS_ERR(host))
+ +              return PTR_ERR(host);
+ +
+ +      server->nlm_host = host;
+ +      server->destroy = nfs_destroy_server;
+ +      return 0;
   }
   
   /*
@@@ -532,7 -444,7 +532,7 @@@
   #ifdef CONFIG_NFS_V3_ACL
   static void nfs_init_server_aclclient(struct nfs_server *server)
   {
- -      if (server->nfs_client->cl_nfsversion != 3)
+ +      if (server->nfs_client->rpc_ops->version != 3)
                 goto out_noacl;
         if (server->flags & NFS_MOUNT_NOACL)
                 goto out_noacl;
@@@ -559,9 -471,7 +559,9 @@@ static inline void nfs_init_server_aclc
   /*
    * Create a general RPC client
    */
- -static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+ +static int nfs_init_server_rpcclient(struct nfs_server *server,
+ +              const struct rpc_timeout *timeo,
+ +              rpc_authflavor_t pseudoflavour)
   {
         struct nfs_client *clp = server->nfs_client;
   
@@@ -571,11 -481,6 +571,11 @@@
                 return PTR_ERR(server->client);
         }
   
+ +      memcpy(&server->client->cl_timeout_default,
+ +                      timeo,
+ +                      sizeof(server->client->cl_timeout_default));
+ +      server->client->cl_timeout = &server->client->cl_timeout_default;
+ +
         if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
                 struct rpc_auth *auth;
   
@@@ -589,10 -494,6 +589,6 @@@
         if (server->flags & NFS_MOUNT_SOFT)
                 server->client->cl_softrtry = 1;
   
-       server->client->cl_intr = 0;
-       if (server->flags & NFS4_MOUNT_INTR)
-               server->client->cl_intr = 1;
- 
         return 0;
   }
   
@@@ -600,7 -501,6 +596,7 @@@
    * Initialise an NFS2 or NFS3 client
    */
   static int nfs_init_client(struct nfs_client *clp,
+ +                         const struct rpc_timeout *timeparms,
                            const struct nfs_parsed_mount_data *data)
   {
         int error;
@@@ -611,11 -511,18 +607,11 @@@
                 return 0;
         }
   
- -      /* Check NFS protocol revision and initialize RPC op vector */
- -      clp->rpc_ops = &nfs_v2_clientops;
- -#ifdef CONFIG_NFS_V3
- -      if (clp->cl_nfsversion == 3)
- -              clp->rpc_ops = &nfs_v3_clientops;
- -#endif
         /*
          * Create a client RPC handle for doing FSSTAT with UNIX auth only
          * - RFC 2623, sec 2.3.2
          */
- -      error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
- -                              data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
+ +      error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0);
         if (error < 0)
                 goto error;
         nfs_mark_client_ready(clp, NFS_CS_READY);
@@@ -633,34 -540,25 +629,34 @@@ error
   static int nfs_init_server(struct nfs_server *server,
                            const struct nfs_parsed_mount_data *data)
   {
+ +      struct nfs_client_initdata cl_init = {
+ +              .hostname = data->nfs_server.hostname,
+ +              .addr = (const struct sockaddr *)&data->nfs_server.address,
+ +              .addrlen = data->nfs_server.addrlen,
+ +              .rpc_ops = &nfs_v2_clientops,
+ +              .proto = data->nfs_server.protocol,
+ +      };
+ +      struct rpc_timeout timeparms;
         struct nfs_client *clp;
- -      int error, nfsvers = 2;
+ +      int error;
   
         dprintk("--> nfs_init_server()\n");
   
   #ifdef CONFIG_NFS_V3
         if (data->flags & NFS_MOUNT_VER3)
- -              nfsvers = 3;
+ +              cl_init.rpc_ops = &nfs_v3_clientops;
   #endif
   
         /* Allocate or find a client reference we can use */
- -      clp = nfs_get_client(data->nfs_server.hostname,
- -                              &data->nfs_server.address, nfsvers);
+ +      clp = nfs_get_client(&cl_init);
         if (IS_ERR(clp)) {
                 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
                 return PTR_ERR(clp);
         }
   
- -      error = nfs_init_client(clp, data);
+ +      nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ +                      data->timeo, data->retrans);
+ +      error = nfs_init_client(clp, &timeparms, data);
         if (error < 0)
                 goto error;
   
@@@ -684,7 -582,7 +680,7 @@@
         if (error < 0)
                 goto error;
   
- -      error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+ +      error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
         if (error < 0)
                 goto error;
   
@@@ -830,9 -728,6 +826,9 @@@ static struct nfs_server *nfs_alloc_ser
         INIT_LIST_HEAD(&server->client_link);
         INIT_LIST_HEAD(&server->master_link);
   
+ +      init_waitqueue_head(&server->active_wq);
+ +      atomic_set(&server->active, 0);
+ +
         server->io_stats = nfs_alloc_iostats();
         if (!server->io_stats) {
                 kfree(server);
@@@ -856,9 -751,6 +852,9 @@@ void nfs_free_server(struct nfs_server 
   
         if (server->destroy != NULL)
                 server->destroy(server);
+ +
+ +      if (!IS_ERR(server->client_acl))
+ +              rpc_shutdown_client(server->client_acl);
         if (!IS_ERR(server->client))
                 rpc_shutdown_client(server->client);
   
@@@ -944,7 -836,7 +940,7 @@@ error
    * Initialise an NFS4 client record
    */
   static int nfs4_init_client(struct nfs_client *clp,
- -              int proto, int timeo, int retrans,
+ +              const struct rpc_timeout *timeparms,
                 const char *ip_addr,
                 rpc_authflavor_t authflavour)
   {
@@@ -959,7 -851,7 +955,7 @@@
         /* Check NFS protocol revision and initialize RPC op vector */
         clp->rpc_ops = &nfs_v4_clientops;
   
- -      error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour,
+ +      error = nfs_create_rpc_client(clp, timeparms, authflavour,
                                         RPC_CLNT_CREATE_DISCRTRY);
         if (error < 0)
                 goto error;
@@@ -986,32 -878,23 +982,32 @@@ error
    * Set up an NFS4 client
    */
   static int nfs4_set_client(struct nfs_server *server,
- -              const char *hostname, const struct sockaddr_in *addr,
+ +              const char *hostname,
+ +              const struct sockaddr *addr,
+ +              const size_t addrlen,
                 const char *ip_addr,
                 rpc_authflavor_t authflavour,
- -              int proto, int timeo, int retrans)
+ +              int proto, const struct rpc_timeout *timeparms)
   {
+ +      struct nfs_client_initdata cl_init = {
+ +              .hostname = hostname,
+ +              .addr = addr,
+ +              .addrlen = addrlen,
+ +              .rpc_ops = &nfs_v4_clientops,
+ +              .proto = proto,
+ +      };
         struct nfs_client *clp;
         int error;
   
         dprintk("--> nfs4_set_client()\n");
   
         /* Allocate or find a client reference we can use */
- -      clp = nfs_get_client(hostname, addr, 4);
+ +      clp = nfs_get_client(&cl_init);
         if (IS_ERR(clp)) {
                 error = PTR_ERR(clp);
                 goto error;
         }
- -      error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour);
+ +      error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
         if (error < 0)
                 goto error_put;
   
@@@ -1032,26 -915,10 +1028,26 @@@ error
   static int nfs4_init_server(struct nfs_server *server,
                 const struct nfs_parsed_mount_data *data)
   {
+ +      struct rpc_timeout timeparms;
         int error;
   
         dprintk("--> nfs4_init_server()\n");
   
+ +      nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ +                      data->timeo, data->retrans);
+ +
+ +      /* Get a client record */
+ +      error = nfs4_set_client(server,
+ +                      data->nfs_server.hostname,
+ +                      (const struct sockaddr *)&data->nfs_server.address,
+ +                      data->nfs_server.addrlen,
+ +                      data->client_address,
+ +                      data->auth_flavors[0],
+ +                      data->nfs_server.protocol,
+ +                      &timeparms);
+ +      if (error < 0)
+ +              goto error;
+ +
         /* Initialise the client representation from the mount data */
         server->flags = data->flags & NFS_MOUNT_FLAGMASK;
         server->caps |= NFS_CAP_ATOMIC_OPEN;
@@@ -1066,9 -933,8 +1062,9 @@@
         server->acdirmin = data->acdirmin * HZ;
         server->acdirmax = data->acdirmax * HZ;
   
- -      error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+ +      error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
   
+ +error:
         /* Done */
         dprintk("<-- nfs4_init_server() = %d\n", error);
         return error;
@@@ -1091,6 -957,17 +1087,6 @@@ struct nfs_server *nfs4_create_server(c
         if (!server)
                 return ERR_PTR(-ENOMEM);
   
- -      /* Get a client record */
- -      error = nfs4_set_client(server,
- -                      data->nfs_server.hostname,
- -                      &data->nfs_server.address,
- -                      data->client_address,
- -                      data->auth_flavors[0],
- -                      data->nfs_server.protocol,
- -                      data->timeo, data->retrans);
- -      if (error < 0)
- -              goto error;
- -
         /* set up the general RPC client */
         error = nfs4_init_server(server, data);
         if (error < 0)
@@@ -1158,13 -1035,12 +1154,13 @@@ struct nfs_server *nfs4_create_referral
   
         /* Get a client representation.
          * Note: NFSv4 always uses TCP, */
- -      error = nfs4_set_client(server, data->hostname, data->addr,
- -                      parent_client->cl_ipaddr,
- -                      data->authflavor,
- -                      parent_server->client->cl_xprt->prot,
- -                      parent_client->retrans_timeo,
- -                      parent_client->retrans_count);
+ +      error = nfs4_set_client(server, data->hostname,
+ +                              data->addr,
+ +                              data->addrlen,
+ +                              parent_client->cl_ipaddr,
+ +                              data->authflavor,
+ +                              parent_server->client->cl_xprt->prot,
+ +                              parent_server->client->cl_timeout);
         if (error < 0)
                 goto error;
   
@@@ -1172,7 -1048,7 +1168,7 @@@
         nfs_server_copy_userdata(server, parent_server);
         server->caps |= NFS_CAP_ATOMIC_OPEN;
   
- -      error = nfs_init_server_rpcclient(server, data->authflavor);
+ +      error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
         if (error < 0)
                 goto error;
   
@@@ -1241,9 -1117,7 +1237,9 @@@ struct nfs_server *nfs_clone_server(str
   
         server->fsid = fattr->fsid;
   
- -      error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+ +      error = nfs_init_server_rpcclient(server,
+ +                      source->client->cl_timeout,
+ +                      source->client->cl_auth->au_flavor);
         if (error < 0)
                 goto out_free_server;
         if (!IS_ERR(source->client_acl))
@@@ -1385,10 -1259,10 +1381,10 @@@ static int nfs_server_list_show(struct 
         /* display one transport per line on subsequent lines */
         clp = list_entry(v, struct nfs_client, cl_share_link);
   
- -      seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
- -                 clp->cl_nfsversion,
- -                 NIPQUAD(clp->cl_addr.sin_addr),
- -                 ntohs(clp->cl_addr.sin_port),
+ +      seq_printf(m, "v%u %s %s %3d %s\n",
+ +                 clp->rpc_ops->version,
+ +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+ +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
                    atomic_read(&clp->cl_count),
                    clp->cl_hostname);
   
@@@ -1464,10 -1338,10 +1460,10 @@@ static int nfs_volume_list_show(struct 
                  (unsigned long long) server->fsid.major,
                  (unsigned long long) server->fsid.minor);
   
- -      seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
- -                 clp->cl_nfsversion,
- -                 NIPQUAD(clp->cl_addr.sin_addr),
- -                 ntohs(clp->cl_addr.sin_port),
+ +      seq_printf(m, "v%u %s %s %-7s %-17s\n",
+ +                 clp->rpc_ops->version,
+ +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+ +                 rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
                    dev,
                    fsid);
   
diff --combined fs/nfs/direct.c

index f8e165c7d5a637de762e619e1ddd1a7db4436fb7,7b994b2fa593f6a50c8a21fbb5c2bc35fda7d852..16844f98f50e4b6f6ad24eea74470fb6da263adf
--- 1/fs/nfs/direct.c
--- 2/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@@ -188,17 -188,12 +188,12 @@@ static void nfs_direct_req_release(stru
   static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
   {
         ssize_t result = -EIOCBQUEUED;
-       struct rpc_clnt *clnt;
-       sigset_t oldset;
   
         /* Async requests don't wait here */
         if (dreq->iocb)
                 goto out;
   
-       clnt = NFS_CLIENT(dreq->inode);
-       rpc_clnt_sigmask(clnt, &oldset);
-       result = wait_for_completion_interruptible(&dreq->completion);
-       rpc_clnt_sigunmask(clnt, &oldset);
+       result = wait_for_completion_killable(&dreq->completion);
   
         if (!result)
                 result = dreq->error;
@@@ -277,16 -272,6 +272,16 @@@ static ssize_t nfs_direct_read_schedule
         unsigned long user_addr = (unsigned long)iov->iov_base;
         size_t count = iov->iov_len;
         size_t rsize = NFS_SERVER(inode)->rsize;
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_cred = ctx->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs_read_direct_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         unsigned int pgbase;
         int result;
         ssize_t started = 0;
@@@ -326,7 -311,7 +321,7 @@@
   
                 data->req = (struct nfs_page *) dreq;
                 data->inode = inode;
- -              data->cred = ctx->cred;
+ +              data->cred = msg.rpc_cred;
                 data->args.fh = NFS_FH(inode);
                 data->args.context = ctx;
                 data->args.offset = pos;
@@@ -336,16 -321,14 +331,16 @@@
                 data->res.fattr = &data->fattr;
                 data->res.eof = 0;
                 data->res.count = bytes;
+ +              msg.rpc_argp = &data->args;
+ +              msg.rpc_resp = &data->res;
   
- -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- -                              &nfs_read_direct_ops, data);
- -              NFS_PROTO(inode)->read_setup(data);
+ +              task_setup_data.task = &data->task;
+ +              task_setup_data.callback_data = data;
+ +              NFS_PROTO(inode)->read_setup(data, &msg);
   
- -              data->task.tk_cookie = (unsigned long) inode;
- -
- -              rpc_execute(&data->task);
+ +              task = rpc_run_task(&task_setup_data);
+ +              if (!IS_ERR(task))
+ +                      rpc_put_task(task);
   
                 dprintk("NFS: %5u initiated direct read call "
                         "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@@ -444,15 -427,6 +439,15 @@@ static void nfs_direct_write_reschedule
         struct inode *inode = dreq->inode;
         struct list_head *p;
         struct nfs_write_data *data;
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_cred = dreq->ctx->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(inode),
+ +              .callback_ops = &nfs_write_direct_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
   
         dreq->count = 0;
         get_dreq(dreq);
@@@ -462,9 -436,6 +457,9 @@@
   
                 get_dreq(dreq);
   
+ +              /* Use stable writes */
+ +              data->args.stable = NFS_FILE_SYNC;
+ +
                 /*
                  * Reset data->res.
                  */
@@@ -476,18 -447,17 +471,18 @@@
                  * Reuse data->task; data->args should not have changed
                  * since the original request was sent.
                  */
- -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- -                              &nfs_write_direct_ops, data);
- -              NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
- -
- -              data->task.tk_priority = RPC_PRIORITY_NORMAL;
- -              data->task.tk_cookie = (unsigned long) inode;
+ +              task_setup_data.task = &data->task;
+ +              task_setup_data.callback_data = data;
+ +              msg.rpc_argp = &data->args;
+ +              msg.rpc_resp = &data->res;
+ +              NFS_PROTO(inode)->write_setup(data, &msg);
   
                 /*
                  * We're called via an RPC callback, so BKL is already held.
                  */
- -              rpc_execute(&data->task);
+ +              task = rpc_run_task(&task_setup_data);
+ +              if (!IS_ERR(task))
+ +                      rpc_put_task(task);
   
                 dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
                                 data->task.tk_pid,
@@@ -530,23 -500,9 +525,23 @@@ static const struct rpc_call_ops nfs_co
   static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
   {
         struct nfs_write_data *data = dreq->commit_data;
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_argp = &data->args,
+ +              .rpc_resp = &data->res,
+ +              .rpc_cred = dreq->ctx->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .task = &data->task,
+ +              .rpc_client = NFS_CLIENT(dreq->inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs_commit_direct_ops,
+ +              .callback_data = data,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
   
         data->inode = dreq->inode;
- -      data->cred = dreq->ctx->cred;
+ +      data->cred = msg.rpc_cred;
   
         data->args.fh = NFS_FH(data->inode);
         data->args.offset = 0;
@@@ -555,16 -511,18 +550,16 @@@
         data->res.fattr = &data->fattr;
         data->res.verf = &data->verf;
   
- -      rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
- -                              &nfs_commit_direct_ops, data);
- -      NFS_PROTO(data->inode)->commit_setup(data, 0);
+ +      NFS_PROTO(data->inode)->commit_setup(data, &msg);
   
- -      data->task.tk_priority = RPC_PRIORITY_NORMAL;
- -      data->task.tk_cookie = (unsigned long)data->inode;
         /* Note: task.tk_ops->rpc_release will free dreq->commit_data */
         dreq->commit_data = NULL;
   
         dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
   
- -      rpc_execute(&data->task);
+ +      task = rpc_run_task(&task_setup_data);
+ +      if (!IS_ERR(task))
+ +              rpc_put_task(task);
   }
   
   static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@@ -679,16 -637,6 +674,16 @@@ static ssize_t nfs_direct_write_schedul
         struct inode *inode = ctx->path.dentry->d_inode;
         unsigned long user_addr = (unsigned long)iov->iov_base;
         size_t count = iov->iov_len;
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_cred = ctx->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs_write_direct_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         size_t wsize = NFS_SERVER(inode)->wsize;
         unsigned int pgbase;
         int result;
@@@ -731,27 -679,25 +726,27 @@@
   
                 data->req = (struct nfs_page *) dreq;
                 data->inode = inode;
- -              data->cred = ctx->cred;
+ +              data->cred = msg.rpc_cred;
                 data->args.fh = NFS_FH(inode);
                 data->args.context = ctx;
                 data->args.offset = pos;
                 data->args.pgbase = pgbase;
                 data->args.pages = data->pagevec;
                 data->args.count = bytes;
+ +              data->args.stable = sync;
                 data->res.fattr = &data->fattr;
                 data->res.count = bytes;
                 data->res.verf = &data->verf;
   
- -              rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- -                              &nfs_write_direct_ops, data);
- -              NFS_PROTO(inode)->write_setup(data, sync);
+ +              task_setup_data.task = &data->task;
+ +              task_setup_data.callback_data = data;
+ +              msg.rpc_argp = &data->args;
+ +              msg.rpc_resp = &data->res;
+ +              NFS_PROTO(inode)->write_setup(data, &msg);
   
- -              data->task.tk_priority = RPC_PRIORITY_NORMAL;
- -              data->task.tk_cookie = (unsigned long) inode;
- -
- -              rpc_execute(&data->task);
+ +              task = rpc_run_task(&task_setup_data);
+ +              if (!IS_ERR(task))
+ +                      rpc_put_task(task);
   
                 dprintk("NFS: %5u initiated direct write call "
                         "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@@ -820,7 -766,7 +815,7 @@@ static ssize_t nfs_direct_write(struct 
         struct inode *inode = iocb->ki_filp->f_mapping->host;
         struct nfs_direct_req *dreq;
         size_t wsize = NFS_SERVER(inode)->wsize;
- -      int sync = 0;
+ +      int sync = NFS_UNSTABLE;
   
         dreq = nfs_direct_req_alloc();
         if (!dreq)
@@@ -828,7 -774,7 +823,7 @@@
         nfs_alloc_commit_data(dreq);
   
         if (dreq->commit_data == NULL || count < wsize)
- -              sync = FLUSH_STABLE;
+ +              sync = NFS_FILE_SYNC;
   
         dreq->inode = inode;
         dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
@@@ -940,6 -886,8 +935,6 @@@ ssize_t nfs_file_direct_write(struct ki
         retval = generic_write_checks(file, &pos, &count, 0);
         if (retval)
                 goto out;
- -      if (!count)
- -              goto out;       /* return 0 */
   
         retval = -EINVAL;
         if ((ssize_t) count < 0)
diff --combined fs/nfs/inode.c

index 3f332e54e760ad2056154c01491c8db9521b8319,f68c22215b14fb13667ddacf9e29ca134702f5f7..966a8850aa30be5330a524699069718cd00932ce
--- 1/fs/nfs/inode.c
--- 2/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@@ -192,7 -192,7 +192,7 @@@ void nfs_invalidate_atime(struct inode 
    */
   static void nfs_invalidate_inode(struct inode *inode)
   {
- -      set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+ +      set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
         nfs_zap_caches_locked(inode);
   }
   
@@@ -229,7 -229,7 +229,7 @@@ nfs_init_locked(struct inode *inode, vo
         struct nfs_find_desc    *desc = (struct nfs_find_desc *)opaque;
         struct nfs_fattr        *fattr = desc->fattr;
   
- -      NFS_FILEID(inode) = fattr->fileid;
+ +      set_nfs_fileid(inode, fattr->fileid);
         nfs_copy_fh(NFS_FH(inode), desc->fh);
         return 0;
   }
@@@ -291,7 -291,7 +291,7 @@@ nfs_fhget(struct super_block *sb, struc
                         inode->i_fop = &nfs_dir_operations;
                         if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
                             && fattr->size <= NFS_LIMIT_READDIRPLUS)
- -                              set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+ +                              set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
                         /* Deal with crossing mountpoints */
                         if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
                                 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
@@@ -433,15 -433,11 +433,11 @@@ static int nfs_wait_schedule(void *word
    */
   static int nfs_wait_on_inode(struct inode *inode)
   {
-       struct rpc_clnt *clnt = NFS_CLIENT(inode);
         struct nfs_inode *nfsi = NFS_I(inode);
-       sigset_t oldmask;
         int error;
   
-       rpc_clnt_sigmask(clnt, &oldmask);
         error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
-                                       nfs_wait_schedule, TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldmask);
+                                       nfs_wait_schedule, TASK_KILLABLE);
   
         return error;
   }
@@@ -461,18 -457,9 +457,18 @@@ int nfs_getattr(struct vfsmount *mnt, s
         int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
         int err;
   
- -      /* Flush out writes to the server in order to update c/mtime */
- -      if (S_ISREG(inode->i_mode))
+ +      /*
+ +       * Flush out writes to the server in order to update c/mtime.
+ +       *
+ +       * Hold the i_mutex to suspend application writes temporarily;
+ +       * this prevents long-running writing applications from blocking
+ +       * nfs_wb_nocommit.
+ +       */
+ +      if (S_ISREG(inode->i_mode)) {
+ +              mutex_lock(&inode->i_mutex);
                 nfs_wb_nocommit(inode);
+ +              mutex_unlock(&inode->i_mutex);
+ +      }
   
         /*
          * We may force a getattr if the user cares about atime.
@@@ -668,7 -655,7 +664,7 @@@ __nfs_revalidate_inode(struct nfs_serve
                 if (status == -ESTALE) {
                         nfs_zap_caches(inode);
                         if (!S_ISDIR(inode->i_mode))
- -                              set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+ +                              set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
                 }
                 goto out;
         }
@@@ -823,9 -810,8 +819,9 @@@ static void nfs_wcc_update_inode(struc
                         if (S_ISDIR(inode->i_mode))
                                 nfsi->cache_validity |= NFS_INO_INVALID_DATA;
                 }
- -              if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
- -                      inode->i_size = fattr->size;
+ +              if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
+ +                  nfsi->npages == 0)
+ +                      inode->i_size = nfs_size_to_loff_t(fattr->size);
         }
   }
   
@@@ -1029,8 -1015,7 +1025,8 @@@ static int nfs_update_inode(struct inod
                         dprintk("NFS: mtime change on server for file %s/%ld\n",
                                         inode->i_sb->s_id, inode->i_ino);
                         invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- -                      nfsi->cache_change_attribute = now;
+ +                      if (S_ISDIR(inode->i_mode))
+ +                              nfs_force_lookup_revalidate(inode);
                 }
                 /* If ctime has changed we should definitely clear access+acl caches */
                 if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
@@@ -1039,8 -1024,7 +1035,8 @@@
                 dprintk("NFS: change_attr change on server for file %s/%ld\n",
                                 inode->i_sb->s_id, inode->i_ino);
                 invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- -              nfsi->cache_change_attribute = now;
+ +              if (S_ISDIR(inode->i_mode))
+ +                      nfs_force_lookup_revalidate(inode);
         }
   
         /* Check if our cached file size is stale */
@@@ -1145,7 -1129,7 +1141,7 @@@
   void nfs4_clear_inode(struct inode *inode)
   {
         /* If we are holding a delegation, return it! */
- -      nfs_inode_return_delegation(inode);
+ +      nfs_inode_return_delegation_noreclaim(inode);
         /* First call standard NFS clear_inode() code */
         nfs_clear_inode(inode);
   }
diff --combined fs/nfs/nfs3proc.c

index b353c1a05bfda77c7074fcc31d4dbac00b4c09c9,5ae96340f2c2a36851b8e85cc424faef089bd899..549dbce714a4bde33c55386ed18357ddf911eab4
--- 1/fs/nfs/nfs3proc.c
--- 2/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@@ -27,17 -27,14 +27,14 @@@
   static int
   nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
   {
-       sigset_t oldset;
         int res;
-       rpc_clnt_sigmask(clnt, &oldset);
         do {
                 res = rpc_call_sync(clnt, msg, flags);
                 if (res != -EJUKEBOX)
                         break;
-               schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
+               schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
                 res = -ERESTARTSYS;
-       } while (!signalled());
-       rpc_clnt_sigunmask(clnt, &oldset);
+       } while (!fatal_signal_pending(current));
         return res;
   }
   
@@@ -732,9 -729,16 +729,9 @@@ static int nfs3_read_done(struct rpc_ta
         return 0;
   }
   
- -static void nfs3_proc_read_setup(struct nfs_read_data *data)
+ +static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message      msg = {
- -              .rpc_proc       = &nfs3_procedures[NFS3PROC_READ],
- -              .rpc_argp       = &data->args,
- -              .rpc_resp       = &data->res,
- -              .rpc_cred       = data->cred,
- -      };
- -
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
   }
   
   static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@@ -746,9 -750,24 +743,9 @@@
         return 0;
   }
   
- -static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+ +static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message      msg = {
- -              .rpc_proc       = &nfs3_procedures[NFS3PROC_WRITE],
- -              .rpc_argp       = &data->args,
- -              .rpc_resp       = &data->res,
- -              .rpc_cred       = data->cred,
- -      };
- -
- -      data->args.stable = NFS_UNSTABLE;
- -      if (how & FLUSH_STABLE) {
- -              data->args.stable = NFS_FILE_SYNC;
- -              if (NFS_I(data->inode)->ncommit)
- -                      data->args.stable = NFS_DATA_SYNC;
- -      }
- -
- -      /* Finalize the task. */
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
   }
   
   static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
@@@ -759,17 -778,22 +756,17 @@@
         return 0;
   }
   
- -static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+ +static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message      msg = {
- -              .rpc_proc       = &nfs3_procedures[NFS3PROC_COMMIT],
- -              .rpc_argp       = &data->args,
- -              .rpc_resp       = &data->res,
- -              .rpc_cred       = data->cred,
- -      };
- -
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
   }
   
   static int
   nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
   {
- -      return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
+ +      struct inode *inode = filp->f_path.dentry->d_inode;
+ +
+ +      return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
   }
   
   const struct nfs_rpc_ops nfs_v3_clientops = {
diff --combined fs/nfs/nfs4proc.c

index 5c189bd57eb2b6ea88bcc837bb8f821f04089fe4,c4faa43b36de1d5b5bba79a2e90c9f4fa96c530a..027e1095256ebe09cb001d1736489a7216620d0d
--- 1/fs/nfs/nfs4proc.c
--- 2/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@@ -210,7 -210,7 +210,7 @@@ static void update_changeattr(struct in
         spin_lock(&dir->i_lock);
         nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
         if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
- -              nfsi->cache_change_attribute = jiffies;
+ +              nfs_force_lookup_revalidate(dir);
         nfsi->change_attr = cinfo->after;
         spin_unlock(&dir->i_lock);
   }
@@@ -316,12 -316,9 +316,9 @@@ static void nfs4_opendata_put(struct nf
   
   static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
   {
-       sigset_t oldset;
         int ret;
   
-       rpc_clnt_sigmask(task->tk_client, &oldset);
         ret = rpc_wait_for_completion_task(task);
-       rpc_clnt_sigunmask(task->tk_client, &oldset);
         return ret;
   }
   
@@@ -718,6 -715,19 +715,6 @@@ int nfs4_open_delegation_recall(struct 
         return err;
   }
   
- -static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
- -{
- -      struct nfs4_opendata *data = calldata;
- -      struct  rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
- -              .rpc_argp = &data->c_arg,
- -              .rpc_resp = &data->c_res,
- -              .rpc_cred = data->owner->so_cred,
- -      };
- -      data->timestamp = jiffies;
- -      rpc_call_setup(task, &msg, 0);
- -}
- -
   static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
   {
         struct nfs4_opendata *data = calldata;
@@@ -728,10 -738,10 +725,10 @@@
         if (data->rpc_status == 0) {
                 memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
                                 sizeof(data->o_res.stateid.data));
+ +              nfs_confirm_seqid(&data->owner->so_seqid, 0);
                 renew_lease(data->o_res.server, data->timestamp);
                 data->rpc_done = 1;
         }
- -      nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
         nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
   }
   
@@@ -746,6 -756,7 +743,6 @@@ static void nfs4_open_confirm_release(v
         /* In case of error, no cleanup! */
         if (!data->rpc_done)
                 goto out_free;
- -      nfs_confirm_seqid(&data->owner->so_seqid, 0);
         state = nfs4_opendata_to_nfs4_state(data);
         if (!IS_ERR(state))
                 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@@ -754,6 -765,7 +751,6 @@@ out_free
   }
   
   static const struct rpc_call_ops nfs4_open_confirm_ops = {
- -      .rpc_call_prepare = nfs4_open_confirm_prepare,
         .rpc_call_done = nfs4_open_confirm_done,
         .rpc_release = nfs4_open_confirm_release,
   };
@@@ -765,26 -777,12 +762,26 @@@ static int _nfs4_proc_open_confirm(stru
   {
         struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
         struct rpc_task *task;
+ +      struct  rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+ +              .rpc_argp = &data->c_arg,
+ +              .rpc_resp = &data->c_res,
+ +              .rpc_cred = data->owner->so_cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = server->client,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_open_confirm_ops,
+ +              .callback_data = data,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         int status;
   
         kref_get(&data->kref);
         data->rpc_done = 0;
         data->rpc_status = 0;
- -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+ +      data->timestamp = jiffies;
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         status = nfs4_wait_for_completion_rpc_task(task);
@@@ -801,7 -799,13 +798,7 @@@ static void nfs4_open_prepare(struct rp
   {
         struct nfs4_opendata *data = calldata;
         struct nfs4_state_owner *sp = data->owner;
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
- -              .rpc_argp = &data->o_arg,
- -              .rpc_resp = &data->o_res,
- -              .rpc_cred = sp->so_cred,
- -      };
- -      
+ +
         if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
                 return;
         /*
@@@ -826,11 -830,11 +823,11 @@@
         data->o_arg.id = sp->so_owner_id.id;
         data->o_arg.clientid = sp->so_client->cl_clientid;
         if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
- -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
                 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
         }
         data->timestamp = jiffies;
- -      rpc_call_setup(task, &msg, 0);
+ +      rpc_call_start(task);
         return;
   out_no_action:
         task->tk_action = NULL;
@@@ -879,6 -883,7 +876,6 @@@ static void nfs4_open_release(void *cal
         /* In case we need an open_confirm, no cleanup! */
         if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
                 goto out_free;
- -      nfs_confirm_seqid(&data->owner->so_seqid, 0);
         state = nfs4_opendata_to_nfs4_state(data);
         if (!IS_ERR(state))
                 nfs4_close_state(&data->path, state, data->o_arg.open_flags);
@@@ -902,26 -907,13 +899,26 @@@ static int _nfs4_proc_open(struct nfs4_
         struct nfs_openargs *o_arg = &data->o_arg;
         struct nfs_openres *o_res = &data->o_res;
         struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+ +              .rpc_argp = o_arg,
+ +              .rpc_resp = o_res,
+ +              .rpc_cred = data->owner->so_cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = server->client,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_open_ops,
+ +              .callback_data = data,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         int status;
   
         kref_get(&data->kref);
         data->rpc_done = 0;
         data->rpc_status = 0;
         data->cancelled = 0;
- -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         status = nfs4_wait_for_completion_rpc_task(task);
@@@ -1251,6 -1243,12 +1248,6 @@@ static void nfs4_close_prepare(struct r
   {
         struct nfs4_closedata *calldata = data;
         struct nfs4_state *state = calldata->state;
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
- -              .rpc_argp = &calldata->arg,
- -              .rpc_resp = &calldata->res,
- -              .rpc_cred = state->owner->so_cred,
- -      };
         int clear_rd, clear_wr, clear_rdwr;
   
         if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
@@@ -1277,14 -1275,14 +1274,14 @@@
         }
         nfs_fattr_init(calldata->res.fattr);
         if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
- -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+ +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
                 calldata->arg.open_flags = FMODE_READ;
         } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
- -              msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+ +              task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
                 calldata->arg.open_flags = FMODE_WRITE;
         }
         calldata->timestamp = jiffies;
- -      rpc_call_setup(task, &msg, 0);
+ +      rpc_call_start(task);
   }
   
   static const struct rpc_call_ops nfs4_close_ops = {
@@@ -1310,16 -1308,6 +1307,16 @@@ int nfs4_do_close(struct path *path, st
         struct nfs4_closedata *calldata;
         struct nfs4_state_owner *sp = state->owner;
         struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+ +              .rpc_cred = state->owner->so_cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = server->client,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_close_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         int status = -ENOMEM;
   
         calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
@@@ -1339,10 -1327,7 +1336,10 @@@
         calldata->path.mnt = mntget(path->mnt);
         calldata->path.dentry = dget(path->dentry);
   
- -      task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+ +      msg.rpc_argp = &calldata->arg,
+ +      msg.rpc_resp = &calldata->res,
+ +      task_setup_data.callback_data = calldata;
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         status = 0;
@@@ -2428,10 -2413,18 +2425,10 @@@ static int nfs4_read_done(struct rpc_ta
         return 0;
   }
   
- -static void nfs4_proc_read_setup(struct nfs_read_data *data)
+ +static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
- -              .rpc_argp = &data->args,
- -              .rpc_resp = &data->res,
- -              .rpc_cred = data->cred,
- -      };
- -
         data->timestamp   = jiffies;
- -
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
   }
   
   static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@@ -2449,15 -2442,33 +2446,15 @@@
         return 0;
   }
   
- -static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+ +static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
- -              .rpc_argp = &data->args,
- -              .rpc_resp = &data->res,
- -              .rpc_cred = data->cred,
- -      };
- -      struct inode *inode = data->inode;
- -      struct nfs_server *server = NFS_SERVER(inode);
- -      int stable;
- -      
- -      if (how & FLUSH_STABLE) {
- -              if (!NFS_I(inode)->ncommit)
- -                      stable = NFS_FILE_SYNC;
- -              else
- -                      stable = NFS_DATA_SYNC;
- -      } else
- -              stable = NFS_UNSTABLE;
- -      data->args.stable = stable;
+ +      struct nfs_server *server = NFS_SERVER(data->inode);
+ +
         data->args.bitmask = server->attr_bitmask;
         data->res.server = server;
- -
         data->timestamp   = jiffies;
   
- -      /* Finalize the task. */
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
   }
   
   static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
@@@ -2472,13 -2483,20 +2469,13 @@@
         return 0;
   }
   
- -static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+ +static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
   {
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
- -              .rpc_argp = &data->args,
- -              .rpc_resp = &data->res,
- -              .rpc_cred = data->cred,
- -      };      
         struct nfs_server *server = NFS_SERVER(data->inode);
         
         data->args.bitmask = server->attr_bitmask;
         data->res.server = server;
- -
- -      rpc_call_setup(&data->task, &msg, 0);
+ +      msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
   }
   
   /*
@@@ -2785,9 -2803,9 +2782,9 @@@ nfs4_async_handle_error(struct rpc_tas
         return 0;
   }
   
- static int nfs4_wait_bit_interruptible(void *word)
+ static int nfs4_wait_bit_killable(void *word)
   {
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                 return -ERESTARTSYS;
         schedule();
         return 0;
@@@ -2795,18 -2813,14 +2792,14 @@@
   
   static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
   {
-       sigset_t oldset;
         int res;
   
         might_sleep();
   
         rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
   
-       rpc_clnt_sigmask(clnt, &oldset);
         res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
-                       nfs4_wait_bit_interruptible,
-                       TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldset);
+                       nfs4_wait_bit_killable, TASK_KILLABLE);
   
         rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
         return res;
@@@ -2814,7 -2828,6 +2807,6 @@@
   
   static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
   {
-       sigset_t oldset;
         int res = 0;
   
         might_sleep();
@@@ -2823,14 -2836,9 +2815,9 @@@
                 *timeout = NFS4_POLL_RETRY_MIN;
         if (*timeout > NFS4_POLL_RETRY_MAX)
                 *timeout = NFS4_POLL_RETRY_MAX;
-       rpc_clnt_sigmask(clnt, &oldset);
-       if (clnt->cl_intr) {
-               schedule_timeout_interruptible(*timeout);
-               if (signalled())
-                       res = -ERESTARTSYS;
-       } else
-               schedule_timeout_uninterruptible(*timeout);
-       rpc_clnt_sigunmask(clnt, &oldset);
+       schedule_timeout_killable(*timeout);
+       if (fatal_signal_pending(current))
+               res = -ERESTARTSYS;
         *timeout <<= 1;
         return res;
   }
@@@ -2891,20 -2899,14 +2878,20 @@@ int nfs4_proc_setclientid(struct nfs_cl
   
         for(;;) {
                 setclientid.sc_name_len = scnprintf(setclientid.sc_name,
- -                              sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
- -                              clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
+ +                              sizeof(setclientid.sc_name), "%s/%s %s %s %u",
+ +                              clp->cl_ipaddr,
+ +                              rpc_peeraddr2str(clp->cl_rpcclient,
+ +                                                      RPC_DISPLAY_ADDR),
+ +                              rpc_peeraddr2str(clp->cl_rpcclient,
+ +                                                      RPC_DISPLAY_PROTO),
                                 cred->cr_ops->cr_name,
                                 clp->cl_id_uniquifier);
                 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
- -                              sizeof(setclientid.sc_netid), "tcp");
+ +                              sizeof(setclientid.sc_netid),
+ +                              rpc_peeraddr2str(clp->cl_rpcclient,
+ +                                                      RPC_DISPLAY_NETID));
                 setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
- -                              sizeof(setclientid.sc_uaddr), "%s.%d.%d",
+ +                              sizeof(setclientid.sc_uaddr), "%s.%u.%u",
                                 clp->cl_ipaddr, port >> 8, port & 255);
   
                 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
@@@ -2968,11 -2970,25 +2955,11 @@@ struct nfs4_delegreturndata 
         struct nfs4_delegreturnres res;
         struct nfs_fh fh;
         nfs4_stateid stateid;
- -      struct rpc_cred *cred;
         unsigned long timestamp;
         struct nfs_fattr fattr;
         int rpc_status;
   };
   
- -static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
- -{
- -      struct nfs4_delegreturndata *data = calldata;
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
- -              .rpc_argp = &data->args,
- -              .rpc_resp = &data->res,
- -              .rpc_cred = data->cred,
- -      };
- -      nfs_fattr_init(data->res.fattr);
- -      rpc_call_setup(task, &msg, 0);
- -}
- -
   static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
   {
         struct nfs4_delegreturndata *data = calldata;
@@@ -2983,30 -2999,24 +2970,30 @@@
   
   static void nfs4_delegreturn_release(void *calldata)
   {
- -      struct nfs4_delegreturndata *data = calldata;
- -
- -      put_rpccred(data->cred);
         kfree(calldata);
   }
   
   static const struct rpc_call_ops nfs4_delegreturn_ops = {
- -      .rpc_call_prepare = nfs4_delegreturn_prepare,
         .rpc_call_done = nfs4_delegreturn_done,
         .rpc_release = nfs4_delegreturn_release,
   };
   
- -static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+ +static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
   {
         struct nfs4_delegreturndata *data;
         struct nfs_server *server = NFS_SERVER(inode);
         struct rpc_task *task;
- -      int status;
+ +      struct rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
+ +              .rpc_cred = cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = server->client,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_delegreturn_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
+ +      int status = 0;
   
         data = kmalloc(sizeof(*data), GFP_KERNEL);
         if (data == NULL)
@@@ -3018,37 -3028,30 +3005,37 @@@
         memcpy(&data->stateid, stateid, sizeof(data->stateid));
         data->res.fattr = &data->fattr;
         data->res.server = server;
- -      data->cred = get_rpccred(cred);
+ +      nfs_fattr_init(data->res.fattr);
         data->timestamp = jiffies;
         data->rpc_status = 0;
   
- -      task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
+ +      task_setup_data.callback_data = data;
+ +      msg.rpc_argp = &data->args,
+ +      msg.rpc_resp = &data->res,
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
+ +      if (!issync)
+ +              goto out;
         status = nfs4_wait_for_completion_rpc_task(task);
- -      if (status == 0) {
- -              status = data->rpc_status;
- -              if (status == 0)
- -                      nfs_refresh_inode(inode, &data->fattr);
- -      }
+ +      if (status != 0)
+ +              goto out;
+ +      status = data->rpc_status;
+ +      if (status != 0)
+ +              goto out;
+ +      nfs_refresh_inode(inode, &data->fattr);
+ +out:
         rpc_put_task(task);
         return status;
   }
   
- -int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+ +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
   {
         struct nfs_server *server = NFS_SERVER(inode);
         struct nfs4_exception exception = { };
         int err;
         do {
- -              err = _nfs4_proc_delegreturn(inode, cred, stateid);
+ +              err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
                 switch (err) {
                         case -NFS4ERR_STALE_STATEID:
                         case -NFS4ERR_EXPIRED:
@@@ -3069,7 -3072,7 +3056,7 @@@
   static unsigned long
   nfs4_set_lock_task_retry(unsigned long timeout)
   {
-       schedule_timeout_interruptible(timeout);
+       schedule_timeout_killable(timeout);
         timeout <<= 1;
         if (timeout > NFS4_LOCK_MAXTIMEOUT)
                 return NFS4_LOCK_MAXTIMEOUT;
@@@ -3216,6 -3219,12 +3203,6 @@@ static void nfs4_locku_done(struct rpc_
   static void nfs4_locku_prepare(struct rpc_task *task, void *data)
   {
         struct nfs4_unlockdata *calldata = data;
- -      struct rpc_message msg = {
- -              .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
- -              .rpc_argp       = &calldata->arg,
- -              .rpc_resp       = &calldata->res,
- -              .rpc_cred       = calldata->lsp->ls_state->owner->so_cred,
- -      };
   
         if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
                 return;
@@@ -3225,7 -3234,7 +3212,7 @@@
                 return;
         }
         calldata->timestamp = jiffies;
- -      rpc_call_setup(task, &msg, 0);
+ +      rpc_call_start(task);
   }
   
   static const struct rpc_call_ops nfs4_locku_ops = {
@@@ -3240,16 -3249,6 +3227,16 @@@ static struct rpc_task *nfs4_do_unlck(s
                 struct nfs_seqid *seqid)
   {
         struct nfs4_unlockdata *data;
+ +      struct rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
+ +              .rpc_cred = ctx->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(lsp->ls_state->inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_locku_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
   
         /* Ensure this is an unlock - when canceling a lock, the
          * canceled lock is passed in, and it won't be an unlock.
@@@ -3262,10 -3261,7 +3249,10 @@@
                 return ERR_PTR(-ENOMEM);
         }
   
- -      return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
+ +      msg.rpc_argp = &data->arg,
+ +      msg.rpc_resp = &data->res,
+ +      task_setup_data.callback_data = data;
+ +      return rpc_run_task(&task_setup_data);
   }
   
   static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@@ -3324,12 -3320,9 +3311,12 @@@ static struct nfs4_lockdata *nfs4_alloc
   
         p->arg.fh = NFS_FH(inode);
         p->arg.fl = &p->fl;
+ +      p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
+ +      if (p->arg.open_seqid == NULL)
+ +              goto out_free;
         p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
         if (p->arg.lock_seqid == NULL)
- -              goto out_free;
+ +              goto out_free_seqid;
         p->arg.lock_stateid = &lsp->ls_stateid;
         p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
         p->arg.lock_owner.id = lsp->ls_id.id;
@@@ -3338,8 -3331,6 +3325,8 @@@
         p->ctx = get_nfs_open_context(ctx);
         memcpy(&p->fl, fl, sizeof(p->fl));
         return p;
+ +out_free_seqid:
+ +      nfs_free_seqid(p->arg.open_seqid);
   out_free:
         kfree(p);
         return NULL;
@@@ -3349,20 -3340,31 +3336,20 @@@ static void nfs4_lock_prepare(struct rp
   {
         struct nfs4_lockdata *data = calldata;
         struct nfs4_state *state = data->lsp->ls_state;
- -      struct nfs4_state_owner *sp = state->owner;
- -      struct rpc_message msg = {
- -              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
- -              .rpc_argp = &data->arg,
- -              .rpc_resp = &data->res,
- -              .rpc_cred = sp->so_cred,
- -      };
   
+ +      dprintk("%s: begin!\n", __FUNCTION__);
         if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
                 return;
- -      dprintk("%s: begin!\n", __FUNCTION__);
         /* Do we need to do an open_to_lock_owner? */
         if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
- -              data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid);
- -              if (data->arg.open_seqid == NULL) {
- -                      data->rpc_status = -ENOMEM;
- -                      task->tk_action = NULL;
- -                      goto out;
- -              }
+ +              if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+ +                      return;
                 data->arg.open_stateid = &state->stateid;
                 data->arg.new_lock_owner = 1;
- -      }
+ +      } else
+ +              data->arg.new_lock_owner = 0;
         data->timestamp = jiffies;
- -      rpc_call_setup(task, &msg, 0);
- -out:
+ +      rpc_call_start(task);
         dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
   }
   
@@@ -3398,7 -3400,8 +3385,7 @@@ static void nfs4_lock_release(void *cal
         struct nfs4_lockdata *data = calldata;
   
         dprintk("%s: begin!\n", __FUNCTION__);
- -      if (data->arg.open_seqid != NULL)
- -              nfs_free_seqid(data->arg.open_seqid);
+ +      nfs_free_seqid(data->arg.open_seqid);
         if (data->cancelled != 0) {
                 struct rpc_task *task;
                 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
@@@ -3424,16 -3427,6 +3411,16 @@@ static int _nfs4_do_setlk(struct nfs4_s
   {
         struct nfs4_lockdata *data;
         struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+ +              .rpc_cred = state->owner->so_cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(state->inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs4_lock_ops,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
         int ret;
   
         dprintk("%s: begin!\n", __FUNCTION__);
@@@ -3445,10 -3438,8 +3432,10 @@@
                 data->arg.block = 1;
         if (reclaim != 0)
                 data->arg.reclaim = 1;
- -      task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
- -                      &nfs4_lock_ops, data);
+ +      msg.rpc_argp = &data->arg,
+ +      msg.rpc_resp = &data->res,
+ +      task_setup_data.callback_data = data;
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         ret = nfs4_wait_for_completion_rpc_task(task);
@@@ -3621,6 -3612,10 +3608,6 @@@ int nfs4_setxattr(struct dentry *dentry
         if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
                 return -EOPNOTSUPP;
   
- -      if (!S_ISREG(inode->i_mode) &&
- -          (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
- -              return -EPERM;
- -
         return nfs4_proc_set_acl(inode, buf, buflen);
   }
   
diff --combined fs/nfs/pagelist.c

index 3b3dbb94393de116c4bd8923aba8efa58bb3debb,2dff469f04fe396823e00d587ec7dc5f597b6e54..7f079209d70a91ff79f036dbec62ace856023913
--- 1/fs/nfs/pagelist.c
--- 2/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@@ -58,7 -58,7 +58,6 @@@ nfs_create_request(struct nfs_open_cont
                    struct page *page,
                    unsigned int offset, unsigned int count)
   {
--      struct nfs_server *server = NFS_SERVER(inode);
         struct nfs_page         *req;
   
         for (;;) {
@@@ -67,7 -67,7 +66,7 @@@
                 if (req != NULL)
                         break;
   
-               if (signalled() && (server->flags & NFS_MOUNT_INTR))
+               if (fatal_signal_pending(current))
                         return ERR_PTR(-ERESTARTSYS);
                 yield();
         }
@@@ -111,14 -111,13 +110,14 @@@ void nfs_unlock_request(struct nfs_pag
    * nfs_set_page_tag_locked - Tag a request as locked
    * @req:
    */
- -static int nfs_set_page_tag_locked(struct nfs_page *req)
+ +int nfs_set_page_tag_locked(struct nfs_page *req)
   {
         struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
   
- -      if (!nfs_lock_request(req))
+ +      if (!nfs_lock_request_dontget(req))
                 return 0;
- -      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ +      if (req->wb_page != NULL)
+ +              radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
         return 1;
   }
   
@@@ -133,10 -132,9 +132,10 @@@ void nfs_clear_page_tag_locked(struct n
         if (req->wb_page != NULL) {
                 spin_lock(&inode->i_lock);
                 radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ +              nfs_unlock_request(req);
                 spin_unlock(&inode->i_lock);
- -      }
- -      nfs_unlock_request(req);
+ +      } else
+ +              nfs_unlock_request(req);
   }
   
   /**
@@@ -177,11 -175,11 +176,11 @@@ void nfs_release_request(struct nfs_pag
         kref_put(&req->wb_kref, nfs_free_request);
   }
   
- static int nfs_wait_bit_interruptible(void *word)
+ static int nfs_wait_bit_killable(void *word)
   {
         int ret = 0;
   
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                 ret = -ERESTARTSYS;
         else
                 schedule();
@@@ -192,26 -190,18 +191,18 @@@
    * nfs_wait_on_request - Wait for a request to complete.
    * @req: request to wait upon.
    *
-  * Interruptible by signals only if mounted with intr flag.
+  * Interruptible by fatal signals only.
    * The user is responsible for holding a count on the request.
    */
   int
   nfs_wait_on_request(struct nfs_page *req)
   {
-       struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
-       sigset_t oldmask;
         int ret = 0;
   
         if (!test_bit(PG_BUSY, &req->wb_flags))
                 goto out;
-       /*
-        * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
-        *       are not interrupted if intr flag is not set
-        */
-       rpc_clnt_sigmask(clnt, &oldmask);
         ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
-                       nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
-       rpc_clnt_sigunmask(clnt, &oldmask);
+                       nfs_wait_bit_killable, TASK_KILLABLE);
   out:
         return ret;
   }
@@@ -423,7 -413,6 +414,7 @@@ int nfs_scan_list(struct nfs_inode *nfs
                                 goto out;
                         idx_start = req->wb_index + 1;
                         if (nfs_set_page_tag_locked(req)) {
+ +                              kref_get(&req->wb_kref);
                                 nfs_list_remove_request(req);
                                 radix_tree_tag_clear(&nfsi->nfs_page_tree,
                                                 req->wb_index, tag);
diff --combined fs/nfs/super.c

index 22c49c02897d3244c8f53ffd9bb9537e90064238,5b6339f70a4cfb65f9b6d17850392b20a93f2706..7f4505f6ac6f55fd49c1a8c07bbb65ce63d41388
--- 1/fs/nfs/super.c
--- 2/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@@ -45,8 -45,6 +45,8 @@@
   #include <linux/nfs_idmap.h>
   #include <linux/vfs.h>
   #include <linux/inet.h>
+ +#include <linux/in6.h>
+ +#include <net/ipv6.h>
   #include <linux/nfs_xdr.h>
   #include <linux/magic.h>
   #include <linux/parser.h>
@@@ -85,11 -83,11 +85,11 @@@ enum 
         Opt_actimeo,
         Opt_namelen,
         Opt_mountport,
- -      Opt_mountprog, Opt_mountvers,
- -      Opt_nfsprog, Opt_nfsvers,
+ +      Opt_mountvers,
+ +      Opt_nfsvers,
   
         /* Mount options that take string arguments */
- -      Opt_sec, Opt_proto, Opt_mountproto,
+ +      Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
         Opt_addr, Opt_mountaddr, Opt_clientaddr,
   
         /* Mount options that are ignored */
@@@ -139,7 -137,9 +139,7 @@@ static match_table_t nfs_mount_option_t
         { Opt_userspace, "retry=%u" },
         { Opt_namelen, "namlen=%u" },
         { Opt_mountport, "mountport=%u" },
- -      { Opt_mountprog, "mountprog=%u" },
         { Opt_mountvers, "mountvers=%u" },
- -      { Opt_nfsprog, "nfsprog=%u" },
         { Opt_nfsvers, "nfsvers=%u" },
         { Opt_nfsvers, "vers=%u" },
   
@@@ -148,7 -148,7 +148,7 @@@
         { Opt_mountproto, "mountproto=%s" },
         { Opt_addr, "addr=%s" },
         { Opt_clientaddr, "clientaddr=%s" },
- -      { Opt_userspace, "mounthost=%s" },
+ +      { Opt_mounthost, "mounthost=%s" },
         { Opt_mountaddr, "mountaddr=%s" },
   
         { Opt_err, NULL }
@@@ -202,7 -202,6 +202,7 @@@ static int nfs_get_sb(struct file_syste
   static int nfs_xdev_get_sb(struct file_system_type *fs_type,
                 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
   static void nfs_kill_super(struct super_block *);
+ +static void nfs_put_super(struct super_block *);
   
   static struct file_system_type nfs_fs_type = {
         .owner          = THIS_MODULE,
@@@ -224,7 -223,6 +224,7 @@@ static const struct super_operations nf
         .alloc_inode    = nfs_alloc_inode,
         .destroy_inode  = nfs_destroy_inode,
         .write_inode    = nfs_write_inode,
+ +      .put_super      = nfs_put_super,
         .statfs         = nfs_statfs,
         .clear_inode    = nfs_clear_inode,
         .umount_begin   = nfs_umount_begin,
@@@ -327,28 -325,6 +327,28 @@@ void __exit unregister_nfs_fs(void
         unregister_filesystem(&nfs_fs_type);
   }
   
+ +void nfs_sb_active(struct nfs_server *server)
+ +{
+ +      atomic_inc(&server->active);
+ +}
+ +
+ +void nfs_sb_deactive(struct nfs_server *server)
+ +{
+ +      if (atomic_dec_and_test(&server->active))
+ +              wake_up(&server->active_wq);
+ +}
+ +
+ +static void nfs_put_super(struct super_block *sb)
+ +{
+ +      struct nfs_server *server = NFS_SB(sb);
+ +      /*
+ +       * Make sure there are no outstanding ops to this server.
+ +       * If so, wait for them to finish before allowing the
+ +       * unmount to continue.
+ +       */
+ +      wait_event(server->active_wq, atomic_read(&server->active) == 0);
+ +}
+ +
   /*
    * Deliver file system statistics to userspace
    */
@@@ -448,7 -424,6 +448,6 @@@ static void nfs_show_mount_options(stru
                 const char *nostr;
         } nfs_info[] = {
                 { NFS_MOUNT_SOFT, ",soft", ",hard" },
-               { NFS_MOUNT_INTR, ",intr", ",nointr" },
                 { NFS_MOUNT_NOCTO, ",nocto", "" },
                 { NFS_MOUNT_NOAC, ",noac", "" },
                 { NFS_MOUNT_NONLM, ",nolock", "" },
@@@ -479,8 -454,8 +478,8 @@@
         }
         seq_printf(m, ",proto=%s",
                    rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
- -      seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
- -      seq_printf(m, ",retrans=%u", clp->retrans_count);
+ +      seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
+ +      seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
         seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
   }
   
@@@ -493,9 -468,8 +492,9 @@@ static int nfs_show_options(struct seq_
   
         nfs_show_mount_options(m, nfss, 0);
   
- -      seq_printf(m, ",addr="NIPQUAD_FMT,
- -              NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
+ +      seq_printf(m, ",addr=%s",
+ +                      rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
+ +                                                      RPC_DISPLAY_ADDR));
   
         return 0;
   }
@@@ -532,7 -506,7 +531,7 @@@ static int nfs_show_stats(struct seq_fi
         seq_printf(m, ",namelen=%d", nfss->namelen);
   
   #ifdef CONFIG_NFS_V4
- -      if (nfss->nfs_client->cl_nfsversion == 4) {
+ +      if (nfss->nfs_client->rpc_ops->version == 4) {
                 seq_printf(m, "\n\tnfsv4:\t");
                 seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
                 seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
@@@ -600,80 -574,22 +599,80 @@@ static void nfs_umount_begin(struct vfs
   }
   
   /*
- - * Sanity-check a server address provided by the mount command
+ + * Set the port number in an address.  Be agnostic about the address family.
+ + */
+ +static void nfs_set_port(struct sockaddr *sap, unsigned short port)
+ +{
+ +      switch (sap->sa_family) {
+ +      case AF_INET: {
+ +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ +              ap->sin_port = htons(port);
+ +              break;
+ +      }
+ +      case AF_INET6: {
+ +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ +              ap->sin6_port = htons(port);
+ +              break;
+ +      }
+ +      }
+ +}
+ +
+ +/*
+ + * Sanity-check a server address provided by the mount command.
+ + *
+ + * Address family must be initialized, and address must not be
+ + * the ANY address for that family.
    */
   static int nfs_verify_server_address(struct sockaddr *addr)
   {
         switch (addr->sa_family) {
         case AF_INET: {
- -              struct sockaddr_in *sa = (struct sockaddr_in *) addr;
- -              if (sa->sin_addr.s_addr != INADDR_ANY)
- -                      return 1;
- -              break;
+ +              struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+ +              return sa->sin_addr.s_addr != INADDR_ANY;
+ +      }
+ +      case AF_INET6: {
+ +              struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ +              return !ipv6_addr_any(sa);
         }
         }
   
         return 0;
   }
   
+ +/*
+ + * Parse string addresses passed in via a mount option,
+ + * and construct a sockaddr based on the result.
+ + *
+ + * If address parsing fails, set the sockaddr's address
+ + * family to AF_UNSPEC to force nfs_verify_server_address()
+ + * to punt the mount.
+ + */
+ +static void nfs_parse_server_address(char *value,
+ +                                   struct sockaddr *sap,
+ +                                   size_t *len)
+ +{
+ +      if (strchr(value, ':')) {
+ +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ +              u8 *addr = (u8 *)&ap->sin6_addr.in6_u;
+ +
+ +              ap->sin6_family = AF_INET6;
+ +              *len = sizeof(*ap);
+ +              if (in6_pton(value, -1, addr, '\0', NULL))
+ +                      return;
+ +      } else {
+ +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ +              u8 *addr = (u8 *)&ap->sin_addr.s_addr;
+ +
+ +              ap->sin_family = AF_INET;
+ +              *len = sizeof(*ap);
+ +              if (in4_pton(value, -1, addr, '\0', NULL))
+ +                      return;
+ +      }
+ +
+ +      sap->sa_family = AF_UNSPEC;
+ +      *len = 0;
+ +}
+ +
   /*
    * Error-check and convert a string of mount options from user space into
    * a data structure
@@@ -682,7 -598,6 +681,7 @@@ static int nfs_parse_mount_options(cha
                                    struct nfs_parsed_mount_data *mnt)
   {
         char *p, *string;
+ +      unsigned short port = 0;
   
         if (!raw) {
                 dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@@ -708,10 -623,7 +707,7 @@@
                         mnt->flags &= ~NFS_MOUNT_SOFT;
                         break;
                 case Opt_intr:
-                       mnt->flags |= NFS_MOUNT_INTR;
-                       break;
                 case Opt_nointr:
-                       mnt->flags &= ~NFS_MOUNT_INTR;
                         break;
                 case Opt_posix:
                         mnt->flags |= NFS_MOUNT_POSIX;
@@@ -785,7 -697,7 +781,7 @@@
                                 return 0;
                         if (option < 0 || option > 65535)
                                 return 0;
- -                      mnt->nfs_server.address.sin_port = htons(option);
+ +                      port = option;
                         break;
                 case Opt_rsize:
                         if (match_int(args, &mnt->rsize))
@@@ -847,6 -759,13 +843,6 @@@
                                 return 0;
                         mnt->mount_server.port = option;
                         break;
- -              case Opt_mountprog:
- -                      if (match_int(args, &option))
- -                              return 0;
- -                      if (option < 0)
- -                              return 0;
- -                      mnt->mount_server.program = option;
- -                      break;
                 case Opt_mountvers:
                         if (match_int(args, &option))
                                 return 0;
@@@ -854,6 -773,13 +850,6 @@@
                                 return 0;
                         mnt->mount_server.version = option;
                         break;
- -              case Opt_nfsprog:
- -                      if (match_int(args, &option))
- -                              return 0;
- -                      if (option < 0)
- -                              return 0;
- -                      mnt->nfs_server.program = option;
- -                      break;
                 case Opt_nfsvers:
                         if (match_int(args, &option))
                                 return 0;
@@@ -997,32 -923,24 +993,32 @@@
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
- -                      mnt->nfs_server.address.sin_family = AF_INET;
- -                      mnt->nfs_server.address.sin_addr.s_addr =
- -                                                      in_aton(string);
+ +                      nfs_parse_server_address(string, (struct sockaddr *)
+ +                                               &mnt->nfs_server.address,
+ +                                               &mnt->nfs_server.addrlen);
                         kfree(string);
                         break;
                 case Opt_clientaddr:
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
+ +                      kfree(mnt->client_address);
                         mnt->client_address = string;
                         break;
+ +              case Opt_mounthost:
+ +                      string = match_strdup(args);
+ +                      if (string == NULL)
+ +                              goto out_nomem;
+ +                      kfree(mnt->mount_server.hostname);
+ +                      mnt->mount_server.hostname = string;
+ +                      break;
                 case Opt_mountaddr:
                         string = match_strdup(args);
                         if (string == NULL)
                                 goto out_nomem;
- -                      mnt->mount_server.address.sin_family = AF_INET;
- -                      mnt->mount_server.address.sin_addr.s_addr =
- -                                                      in_aton(string);
+ +                      nfs_parse_server_address(string, (struct sockaddr *)
+ +                                               &mnt->mount_server.address,
+ +                                               &mnt->mount_server.addrlen);
                         kfree(string);
                         break;
   
@@@ -1035,8 -953,6 +1031,8 @@@
                 }
         }
   
+ +      nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
+ +
         return 1;
   
   out_nomem:
@@@ -1067,8 -983,7 +1063,8 @@@ out_unknown
   static int nfs_try_mount(struct nfs_parsed_mount_data *args,
                          struct nfs_fh *root_fh)
   {
- -      struct sockaddr_in sin;
+ +      struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address;
+ +      char *hostname;
         int status;
   
         if (args->mount_server.version == 0) {
@@@ -1078,32 -993,25 +1074,32 @@@
                         args->mount_server.version = NFS_MNT_VERSION;
         }
   
+ +      if (args->mount_server.hostname)
+ +              hostname = args->mount_server.hostname;
+ +      else
+ +              hostname = args->nfs_server.hostname;
+ +
         /*
          * Construct the mount server's address.
          */
- -      if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
- -              sin = args->mount_server.address;
- -      else
- -              sin = args->nfs_server.address;
+ +      if (args->mount_server.address.ss_family == AF_UNSPEC) {
+ +              memcpy(sap, &args->nfs_server.address,
+ +                     args->nfs_server.addrlen);
+ +              args->mount_server.addrlen = args->nfs_server.addrlen;
+ +      }
+ +
         /*
          * autobind will be used if mount_server.port == 0
          */
- -      sin.sin_port = htons(args->mount_server.port);
+ +      nfs_set_port(sap, args->mount_server.port);
   
         /*
          * Now ask the mount server to map our export path
          * to a file handle.
          */
- -      status = nfs_mount((struct sockaddr *) &sin,
- -                         sizeof(sin),
- -                         args->nfs_server.hostname,
+ +      status = nfs_mount(sap,
+ +                         args->mount_server.addrlen,
+ +                         hostname,
                            args->nfs_server.export_path,
                            args->mount_server.version,
                            args->mount_server.protocol,
@@@ -1111,8 -1019,8 +1107,8 @@@
         if (status == 0)
                 return 0;
   
- -      dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
- -                      ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
+ +      dfprintk(MOUNT, "NFS: unable to mount server %s, error %d",
+ +                      hostname, status);
         return status;
   }
   
@@@ -1131,6 -1039,9 +1127,6 @@@
    *
    * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
    *   mountproto=tcp after mountproto=udp, and so on
- - *
- - * XXX: as far as I can tell, changing the NFS program number is not
- - *      supported in the NFS client.
    */
   static int nfs_validate_mount_data(void *options,
                                    struct nfs_parsed_mount_data *args,
@@@ -1154,7 -1065,9 +1150,7 @@@
         args->acdirmin          = 30;
         args->acdirmax          = 60;
         args->mount_server.protocol = XPRT_TRANSPORT_UDP;
- -      args->mount_server.program = NFS_MNT_PROGRAM;
         args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- -      args->nfs_server.program = NFS_PROGRAM;
   
         switch (data->version) {
         case 1:
@@@ -1185,6 -1098,9 +1181,6 @@@
                         memset(mntfh->data + mntfh->size, 0,
                                sizeof(mntfh->data) - mntfh->size);
   
- -              if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
- -                      goto out_no_address;
- -
                 /*
                  * Translate to nfs_parsed_mount_data, which nfs_fill_super
                  * can deal with.
@@@ -1199,14 -1115,7 +1195,14 @@@
                 args->acregmax          = data->acregmax;
                 args->acdirmin          = data->acdirmin;
                 args->acdirmax          = data->acdirmax;
- -              args->nfs_server.address = data->addr;
+ +
+ +              memcpy(&args->nfs_server.address, &data->addr,
+ +                     sizeof(data->addr));
+ +              args->nfs_server.addrlen = sizeof(data->addr);
+ +              if (!nfs_verify_server_address((struct sockaddr *)
+ +                                              &args->nfs_server.address))
+ +                      goto out_no_address;
+ +
                 if (!(data->flags & NFS_MOUNT_TCP))
                         args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
                 /* N.B. caller will free nfs_server.hostname in all cases */
@@@ -1409,50 -1318,15 +1405,50 @@@ static int nfs_set_super(struct super_b
         return ret;
   }
   
+ +static int nfs_compare_super_address(struct nfs_server *server1,
+ +                                   struct nfs_server *server2)
+ +{
+ +      struct sockaddr *sap1, *sap2;
+ +
+ +      sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr;
+ +      sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr;
+ +
+ +      if (sap1->sa_family != sap2->sa_family)
+ +              return 0;
+ +
+ +      switch (sap1->sa_family) {
+ +      case AF_INET: {
+ +              struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1;
+ +              struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2;
+ +              if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
+ +                      return 0;
+ +              if (sin1->sin_port != sin2->sin_port)
+ +                      return 0;
+ +              break;
+ +      }
+ +      case AF_INET6: {
+ +              struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1;
+ +              struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2;
+ +              if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
+ +                      return 0;
+ +              if (sin1->sin6_port != sin2->sin6_port)
+ +                      return 0;
+ +              break;
+ +      }
+ +      default:
+ +              return 0;
+ +      }
+ +
+ +      return 1;
+ +}
+ +
   static int nfs_compare_super(struct super_block *sb, void *data)
   {
         struct nfs_sb_mountdata *sb_mntdata = data;
         struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
         int mntflags = sb_mntdata->mntflags;
   
- -      if (memcmp(&old->nfs_client->cl_addr,
- -                              &server->nfs_client->cl_addr,
- -                              sizeof(old->nfs_client->cl_addr)) != 0)
+ +      if (!nfs_compare_super_address(old, server))
                 return 0;
         /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
         if (old->flags & NFS_MOUNT_UNSHARED)
@@@ -1522,7 -1396,6 +1518,7 @@@ static int nfs_get_sb(struct file_syste
   
   out:
         kfree(data.nfs_server.hostname);
+ +      kfree(data.mount_server.hostname);
         return error;
   
   out_err_nosb:
@@@ -1598,7 -1471,7 +1594,7 @@@ static int nfs_xdev_get_sb(struct file_
                 error = PTR_ERR(mntroot);
                 goto error_splat_super;
         }
- -      if (mntroot->d_inode->i_op != &nfs_dir_inode_operations) {
+ +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
                 dput(mntroot);
                 error = -ESTALE;
                 goto error_splat_super;
@@@ -1650,28 -1523,6 +1646,28 @@@ static void nfs4_fill_super(struct supe
         nfs_initialise_sb(sb);
   }
   
+ +/*
+ + * If the user didn't specify a port, set the port number to
+ + * the NFS version 4 default port.
+ + */
+ +static void nfs4_default_port(struct sockaddr *sap)
+ +{
+ +      switch (sap->sa_family) {
+ +      case AF_INET: {
+ +              struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ +              if (ap->sin_port == 0)
+ +                      ap->sin_port = htons(NFS_PORT);
+ +              break;
+ +      }
+ +      case AF_INET6: {
+ +              struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ +              if (ap->sin6_port == 0)
+ +                      ap->sin6_port = htons(NFS_PORT);
+ +              break;
+ +      }
+ +      }
+ +}
+ +
   /*
    * Validate NFSv4 mount options
    */
@@@ -1679,7 -1530,6 +1675,7 @@@ static int nfs4_validate_mount_data(voi
                                     struct nfs_parsed_mount_data *args,
                                     const char *dev_name)
   {
+ +      struct sockaddr_in *ap;
         struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
         char *c;
   
@@@ -1700,21 -1550,18 +1696,21 @@@
   
         switch (data->version) {
         case 1:
- -              if (data->host_addrlen != sizeof(args->nfs_server.address))
+ +              ap = (struct sockaddr_in *)&args->nfs_server.address;
+ +              if (data->host_addrlen > sizeof(args->nfs_server.address))
+ +                      goto out_no_address;
+ +              if (data->host_addrlen == 0)
                         goto out_no_address;
- -              if (copy_from_user(&args->nfs_server.address,
- -                                 data->host_addr,
- -                                 sizeof(args->nfs_server.address)))
+ +              args->nfs_server.addrlen = data->host_addrlen;
+ +              if (copy_from_user(ap, data->host_addr, data->host_addrlen))
                         return -EFAULT;
- -              if (args->nfs_server.address.sin_port == 0)
- -                      args->nfs_server.address.sin_port = htons(NFS_PORT);
                 if (!nfs_verify_server_address((struct sockaddr *)
                                                 &args->nfs_server.address))
                         goto out_no_address;
   
+ +              nfs4_default_port((struct sockaddr *)
+ +                                &args->nfs_server.address);
+ +
                 switch (data->auth_flavourlen) {
                 case 0:
                         args->auth_flavors[0] = RPC_AUTH_UNIX;
@@@ -1772,9 -1619,6 +1768,9 @@@
                                                 &args->nfs_server.address))
                         return -EINVAL;
   
+ +              nfs4_default_port((struct sockaddr *)
+ +                                &args->nfs_server.address);
+ +
                 switch (args->auth_flavor_len) {
                 case 0:
                         args->auth_flavors[0] = RPC_AUTH_UNIX;
@@@ -1795,16 -1639,21 +1791,16 @@@
                 len = c - dev_name;
                 if (len > NFS4_MAXNAMLEN)
                         return -ENAMETOOLONG;
- -              args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
- -              if (args->nfs_server.hostname == NULL)
- -                      return -ENOMEM;
- -              strncpy(args->nfs_server.hostname, dev_name, len - 1);
+ +              /* N.B. caller will free nfs_server.hostname in all cases */
+ +              args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
   
                 c++;                    /* step over the ':' */
                 len = strlen(c);
                 if (len > NFS4_MAXPATHLEN)
                         return -ENAMETOOLONG;
- -              args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
- -              if (args->nfs_server.export_path == NULL)
- -                      return -ENOMEM;
- -              strncpy(args->nfs_server.export_path, c, len);
+ +              args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
   
- -              dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
+ +              dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
   
                 if (args->client_address == NULL)
                         goto out_no_client_address;
@@@ -1973,11 -1822,6 +1969,11 @@@ static int nfs4_xdev_get_sb(struct file
                 error = PTR_ERR(mntroot);
                 goto error_splat_super;
         }
+ +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+ +              dput(mntroot);
+ +              error = -ESTALE;
+ +              goto error_splat_super;
+ +      }
   
         s->s_flags |= MS_ACTIVE;
         mnt->mnt_sb = s;
@@@ -2052,11 -1896,6 +2048,11 @@@ static int nfs4_referral_get_sb(struct 
                 error = PTR_ERR(mntroot);
                 goto error_splat_super;
         }
+ +      if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+ +              dput(mntroot);
+ +              error = -ESTALE;
+ +              goto error_splat_super;
+ +      }
   
         s->s_flags |= MS_ACTIVE;
         mnt->mnt_sb = s;
diff --combined fs/nfs/write.c

index 5ac5b27b639a85decb94b5d49c72aeb46361a9ff,60e3e870ada46f2fb7282fe78df8d0f593de83a9..522efff3e2c51b67befab23c1b4d6c2673d54aeb
--- 1/fs/nfs/write.c
--- 2/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@@ -196,7 -196,7 +196,7 @@@ static int nfs_writepage_setup(struct n
         }
         /* Update file length */
         nfs_grow_file(page, offset, count);
- -      nfs_unlock_request(req);
+ +      nfs_clear_page_tag_locked(req);
         return 0;
   }
   
@@@ -252,6 -252,7 +252,6 @@@ static int nfs_page_async_flush(struct 
                                 struct page *page)
   {
         struct inode *inode = page->mapping->host;
- -      struct nfs_inode *nfsi = NFS_I(inode);
         struct nfs_page *req;
         int ret;
   
@@@ -262,10 -263,10 +262,10 @@@
                         spin_unlock(&inode->i_lock);
                         return 0;
                 }
- -              if (nfs_lock_request_dontget(req))
+ +              if (nfs_set_page_tag_locked(req))
                         break;
                 /* Note: If we hold the page lock, as is the case in nfs_writepage,
- -               *       then the call to nfs_lock_request_dontget() will always
+ +               *       then the call to nfs_set_page_tag_locked() will always
                  *       succeed provided that someone hasn't already marked the
                  *       request as dirty (in which case we don't care).
                  */
@@@ -279,7 -280,7 +279,7 @@@
         if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
                 /* This request is marked for commit */
                 spin_unlock(&inode->i_lock);
- -              nfs_unlock_request(req);
+ +              nfs_clear_page_tag_locked(req);
                 nfs_pageio_complete(pgio);
                 return 0;
         }
@@@ -287,6 -288,8 +287,6 @@@
                 spin_unlock(&inode->i_lock);
                 BUG();
         }
- -      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
- -                      NFS_PAGE_TAG_LOCKED);
         spin_unlock(&inode->i_lock);
         nfs_pageio_add_request(pgio, req);
         return 0;
@@@ -378,7 -381,6 +378,7 @@@ static int nfs_inode_add_request(struc
         set_page_private(req->wb_page, (unsigned long)req);
         nfsi->npages++;
         kref_get(&req->wb_kref);
+ +      radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
         return 0;
   }
   
@@@ -488,7 -490,7 +488,7 @@@ int nfs_reschedule_unstable_write(struc
   /*
    * Wait for a request to complete.
    *
-  * Interruptible by signals only if mounted with intr flag.
+  * Interruptible by fatal signals only.
    */
   static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
   {
@@@ -594,7 -596,7 +594,7 @@@ static struct nfs_page * nfs_update_req
                 spin_lock(&inode->i_lock);
                 req = nfs_page_find_request_locked(page);
                 if (req) {
- -                      if (!nfs_lock_request_dontget(req)) {
+ +                      if (!nfs_set_page_tag_locked(req)) {
                                 int error;
   
                                 spin_unlock(&inode->i_lock);
@@@ -644,7 -646,7 +644,7 @@@
             || req->wb_page != page
             || !nfs_dirty_request(req)
             || offset > rqend || end < req->wb_offset) {
- -              nfs_unlock_request(req);
+ +              nfs_clear_page_tag_locked(req);
                 return ERR_PTR(-EBUSY);
         }
   
@@@ -753,7 -755,7 +753,7 @@@ static void nfs_writepage_release(struc
         nfs_clear_page_tag_locked(req);
   }
   
- -static inline int flush_task_priority(int how)
+ +static int flush_task_priority(int how)
   {
         switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
                 case FLUSH_HIGHPRI:
@@@ -773,31 -775,15 +773,31 @@@ static void nfs_write_rpcsetup(struct n
                 unsigned int count, unsigned int offset,
                 int how)
   {
- -      struct inode            *inode;
- -      int flags;
+ +      struct inode *inode = req->wb_context->path.dentry->d_inode;
+ +      int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ +      int priority = flush_task_priority(how);
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_argp = &data->args,
+ +              .rpc_resp = &data->res,
+ +              .rpc_cred = req->wb_context->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = NFS_CLIENT(inode),
+ +              .task = &data->task,
+ +              .rpc_message = &msg,
+ +              .callback_ops = call_ops,
+ +              .callback_data = data,
+ +              .flags = flags,
+ +              .priority = priority,
+ +      };
   
         /* Set up the RPC argument and reply structs
          * NB: take care not to mess about with data->commit et al. */
   
         data->req = req;
         data->inode = inode = req->wb_context->path.dentry->d_inode;
- -      data->cred = req->wb_context->cred;
+ +      data->cred = msg.rpc_cred;
   
         data->args.fh     = NFS_FH(inode);
         data->args.offset = req_offset(req) + offset;
@@@ -805,12 -791,6 +805,12 @@@
         data->args.pages  = data->pagevec;
         data->args.count  = count;
         data->args.context = req->wb_context;
+ +      data->args.stable  = NFS_UNSTABLE;
+ +      if (how & FLUSH_STABLE) {
+ +              data->args.stable = NFS_DATA_SYNC;
+ +              if (!NFS_I(inode)->ncommit)
+ +                      data->args.stable = NFS_FILE_SYNC;
+ +      }
   
         data->res.fattr   = &data->fattr;
         data->res.count   = count;
@@@ -818,7 -798,12 +818,7 @@@
         nfs_fattr_init(&data->fattr);
   
         /* Set up the initial task struct.  */
- -      flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
- -      rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
- -      NFS_PROTO(inode)->write_setup(data, how);
- -
- -      data->task.tk_priority = flush_task_priority(how);
- -      data->task.tk_cookie = (unsigned long)inode;
+ +      NFS_PROTO(inode)->write_setup(data, &msg);
   
         dprintk("NFS: %5u initiated write call "
                 "(req %s/%Ld, %u bytes @ offset %Lu)\n",
@@@ -827,10 -812,11 +827,10 @@@
                 (long long)NFS_FILEID(inode),
                 count,
                 (unsigned long long)data->args.offset);
- -}
   
- -static void nfs_execute_write(struct nfs_write_data *data)
- -{
- -      rpc_execute(&data->task);
+ +      task = rpc_run_task(&task_setup_data);
+ +      if (!IS_ERR(task))
+ +              rpc_put_task(task);
   }
   
   /*
@@@ -877,6 -863,7 +877,6 @@@ static int nfs_flush_multi(struct inod
                                    wsize, offset, how);
                 offset += wsize;
                 nbytes -= wsize;
- -              nfs_execute_write(data);
         } while (nbytes != 0);
   
         return 0;
@@@ -924,6 -911,7 +924,6 @@@ static int nfs_flush_one(struct inode *
         /* Set up the argument struct */
         nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
   
- -      nfs_execute_write(data);
         return 0;
    out_bad:
         while (!list_empty(head)) {
@@@ -939,7 -927,7 +939,7 @@@
   static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
                                   struct inode *inode, int ioflags)
   {
- -      int wsize = NFS_SERVER(inode)->wsize;
+ +      size_t wsize = NFS_SERVER(inode)->wsize;
   
         if (wsize < PAGE_CACHE_SIZE)
                 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
@@@ -1153,33 -1141,19 +1153,33 @@@ static void nfs_commit_rpcsetup(struct 
                 struct nfs_write_data *data,
                 int how)
   {
- -      struct nfs_page         *first;
- -      struct inode            *inode;
- -      int flags;
+ +      struct nfs_page *first = nfs_list_entry(head->next);
+ +      struct inode *inode = first->wb_context->path.dentry->d_inode;
+ +      int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ +      int priority = flush_task_priority(how);
+ +      struct rpc_task *task;
+ +      struct rpc_message msg = {
+ +              .rpc_argp = &data->args,
+ +              .rpc_resp = &data->res,
+ +              .rpc_cred = first->wb_context->cred,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .task = &data->task,
+ +              .rpc_client = NFS_CLIENT(inode),
+ +              .rpc_message = &msg,
+ +              .callback_ops = &nfs_commit_ops,
+ +              .callback_data = data,
+ +              .flags = flags,
+ +              .priority = priority,
+ +      };
   
         /* Set up the RPC argument and reply structs
          * NB: take care not to mess about with data->commit et al. */
   
         list_splice_init(head, &data->pages);
- -      first = nfs_list_entry(data->pages.next);
- -      inode = first->wb_context->path.dentry->d_inode;
   
         data->inode       = inode;
- -      data->cred        = first->wb_context->cred;
+ +      data->cred        = msg.rpc_cred;
   
         data->args.fh     = NFS_FH(data->inode);
         /* Note: we always request a commit of the entire inode */
@@@ -1191,13 -1165,14 +1191,13 @@@
         nfs_fattr_init(&data->fattr);
   
         /* Set up the initial task struct.  */
- -      flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
- -      rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
- -      NFS_PROTO(inode)->commit_setup(data, how);
+ +      NFS_PROTO(inode)->commit_setup(data, &msg);
   
- -      data->task.tk_priority = flush_task_priority(how);
- -      data->task.tk_cookie = (unsigned long)inode;
- -      
         dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+ +
+ +      task = rpc_run_task(&task_setup_data);
+ +      if (!IS_ERR(task))
+ +              rpc_put_task(task);
   }
   
   /*
@@@ -1217,6 -1192,7 +1217,6 @@@ nfs_commit_list(struct inode *inode, st
         /* Set up the argument struct */
         nfs_commit_rpcsetup(head, data, how);
   
- -      nfs_execute_write(data);
         return 0;
    out_bad:
         while (!list_empty(head)) {
diff --combined fs/proc/array.c

index eb97f2897e2b50724046446be149951c1685c650,5be663e5dad1bac105c4f59b55695da3ec87ce05..b380313092bd5e42173af45654e99b5a41aee908
--- 1/fs/proc/array.c
--- 2/fs/proc/array.c
+++ b/fs/proc/array.c
@@@ -141,12 -141,7 +141,7 @@@ static const char *task_state_array[] 
   
   static inline const char *get_task_state(struct task_struct *tsk)
   {
-       unsigned int state = (tsk->state & (TASK_RUNNING |
-                                           TASK_INTERRUPTIBLE |
-                                           TASK_UNINTERRUPTIBLE |
-                                           TASK_STOPPED |
-                                           TASK_TRACED)) |
-                                          tsk->exit_state;
+       unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
         const char **p = &task_state_array[0];
   
         while (state) {
@@@ -169,7 -164,7 +164,7 @@@ static inline char *task_state(struct t
         ppid = pid_alive(p) ?
                 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
         tpid = pid_alive(p) && p->ptrace ?
- -              task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+ +              task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
         buffer += sprintf(buffer,
                 "State:\t%s\n"
                 "Tgid:\t%d\n"
@@@ -464,8 -459,8 +459,8 @@@ static int do_task_stat(struct task_str
                 }
   
                 sid = task_session_nr_ns(task, ns);
+ +              ppid = task_tgid_nr_ns(task->real_parent, ns);
                 pgid = task_pgrp_nr_ns(task, ns);
- -              ppid = task_ppid_nr_ns(task, ns);
   
                 unlock_task_sighand(task, &flags);
         }
diff --combined fs/proc/base.c

index 91fa8e6ce8ad6591cb496ca54731ab5531cef521,e88ee1a0323ac35aa51717e8c2d10e49c84d03d7..9fa9708cc7153c12ad10d62b760df88ba50e9681
--- 1/fs/proc/base.c
--- 2/fs/proc/base.c
+++ b/fs/proc/base.c
@@@ -199,29 -199,9 +199,29 @@@ static int proc_root_link(struct inode 
         (task == current || \
         (task->parent == current && \
         (task->ptrace & PT_PTRACED) && \
-        (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
+        (task_is_stopped_or_traced(task)) && \
          security_ptrace(current,task) == 0))
   
+ +struct mm_struct *mm_for_maps(struct task_struct *task)
+ +{
+ +      struct mm_struct *mm = get_task_mm(task);
+ +      if (!mm)
+ +              return NULL;
+ +      down_read(&mm->mmap_sem);
+ +      task_lock(task);
+ +      if (task->mm != mm)
+ +              goto out;
+ +      if (task->mm != current->mm && __ptrace_may_attach(task) < 0)
+ +              goto out;
+ +      task_unlock(task);
+ +      return mm;
+ +out:
+ +      task_unlock(task);
+ +      up_read(&mm->mmap_sem);
+ +      mmput(mm);
+ +      return NULL;
+ +}
+ +
   static int proc_pid_cmdline(struct task_struct *task, char * buffer)
   {
         int res = 0;
@@@ -310,77 -290,6 +310,77 @@@ static int proc_pid_schedstat(struct ta
   }
   #endif
   
+ +#ifdef CONFIG_LATENCYTOP
+ +static int lstats_show_proc(struct seq_file *m, void *v)
+ +{
+ +      int i;
+ +      struct task_struct *task = m->private;
+ +      seq_puts(m, "Latency Top version : v0.1\n");
+ +
+ +      for (i = 0; i < 32; i++) {
+ +              if (task->latency_record[i].backtrace[0]) {
+ +                      int q;
+ +                      seq_printf(m, "%i %li %li ",
+ +                              task->latency_record[i].count,
+ +                              task->latency_record[i].time,
+ +                              task->latency_record[i].max);
+ +                      for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+ +                              char sym[KSYM_NAME_LEN];
+ +                              char *c;
+ +                              if (!task->latency_record[i].backtrace[q])
+ +                                      break;
+ +                              if (task->latency_record[i].backtrace[q] == ULONG_MAX)
+ +                                      break;
+ +                              sprint_symbol(sym, task->latency_record[i].backtrace[q]);
+ +                              c = strchr(sym, '+');
+ +                              if (c)
+ +                                      *c = 0;
+ +                              seq_printf(m, "%s ", sym);
+ +                      }
+ +                      seq_printf(m, "\n");
+ +              }
+ +
+ +      }
+ +      return 0;
+ +}
+ +
+ +static int lstats_open(struct inode *inode, struct file *file)
+ +{
+ +      int ret;
+ +      struct seq_file *m;
+ +      struct task_struct *task = get_proc_task(inode);
+ +
+ +      ret = single_open(file, lstats_show_proc, NULL);
+ +      if (!ret) {
+ +              m = file->private_data;
+ +              m->private = task;
+ +      }
+ +      return ret;
+ +}
+ +
+ +static ssize_t lstats_write(struct file *file, const char __user *buf,
+ +                          size_t count, loff_t *offs)
+ +{
+ +      struct seq_file *m;
+ +      struct task_struct *task;
+ +
+ +      m = file->private_data;
+ +      task = m->private;
+ +      clear_all_latency_tracing(task);
+ +
+ +      return count;
+ +}
+ +
+ +static const struct file_operations proc_lstats_operations = {
+ +      .open           = lstats_open,
+ +      .read           = seq_read,
+ +      .write          = lstats_write,
+ +      .llseek         = seq_lseek,
+ +      .release        = single_release,
+ +};
+ +
+ +#endif
+ +
   /* The badness from the OOM killer */
   unsigned long badness(struct task_struct *p, unsigned long uptime);
   static int proc_oom_score(struct task_struct *task, char *buffer)
@@@ -1091,7 -1000,6 +1091,7 @@@ static const struct file_operations pro
   };
   #endif
   
+ +
   #ifdef CONFIG_SCHED_DEBUG
   /*
    * Print out various scheduling related per-task fields:
@@@ -2302,9 -2210,6 +2302,9 @@@ static const struct pid_entry tgid_base
   #ifdef CONFIG_SCHEDSTATS
         INF("schedstat",  S_IRUGO, pid_schedstat),
   #endif
+ +#ifdef CONFIG_LATENCYTOP
+ +      REG("latency",  S_IRUGO, lstats),
+ +#endif
   #ifdef CONFIG_PROC_PID_CPUSET
         REG("cpuset",     S_IRUGO, cpuset),
   #endif
@@@ -2630,9 -2535,6 +2630,9 @@@ static const struct pid_entry tid_base_
   #ifdef CONFIG_SCHEDSTATS
         INF("schedstat", S_IRUGO, pid_schedstat),
   #endif
+ +#ifdef CONFIG_LATENCYTOP
+ +      REG("latency",  S_IRUGO, lstats),
+ +#endif
   #ifdef CONFIG_PROC_PID_CPUSET
         REG("cpuset",    S_IRUGO, cpuset),
   #endif
diff --combined include/linux/nfs_fs.h

index 099ddb4481c07d9d64b4708db3ce519de622ed84,2814bd40edf6e20a8c8248c998e92680439555f7..a69ba80f2dfe1398b9de602e0b793fedc69e4db9
--- 1/include/linux/nfs_fs.h
--- 2/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@@ -196,67 -196,28 +196,67 @@@ struct nfs_inode 
   #define NFS_INO_STALE         (2)             /* possible stale inode */
   #define NFS_INO_ACL_LRU_SET   (3)             /* Inode is on the LRU list */
   
- -static inline struct nfs_inode *NFS_I(struct inode *inode)
+ +static inline struct nfs_inode *NFS_I(const struct inode *inode)
   {
         return container_of(inode, struct nfs_inode, vfs_inode);
   }
- -#define NFS_SB(s)             ((struct nfs_server *)(s->s_fs_info))
   
- -#define NFS_FH(inode)                 (&NFS_I(inode)->fh)
- -#define NFS_SERVER(inode)             (NFS_SB(inode->i_sb))
- -#define NFS_CLIENT(inode)             (NFS_SERVER(inode)->client)
- -#define NFS_PROTO(inode)              (NFS_SERVER(inode)->nfs_client->rpc_ops)
- -#define NFS_COOKIEVERF(inode)         (NFS_I(inode)->cookieverf)
- -#define NFS_MINATTRTIMEO(inode) \
- -      (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
- -                             : NFS_SERVER(inode)->acregmin)
- -#define NFS_MAXATTRTIMEO(inode) \
- -      (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
- -                             : NFS_SERVER(inode)->acregmax)
+ +static inline struct nfs_server *NFS_SB(const struct super_block *s)
+ +{
+ +      return (struct nfs_server *)(s->s_fs_info);
+ +}
+ +
+ +static inline struct nfs_fh *NFS_FH(const struct inode *inode)
+ +{
+ +      return &NFS_I(inode)->fh;
+ +}
+ +
+ +static inline struct nfs_server *NFS_SERVER(const struct inode *inode)
+ +{
+ +      return NFS_SB(inode->i_sb);
+ +}
+ +
+ +static inline struct rpc_clnt *NFS_CLIENT(const struct inode *inode)
+ +{
+ +      return NFS_SERVER(inode)->client;
+ +}
+ +
+ +static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
+ +{
+ +      return NFS_SERVER(inode)->nfs_client->rpc_ops;
+ +}
+ +
+ +static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
+ +{
+ +      return NFS_I(inode)->cookieverf;
+ +}
+ +
+ +static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
+ +{
+ +      struct nfs_server *nfss = NFS_SERVER(inode);
+ +      return S_ISDIR(inode->i_mode) ? nfss->acdirmin : nfss->acregmin;
+ +}
   
- -#define NFS_FLAGS(inode)              (NFS_I(inode)->flags)
- -#define NFS_STALE(inode)              (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
+ +static inline unsigned NFS_MAXATTRTIMEO(const struct inode *inode)
+ +{
+ +      struct nfs_server *nfss = NFS_SERVER(inode);
+ +      return S_ISDIR(inode->i_mode) ? nfss->acdirmax : nfss->acregmax;
+ +}
   
- -#define NFS_FILEID(inode)             (NFS_I(inode)->fileid)
+ +static inline int NFS_STALE(const struct inode *inode)
+ +{
+ +      return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+ +}
+ +
+ +static inline __u64 NFS_FILEID(const struct inode *inode)
+ +{
+ +      return NFS_I(inode)->fileid;
+ +}
+ +
+ +static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
+ +{
+ +      NFS_I(inode)->fileid = fileid;
+ +}
   
   static inline void nfs_mark_for_revalidate(struct inode *inode)
   {
@@@ -276,7 -237,7 +276,7 @@@ static inline int nfs_server_capable(st
   
   static inline int NFS_USE_READDIRPLUS(struct inode *inode)
   {
- -      return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+ +      return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
   }
   
   static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
@@@ -405,7 -366,6 +405,7 @@@ extern const struct inode_operations nf
   extern const struct file_operations nfs_dir_operations;
   extern struct dentry_operations nfs_dentry_operations;
   
+ +extern void nfs_force_lookup_revalidate(struct inode *dir);
   extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
   extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
   extern void nfs_access_zap_cache(struct inode *inode);
@@@ -556,14 -516,7 +556,7 @@@ extern void * nfs_root_data(void)
   
   #define nfs_wait_event(clnt, wq, condition)                           \
   ({                                                                    \
-       int __retval = 0;                                               \
-       if (clnt->cl_intr) {                                            \
-               sigset_t oldmask;                                       \
-               rpc_clnt_sigmask(clnt, &oldmask);                       \
-               __retval = wait_event_interruptible(wq, condition);     \
-               rpc_clnt_sigunmask(clnt, &oldmask);                     \
-       } else                                                          \
-               wait_event(wq, condition);                              \
+       int __retval = wait_event_killable(wq, condition);              \
         __retval;                                                       \
   })
   
diff --combined include/linux/sched.h

index 9d4797609aa5e368439671d603da5f7e7bedb979,e4921aad4063d5d0fc23209ed88fe32f12dd78bd..6c333579d9da0e2c66788516948e83886b0fe7ed
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -27,7 -27,6 +27,7 @@@
   #define CLONE_NEWUSER         0x10000000      /* New user namespace */
   #define CLONE_NEWPID          0x20000000      /* New pid namespace */
   #define CLONE_NEWNET          0x40000000      /* New network namespace */
+ +#define CLONE_IO              0x80000000      /* Clone io context */
   
   /*
    * Scheduling policies
@@@ -79,6 -78,7 +79,6 @@@ struct sched_param 
   #include <linux/proportions.h>
   #include <linux/seccomp.h>
   #include <linux/rcupdate.h>
- -#include <linux/futex.h>
   #include <linux/rtmutex.h>
   
   #include <linux/time.h>
@@@ -88,13 -88,11 +88,13 @@@
   #include <linux/hrtimer.h>
   #include <linux/task_io_accounting.h>
   #include <linux/kobject.h>
+ +#include <linux/latencytop.h>
   
   #include <asm/processor.h>
   
   struct exec_domain;
   struct futex_pi_state;
+ +struct robust_list_head;
   struct bio;
   
   /*
@@@ -172,13 -170,35 +172,35 @@@ print_cfs_rq(struct seq_file *m, int cp
   #define TASK_RUNNING          0
   #define TASK_INTERRUPTIBLE    1
   #define TASK_UNINTERRUPTIBLE  2
- #define TASK_STOPPED          4
- #define TASK_TRACED           8
+ #define __TASK_STOPPED                4
+ #define __TASK_TRACED         8
   /* in tsk->exit_state */
   #define EXIT_ZOMBIE           16
   #define EXIT_DEAD             32
   /* in tsk->state again */
   #define TASK_DEAD             64
+ #define TASK_WAKEKILL         128
+ 
+ /* Convenience macros for the sake of set_task_state */
+ #define TASK_KILLABLE         (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+ #define TASK_STOPPED          (TASK_WAKEKILL | __TASK_STOPPED)
+ #define TASK_TRACED           (TASK_WAKEKILL | __TASK_TRACED)
+ 
+ /* Convenience macros for the sake of wake_up */
+ #define TASK_NORMAL           (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+ #define TASK_ALL              (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+ 
+ /* get_task_state() */
+ #define TASK_REPORT           (TASK_RUNNING | TASK_INTERRUPTIBLE | \
+                                TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+                                __TASK_TRACED)
+ 
+ #define task_is_traced(task)  ((task->state & __TASK_TRACED) != 0)
+ #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+ #define task_is_stopped_or_traced(task)       \
+                       ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+ #define task_contributes_to_load(task)        \
+                               ((task->state & TASK_UNINTERRUPTIBLE) != 0)
   
   #define __set_task_state(tsk, state_value)            \
         do { (tsk)->state = (state_value); } while (0)
@@@ -232,8 -252,6 +254,8 @@@ static inline int select_nohz_load_bala
   }
   #endif
   
+ +extern unsigned long rt_needs_cpu(int cpu);
+ +
   /*
    * Only dump TASK_* tasks. (0 for all tasks)
    */
@@@ -261,19 -279,13 +283,19 @@@ extern void trap_init(void)
   extern void account_process_tick(struct task_struct *task, int user);
   extern void update_process_times(int user);
   extern void scheduler_tick(void);
+ +extern void hrtick_resched(void);
+ +
+ +extern void sched_show_task(struct task_struct *p);
   
   #ifdef CONFIG_DETECT_SOFTLOCKUP
   extern void softlockup_tick(void);
   extern void spawn_softlockup_task(void);
   extern void touch_softlockup_watchdog(void);
   extern void touch_all_softlockup_watchdogs(void);
- -extern int softlockup_thresh;
+ +extern unsigned long  softlockup_thresh;
+ +extern unsigned long sysctl_hung_task_check_count;
+ +extern unsigned long sysctl_hung_task_timeout_secs;
+ +extern unsigned long sysctl_hung_task_warnings;
   #else
   static inline void softlockup_tick(void)
   {
@@@ -302,6 -314,7 +324,7 @@@ extern int in_sched_functions(unsigned 
   #define       MAX_SCHEDULE_TIMEOUT    LONG_MAX
   extern signed long FASTCALL(schedule_timeout(signed long timeout));
   extern signed long schedule_timeout_interruptible(signed long timeout);
+ extern signed long schedule_timeout_killable(signed long timeout);
   extern signed long schedule_timeout_uninterruptible(signed long timeout);
   asmlinkage void schedule(void);
   
@@@ -562,13 -575,18 +585,13 @@@ struct user_struct 
   #ifdef CONFIG_FAIR_USER_SCHED
         struct task_group *tg;
   #ifdef CONFIG_SYSFS
- -      struct kset kset;
- -      struct subsys_attribute user_attr;
+ +      struct kobject kobj;
         struct work_struct work;
   #endif
   #endif
   };
   
- -#ifdef CONFIG_FAIR_USER_SCHED
- -extern int uids_kobject_init(void);
- -#else
- -static inline int uids_kobject_init(void) { return 0; }
- -#endif
+ +extern int uids_sysfs_init(void);
   
   extern struct user_struct *find_user(uid_t);
   
@@@ -832,7 -850,6 +855,7 @@@ struct sched_class 
         void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
         void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
         void (*yield_task) (struct rq *rq);
+ +      int  (*select_task_rq)(struct task_struct *p, int sync);
   
         void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
   
@@@ -848,25 -865,11 +871,25 @@@
         int (*move_one_task) (struct rq *this_rq, int this_cpu,
                               struct rq *busiest, struct sched_domain *sd,
                               enum cpu_idle_type idle);
+ +      void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ +      void (*post_schedule) (struct rq *this_rq);
+ +      void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
   #endif
   
         void (*set_curr_task) (struct rq *rq);
- -      void (*task_tick) (struct rq *rq, struct task_struct *p);
+ +      void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
         void (*task_new) (struct rq *rq, struct task_struct *p);
+ +      void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
+ +
+ +      void (*join_domain)(struct rq *rq);
+ +      void (*leave_domain)(struct rq *rq);
+ +
+ +      void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ +                             int running);
+ +      void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+ +                           int running);
+ +      void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ +                           int oldprio, int running);
   };
   
   struct load_weight {
@@@ -896,8 -899,6 +919,8 @@@ struct sched_entity 
   #ifdef CONFIG_SCHEDSTATS
         u64                     wait_start;
         u64                     wait_max;
+ +      u64                     wait_count;
+ +      u64                     wait_sum;
   
         u64                     sleep_start;
         u64                     sleep_max;
@@@ -936,21 -937,6 +959,21 @@@
   #endif
   };
   
+ +struct sched_rt_entity {
+ +      struct list_head run_list;
+ +      unsigned int time_slice;
+ +      unsigned long timeout;
+ +      int nr_cpus_allowed;
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      struct sched_rt_entity  *parent;
+ +      /* rq on which this entity is (to be) queued: */
+ +      struct rt_rq            *rt_rq;
+ +      /* rq "owned" by this entity/group: */
+ +      struct rt_rq            *my_q;
+ +#endif
+ +};
+ +
   struct task_struct {
         volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
         void *stack;
@@@ -967,15 -953,16 +990,15 @@@
   #endif
   
         int prio, static_prio, normal_prio;
- -      struct list_head run_list;
         const struct sched_class *sched_class;
         struct sched_entity se;
+ +      struct sched_rt_entity rt;
   
   #ifdef CONFIG_PREEMPT_NOTIFIERS
         /* list of struct preempt_notifier: */
         struct hlist_head preempt_notifiers;
   #endif
   
- -      unsigned short ioprio;
         /*
          * fpu_counter contains the number of consecutive context switches
          * that the FPU is used. If this is over a threshold, the lazy fpu
@@@ -992,11 -979,7 +1015,11 @@@
   
         unsigned int policy;
         cpumask_t cpus_allowed;
- -      unsigned int time_slice;
+ +
+ +#ifdef CONFIG_PREEMPT_RCU
+ +      int rcu_read_lock_nesting;
+ +      int rcu_flipctr_idx;
+ +#endif /* #ifdef CONFIG_PREEMPT_RCU */
   
   #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
         struct sched_info sched_info;
@@@ -1086,11 -1069,6 +1109,11 @@@
   /* ipc stuff */
         struct sysv_sem sysvsem;
   #endif
+ +#ifdef CONFIG_DETECT_SOFTLOCKUP
+ +/* hung task detection */
+ +      unsigned long last_switch_timestamp;
+ +      unsigned long last_switch_count;
+ +#endif
   /* CPU-specific state of this task */
         struct thread_struct thread;
   /* filesystem information */
@@@ -1223,10 -1201,6 +1246,10 @@@
         int make_it_fail;
   #endif
         struct prop_local_single dirties;
+ +#ifdef CONFIG_LATENCYTOP
+ +      int latency_record_count;
+ +      struct latency_record latency_record[LT_SAVECOUNT];
+ +#endif
   };
   
   /*
@@@ -1304,6 -1278,13 +1327,6 @@@ struct pid_namespace
    *
    * set_task_vxid()   : assigns a virtual id to a task;
    *
- - * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
- - *                     the result depends on the namespace and whether the
- - *                     task in question is the namespace's init. e.g. for the
- - *                     namespace's init this will return 0 when called from
- - *                     the namespace of this init, or appropriate id otherwise.
- - *
- - *
    * see also pid_nr() etc in include/linux/pid.h
    */
   
@@@ -1359,6 -1340,12 +1382,6 @@@ static inline pid_t task_session_vnr(st
   }
   
   
- -static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
- -              struct pid_namespace *ns)
- -{
- -      return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
- -}
- -
   /**
    * pid_alive - check that a task structure is not stale
    * @p: Task structure to be checked.
@@@ -1507,12 -1494,6 +1530,12 @@@ extern unsigned int sysctl_sched_child_
   extern unsigned int sysctl_sched_features;
   extern unsigned int sysctl_sched_migration_cost;
   extern unsigned int sysctl_sched_nr_migrate;
+ +extern unsigned int sysctl_sched_rt_period;
+ +extern unsigned int sysctl_sched_rt_ratio;
+ +#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+ +extern unsigned int sysctl_sched_min_bal_int_shares;
+ +extern unsigned int sysctl_sched_max_bal_int_shares;
+ +#endif
   
   int sched_nr_latency_handler(struct ctl_table *table, int write,
                 struct file *file, void __user *buffer, size_t *length,
@@@ -1892,7 -1873,14 +1915,14 @@@ static inline int signal_pending(struc
   {
         return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
   }
-   
+ 
+ extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+ 
+ static inline int fatal_signal_pending(struct task_struct *p)
+ {
+       return signal_pending(p) && __fatal_signal_pending(p);
+ }
+ 
   static inline int need_resched(void)
   {
         return unlikely(test_thread_flag(TIF_NEED_RESCHED));
@@@ -1905,33 -1893,29 +1935,33 @@@
    * cond_resched_lock() will drop the spinlock before scheduling,
    * cond_resched_softirq() will enable bhs before scheduling.
    */
- -extern int cond_resched(void);
- -extern int cond_resched_lock(spinlock_t * lock);
- -extern int cond_resched_softirq(void);
- -
- -/*
- - * Does a critical section need to be broken due to another
- - * task waiting?:
- - */
- -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
- -# define need_lockbreak(lock) ((lock)->break_lock)
+ +#ifdef CONFIG_PREEMPT
+ +static inline int cond_resched(void)
+ +{
+ +      return 0;
+ +}
   #else
- -# define need_lockbreak(lock) 0
+ +extern int _cond_resched(void);
+ +static inline int cond_resched(void)
+ +{
+ +      return _cond_resched();
+ +}
   #endif
+ +extern int cond_resched_lock(spinlock_t * lock);
+ +extern int cond_resched_softirq(void);
   
   /*
    * Does a critical section need to be broken due to another
- - * task waiting or preemption being signalled:
+ + * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ + * but a general need for low latency)
    */
- -static inline int lock_need_resched(spinlock_t *lock)
+ +static inline int spin_needbreak(spinlock_t *lock)
   {
- -      if (need_lockbreak(lock) || need_resched())
- -              return 1;
+ +#ifdef CONFIG_PREEMPT
+ +      return spin_is_contended(lock);
+ +#else
         return 0;
+ +#endif
   }
   
   /*
diff --combined include/linux/sunrpc/clnt.h

index 3e9addc741c1ad8a57b41c9172d3114b53faee1b,01879365f4ec747eb409533989fc1228420b3dc8..129a86e25d2989df395e29913fe4322b614701c7
--- 1/include/linux/sunrpc/clnt.h
--- 2/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@@ -41,12 -41,10 +41,11 @@@ struct rpc_clnt 
         struct rpc_iostats *    cl_metrics;     /* per-client statistics */
   
         unsigned int            cl_softrtry : 1,/* soft timeouts */
-                               cl_intr     : 1,/* interruptible */
                                 cl_discrtry : 1,/* disconnect before retry */
                                 cl_autobind : 1;/* use getport() */
   
         struct rpc_rtt *        cl_rtt;         /* RTO estimator data */
+ +      const struct rpc_timeout *cl_timeout;   /* Timeout strategy */
   
         int                     cl_nodelen;     /* nodename length */
         char                    cl_nodename[UNX_MAXNODENAME];
@@@ -55,7 -53,6 +54,7 @@@
         struct dentry *         cl_dentry;      /* inode */
         struct rpc_clnt *       cl_parent;      /* Points to parent of clones */
         struct rpc_rtt          cl_rtt_default;
+ +      struct rpc_timeout      cl_timeout_default;
         struct rpc_program *    cl_program;
         char                    cl_inline_name[32];
   };
@@@ -101,7 -98,7 +100,7 @@@ struct rpc_create_args 
         struct sockaddr         *address;
         size_t                  addrsize;
         struct sockaddr         *saddress;
- -      struct rpc_timeout      *timeout;
+ +      const struct rpc_timeout *timeout;
         char                    *servername;
         struct rpc_program      *program;
         u32                     version;
@@@ -111,7 -108,6 +110,6 @@@
   
   /* Values for "flags" field */
   #define RPC_CLNT_CREATE_HARDRTRY      (1UL << 0)
- #define RPC_CLNT_CREATE_INTR          (1UL << 1)
   #define RPC_CLNT_CREATE_AUTOBIND      (1UL << 2)
   #define RPC_CLNT_CREATE_NONPRIVPORT   (1UL << 3)
   #define RPC_CLNT_CREATE_NOPING                (1UL << 4)
@@@ -125,10 -121,11 +123,10 @@@ void            rpc_shutdown_client(struct rpc_cl
   void          rpc_release_client(struct rpc_clnt *);
   
   int           rpcb_register(u32, u32, int, unsigned short, int *);
- -int           rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
+ +int           rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
   void          rpcb_getport_async(struct rpc_task *);
   
- -void          rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
- -
+ +void          rpc_call_start(struct rpc_task *);
   int           rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
                                int flags, const struct rpc_call_ops *tk_ops,
                                void *calldata);
@@@ -137,13 -134,13 +135,11 @@@ int             rpc_call_sync(struct rpc_clnt *cln
   struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
                                int flags);
   void          rpc_restart_call(struct rpc_task *);
--void          rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
--void          rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
   void          rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
   size_t                rpc_max_payload(struct rpc_clnt *);
   void          rpc_force_rebind(struct rpc_clnt *);
   size_t                rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
- -char *                rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+ +const char    *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
   
   #endif /* __KERNEL__ */
   #endif /* _LINUX_SUNRPC_CLNT_H */
diff --combined include/linux/sunrpc/sched.h

index ce3d1b13272901a7ac3cfa07c9b3042bebb6043e,19160e63d6ad242a29e7f0cf37e41d43005778ce..f689f02e6793de2aa6ee21d9a3054af669714cb1
--- 1/include/linux/sunrpc/sched.h
--- 2/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@@ -56,6 -56,8 +56,6 @@@ struct rpc_task 
         __u8                    tk_garb_retry;
         __u8                    tk_cred_retry;
   
- -      unsigned long           tk_cookie;      /* Cookie for batching tasks */
- -
         /*
          * timeout_fn   to be executed by timer bottom half
          * callback     to be executed after waking up
@@@ -76,6 -78,7 +76,6 @@@
         struct timer_list       tk_timer;       /* kernel timer */
         unsigned long           tk_timeout;     /* timeout for rpc_sleep() */
         unsigned short          tk_flags;       /* misc flags */
- -      unsigned char           tk_priority : 2;/* Task priority */
         unsigned long           tk_runstate;    /* Task run status */
         struct workqueue_struct *tk_workqueue;  /* Normally rpciod, but could
                                                  * be any workqueue
@@@ -91,9 -94,6 +91,9 @@@
         unsigned long           tk_start;       /* RPC task init timestamp */
         long                    tk_rtt;         /* round-trip time (jiffies) */
   
+ +      pid_t                   tk_owner;       /* Process id for batching tasks */
+ +      unsigned char           tk_priority : 2;/* Task priority */
+ +
   #ifdef RPC_DEBUG
         unsigned short          tk_pid;         /* debugging aid */
   #endif
@@@ -117,15 -117,6 +117,15 @@@ struct rpc_call_ops 
         void (*rpc_release)(void *);
   };
   
+ +struct rpc_task_setup {
+ +      struct rpc_task *task;
+ +      struct rpc_clnt *rpc_client;
+ +      const struct rpc_message *rpc_message;
+ +      const struct rpc_call_ops *callback_ops;
+ +      void *callback_data;
+ +      unsigned short flags;
+ +      signed char priority;
+ +};
   
   /*
    * RPC task flags
@@@ -137,7 -128,6 +137,6 @@@
   #define RPC_TASK_DYNAMIC      0x0080          /* task was kmalloc'ed */
   #define RPC_TASK_KILLED               0x0100          /* task was killed */
   #define RPC_TASK_SOFT         0x0200          /* Use soft timeouts */
- #define RPC_TASK_NOINTR               0x0400          /* uninterruptible task */
   
   #define RPC_IS_ASYNC(t)               ((t)->tk_flags & RPC_TASK_ASYNC)
   #define RPC_IS_SWAPPER(t)     ((t)->tk_flags & RPC_TASK_SWAPPER)
@@@ -145,7 -135,6 +144,6 @@@
   #define RPC_ASSASSINATED(t)   ((t)->tk_flags & RPC_TASK_KILLED)
   #define RPC_DO_CALLBACK(t)    ((t)->tk_callback != NULL)
   #define RPC_IS_SOFT(t)                ((t)->tk_flags & RPC_TASK_SOFT)
- #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
   
   #define RPC_TASK_RUNNING      0
   #define RPC_TASK_QUEUED               1
@@@ -189,10 -178,10 +187,10 @@@
    * Note: if you change these, you must also change
    * the task initialization definitions below.
    */
- -#define RPC_PRIORITY_LOW      0
- -#define RPC_PRIORITY_NORMAL   1
- -#define RPC_PRIORITY_HIGH     2
- -#define RPC_NR_PRIORITY               (RPC_PRIORITY_HIGH+1)
+ +#define RPC_PRIORITY_LOW      (-1)
+ +#define RPC_PRIORITY_NORMAL   (0)
+ +#define RPC_PRIORITY_HIGH     (1)
+ +#define RPC_NR_PRIORITY               (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
   
   /*
    * RPC synchronization objects
@@@ -200,7 -189,7 +198,7 @@@
   struct rpc_wait_queue {
         spinlock_t              lock;
         struct list_head        tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
- -      unsigned long           cookie;                 /* cookie of last task serviced */
+ +      pid_t                   owner;                  /* process id of last task serviced */
         unsigned char           maxpriority;            /* maximum priority (0 if queue is not a priority queue) */
         unsigned char           priority;               /* current priority */
         unsigned char           count;                  /* # task groups remaining serviced so far */
@@@ -217,13 -206,41 +215,13 @@@
    * performance of NFS operations such as read/write.
    */
   #define RPC_BATCH_COUNT                       16
- -
- -#ifndef RPC_DEBUG
- -# define RPC_WAITQ_INIT(var,qname) { \
- -              .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
- -              .tasks = { \
- -                      [0] = LIST_HEAD_INIT(var.tasks[0]), \
- -                      [1] = LIST_HEAD_INIT(var.tasks[1]), \
- -                      [2] = LIST_HEAD_INIT(var.tasks[2]), \
- -              }, \
- -      }
- -#else
- -# define RPC_WAITQ_INIT(var,qname) { \
- -              .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
- -              .tasks = { \
- -                      [0] = LIST_HEAD_INIT(var.tasks[0]), \
- -                      [1] = LIST_HEAD_INIT(var.tasks[1]), \
- -                      [2] = LIST_HEAD_INIT(var.tasks[2]), \
- -              }, \
- -              .name = qname, \
- -      }
- -#endif
- -# define RPC_WAITQ(var,qname)      struct rpc_wait_queue var = RPC_WAITQ_INIT(var,qname)
- -
   #define RPC_IS_PRIORITY(q)            ((q)->maxpriority > 0)
   
   /*
    * Function prototypes
    */
- -struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
- -                              const struct rpc_call_ops *ops, void *data);
- -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
- -                              const struct rpc_call_ops *ops, void *data);
- -void          rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
- -                              int flags, const struct rpc_call_ops *ops,
- -                              void *data);
+ +struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
+ +struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
   void          rpc_put_task(struct rpc_task *);
   void          rpc_exit_task(struct rpc_task *);
   void          rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --combined kernel/ptrace.c

index e6e9b8be4b053c5f0074bdeca6f1a4ac9517e8fb,26f9923baddc6abb454022f61bbe403894c28103..b0d4ab4dfd3d27ee60e3a044f9e0bc3bba665d88
--- 1/kernel/ptrace.c
--- 2/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@@ -51,7 -51,7 +51,7 @@@ void __ptrace_link(struct task_struct *
   void ptrace_untrace(struct task_struct *child)
   {
         spin_lock(&child->sighand->siglock);
-       if (child->state == TASK_TRACED) {
+       if (task_is_traced(child)) {
                 if (child->signal->flags & SIGNAL_STOP_STOPPED) {
                         child->state = TASK_STOPPED;
                 } else {
@@@ -79,7 -79,7 +79,7 @@@ void __ptrace_unlink(struct task_struc
                 add_parent(child);
         }
   
-       if (child->state == TASK_TRACED)
+       if (task_is_traced(child))
                 ptrace_untrace(child);
   }
   
@@@ -103,9 -103,9 +103,9 @@@ int ptrace_check_attach(struct task_str
             && child->signal != NULL) {
                 ret = 0;
                 spin_lock_irq(&child->sighand->siglock);
-               if (child->state == TASK_STOPPED) {
+               if (task_is_stopped(child)) {
                         child->state = TASK_TRACED;
-               } else if (child->state != TASK_TRACED && !kill) {
+               } else if (!task_is_traced(child) && !kill) {
                         ret = -ESRCH;
                 }
                 spin_unlock_irq(&child->sighand->siglock);
@@@ -120,7 -120,7 +120,7 @@@
         return ret;
   }
   
- -static int may_attach(struct task_struct *task)
+ +int __ptrace_may_attach(struct task_struct *task)
   {
         /* May we inspect the given task?
          * This check is used both for attaching with ptrace
@@@ -154,7 -154,7 +154,7 @@@ int ptrace_may_attach(struct task_struc
   {
         int err;
         task_lock(task);
- -      err = may_attach(task);
+ +      err = __ptrace_may_attach(task);
         task_unlock(task);
         return !err;
   }
@@@ -196,7 -196,7 +196,7 @@@ repeat
         /* the same process cannot be attached many times */
         if (task->ptrace & PT_PTRACED)
                 goto bad;
- -      retval = may_attach(task);
+ +      retval = __ptrace_may_attach(task);
         if (retval)
                 goto bad;
   
@@@ -366,73 -366,12 +366,73 @@@ static int ptrace_setsiginfo(struct tas
         return error;
   }
   
+ +
+ +#ifdef PTRACE_SINGLESTEP
+ +#define is_singlestep(request)                ((request) == PTRACE_SINGLESTEP)
+ +#else
+ +#define is_singlestep(request)                0
+ +#endif
+ +
+ +#ifdef PTRACE_SINGLEBLOCK
+ +#define is_singleblock(request)               ((request) == PTRACE_SINGLEBLOCK)
+ +#else
+ +#define is_singleblock(request)               0
+ +#endif
+ +
+ +#ifdef PTRACE_SYSEMU
+ +#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
+ +#else
+ +#define is_sysemu_singlestep(request) 0
+ +#endif
+ +
+ +static int ptrace_resume(struct task_struct *child, long request, long data)
+ +{
+ +      if (!valid_signal(data))
+ +              return -EIO;
+ +
+ +      if (request == PTRACE_SYSCALL)
+ +              set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ +      else
+ +              clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ +
+ +#ifdef TIF_SYSCALL_EMU
+ +      if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+ +              set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ +      else
+ +              clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ +#endif
+ +
+ +      if (is_singleblock(request)) {
+ +              if (unlikely(!arch_has_block_step()))
+ +                      return -EIO;
+ +              user_enable_block_step(child);
+ +      } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+ +              if (unlikely(!arch_has_single_step()))
+ +                      return -EIO;
+ +              user_enable_single_step(child);
+ +      }
+ +      else
+ +              user_disable_single_step(child);
+ +
+ +      child->exit_code = data;
+ +      wake_up_process(child);
+ +
+ +      return 0;
+ +}
+ +
   int ptrace_request(struct task_struct *child, long request,
                    long addr, long data)
   {
         int ret = -EIO;
   
         switch (request) {
+ +      case PTRACE_PEEKTEXT:
+ +      case PTRACE_PEEKDATA:
+ +              return generic_ptrace_peekdata(child, addr, data);
+ +      case PTRACE_POKETEXT:
+ +      case PTRACE_POKEDATA:
+ +              return generic_ptrace_pokedata(child, addr, data);
+ +
   #ifdef PTRACE_OLDSETOPTIONS
         case PTRACE_OLDSETOPTIONS:
   #endif
@@@ -451,26 -390,6 +451,26 @@@
         case PTRACE_DETACH:      /* detach a process that was attached. */
                 ret = ptrace_detach(child, data);
                 break;
+ +
+ +#ifdef PTRACE_SINGLESTEP
+ +      case PTRACE_SINGLESTEP:
+ +#endif
+ +#ifdef PTRACE_SINGLEBLOCK
+ +      case PTRACE_SINGLEBLOCK:
+ +#endif
+ +#ifdef PTRACE_SYSEMU
+ +      case PTRACE_SYSEMU:
+ +      case PTRACE_SYSEMU_SINGLESTEP:
+ +#endif
+ +      case PTRACE_SYSCALL:
+ +      case PTRACE_CONT:
+ +              return ptrace_resume(child, request, data);
+ +
+ +      case PTRACE_KILL:
+ +              if (child->exit_state)  /* already dead */
+ +                      return 0;
+ +              return ptrace_resume(child, request, SIGKILL);
+ +
         default:
                 break;
         }
@@@ -551,8 -470,6 +551,8 @@@ asmlinkage long sys_ptrace(long request
         lock_kernel();
         if (request == PTRACE_TRACEME) {
                 ret = ptrace_traceme();
+ +              if (!ret)
+ +                      arch_ptrace_attach(current);
                 goto out;
         }
   
@@@ -607,87 -524,3 +607,87 @@@ int generic_ptrace_pokedata(struct task
         copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
         return (copied == sizeof(data)) ? 0 : -EIO;
   }
+ +
+ +#ifdef CONFIG_COMPAT
+ +#include <linux/compat.h>
+ +
+ +int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+ +                        compat_ulong_t addr, compat_ulong_t data)
+ +{
+ +      compat_ulong_t __user *datap = compat_ptr(data);
+ +      compat_ulong_t word;
+ +      int ret;
+ +
+ +      switch (request) {
+ +      case PTRACE_PEEKTEXT:
+ +      case PTRACE_PEEKDATA:
+ +              ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+ +              if (ret != sizeof(word))
+ +                      ret = -EIO;
+ +              else
+ +                      ret = put_user(word, datap);
+ +              break;
+ +
+ +      case PTRACE_POKETEXT:
+ +      case PTRACE_POKEDATA:
+ +              ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+ +              ret = (ret != sizeof(data) ? -EIO : 0);
+ +              break;
+ +
+ +      case PTRACE_GETEVENTMSG:
+ +              ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+ +              break;
+ +
+ +      default:
+ +              ret = ptrace_request(child, request, addr, data);
+ +      }
+ +
+ +      return ret;
+ +}
+ +
+ +#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+ +asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+ +                                compat_long_t addr, compat_long_t data)
+ +{
+ +      struct task_struct *child;
+ +      long ret;
+ +
+ +      /*
+ +       * This lock_kernel fixes a subtle race with suid exec
+ +       */
+ +      lock_kernel();
+ +      if (request == PTRACE_TRACEME) {
+ +              ret = ptrace_traceme();
+ +              goto out;
+ +      }
+ +
+ +      child = ptrace_get_task_struct(pid);
+ +      if (IS_ERR(child)) {
+ +              ret = PTR_ERR(child);
+ +              goto out;
+ +      }
+ +
+ +      if (request == PTRACE_ATTACH) {
+ +              ret = ptrace_attach(child);
+ +              /*
+ +               * Some architectures need to do book-keeping after
+ +               * a ptrace attach.
+ +               */
+ +              if (!ret)
+ +                      arch_ptrace_attach(child);
+ +              goto out_put_task_struct;
+ +      }
+ +
+ +      ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ +      if (!ret)
+ +              ret = compat_arch_ptrace(child, request, addr, data);
+ +
+ + out_put_task_struct:
+ +      put_task_struct(child);
+ + out:
+ +      unlock_kernel();
+ +      return ret;
+ +}
+ +#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+ +
+ +#endif        /* CONFIG_COMPAT */
diff --combined kernel/sched.c

index 8355e007e0213e1f2384ad80d059e56e342b0d6d,d2f77fab0f4621a68b2b8bb95558bf146b40cec7..9474b23c28bf41f5989df3b94c5e810b0f1e971a
--- 1/kernel/sched.c
--- 2/kernel/sched.c
+++ b/kernel/sched.c
@@@ -22,8 -22,6 +22,8 @@@
    *              by Peter Williams
    *  2007-05-06  Interactivity improvements to CFS by Mike Galbraith
    *  2007-07-01  Group scheduling enhancements by Srivatsa Vaddagiri
+ + *  2007-11-29  RT balancing improvements by Steven Rostedt, Gregory Haskins,
+ + *              Thomas Gleixner, Mike Kravetz
    */
   
   #include <linux/mm.h>
@@@ -65,7 -63,6 +65,7 @@@
   #include <linux/reciprocal_div.h>
   #include <linux/unistd.h>
   #include <linux/pagemap.h>
+ +#include <linux/hrtimer.h>
   
   #include <asm/tlb.h>
   #include <asm/irq_regs.h>
@@@ -99,9 -96,10 +99,9 @@@ unsigned long long __attribute__((weak)
   #define MAX_USER_PRIO         (USER_PRIO(MAX_PRIO))
   
   /*
- - * Some helpers for converting nanosecond timing to jiffy resolution
+ + * Helpers for converting nanosecond timing to jiffy resolution
    */
   #define NS_TO_JIFFIES(TIME)   ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
- -#define JIFFIES_TO_NS(TIME)   ((TIME) * (NSEC_PER_SEC / HZ))
   
   #define NICE_0_LOAD           SCHED_LOAD_SCALE
   #define NICE_0_SHIFT          SCHED_LOAD_SHIFT
@@@ -161,8 -159,6 +161,8 @@@ struct rt_prio_array 
   
   struct cfs_rq;
   
+ +static LIST_HEAD(task_groups);
+ +
   /* task group related information */
   struct task_group {
   #ifdef CONFIG_FAIR_CGROUP_SCHED
@@@ -172,50 -168,10 +172,50 @@@
         struct sched_entity **se;
         /* runqueue "owned" by this group on each cpu */
         struct cfs_rq **cfs_rq;
+ +
+ +      struct sched_rt_entity **rt_se;
+ +      struct rt_rq **rt_rq;
+ +
+ +      unsigned int rt_ratio;
+ +
+ +      /*
+ +       * shares assigned to a task group governs how much of cpu bandwidth
+ +       * is allocated to the group. The more shares a group has, the more is
+ +       * the cpu bandwidth allocated to it.
+ +       *
+ +       * For ex, lets say that there are three task groups, A, B and C which
+ +       * have been assigned shares 1000, 2000 and 3000 respectively. Then,
+ +       * cpu bandwidth allocated by the scheduler to task groups A, B and C
+ +       * should be:
+ +       *
+ +       *      Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
+ +       *      Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
+ +       *      Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
+ +       *
+ +       * The weight assigned to a task group's schedulable entities on every
+ +       * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
+ +       * group's shares. For ex: lets say that task group A has been
+ +       * assigned shares of 1000 and there are two CPUs in a system. Then,
+ +       *
+ +       *  tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
+ +       *
+ +       * Note: It's not necessary that each of a task's group schedulable
+ +       *       entity have the same weight on all CPUs. If the group
+ +       *       has 2 of its tasks on CPU0 and 1 task on CPU1, then a
+ +       *       better distribution of weight could be:
+ +       *
+ +       *      tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
+ +       *      tg_A->se[1]->load.weight = 1/2 * 2000 =  667
+ +       *
+ +       * rebalance_shares() is responsible for distributing the shares of a
+ +       * task groups like this among the group's schedulable entities across
+ +       * cpus.
+ +       *
+ +       */
         unsigned long shares;
- -      /* spinlock to serialize modification to shares */
- -      spinlock_t lock;
+ +
         struct rcu_head rcu;
+ +      struct list_head list;
   };
   
   /* Default task group's sched entity on each cpu */
@@@ -223,51 -179,24 +223,51 @@@ static DEFINE_PER_CPU(struct sched_enti
   /* Default task group's cfs_rq on each cpu */
   static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
   
+ +static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
+ +static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
+ +
   static struct sched_entity *init_sched_entity_p[NR_CPUS];
   static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
   
+ +static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
+ +static struct rt_rq *init_rt_rq_p[NR_CPUS];
+ +
+ +/* task_group_mutex serializes add/remove of task groups and also changes to
+ + * a task group's cpu shares.
+ + */
+ +static DEFINE_MUTEX(task_group_mutex);
+ +
+ +/* doms_cur_mutex serializes access to doms_cur[] array */
+ +static DEFINE_MUTEX(doms_cur_mutex);
+ +
+ +#ifdef CONFIG_SMP
+ +/* kernel thread that runs rebalance_shares() periodically */
+ +static struct task_struct *lb_monitor_task;
+ +static int load_balance_monitor(void *unused);
+ +#endif
+ +
+ +static void set_se_shares(struct sched_entity *se, unsigned long shares);
+ +
   /* Default task group.
    *    Every task in system belong to this group at bootup.
    */
   struct task_group init_task_group = {
- -      .se     = init_sched_entity_p,
+ +      .se     = init_sched_entity_p,
         .cfs_rq = init_cfs_rq_p,
+ +
+ +      .rt_se  = init_sched_rt_entity_p,
+ +      .rt_rq  = init_rt_rq_p,
   };
   
   #ifdef CONFIG_FAIR_USER_SCHED
- -# define INIT_TASK_GRP_LOAD   2*NICE_0_LOAD
+ +# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
   #else
- -# define INIT_TASK_GRP_LOAD   NICE_0_LOAD
+ +# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
   #endif
   
- -static int init_task_group_load = INIT_TASK_GRP_LOAD;
+ +#define MIN_GROUP_SHARES      2
+ +
+ +static int init_task_group_load = INIT_TASK_GROUP_LOAD;
   
   /* return group to which a task belongs */
   static inline struct task_group *task_group(struct task_struct *p)
@@@ -286,42 -215,15 +286,42 @@@
   }
   
   /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
- -static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu)
+ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
   {
         p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
         p->se.parent = task_group(p)->se[cpu];
+ +
+ +      p->rt.rt_rq  = task_group(p)->rt_rq[cpu];
+ +      p->rt.parent = task_group(p)->rt_se[cpu];
+ +}
+ +
+ +static inline void lock_task_group_list(void)
+ +{
+ +      mutex_lock(&task_group_mutex);
+ +}
+ +
+ +static inline void unlock_task_group_list(void)
+ +{
+ +      mutex_unlock(&task_group_mutex);
+ +}
+ +
+ +static inline void lock_doms_cur(void)
+ +{
+ +      mutex_lock(&doms_cur_mutex);
+ +}
+ +
+ +static inline void unlock_doms_cur(void)
+ +{
+ +      mutex_unlock(&doms_cur_mutex);
   }
   
   #else
   
- -static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
+ +static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+ +static inline void lock_task_group_list(void) { }
+ +static inline void unlock_task_group_list(void) { }
+ +static inline void lock_doms_cur(void) { }
+ +static inline void unlock_doms_cur(void) { }
   
   #endif        /* CONFIG_FAIR_GROUP_SCHED */
   
@@@ -362,56 -264,10 +362,56 @@@ struct cfs_rq 
   /* Real-Time classes' related field in a runqueue: */
   struct rt_rq {
         struct rt_prio_array active;
- -      int rt_load_balance_idx;
- -      struct list_head *rt_load_balance_head, *rt_load_balance_curr;
+ +      unsigned long rt_nr_running;
+ +#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+ +      int highest_prio; /* highest queued rt task prio */
+ +#endif
+ +#ifdef CONFIG_SMP
+ +      unsigned long rt_nr_migratory;
+ +      int overloaded;
+ +#endif
+ +      int rt_throttled;
+ +      u64 rt_time;
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      struct rq *rq;
+ +      struct list_head leaf_rt_rq_list;
+ +      struct task_group *tg;
+ +      struct sched_rt_entity *rt_se;
+ +#endif
+ +};
+ +
+ +#ifdef CONFIG_SMP
+ +
+ +/*
+ + * We add the notion of a root-domain which will be used to define per-domain
+ + * variables. Each exclusive cpuset essentially defines an island domain by
+ + * fully partitioning the member cpus from any other cpuset. Whenever a new
+ + * exclusive cpuset is created, we also create and attach a new root-domain
+ + * object.
+ + *
+ + */
+ +struct root_domain {
+ +      atomic_t refcount;
+ +      cpumask_t span;
+ +      cpumask_t online;
+ +
+ +      /*
+ +       * The "RT overload" flag: it gets set if a CPU has more than
+ +       * one runnable RT task.
+ +       */
+ +      cpumask_t rto_mask;
+ +      atomic_t rto_count;
   };
   
+ +/*
+ + * By default the system creates a single root-domain with all cpus as
+ + * members (mimicking the global state we have today).
+ + */
+ +static struct root_domain def_root_domain;
+ +
+ +#endif
+ +
   /*
    * This is the main, per-CPU runqueue data structure.
    *
@@@ -440,15 -296,11 +440,15 @@@ struct rq 
         u64 nr_switches;
   
         struct cfs_rq cfs;
+ +      struct rt_rq rt;
+ +      u64 rt_period_expire;
+ +      int rt_throttled;
+ +
   #ifdef CONFIG_FAIR_GROUP_SCHED
         /* list of leaf cfs_rq on this cpu: */
         struct list_head leaf_cfs_rq_list;
+ +      struct list_head leaf_rt_rq_list;
   #endif
- -      struct rt_rq rt;
   
         /*
          * This is part of a global counter where only the total sum
@@@ -465,7 -317,7 +465,7 @@@
         u64 clock, prev_clock_raw;
         s64 clock_max_delta;
   
- -      unsigned int clock_warps, clock_overflows;
+ +      unsigned int clock_warps, clock_overflows, clock_underflows;
         u64 idle_clock;
         unsigned int clock_deep_idle_events;
         u64 tick_timestamp;
@@@ -473,7 -325,6 +473,7 @@@
         atomic_t nr_iowait;
   
   #ifdef CONFIG_SMP
+ +      struct root_domain *rd;
         struct sched_domain *sd;
   
         /* For active balancing */
@@@ -486,12 -337,6 +486,12 @@@
         struct list_head migration_queue;
   #endif
   
+ +#ifdef CONFIG_SCHED_HRTICK
+ +      unsigned long hrtick_flags;
+ +      ktime_t hrtick_expire;
+ +      struct hrtimer hrtick_timer;
+ +#endif
+ +
   #ifdef CONFIG_SCHEDSTATS
         /* latency stats */
         struct sched_info rq_sched_info;
@@@ -518,6 -363,7 +518,6 @@@
   };
   
   static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
- -static DEFINE_MUTEX(sched_hotcpu_mutex);
   
   static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
   {
@@@ -595,23 -441,6 +595,23 @@@ static void update_rq_clock(struct rq *
   #define task_rq(p)            cpu_rq(task_cpu(p))
   #define cpu_curr(cpu)         (cpu_rq(cpu)->curr)
   
+ +unsigned long rt_needs_cpu(int cpu)
+ +{
+ +      struct rq *rq = cpu_rq(cpu);
+ +      u64 delta;
+ +
+ +      if (!rq->rt_throttled)
+ +              return 0;
+ +
+ +      if (rq->clock > rq->rt_period_expire)
+ +              return 1;
+ +
+ +      delta = rq->rt_period_expire - rq->clock;
+ +      do_div(delta, NSEC_PER_SEC / HZ);
+ +
+ +      return (unsigned long)delta;
+ +}
+ +
   /*
    * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
    */
@@@ -630,8 -459,6 +630,8 @@@ enum 
         SCHED_FEAT_START_DEBIT          = 4,
         SCHED_FEAT_TREE_AVG             = 8,
         SCHED_FEAT_APPROX_AVG           = 16,
+ +      SCHED_FEAT_HRTICK               = 32,
+ +      SCHED_FEAT_DOUBLE_TICK          = 64,
   };
   
   const_debug unsigned int sysctl_sched_features =
@@@ -639,9 -466,7 +639,9 @@@
                 SCHED_FEAT_WAKEUP_PREEMPT       * 1 |
                 SCHED_FEAT_START_DEBIT          * 1 |
                 SCHED_FEAT_TREE_AVG             * 0 |
- -              SCHED_FEAT_APPROX_AVG           * 0;
+ +              SCHED_FEAT_APPROX_AVG           * 0 |
+ +              SCHED_FEAT_HRTICK               * 1 |
+ +              SCHED_FEAT_DOUBLE_TICK          * 0;
   
   #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
   
@@@ -651,21 -476,6 +651,21 @@@
    */
   const_debug unsigned int sysctl_sched_nr_migrate = 32;
   
+ +/*
+ + * period over which we measure -rt task cpu usage in ms.
+ + * default: 1s
+ + */
+ +const_debug unsigned int sysctl_sched_rt_period = 1000;
+ +
+ +#define SCHED_RT_FRAC_SHIFT   16
+ +#define SCHED_RT_FRAC         (1UL << SCHED_RT_FRAC_SHIFT)
+ +
+ +/*
+ + * ratio of time -rt tasks may consume.
+ + * default: 95%
+ + */
+ +const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+ +
   /*
    * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
    * clock constructed from sched_clock():
@@@ -678,12 -488,7 +678,12 @@@ unsigned long long cpu_clock(int cpu
   
         local_irq_save(flags);
         rq = cpu_rq(cpu);
- -      update_rq_clock(rq);
+ +      /*
+ +       * Only call sched_clock() if the scheduler has already been
+ +       * initialized (some code might call cpu_clock() very early):
+ +       */
+ +      if (rq->idle)
+ +              update_rq_clock(rq);
         now = rq->clock;
         local_irq_restore(flags);
   
@@@ -698,15 -503,10 +698,15 @@@ EXPORT_SYMBOL_GPL(cpu_clock)
   # define finish_arch_switch(prev)     do { } while (0)
   #endif
   
+ +static inline int task_current(struct rq *rq, struct task_struct *p)
+ +{
+ +      return rq->curr == p;
+ +}
+ +
   #ifndef __ARCH_WANT_UNLOCKED_CTXSW
   static inline int task_running(struct rq *rq, struct task_struct *p)
   {
- -      return rq->curr == p;
+ +      return task_current(rq, p);
   }
   
   static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
@@@ -735,7 -535,7 +735,7 @@@ static inline int task_running(struct r
   #ifdef CONFIG_SMP
         return p->oncpu;
   #else
- -      return rq->curr == p;
+ +      return task_current(rq, p);
   #endif
   }
   
@@@ -869,177 -669,9 +869,177 @@@ void sched_clock_idle_wakeup_event(u64 
         rq->prev_clock_raw = now;
         rq->clock += delta_ns;
         spin_unlock(&rq->lock);
+ +      touch_softlockup_watchdog();
   }
   EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
   
+ +static void __resched_task(struct task_struct *p, int tif_bit);
+ +
+ +static inline void resched_task(struct task_struct *p)
+ +{
+ +      __resched_task(p, TIF_NEED_RESCHED);
+ +}
+ +
+ +#ifdef CONFIG_SCHED_HRTICK
+ +/*
+ + * Use HR-timers to deliver accurate preemption points.
+ + *
+ + * Its all a bit involved since we cannot program an hrt while holding the
+ + * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
+ + * reschedule event.
+ + *
+ + * When we get rescheduled we reprogram the hrtick_timer outside of the
+ + * rq->lock.
+ + */
+ +static inline void resched_hrt(struct task_struct *p)
+ +{
+ +      __resched_task(p, TIF_HRTICK_RESCHED);
+ +}
+ +
+ +static inline void resched_rq(struct rq *rq)
+ +{
+ +      unsigned long flags;
+ +
+ +      spin_lock_irqsave(&rq->lock, flags);
+ +      resched_task(rq->curr);
+ +      spin_unlock_irqrestore(&rq->lock, flags);
+ +}
+ +
+ +enum {
+ +      HRTICK_SET,             /* re-programm hrtick_timer */
+ +      HRTICK_RESET,           /* not a new slice */
+ +};
+ +
+ +/*
+ + * Use hrtick when:
+ + *  - enabled by features
+ + *  - hrtimer is actually high res
+ + */
+ +static inline int hrtick_enabled(struct rq *rq)
+ +{
+ +      if (!sched_feat(HRTICK))
+ +              return 0;
+ +      return hrtimer_is_hres_active(&rq->hrtick_timer);
+ +}
+ +
+ +/*
+ + * Called to set the hrtick timer state.
+ + *
+ + * called with rq->lock held and irqs disabled
+ + */
+ +static void hrtick_start(struct rq *rq, u64 delay, int reset)
+ +{
+ +      assert_spin_locked(&rq->lock);
+ +
+ +      /*
+ +       * preempt at: now + delay
+ +       */
+ +      rq->hrtick_expire =
+ +              ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
+ +      /*
+ +       * indicate we need to program the timer
+ +       */
+ +      __set_bit(HRTICK_SET, &rq->hrtick_flags);
+ +      if (reset)
+ +              __set_bit(HRTICK_RESET, &rq->hrtick_flags);
+ +
+ +      /*
+ +       * New slices are called from the schedule path and don't need a
+ +       * forced reschedule.
+ +       */
+ +      if (reset)
+ +              resched_hrt(rq->curr);
+ +}
+ +
+ +static void hrtick_clear(struct rq *rq)
+ +{
+ +      if (hrtimer_active(&rq->hrtick_timer))
+ +              hrtimer_cancel(&rq->hrtick_timer);
+ +}
+ +
+ +/*
+ + * Update the timer from the possible pending state.
+ + */
+ +static void hrtick_set(struct rq *rq)
+ +{
+ +      ktime_t time;
+ +      int set, reset;
+ +      unsigned long flags;
+ +
+ +      WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+ +
+ +      spin_lock_irqsave(&rq->lock, flags);
+ +      set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
+ +      reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
+ +      time = rq->hrtick_expire;
+ +      clear_thread_flag(TIF_HRTICK_RESCHED);
+ +      spin_unlock_irqrestore(&rq->lock, flags);
+ +
+ +      if (set) {
+ +              hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
+ +              if (reset && !hrtimer_active(&rq->hrtick_timer))
+ +                      resched_rq(rq);
+ +      } else
+ +              hrtick_clear(rq);
+ +}
+ +
+ +/*
+ + * High-resolution timer tick.
+ + * Runs from hardirq context with interrupts disabled.
+ + */
+ +static enum hrtimer_restart hrtick(struct hrtimer *timer)
+ +{
+ +      struct rq *rq = container_of(timer, struct rq, hrtick_timer);
+ +
+ +      WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+ +
+ +      spin_lock(&rq->lock);
+ +      __update_rq_clock(rq);
+ +      rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+ +      spin_unlock(&rq->lock);
+ +
+ +      return HRTIMER_NORESTART;
+ +}
+ +
+ +static inline void init_rq_hrtick(struct rq *rq)
+ +{
+ +      rq->hrtick_flags = 0;
+ +      hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ +      rq->hrtick_timer.function = hrtick;
+ +      rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+ +}
+ +
+ +void hrtick_resched(void)
+ +{
+ +      struct rq *rq;
+ +      unsigned long flags;
+ +
+ +      if (!test_thread_flag(TIF_HRTICK_RESCHED))
+ +              return;
+ +
+ +      local_irq_save(flags);
+ +      rq = cpu_rq(smp_processor_id());
+ +      hrtick_set(rq);
+ +      local_irq_restore(flags);
+ +}
+ +#else
+ +static inline void hrtick_clear(struct rq *rq)
+ +{
+ +}
+ +
+ +static inline void hrtick_set(struct rq *rq)
+ +{
+ +}
+ +
+ +static inline void init_rq_hrtick(struct rq *rq)
+ +{
+ +}
+ +
+ +void hrtick_resched(void)
+ +{
+ +}
+ +#endif
+ +
   /*
    * resched_task - mark a task 'to be rescheduled now'.
    *
@@@ -1053,16 -685,16 +1053,16 @@@
   #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
   #endif
   
- -static void resched_task(struct task_struct *p)
+ +static void __resched_task(struct task_struct *p, int tif_bit)
   {
         int cpu;
   
         assert_spin_locked(&task_rq(p)->lock);
   
- -      if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ +      if (unlikely(test_tsk_thread_flag(p, tif_bit)))
                 return;
   
- -      set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+ +      set_tsk_thread_flag(p, tif_bit);
   
         cpu = task_cpu(p);
         if (cpu == smp_processor_id())
@@@ -1085,10 -717,10 +1085,10 @@@ static void resched_cpu(int cpu
         spin_unlock_irqrestore(&rq->lock, flags);
   }
   #else
- -static inline void resched_task(struct task_struct *p)
+ +static void __resched_task(struct task_struct *p, int tif_bit)
   {
         assert_spin_locked(&task_rq(p)->lock);
- -      set_tsk_need_resched(p);
+ +      set_tsk_thread_flag(p, tif_bit);
   }
   #endif
   
@@@ -1228,23 -860,6 +1228,23 @@@ static void cpuacct_charge(struct task_
   static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
   #endif
   
+ +static inline void inc_cpu_load(struct rq *rq, unsigned long load)
+ +{
+ +      update_load_add(&rq->load, load);
+ +}
+ +
+ +static inline void dec_cpu_load(struct rq *rq, unsigned long load)
+ +{
+ +      update_load_sub(&rq->load, load);
+ +}
+ +
+ +#ifdef CONFIG_SMP
+ +static unsigned long source_load(int cpu, int type);
+ +static unsigned long target_load(int cpu, int type);
+ +static unsigned long cpu_avg_load_per_task(int cpu);
+ +static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+ +#endif /* CONFIG_SMP */
+ +
   #include "sched_stats.h"
   #include "sched_idletask.c"
   #include "sched_fair.c"
@@@ -1255,14 -870,41 +1255,14 @@@
   
   #define sched_class_highest (&rt_sched_class)
   
- -/*
- - * Update delta_exec, delta_fair fields for rq.
- - *
- - * delta_fair clock advances at a rate inversely proportional to
- - * total load (rq->load.weight) on the runqueue, while
- - * delta_exec advances at the same rate as wall-clock (provided
- - * cpu is not idle).
- - *
- - * delta_exec / delta_fair is a measure of the (smoothened) load on this
- - * runqueue over any given interval. This (smoothened) load is used
- - * during load balance.
- - *
- - * This function is called /before/ updating rq->load
- - * and when switching tasks.
- - */
- -static inline void inc_load(struct rq *rq, const struct task_struct *p)
- -{
- -      update_load_add(&rq->load, p->se.load.weight);
- -}
- -
- -static inline void dec_load(struct rq *rq, const struct task_struct *p)
- -{
- -      update_load_sub(&rq->load, p->se.load.weight);
- -}
- -
- -static void inc_nr_running(struct task_struct *p, struct rq *rq)
+ +static void inc_nr_running(struct rq *rq)
   {
         rq->nr_running++;
- -      inc_load(rq, p);
   }
   
- -static void dec_nr_running(struct task_struct *p, struct rq *rq)
+ +static void dec_nr_running(struct rq *rq)
   {
         rq->nr_running--;
- -      dec_load(rq, p);
   }
   
   static void set_load_weight(struct task_struct *p)
@@@ -1350,11 -992,11 +1350,11 @@@ static int effective_prio(struct task_s
    */
   static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
   {
-       if (p->state == TASK_UNINTERRUPTIBLE)
+       if (task_contributes_to_load(p))
                 rq->nr_uninterruptible--;
   
         enqueue_task(rq, p, wakeup);
- -      inc_nr_running(p, rq);
+ +      inc_nr_running(rq);
   }
   
   /*
@@@ -1362,11 -1004,11 +1362,11 @@@
    */
   static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
   {
-       if (p->state == TASK_UNINTERRUPTIBLE)
+       if (task_contributes_to_load(p))
                 rq->nr_uninterruptible++;
   
         dequeue_task(rq, p, sleep);
- -      dec_nr_running(p, rq);
+ +      dec_nr_running(rq);
   }
   
   /**
@@@ -1386,7 -1028,7 +1386,7 @@@ unsigned long weighted_cpuload(const in
   
   static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
   {
- -      set_task_cfs_rq(p, cpu);
+ +      set_task_rq(p, cpu);
   #ifdef CONFIG_SMP
         /*
          * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
@@@ -1398,24 -1040,12 +1398,24 @@@
   #endif
   }
   
+ +static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ +                                     const struct sched_class *prev_class,
+ +                                     int oldprio, int running)
+ +{
+ +      if (prev_class != p->sched_class) {
+ +              if (prev_class->switched_from)
+ +                      prev_class->switched_from(rq, p, running);
+ +              p->sched_class->switched_to(rq, p, running);
+ +      } else
+ +              p->sched_class->prio_changed(rq, p, oldprio, running);
+ +}
+ +
   #ifdef CONFIG_SMP
   
   /*
    * Is this task likely cache-hot:
    */
- -static inline int
+ +static int
   task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
   {
         s64 delta;
@@@ -1640,7 -1270,7 +1640,7 @@@ static unsigned long target_load(int cp
   /*
    * Return the average load per task on the cpu's run queue
    */
- -static inline unsigned long cpu_avg_load_per_task(int cpu)
+ +static unsigned long cpu_avg_load_per_task(int cpu)
   {
         struct rq *rq = cpu_rq(cpu);
         unsigned long total = weighted_cpuload(cpu);
@@@ -1797,6 -1427,58 +1797,6 @@@ static int sched_balance_self(int cpu, 
   
   #endif /* CONFIG_SMP */
   
- -/*
- - * wake_idle() will wake a task on an idle cpu if task->cpu is
- - * not idle and an idle cpu is available.  The span of cpus to
- - * search starts with cpus closest then further out as needed,
- - * so we always favor a closer, idle cpu.
- - *
- - * Returns the CPU we should wake onto.
- - */
- -#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
- -static int wake_idle(int cpu, struct task_struct *p)
- -{
- -      cpumask_t tmp;
- -      struct sched_domain *sd;
- -      int i;
- -
- -      /*
- -       * If it is idle, then it is the best cpu to run this task.
- -       *
- -       * This cpu is also the best, if it has more than one task already.
- -       * Siblings must be also busy(in most cases) as they didn't already
- -       * pickup the extra load from this cpu and hence we need not check
- -       * sibling runqueue info. This will avoid the checks and cache miss
- -       * penalities associated with that.
- -       */
- -      if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
- -              return cpu;
- -
- -      for_each_domain(cpu, sd) {
- -              if (sd->flags & SD_WAKE_IDLE) {
- -                      cpus_and(tmp, sd->span, p->cpus_allowed);
- -                      for_each_cpu_mask(i, tmp) {
- -                              if (idle_cpu(i)) {
- -                                      if (i != task_cpu(p)) {
- -                                              schedstat_inc(p,
- -                                                      se.nr_wakeups_idle);
- -                                      }
- -                                      return i;
- -                              }
- -                      }
- -              } else {
- -                      break;
- -              }
- -      }
- -      return cpu;
- -}
- -#else
- -static inline int wake_idle(int cpu, struct task_struct *p)
- -{
- -      return cpu;
- -}
- -#endif
- -
   /***
    * try_to_wake_up - wake up a thread
    * @p: the to-be-woken-up thread
@@@ -1817,6 -1499,11 +1817,6 @@@ static int try_to_wake_up(struct task_s
         unsigned long flags;
         long old_state;
         struct rq *rq;
- -#ifdef CONFIG_SMP
- -      struct sched_domain *sd, *this_sd = NULL;
- -      unsigned long load, this_load;
- -      int new_cpu;
- -#endif
   
         rq = task_rq_lock(p, &flags);
         old_state = p->state;
@@@ -1834,9 -1521,92 +1834,9 @@@
         if (unlikely(task_running(rq, p)))
                 goto out_activate;
   
- -      new_cpu = cpu;
- -
- -      schedstat_inc(rq, ttwu_count);
- -      if (cpu == this_cpu) {
- -              schedstat_inc(rq, ttwu_local);
- -              goto out_set_cpu;
- -      }
- -
- -      for_each_domain(this_cpu, sd) {
- -              if (cpu_isset(cpu, sd->span)) {
- -                      schedstat_inc(sd, ttwu_wake_remote);
- -                      this_sd = sd;
- -                      break;
- -              }
- -      }
- -
- -      if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
- -              goto out_set_cpu;
- -
- -      /*
- -       * Check for affine wakeup and passive balancing possibilities.
- -       */
- -      if (this_sd) {
- -              int idx = this_sd->wake_idx;
- -              unsigned int imbalance;
- -
- -              imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
- -
- -              load = source_load(cpu, idx);
- -              this_load = target_load(this_cpu, idx);
- -
- -              new_cpu = this_cpu; /* Wake to this CPU if we can */
- -
- -              if (this_sd->flags & SD_WAKE_AFFINE) {
- -                      unsigned long tl = this_load;
- -                      unsigned long tl_per_task;
- -
- -                      /*
- -                       * Attract cache-cold tasks on sync wakeups:
- -                       */
- -                      if (sync && !task_hot(p, rq->clock, this_sd))
- -                              goto out_set_cpu;
- -
- -                      schedstat_inc(p, se.nr_wakeups_affine_attempts);
- -                      tl_per_task = cpu_avg_load_per_task(this_cpu);
- -
- -                      /*
- -                       * If sync wakeup then subtract the (maximum possible)
- -                       * effect of the currently running task from the load
- -                       * of the current CPU:
- -                       */
- -                      if (sync)
- -                              tl -= current->se.load.weight;
- -
- -                      if ((tl <= load &&
- -                              tl + target_load(cpu, idx) <= tl_per_task) ||
- -                             100*(tl + p->se.load.weight) <= imbalance*load) {
- -                              /*
- -                               * This domain has SD_WAKE_AFFINE and
- -                               * p is cache cold in this domain, and
- -                               * there is no bad imbalance.
- -                               */
- -                              schedstat_inc(this_sd, ttwu_move_affine);
- -                              schedstat_inc(p, se.nr_wakeups_affine);
- -                              goto out_set_cpu;
- -                      }
- -              }
- -
- -              /*
- -               * Start passive balancing when half the imbalance_pct
- -               * limit is reached.
- -               */
- -              if (this_sd->flags & SD_WAKE_BALANCE) {
- -                      if (imbalance*this_load <= 100*load) {
- -                              schedstat_inc(this_sd, ttwu_move_balance);
- -                              schedstat_inc(p, se.nr_wakeups_passive);
- -                              goto out_set_cpu;
- -                      }
- -              }
- -      }
- -
- -      new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
- -out_set_cpu:
- -      new_cpu = wake_idle(new_cpu, p);
- -      if (new_cpu != cpu) {
- -              set_task_cpu(p, new_cpu);
+ +      cpu = p->sched_class->select_task_rq(p, sync);
+ +      if (cpu != orig_cpu) {
+ +              set_task_cpu(p, cpu);
                 task_rq_unlock(rq, &flags);
                 /* might preempt at this point */
                 rq = task_rq_lock(p, &flags);
@@@ -1850,21 -1620,6 +1850,21 @@@
                 cpu = task_cpu(p);
         }
   
+ +#ifdef CONFIG_SCHEDSTATS
+ +      schedstat_inc(rq, ttwu_count);
+ +      if (cpu == this_cpu)
+ +              schedstat_inc(rq, ttwu_local);
+ +      else {
+ +              struct sched_domain *sd;
+ +              for_each_domain(this_cpu, sd) {
+ +                      if (cpu_isset(cpu, sd->span)) {
+ +                              schedstat_inc(sd, ttwu_wake_remote);
+ +                              break;
+ +                      }
+ +              }
+ +      }
+ +#endif
+ +
   out_activate:
   #endif /* CONFIG_SMP */
         schedstat_inc(p, se.nr_wakeups);
@@@ -1883,10 -1638,6 +1883,10 @@@
   
   out_running:
         p->state = TASK_RUNNING;
+ +#ifdef CONFIG_SMP
+ +      if (p->sched_class->task_wake_up)
+ +              p->sched_class->task_wake_up(rq, p);
+ +#endif
   out:
         task_rq_unlock(rq, &flags);
   
@@@ -1895,8 -1646,7 +1895,7 @@@
   
   int fastcall wake_up_process(struct task_struct *p)
   {
-       return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
-                                TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+       return try_to_wake_up(p, TASK_ALL, 0);
   }
   EXPORT_SYMBOL(wake_up_process);
   
@@@ -1929,7 -1679,7 +1928,7 @@@ static void __sched_fork(struct task_st
         p->se.wait_max                  = 0;
   #endif
   
- -      INIT_LIST_HEAD(&p->run_list);
+ +      INIT_LIST_HEAD(&p->rt.run_list);
         p->se.on_rq = 0;
   
   #ifdef CONFIG_PREEMPT_NOTIFIERS
@@@ -2006,13 -1756,9 +2005,13 @@@ void fastcall wake_up_new_task(struct t
                  * management (if any):
                  */
                 p->sched_class->task_new(rq, p);
- -              inc_nr_running(p, rq);
+ +              inc_nr_running(rq);
         }
         check_preempt_curr(rq, p);
+ +#ifdef CONFIG_SMP
+ +      if (p->sched_class->task_wake_up)
+ +              p->sched_class->task_wake_up(rq, p);
+ +#endif
         task_rq_unlock(rq, &flags);
   }
   
@@@ -2133,11 -1879,6 +2132,11 @@@ static void finish_task_switch(struct r
         prev_state = prev->state;
         finish_arch_switch(prev);
         finish_lock_switch(rq, prev);
+ +#ifdef CONFIG_SMP
+ +      if (current->sched_class->post_schedule)
+ +              current->sched_class->post_schedule(rq);
+ +#endif
+ +
         fire_sched_in_preempt_notifiers(current);
         if (mm)
                 mmdrop(mm);
@@@ -2371,13 -2112,11 +2370,13 @@@ static void double_rq_unlock(struct rq 
   /*
    * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
    */
- -static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ +static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
         __releases(this_rq->lock)
         __acquires(busiest->lock)
         __acquires(this_rq->lock)
   {
+ +      int ret = 0;
+ +
         if (unlikely(!irqs_disabled())) {
                 /* printk() doesn't work good under rq->lock */
                 spin_unlock(&this_rq->lock);
@@@ -2388,11 -2127,9 +2387,11 @@@
                         spin_unlock(&this_rq->lock);
                         spin_lock(&busiest->lock);
                         spin_lock(&this_rq->lock);
+ +                      ret = 1;
                 } else
                         spin_lock(&busiest->lock);
         }
+ +      return ret;
   }
   
   /*
@@@ -3591,7 -3328,7 +3590,7 @@@ unsigned long long task_sched_runtime(s
   
         rq = task_rq_lock(p, &flags);
         ns = p->se.sum_exec_runtime;
- -      if (rq->curr == p) {
+ +      if (task_current(rq, p)) {
                 update_rq_clock(rq);
                 delta_exec = rq->clock - p->se.exec_start;
                 if ((s64)delta_exec > 0)
@@@ -3736,14 -3473,12 +3735,14 @@@ void scheduler_tick(void
         /*
          * Let rq->clock advance by at least TICK_NSEC:
          */
- -      if (unlikely(rq->clock < next_tick))
+ +      if (unlikely(rq->clock < next_tick)) {
                 rq->clock = next_tick;
+ +              rq->clock_underflows++;
+ +      }
         rq->tick_timestamp = rq->clock;
         update_cpu_load(rq);
- -      if (curr != rq->idle) /* FIXME: needed? */
- -              curr->sched_class->task_tick(rq, curr);
+ +      curr->sched_class->task_tick(rq, curr, 0);
+ +      update_sched_rt_period(rq);
         spin_unlock(&rq->lock);
   
   #ifdef CONFIG_SMP
@@@ -3889,8 -3624,6 +3888,8 @@@ need_resched_nonpreemptible
   
         schedule_debug(prev);
   
+ +      hrtick_clear(rq);
+ +
         /*
          * Do the rq-clock update outside the rq lock:
          */
@@@ -3909,11 -3642,6 +3908,11 @@@
                 switch_count = &prev->nvcsw;
         }
   
+ +#ifdef CONFIG_SMP
+ +      if (prev->sched_class->pre_schedule)
+ +              prev->sched_class->pre_schedule(rq, prev);
+ +#endif
+ +
         if (unlikely(!rq->nr_running))
                 idle_balance(cpu, rq);
   
@@@ -3928,20 -3656,14 +3927,20 @@@
                 ++*switch_count;
   
                 context_switch(rq, prev, next); /* unlocks the rq */
+ +              /*
+ +               * the context switch might have flipped the stack from under
+ +               * us, hence refresh the local variables.
+ +               */
+ +              cpu = smp_processor_id();
+ +              rq = cpu_rq(cpu);
         } else
                 spin_unlock_irq(&rq->lock);
   
- -      if (unlikely(reacquire_kernel_lock(current) < 0)) {
- -              cpu = smp_processor_id();
- -              rq = cpu_rq(cpu);
+ +      hrtick_set(rq);
+ +
+ +      if (unlikely(reacquire_kernel_lock(current) < 0))
                 goto need_resched_nonpreemptible;
- -      }
+ +
         preempt_enable_no_resched();
         if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
                 goto need_resched;
@@@ -3957,9 -3679,10 +3956,9 @@@ EXPORT_SYMBOL(schedule)
   asmlinkage void __sched preempt_schedule(void)
   {
         struct thread_info *ti = current_thread_info();
- -#ifdef CONFIG_PREEMPT_BKL
         struct task_struct *task = current;
         int saved_lock_depth;
- -#endif
+ +
         /*
          * If there is a non-zero preempt_count or interrupts are disabled,
          * we do not want to preempt the current task. Just return..
@@@ -3975,10 -3698,14 +3974,10 @@@
                  * clear ->lock_depth so that schedule() doesnt
                  * auto-release the semaphore:
                  */
- -#ifdef CONFIG_PREEMPT_BKL
                 saved_lock_depth = task->lock_depth;
                 task->lock_depth = -1;
- -#endif
                 schedule();
- -#ifdef CONFIG_PREEMPT_BKL
                 task->lock_depth = saved_lock_depth;
- -#endif
                 sub_preempt_count(PREEMPT_ACTIVE);
   
                 /*
@@@ -3999,9 -3726,10 +3998,9 @@@ EXPORT_SYMBOL(preempt_schedule)
   asmlinkage void __sched preempt_schedule_irq(void)
   {
         struct thread_info *ti = current_thread_info();
- -#ifdef CONFIG_PREEMPT_BKL
         struct task_struct *task = current;
         int saved_lock_depth;
- -#endif
+ +
         /* Catch callers which need to be fixed */
         BUG_ON(ti->preempt_count || !irqs_disabled());
   
@@@ -4013,12 -3741,16 +4012,12 @@@
                  * clear ->lock_depth so that schedule() doesnt
                  * auto-release the semaphore:
                  */
- -#ifdef CONFIG_PREEMPT_BKL
                 saved_lock_depth = task->lock_depth;
                 task->lock_depth = -1;
- -#endif
                 local_irq_enable();
                 schedule();
                 local_irq_disable();
- -#ifdef CONFIG_PREEMPT_BKL
                 task->lock_depth = saved_lock_depth;
- -#endif
                 sub_preempt_count(PREEMPT_ACTIVE);
   
                 /*
@@@ -4124,8 -3856,7 +4123,7 @@@ void complete(struct completion *x
   
         spin_lock_irqsave(&x->wait.lock, flags);
         x->done++;
-       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-                        1, 0, NULL);
+       __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
         spin_unlock_irqrestore(&x->wait.lock, flags);
   }
   EXPORT_SYMBOL(complete);
@@@ -4136,8 -3867,7 +4134,7 @@@ void complete_all(struct completion *x
   
         spin_lock_irqsave(&x->wait.lock, flags);
         x->done += UINT_MAX/2;
-       __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
-                        0, 0, NULL);
+       __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
         spin_unlock_irqrestore(&x->wait.lock, flags);
   }
   EXPORT_SYMBOL(complete_all);
@@@ -4151,8 -3881,10 +4148,10 @@@ do_wait_for_common(struct completion *x
                 wait.flags |= WQ_FLAG_EXCLUSIVE;
                 __add_wait_queue_tail(&x->wait, &wait);
                 do {
-                       if (state == TASK_INTERRUPTIBLE &&
-                           signal_pending(current)) {
+                       if ((state == TASK_INTERRUPTIBLE &&
+                            signal_pending(current)) ||
+                           (state == TASK_KILLABLE &&
+                            fatal_signal_pending(current))) {
                                 __remove_wait_queue(&x->wait, &wait);
                                 return -ERESTARTSYS;
                         }
@@@ -4212,6 -3944,15 +4211,15 @@@ wait_for_completion_interruptible_timeo
   }
   EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
   
+ int __sched wait_for_completion_killable(struct completion *x)
+ {
+       long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
+       if (t == -ERESTARTSYS)
+               return t;
+       return 0;
+ }
+ EXPORT_SYMBOL(wait_for_completion_killable);
+ 
   static long __sched
   sleep_on_common(wait_queue_head_t *q, int state, long timeout)
   {
@@@ -4275,7 -4016,6 +4283,7 @@@ void rt_mutex_setprio(struct task_struc
         unsigned long flags;
         int oldprio, on_rq, running;
         struct rq *rq;
+ +      const struct sched_class *prev_class = p->sched_class;
   
         BUG_ON(prio < 0 || prio > MAX_PRIO);
   
@@@ -4284,7 -4024,7 +4292,7 @@@
   
         oldprio = p->prio;
         on_rq = p->se.on_rq;
- -      running = task_running(rq, p);
+ +      running = task_current(rq, p);
         if (on_rq) {
                 dequeue_task(rq, p, 0);
                 if (running)
@@@ -4301,10 -4041,18 +4309,10 @@@
         if (on_rq) {
                 if (running)
                         p->sched_class->set_curr_task(rq);
+ +
                 enqueue_task(rq, p, 0);
- -              /*
- -               * Reschedule if we are currently running on this runqueue and
- -               * our priority decreased, or if we are not currently running on
- -               * this runqueue and our priority is higher than the current's
- -               */
- -              if (running) {
- -                      if (p->prio > oldprio)
- -                              resched_task(rq->curr);
- -              } else {
- -                      check_preempt_curr(rq, p);
- -              }
+ +
+ +              check_class_changed(rq, p, prev_class, oldprio, running);
         }
         task_rq_unlock(rq, &flags);
   }
@@@ -4336,8 -4084,10 +4344,8 @@@ void set_user_nice(struct task_struct *
                 goto out_unlock;
         }
         on_rq = p->se.on_rq;
- -      if (on_rq) {
+ +      if (on_rq)
                 dequeue_task(rq, p, 0);
- -              dec_load(rq, p);
- -      }
   
         p->static_prio = NICE_TO_PRIO(nice);
         set_load_weight(p);
@@@ -4347,6 -4097,7 +4355,6 @@@
   
         if (on_rq) {
                 enqueue_task(rq, p, 0);
- -              inc_load(rq, p);
                 /*
                  * If the task increased its priority or is running and
                  * lowered its priority, then reschedule its CPU:
@@@ -4504,7 -4255,6 +4512,7 @@@ int sched_setscheduler(struct task_stru
   {
         int retval, oldprio, oldpolicy = -1, on_rq, running;
         unsigned long flags;
+ +      const struct sched_class *prev_class = p->sched_class;
         struct rq *rq;
   
         /* may grab non-irq protected spin_locks */
@@@ -4585,7 -4335,7 +4593,7 @@@ recheck
         }
         update_rq_clock(rq);
         on_rq = p->se.on_rq;
- -      running = task_running(rq, p);
+ +      running = task_current(rq, p);
         if (on_rq) {
                 deactivate_task(rq, p, 0);
                 if (running)
@@@ -4598,10 -4348,18 +4606,10 @@@
         if (on_rq) {
                 if (running)
                         p->sched_class->set_curr_task(rq);
+ +
                 activate_task(rq, p, 0);
- -              /*
- -               * Reschedule if we are currently running on this runqueue and
- -               * our priority decreased, or if we are not currently running on
- -               * this runqueue and our priority is higher than the current's
- -               */
- -              if (running) {
- -                      if (p->prio > oldprio)
- -                              resched_task(rq->curr);
- -              } else {
- -                      check_preempt_curr(rq, p);
- -              }
+ +
+ +              check_class_changed(rq, p, prev_class, oldprio, running);
         }
         __task_rq_unlock(rq);
         spin_unlock_irqrestore(&p->pi_lock, flags);
@@@ -4729,13 -4487,13 +4737,13 @@@ long sched_setaffinity(pid_t pid, cpuma
         struct task_struct *p;
         int retval;
   
- -      mutex_lock(&sched_hotcpu_mutex);
+ +      get_online_cpus();
         read_lock(&tasklist_lock);
   
         p = find_process_by_pid(pid);
         if (!p) {
                 read_unlock(&tasklist_lock);
- -              mutex_unlock(&sched_hotcpu_mutex);
+ +              put_online_cpus();
                 return -ESRCH;
         }
   
@@@ -4775,7 -4533,7 +4783,7 @@@
         }
   out_unlock:
         put_task_struct(p);
- -      mutex_unlock(&sched_hotcpu_mutex);
+ +      put_online_cpus();
         return retval;
   }
   
@@@ -4832,7 -4590,7 +4840,7 @@@ long sched_getaffinity(pid_t pid, cpuma
         struct task_struct *p;
         int retval;
   
- -      mutex_lock(&sched_hotcpu_mutex);
+ +      get_online_cpus();
         read_lock(&tasklist_lock);
   
         retval = -ESRCH;
@@@ -4848,7 -4606,7 +4856,7 @@@
   
   out_unlock:
         read_unlock(&tasklist_lock);
- -      mutex_unlock(&sched_hotcpu_mutex);
+ +      put_online_cpus();
   
         return retval;
   }
@@@ -4922,8 -4680,7 +4930,8 @@@ static void __cond_resched(void
         } while (need_resched());
   }
   
- -int __sched cond_resched(void)
+ +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
+ +int __sched _cond_resched(void)
   {
         if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
                                         system_state == SYSTEM_RUNNING) {
@@@ -4932,8 -4689,7 +4940,8 @@@
         }
         return 0;
   }
- -EXPORT_SYMBOL(cond_resched);
+ +EXPORT_SYMBOL(_cond_resched);
+ +#endif
   
   /*
    * cond_resched_lock() - if a reschedule is pending, drop the given lock,
@@@ -4945,15 -4701,19 +4953,15 @@@
    */
   int cond_resched_lock(spinlock_t *lock)
   {
+ +      int resched = need_resched() && system_state == SYSTEM_RUNNING;
         int ret = 0;
   
- -      if (need_lockbreak(lock)) {
+ +      if (spin_needbreak(lock) || resched) {
                 spin_unlock(lock);
- -              cpu_relax();
- -              ret = 1;
- -              spin_lock(lock);
- -      }
- -      if (need_resched() && system_state == SYSTEM_RUNNING) {
- -              spin_release(&lock->dep_map, 1, _THIS_IP_);
- -              _raw_spin_unlock(lock);
- -              preempt_enable_no_resched();
- -              __cond_resched();
+ +              if (resched && need_resched())
+ +                      __cond_resched();
+ +              else
+ +                      cpu_relax();
                 ret = 1;
                 spin_lock(lock);
         }
@@@ -5127,7 -4887,7 +5135,7 @@@ out_unlock
   
   static const char stat_nam[] = "RSDTtZX";
   
- -static void show_task(struct task_struct *p)
+ +void sched_show_task(struct task_struct *p)
   {
         unsigned long free = 0;
         unsigned state;
@@@ -5155,9 -4915,10 +5163,9 @@@
         }
   #endif
         printk(KERN_CONT "%5lu %5d %6d\n", free,
- -              task_pid_nr(p), task_pid_nr(p->parent));
+ +              task_pid_nr(p), task_pid_nr(p->real_parent));
   
- -      if (state != TASK_RUNNING)
- -              show_stack(p, NULL);
+ +      show_stack(p, NULL);
   }
   
   void show_state_filter(unsigned long state_filter)
@@@ -5179,7 -4940,7 +5187,7 @@@
                  */
                 touch_nmi_watchdog();
                 if (!state_filter || (p->state & state_filter))
- -                      show_task(p);
+ +                      sched_show_task(p);
         } while_each_thread(g, p);
   
         touch_all_softlockup_watchdogs();
@@@ -5228,8 -4989,11 +5236,8 @@@ void __cpuinit init_idle(struct task_st
         spin_unlock_irqrestore(&rq->lock, flags);
   
         /* Set the preempt count _outside_ the spinlocks! */
- -#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
- -      task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
- -#else
         task_thread_info(idle)->preempt_count = 0;
- -#endif
+ +
         /*
          * The idle tasks have their own, simple scheduling class:
          */
@@@ -5310,13 -5074,7 +5318,13 @@@ int set_cpus_allowed(struct task_struc
                 goto out;
         }
   
- -      p->cpus_allowed = new_mask;
+ +      if (p->sched_class->set_cpus_allowed)
+ +              p->sched_class->set_cpus_allowed(p, &new_mask);
+ +      else {
+ +              p->cpus_allowed = new_mask;
+ +              p->rt.nr_cpus_allowed = cpus_weight(new_mask);
+ +      }
+ +
         /* Can the task run on the task's current CPU? If so, we're done */
         if (cpu_isset(task_cpu(p), new_mask))
                 goto out;
@@@ -5808,6 -5566,9 +5816,6 @@@ migration_call(struct notifier_block *n
         struct rq *rq;
   
         switch (action) {
- -      case CPU_LOCK_ACQUIRE:
- -              mutex_lock(&sched_hotcpu_mutex);
- -              break;
   
         case CPU_UP_PREPARE:
         case CPU_UP_PREPARE_FROZEN:
@@@ -5826,15 -5587,6 +5834,15 @@@
         case CPU_ONLINE_FROZEN:
                 /* Strictly unnecessary, as first user will wake it. */
                 wake_up_process(cpu_rq(cpu)->migration_thread);
+ +
+ +              /* Update our root-domain */
+ +              rq = cpu_rq(cpu);
+ +              spin_lock_irqsave(&rq->lock, flags);
+ +              if (rq->rd) {
+ +                      BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ +                      cpu_set(cpu, rq->rd->online);
+ +              }
+ +              spin_unlock_irqrestore(&rq->lock, flags);
                 break;
   
   #ifdef CONFIG_HOTPLUG_CPU
@@@ -5885,18 -5637,10 +5893,18 @@@
                 }
                 spin_unlock_irq(&rq->lock);
                 break;
- -#endif
- -      case CPU_LOCK_RELEASE:
- -              mutex_unlock(&sched_hotcpu_mutex);
+ +
+ +      case CPU_DOWN_PREPARE:
+ +              /* Update our root-domain */
+ +              rq = cpu_rq(cpu);
+ +              spin_lock_irqsave(&rq->lock, flags);
+ +              if (rq->rd) {
+ +                      BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ +                      cpu_clear(cpu, rq->rd->online);
+ +              }
+ +              spin_unlock_irqrestore(&rq->lock, flags);
                 break;
+ +#endif
         }
         return NOTIFY_OK;
   }
@@@ -6084,76 -5828,11 +6092,76 @@@ sd_parent_degenerate(struct sched_domai
         return 1;
   }
   
+ +static void rq_attach_root(struct rq *rq, struct root_domain *rd)
+ +{
+ +      unsigned long flags;
+ +      const struct sched_class *class;
+ +
+ +      spin_lock_irqsave(&rq->lock, flags);
+ +
+ +      if (rq->rd) {
+ +              struct root_domain *old_rd = rq->rd;
+ +
+ +              for (class = sched_class_highest; class; class = class->next) {
+ +                      if (class->leave_domain)
+ +                              class->leave_domain(rq);
+ +              }
+ +
+ +              cpu_clear(rq->cpu, old_rd->span);
+ +              cpu_clear(rq->cpu, old_rd->online);
+ +
+ +              if (atomic_dec_and_test(&old_rd->refcount))
+ +                      kfree(old_rd);
+ +      }
+ +
+ +      atomic_inc(&rd->refcount);
+ +      rq->rd = rd;
+ +
+ +      cpu_set(rq->cpu, rd->span);
+ +      if (cpu_isset(rq->cpu, cpu_online_map))
+ +              cpu_set(rq->cpu, rd->online);
+ +
+ +      for (class = sched_class_highest; class; class = class->next) {
+ +              if (class->join_domain)
+ +                      class->join_domain(rq);
+ +      }
+ +
+ +      spin_unlock_irqrestore(&rq->lock, flags);
+ +}
+ +
+ +static void init_rootdomain(struct root_domain *rd)
+ +{
+ +      memset(rd, 0, sizeof(*rd));
+ +
+ +      cpus_clear(rd->span);
+ +      cpus_clear(rd->online);
+ +}
+ +
+ +static void init_defrootdomain(void)
+ +{
+ +      init_rootdomain(&def_root_domain);
+ +      atomic_set(&def_root_domain.refcount, 1);
+ +}
+ +
+ +static struct root_domain *alloc_rootdomain(void)
+ +{
+ +      struct root_domain *rd;
+ +
+ +      rd = kmalloc(sizeof(*rd), GFP_KERNEL);
+ +      if (!rd)
+ +              return NULL;
+ +
+ +      init_rootdomain(rd);
+ +
+ +      return rd;
+ +}
+ +
   /*
- - * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
+ + * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
    * hold the hotplug lock.
    */
- -static void cpu_attach_domain(struct sched_domain *sd, int cpu)
+ +static void
+ +cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
   {
         struct rq *rq = cpu_rq(cpu);
         struct sched_domain *tmp;
@@@ -6178,7 -5857,6 +6186,7 @@@
   
         sched_domain_debug(sd, cpu);
   
+ +      rq_attach_root(rq, rd);
         rcu_assign_pointer(rq->sd, sd);
   }
   
@@@ -6547,7 -6225,6 +6555,7 @@@ static void init_sched_groups_power(in
   static int build_sched_domains(const cpumask_t *cpu_map)
   {
         int i;
+ +      struct root_domain *rd;
   #ifdef CONFIG_NUMA
         struct sched_group **sched_group_nodes = NULL;
         int sd_allnodes = 0;
@@@ -6564,12 -6241,6 +6572,12 @@@
         sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
   #endif
   
+ +      rd = alloc_rootdomain();
+ +      if (!rd) {
+ +              printk(KERN_WARNING "Cannot alloc root domain\n");
+ +              return -ENOMEM;
+ +      }
+ +
         /*
          * Set up domains for cpus specified by the cpu_map.
          */
@@@ -6786,7 -6457,7 +6794,7 @@@
   #else
                 sd = &per_cpu(phys_domains, i);
   #endif
- -              cpu_attach_domain(sd, i);
+ +              cpu_attach_domain(sd, rd, i);
         }
   
         return 0;
@@@ -6844,7 -6515,7 +6852,7 @@@ static void detach_destroy_domains(cons
         unregister_sched_domain_sysctl();
   
         for_each_cpu_mask(i, *cpu_map)
- -              cpu_attach_domain(NULL, i);
+ +              cpu_attach_domain(NULL, &def_root_domain, i);
         synchronize_sched();
         arch_destroy_sched_domains(cpu_map);
   }
@@@ -6874,8 -6545,6 +6882,8 @@@ void partition_sched_domains(int ndoms_
   {
         int i, j;
   
+ +      lock_doms_cur();
+ +
         /* always unregister in case we don't destroy any domains */
         unregister_sched_domain_sysctl();
   
@@@ -6916,8 -6585,6 +6924,8 @@@ match2
         ndoms_cur = ndoms_new;
   
         register_sched_domain_sysctl();
+ +
+ +      unlock_doms_cur();
   }
   
   #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@@ -6925,10 -6592,10 +6933,10 @@@ static int arch_reinit_sched_domains(vo
   {
         int err;
   
- -      mutex_lock(&sched_hotcpu_mutex);
+ +      get_online_cpus();
         detach_destroy_domains(&cpu_online_map);
         err = arch_init_sched_domains(&cpu_online_map);
- -      mutex_unlock(&sched_hotcpu_mutex);
+ +      put_online_cpus();
   
         return err;
   }
@@@ -7039,12 -6706,12 +7047,12 @@@ void __init sched_init_smp(void
   {
         cpumask_t non_isolated_cpus;
   
- -      mutex_lock(&sched_hotcpu_mutex);
+ +      get_online_cpus();
         arch_init_sched_domains(&cpu_online_map);
         cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
         if (cpus_empty(non_isolated_cpus))
                 cpu_set(smp_processor_id(), non_isolated_cpus);
- -      mutex_unlock(&sched_hotcpu_mutex);
+ +      put_online_cpus();
         /* XXX: Theoretical race here - CPU may be hotplugged now */
         hotcpu_notifier(update_sched_domains, 0);
   
@@@ -7052,21 -6719,6 +7060,21 @@@
         if (set_cpus_allowed(current, non_isolated_cpus) < 0)
                 BUG();
         sched_init_granularity();
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      if (nr_cpu_ids == 1)
+ +              return;
+ +
+ +      lb_monitor_task = kthread_create(load_balance_monitor, NULL,
+ +                                       "group_balance");
+ +      if (!IS_ERR(lb_monitor_task)) {
+ +              lb_monitor_task->flags |= PF_NOFREEZE;
+ +              wake_up_process(lb_monitor_task);
+ +      } else {
+ +              printk(KERN_ERR "Could not create load balance monitor thread"
+ +                      "(error = %ld) \n", PTR_ERR(lb_monitor_task));
+ +      }
+ +#endif
   }
   #else
   void __init sched_init_smp(void)
@@@ -7091,87 -6743,13 +7099,87 @@@ static void init_cfs_rq(struct cfs_rq *
         cfs_rq->min_vruntime = (u64)(-(1LL << 20));
   }
   
+ +static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+ +{
+ +      struct rt_prio_array *array;
+ +      int i;
+ +
+ +      array = &rt_rq->active;
+ +      for (i = 0; i < MAX_RT_PRIO; i++) {
+ +              INIT_LIST_HEAD(array->queue + i);
+ +              __clear_bit(i, array->bitmap);
+ +      }
+ +      /* delimiter for bitsearch: */
+ +      __set_bit(MAX_RT_PRIO, array->bitmap);
+ +
+ +#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+ +      rt_rq->highest_prio = MAX_RT_PRIO;
+ +#endif
+ +#ifdef CONFIG_SMP
+ +      rt_rq->rt_nr_migratory = 0;
+ +      rt_rq->overloaded = 0;
+ +#endif
+ +
+ +      rt_rq->rt_time = 0;
+ +      rt_rq->rt_throttled = 0;
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      rt_rq->rq = rq;
+ +#endif
+ +}
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
+ +              struct cfs_rq *cfs_rq, struct sched_entity *se,
+ +              int cpu, int add)
+ +{
+ +      tg->cfs_rq[cpu] = cfs_rq;
+ +      init_cfs_rq(cfs_rq, rq);
+ +      cfs_rq->tg = tg;
+ +      if (add)
+ +              list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ +
+ +      tg->se[cpu] = se;
+ +      se->cfs_rq = &rq->cfs;
+ +      se->my_q = cfs_rq;
+ +      se->load.weight = tg->shares;
+ +      se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
+ +      se->parent = NULL;
+ +}
+ +
+ +static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
+ +              struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
+ +              int cpu, int add)
+ +{
+ +      tg->rt_rq[cpu] = rt_rq;
+ +      init_rt_rq(rt_rq, rq);
+ +      rt_rq->tg = tg;
+ +      rt_rq->rt_se = rt_se;
+ +      if (add)
+ +              list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+ +
+ +      tg->rt_se[cpu] = rt_se;
+ +      rt_se->rt_rq = &rq->rt;
+ +      rt_se->my_q = rt_rq;
+ +      rt_se->parent = NULL;
+ +      INIT_LIST_HEAD(&rt_se->run_list);
+ +}
+ +#endif
+ +
   void __init sched_init(void)
   {
         int highest_cpu = 0;
         int i, j;
   
+ +#ifdef CONFIG_SMP
+ +      init_defrootdomain();
+ +#endif
+ +
+ +#ifdef CONFIG_FAIR_GROUP_SCHED
+ +      list_add(&init_task_group.list, &task_groups);
+ +#endif
+ +
         for_each_possible_cpu(i) {
- -              struct rt_prio_array *array;
                 struct rq *rq;
   
                 rq = cpu_rq(i);
@@@ -7180,39 -6758,52 +7188,39 @@@
                 rq->nr_running = 0;
                 rq->clock = 1;
                 init_cfs_rq(&rq->cfs, rq);
+ +              init_rt_rq(&rq->rt, rq);
   #ifdef CONFIG_FAIR_GROUP_SCHED
- -              INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
- -              {
- -                      struct cfs_rq *cfs_rq = &per_cpu(init_cfs_rq, i);
- -                      struct sched_entity *se =
- -                                       &per_cpu(init_sched_entity, i);
- -
- -                      init_cfs_rq_p[i] = cfs_rq;
- -                      init_cfs_rq(cfs_rq, rq);
- -                      cfs_rq->tg = &init_task_group;
- -                      list_add(&cfs_rq->leaf_cfs_rq_list,
- -                                                       &rq->leaf_cfs_rq_list);
- -
- -                      init_sched_entity_p[i] = se;
- -                      se->cfs_rq = &rq->cfs;
- -                      se->my_q = cfs_rq;
- -                      se->load.weight = init_task_group_load;
- -                      se->load.inv_weight =
- -                               div64_64(1ULL<<32, init_task_group_load);
- -                      se->parent = NULL;
- -              }
                 init_task_group.shares = init_task_group_load;
- -              spin_lock_init(&init_task_group.lock);
+ +              INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
+ +              init_tg_cfs_entry(rq, &init_task_group,
+ +                              &per_cpu(init_cfs_rq, i),
+ +                              &per_cpu(init_sched_entity, i), i, 1);
+ +
+ +              init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+ +              INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
+ +              init_tg_rt_entry(rq, &init_task_group,
+ +                              &per_cpu(init_rt_rq, i),
+ +                              &per_cpu(init_sched_rt_entity, i), i, 1);
   #endif
+ +              rq->rt_period_expire = 0;
+ +              rq->rt_throttled = 0;
   
                 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
                         rq->cpu_load[j] = 0;
   #ifdef CONFIG_SMP
                 rq->sd = NULL;
+ +              rq->rd = NULL;
                 rq->active_balance = 0;
                 rq->next_balance = jiffies;
                 rq->push_cpu = 0;
                 rq->cpu = i;
                 rq->migration_thread = NULL;
                 INIT_LIST_HEAD(&rq->migration_queue);
+ +              rq_attach_root(rq, &def_root_domain);
   #endif
+ +              init_rq_hrtick(rq);
                 atomic_set(&rq->nr_iowait, 0);
- -
- -              array = &rq->rt.active;
- -              for (j = 0; j < MAX_RT_PRIO; j++) {
- -                      INIT_LIST_HEAD(array->queue + j);
- -                      __clear_bit(j, array->bitmap);
- -              }
                 highest_cpu = i;
- -              /* delimiter for bitsearch: */
- -              __set_bit(MAX_RT_PRIO, array->bitmap);
         }
   
         set_load_weight(&init_task);
@@@ -7381,187 -6972,12 +7389,187 @@@ void set_curr_task(int cpu, struct task
   
   #ifdef CONFIG_FAIR_GROUP_SCHED
   
+ +#ifdef CONFIG_SMP
+ +/*
+ + * distribute shares of all task groups among their schedulable entities,
+ + * to reflect load distribution across cpus.
+ + */
+ +static int rebalance_shares(struct sched_domain *sd, int this_cpu)
+ +{
+ +      struct cfs_rq *cfs_rq;
+ +      struct rq *rq = cpu_rq(this_cpu);
+ +      cpumask_t sdspan = sd->span;
+ +      int balanced = 1;
+ +
+ +      /* Walk thr' all the task groups that we have */
+ +      for_each_leaf_cfs_rq(rq, cfs_rq) {
+ +              int i;
+ +              unsigned long total_load = 0, total_shares;
+ +              struct task_group *tg = cfs_rq->tg;
+ +
+ +              /* Gather total task load of this group across cpus */
+ +              for_each_cpu_mask(i, sdspan)
+ +                      total_load += tg->cfs_rq[i]->load.weight;
+ +
+ +              /* Nothing to do if this group has no load */
+ +              if (!total_load)
+ +                      continue;
+ +
+ +              /*
+ +               * tg->shares represents the number of cpu shares the task group
+ +               * is eligible to hold on a single cpu. On N cpus, it is
+ +               * eligible to hold (N * tg->shares) number of cpu shares.
+ +               */
+ +              total_shares = tg->shares * cpus_weight(sdspan);
+ +
+ +              /*
+ +               * redistribute total_shares across cpus as per the task load
+ +               * distribution.
+ +               */
+ +              for_each_cpu_mask(i, sdspan) {
+ +                      unsigned long local_load, local_shares;
+ +
+ +                      local_load = tg->cfs_rq[i]->load.weight;
+ +                      local_shares = (local_load * total_shares) / total_load;
+ +                      if (!local_shares)
+ +                              local_shares = MIN_GROUP_SHARES;
+ +                      if (local_shares == tg->se[i]->load.weight)
+ +                              continue;
+ +
+ +                      spin_lock_irq(&cpu_rq(i)->lock);
+ +                      set_se_shares(tg->se[i], local_shares);
+ +                      spin_unlock_irq(&cpu_rq(i)->lock);
+ +                      balanced = 0;
+ +              }
+ +      }
+ +
+ +      return balanced;
+ +}
+ +
+ +/*
+ + * How frequently should we rebalance_shares() across cpus?
+ + *
+ + * The more frequently we rebalance shares, the more accurate is the fairness
+ + * of cpu bandwidth distribution between task groups. However higher frequency
+ + * also implies increased scheduling overhead.
+ + *
+ + * sysctl_sched_min_bal_int_shares represents the minimum interval between
+ + * consecutive calls to rebalance_shares() in the same sched domain.
+ + *
+ + * sysctl_sched_max_bal_int_shares represents the maximum interval between
+ + * consecutive calls to rebalance_shares() in the same sched domain.
+ + *
+ + * These settings allows for the appropriate trade-off between accuracy of
+ + * fairness and the associated overhead.
+ + *
+ + */
+ +
+ +/* default: 8ms, units: milliseconds */
+ +const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
+ +
+ +/* default: 128ms, units: milliseconds */
+ +const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
+ +
+ +/* kernel thread that runs rebalance_shares() periodically */
+ +static int load_balance_monitor(void *unused)
+ +{
+ +      unsigned int timeout = sysctl_sched_min_bal_int_shares;
+ +      struct sched_param schedparm;
+ +      int ret;
+ +
+ +      /*
+ +       * We don't want this thread's execution to be limited by the shares
+ +       * assigned to default group (init_task_group). Hence make it run
+ +       * as a SCHED_RR RT task at the lowest priority.
+ +       */
+ +      schedparm.sched_priority = 1;
+ +      ret = sched_setscheduler(current, SCHED_RR, &schedparm);
+ +      if (ret)
+ +              printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
+ +                              " monitor thread (error = %d) \n", ret);
+ +
+ +      while (!kthread_should_stop()) {
+ +              int i, cpu, balanced = 1;
+ +
+ +              /* Prevent cpus going down or coming up */
+ +              get_online_cpus();
+ +              /* lockout changes to doms_cur[] array */
+ +              lock_doms_cur();
+ +              /*
+ +               * Enter a rcu read-side critical section to safely walk rq->sd
+ +               * chain on various cpus and to walk task group list
+ +               * (rq->leaf_cfs_rq_list) in rebalance_shares().
+ +               */
+ +              rcu_read_lock();
+ +
+ +              for (i = 0; i < ndoms_cur; i++) {
+ +                      cpumask_t cpumap = doms_cur[i];
+ +                      struct sched_domain *sd = NULL, *sd_prev = NULL;
+ +
+ +                      cpu = first_cpu(cpumap);
+ +
+ +                      /* Find the highest domain at which to balance shares */
+ +                      for_each_domain(cpu, sd) {
+ +                              if (!(sd->flags & SD_LOAD_BALANCE))
+ +                                      continue;
+ +                              sd_prev = sd;
+ +                      }
+ +
+ +                      sd = sd_prev;
+ +                      /* sd == NULL? No load balance reqd in this domain */
+ +                      if (!sd)
+ +                              continue;
+ +
+ +                      balanced &= rebalance_shares(sd, cpu);
+ +              }
+ +
+ +              rcu_read_unlock();
+ +
+ +              unlock_doms_cur();
+ +              put_online_cpus();
+ +
+ +              if (!balanced)
+ +                      timeout = sysctl_sched_min_bal_int_shares;
+ +              else if (timeout < sysctl_sched_max_bal_int_shares)
+ +                      timeout *= 2;
+ +
+ +              msleep_interruptible(timeout);
+ +      }
+ +
+ +      return 0;
+ +}
+ +#endif        /* CONFIG_SMP */
+ +
+ +static void free_sched_group(struct task_group *tg)
+ +{
+ +      int i;
+ +
+ +      for_each_possible_cpu(i) {
+ +              if (tg->cfs_rq)
+ +                      kfree(tg->cfs_rq[i]);
+ +              if (tg->se)
+ +                      kfree(tg->se[i]);
+ +              if (tg->rt_rq)
+ +                      kfree(tg->rt_rq[i]);
+ +              if (tg->rt_se)
+ +                      kfree(tg->rt_se[i]);
+ +      }
+ +
+ +      kfree(tg->cfs_rq);
+ +      kfree(tg->se);
+ +      kfree(tg->rt_rq);
+ +      kfree(tg->rt_se);
+ +      kfree(tg);
+ +}
+ +
   /* allocate runqueue etc for a new task group */
   struct task_group *sched_create_group(void)
   {
         struct task_group *tg;
         struct cfs_rq *cfs_rq;
         struct sched_entity *se;
+ +      struct rt_rq *rt_rq;
+ +      struct sched_rt_entity *rt_se;
         struct rq *rq;
         int i;
   
@@@ -7575,89 -6991,97 +7583,89 @@@
         tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
         if (!tg->se)
                 goto err;
+ +      tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+ +      if (!tg->rt_rq)
+ +              goto err;
+ +      tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+ +      if (!tg->rt_se)
+ +              goto err;
+ +
+ +      tg->shares = NICE_0_LOAD;
+ +      tg->rt_ratio = 0; /* XXX */
   
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
   
- -              cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
- -                                                       cpu_to_node(i));
+ +              cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
+ +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                 if (!cfs_rq)
                         goto err;
   
- -              se = kmalloc_node(sizeof(struct sched_entity), GFP_KERNEL,
- -                                                      cpu_to_node(i));
+ +              se = kmalloc_node(sizeof(struct sched_entity),
+ +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                 if (!se)
                         goto err;
   
- -              memset(cfs_rq, 0, sizeof(struct cfs_rq));
- -              memset(se, 0, sizeof(struct sched_entity));
+ +              rt_rq = kmalloc_node(sizeof(struct rt_rq),
+ +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              if (!rt_rq)
+ +                      goto err;
   
- -              tg->cfs_rq[i] = cfs_rq;
- -              init_cfs_rq(cfs_rq, rq);
- -              cfs_rq->tg = tg;
+ +              rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
+ +                              GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ +              if (!rt_se)
+ +                      goto err;
   
- -              tg->se[i] = se;
- -              se->cfs_rq = &rq->cfs;
- -              se->my_q = cfs_rq;
- -              se->load.weight = NICE_0_LOAD;
- -              se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
- -              se->parent = NULL;
+ +              init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+ +              init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
         }
   
+ +      lock_task_group_list();
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
                 cfs_rq = tg->cfs_rq[i];
                 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ +              rt_rq = tg->rt_rq[i];
+ +              list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
         }
- -
- -      tg->shares = NICE_0_LOAD;
- -      spin_lock_init(&tg->lock);
+ +      list_add_rcu(&tg->list, &task_groups);
+ +      unlock_task_group_list();
   
         return tg;
   
   err:
- -      for_each_possible_cpu(i) {
- -              if (tg->cfs_rq)
- -                      kfree(tg->cfs_rq[i]);
- -              if (tg->se)
- -                      kfree(tg->se[i]);
- -      }
- -      kfree(tg->cfs_rq);
- -      kfree(tg->se);
- -      kfree(tg);
- -
+ +      free_sched_group(tg);
         return ERR_PTR(-ENOMEM);
   }
   
   /* rcu callback to free various structures associated with a task group */
- -static void free_sched_group(struct rcu_head *rhp)
+ +static void free_sched_group_rcu(struct rcu_head *rhp)
   {
- -      struct task_group *tg = container_of(rhp, struct task_group, rcu);
- -      struct cfs_rq *cfs_rq;
- -      struct sched_entity *se;
- -      int i;
- -
         /* now it should be safe to free those cfs_rqs */
- -      for_each_possible_cpu(i) {
- -              cfs_rq = tg->cfs_rq[i];
- -              kfree(cfs_rq);
- -
- -              se = tg->se[i];
- -              kfree(se);
- -      }
- -
- -      kfree(tg->cfs_rq);
- -      kfree(tg->se);
- -      kfree(tg);
+ +      free_sched_group(container_of(rhp, struct task_group, rcu));
   }
   
   /* Destroy runqueue etc associated with a task group */
   void sched_destroy_group(struct task_group *tg)
   {
         struct cfs_rq *cfs_rq = NULL;
+ +      struct rt_rq *rt_rq = NULL;
         int i;
   
+ +      lock_task_group_list();
         for_each_possible_cpu(i) {
                 cfs_rq = tg->cfs_rq[i];
                 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+ +              rt_rq = tg->rt_rq[i];
+ +              list_del_rcu(&rt_rq->leaf_rt_rq_list);
         }
+ +      list_del_rcu(&tg->list);
+ +      unlock_task_group_list();
   
         BUG_ON(!cfs_rq);
   
         /* wait for possible concurrent references to cfs_rqs complete */
- -      call_rcu(&tg->rcu, free_sched_group);
+ +      call_rcu(&tg->rcu, free_sched_group_rcu);
   }
   
   /* change task's runqueue when it moves between groups.
@@@ -7673,9 -7097,14 +7681,9 @@@ void sched_move_task(struct task_struc
   
         rq = task_rq_lock(tsk, &flags);
   
- -      if (tsk->sched_class != &fair_sched_class) {
- -              set_task_cfs_rq(tsk, task_cpu(tsk));
- -              goto done;
- -      }
- -
         update_rq_clock(rq);
   
- -      running = task_running(rq, tsk);
+ +      running = task_current(rq, tsk);
         on_rq = tsk->se.on_rq;
   
         if (on_rq) {
@@@ -7684,7 -7113,7 +7692,7 @@@
                         tsk->sched_class->put_prev_task(rq, tsk);
         }
   
- -      set_task_cfs_rq(tsk, task_cpu(tsk));
+ +      set_task_rq(tsk, task_cpu(tsk));
   
         if (on_rq) {
                 if (unlikely(running))
@@@ -7692,82 -7121,45 +7700,82 @@@
                 enqueue_task(rq, tsk, 0);
         }
   
- -done:
         task_rq_unlock(rq, &flags);
   }
   
+ +/* rq->lock to be locked by caller */
   static void set_se_shares(struct sched_entity *se, unsigned long shares)
   {
         struct cfs_rq *cfs_rq = se->cfs_rq;
         struct rq *rq = cfs_rq->rq;
         int on_rq;
   
- -      spin_lock_irq(&rq->lock);
+ +      if (!shares)
+ +              shares = MIN_GROUP_SHARES;
   
         on_rq = se->on_rq;
- -      if (on_rq)
+ +      if (on_rq) {
                 dequeue_entity(cfs_rq, se, 0);
+ +              dec_cpu_load(rq, se->load.weight);
+ +      }
   
         se->load.weight = shares;
         se->load.inv_weight = div64_64((1ULL<<32), shares);
   
- -      if (on_rq)
+ +      if (on_rq) {
                 enqueue_entity(cfs_rq, se, 0);
- -
- -      spin_unlock_irq(&rq->lock);
+ +              inc_cpu_load(rq, se->load.weight);
+ +      }
   }
   
   int sched_group_set_shares(struct task_group *tg, unsigned long shares)
   {
         int i;
+ +      struct cfs_rq *cfs_rq;
+ +      struct rq *rq;
   
- -      spin_lock(&tg->lock);
+ +      lock_task_group_list();
         if (tg->shares == shares)
                 goto done;
   
+ +      if (shares < MIN_GROUP_SHARES)
+ +              shares = MIN_GROUP_SHARES;
+ +
+ +      /*
+ +       * Prevent any load balance activity (rebalance_shares,
+ +       * load_balance_fair) from referring to this group first,
+ +       * by taking it off the rq->leaf_cfs_rq_list on each cpu.
+ +       */
+ +      for_each_possible_cpu(i) {
+ +              cfs_rq = tg->cfs_rq[i];
+ +              list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+ +      }
+ +
+ +      /* wait for any ongoing reference to this group to finish */
+ +      synchronize_sched();
+ +
+ +      /*
+ +       * Now we are free to modify the group's share on each cpu
+ +       * w/o tripping rebalance_share or load_balance_fair.
+ +       */
         tg->shares = shares;
- -      for_each_possible_cpu(i)
+ +      for_each_possible_cpu(i) {
+ +              spin_lock_irq(&cpu_rq(i)->lock);
                 set_se_shares(tg->se[i], shares);
+ +              spin_unlock_irq(&cpu_rq(i)->lock);
+ +      }
   
+ +      /*
+ +       * Enable load balance activity on this group, by inserting it back on
+ +       * each cpu's rq->leaf_cfs_rq_list.
+ +       */
+ +      for_each_possible_cpu(i) {
+ +              rq = cpu_rq(i);
+ +              cfs_rq = tg->cfs_rq[i];
+ +              list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ +      }
   done:
- -      spin_unlock(&tg->lock);
+ +      unlock_task_group_list();
         return 0;
   }
   
@@@ -7776,31 -7168,6 +7784,31 @@@ unsigned long sched_group_shares(struc
         return tg->shares;
   }
   
+ +/*
+ + * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ + */
+ +int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+ +{
+ +      struct task_group *tgi;
+ +      unsigned long total = 0;
+ +
+ +      rcu_read_lock();
+ +      list_for_each_entry_rcu(tgi, &task_groups, list)
+ +              total += tgi->rt_ratio;
+ +      rcu_read_unlock();
+ +
+ +      if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
+ +              return -EINVAL;
+ +
+ +      tg->rt_ratio = rt_ratio;
+ +      return 0;
+ +}
+ +
+ +unsigned long sched_group_rt_ratio(struct task_group *tg)
+ +{
+ +      return tg->rt_ratio;
+ +}
+ +
   #endif        /* CONFIG_FAIR_GROUP_SCHED */
   
   #ifdef CONFIG_FAIR_CGROUP_SCHED
@@@ -7876,30 -7243,12 +7884,30 @@@ static u64 cpu_shares_read_uint(struct 
         return (u64) tg->shares;
   }
   
+ +static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
+ +              u64 rt_ratio_val)
+ +{
+ +      return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+ +}
+ +
+ +static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+ +{
+ +      struct task_group *tg = cgroup_tg(cgrp);
+ +
+ +      return (u64) tg->rt_ratio;
+ +}
+ +
   static struct cftype cpu_files[] = {
         {
                 .name = "shares",
                 .read_uint = cpu_shares_read_uint,
                 .write_uint = cpu_shares_write_uint,
         },
+ +      {
+ +              .name = "rt_ratio",
+ +              .read_uint = cpu_rt_ratio_read_uint,
+ +              .write_uint = cpu_rt_ratio_write_uint,
+ +      },
   };
   
   static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
diff --combined kernel/signal.c

index bf49ce6f016bee66cb89bdb39dd416d4d20635ba,657aa16d97cbfa860b73463bee90a15830fa8b31..8054dd4e2d76c22ae86eb7e0380acbb99de4b853
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -456,15 -456,15 +456,15 @@@ void signal_wake_up(struct task_struct 
         set_tsk_thread_flag(t, TIF_SIGPENDING);
   
         /*
-        * For SIGKILL, we want to wake it up in the stopped/traced case.
-        * We don't check t->state here because there is a race with it
+        * For SIGKILL, we want to wake it up in the stopped/traced/killable
+        * case. We don't check t->state here because there is a race with it
          * executing another processor and just now entering stopped state.
          * By using wake_up_state, we ensure the process will wake up and
          * handle its death signal.
          */
         mask = TASK_INTERRUPTIBLE;
         if (resume)
-               mask |= TASK_STOPPED | TASK_TRACED;
+               mask |= TASK_WAKEKILL;
         if (!wake_up_state(t, mask))
                 kick_process(t);
   }
@@@ -620,7 -620,7 +620,7 @@@ static void handle_stop_signal(int sig
                          * Wake up the stopped thread _after_ setting
                          * TIF_SIGPENDING
                          */
-                       state = TASK_STOPPED;
+                       state = __TASK_STOPPED;
                         if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
                                 set_tsk_thread_flag(t, TIF_SIGPENDING);
                                 state |= TASK_INTERRUPTIBLE;
@@@ -733,13 -733,13 +733,13 @@@ static void print_fatal_signal(struct p
                 current->comm, task_pid_nr(current), signr);
   
   #if defined(__i386__) && !defined(__arch_um__)
- -      printk("code at %08lx: ", regs->eip);
+ +      printk("code at %08lx: ", regs->ip);
         {
                 int i;
                 for (i = 0; i < 16; i++) {
                         unsigned char insn;
   
- -                      __get_user(insn, (unsigned char *)(regs->eip + i));
+ +                      __get_user(insn, (unsigned char *)(regs->ip + i));
                         printk("%02x ", insn);
                 }
         }
@@@ -838,7 -838,7 +838,7 @@@ static inline int wants_signal(int sig
                 return 0;
         if (sig == SIGKILL)
                 return 1;
-       if (p->state & (TASK_STOPPED | TASK_TRACED))
+       if (task_is_stopped_or_traced(p))
                 return 0;
         return task_curr(p) || !signal_pending(p);
   }
@@@ -994,6 -994,11 +994,11 @@@ void zap_other_threads(struct task_stru
         }
   }
   
+ int fastcall __fatal_signal_pending(struct task_struct *tsk)
+ {
+       return sigismember(&tsk->pending.signal, SIGKILL);
+ }
+ 
   /*
    * Must be called under rcu_read_lock() or with tasklist_lock read-held.
    */
@@@ -1441,7 -1446,7 +1446,7 @@@ void do_notify_parent(struct task_struc
         BUG_ON(sig == -1);
   
         /* do_notify_parent_cldstop should have been called instead.  */
-       BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
+       BUG_ON(task_is_stopped_or_traced(tsk));
   
         BUG_ON(!tsk->ptrace &&
                (tsk->group_leader != tsk || !thread_group_empty(tsk)));
@@@ -1729,7 -1734,7 +1734,7 @@@ static int do_signal_stop(int signr
                          * so this check has no races.
                          */
                         if (!t->exit_state &&
-                           !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+                           !task_is_stopped_or_traced(t)) {
                                 stop_count++;
                                 signal_wake_up(t, 0);
                         }
diff --combined kernel/timer.c

index 23f7ead78faeae25b07ad78819f4cadefdd3b4b6,66d7d8bca1a3a5cec5652085d167e1c10bc260da..9fbb472b8cf0a4e3016480fb205c42eda38dabf8
--- 1/kernel/timer.c
--- 2/kernel/timer.c
+++ b/kernel/timer.c
@@@ -58,57 -58,59 +58,57 @@@ EXPORT_SYMBOL(jiffies_64)
   #define TVN_MASK (TVN_SIZE - 1)
   #define TVR_MASK (TVR_SIZE - 1)
   
- -typedef struct tvec_s {
+ +struct tvec {
         struct list_head vec[TVN_SIZE];
- -} tvec_t;
+ +};
   
- -typedef struct tvec_root_s {
+ +struct tvec_root {
         struct list_head vec[TVR_SIZE];
- -} tvec_root_t;
+ +};
   
- -struct tvec_t_base_s {
+ +struct tvec_base {
         spinlock_t lock;
         struct timer_list *running_timer;
         unsigned long timer_jiffies;
- -      tvec_root_t tv1;
- -      tvec_t tv2;
- -      tvec_t tv3;
- -      tvec_t tv4;
- -      tvec_t tv5;
+ +      struct tvec_root tv1;
+ +      struct tvec tv2;
+ +      struct tvec tv3;
+ +      struct tvec tv4;
+ +      struct tvec tv5;
   } ____cacheline_aligned;
   
- -typedef struct tvec_t_base_s tvec_base_t;
- -
- -tvec_base_t boot_tvec_bases;
+ +struct tvec_base boot_tvec_bases;
   EXPORT_SYMBOL(boot_tvec_bases);
- -static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+ +static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
   
   /*
- - * Note that all tvec_bases is 2 byte aligned and lower bit of
+ + * Note that all tvec_bases are 2 byte aligned and lower bit of
    * base in timer_list is guaranteed to be zero. Use the LSB for
    * the new flag to indicate whether the timer is deferrable
    */
   #define TBASE_DEFERRABLE_FLAG         (0x1)
   
   /* Functions below help us manage 'deferrable' flag */
- -static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+ +static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
   {
         return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
   }
   
- -static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+ +static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
   {
- -      return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+ +      return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
   }
   
   static inline void timer_set_deferrable(struct timer_list *timer)
   {
- -      timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+ +      timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
                                        TBASE_DEFERRABLE_FLAG));
   }
   
   static inline void
- -timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+ +timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
   {
- -      timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+ +      timer->base = (struct tvec_base *)((unsigned long)(new_base) |
                                       tbase_get_deferrable(timer->base));
   }
   
@@@ -244,7 -246,7 +244,7 @@@ unsigned long round_jiffies_relative(un
   EXPORT_SYMBOL_GPL(round_jiffies_relative);
   
   
- -static inline void set_running_timer(tvec_base_t *base,
+ +static inline void set_running_timer(struct tvec_base *base,
                                         struct timer_list *timer)
   {
   #ifdef CONFIG_SMP
@@@ -252,7 -254,7 +252,7 @@@
   #endif
   }
   
- -static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+ +static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
   {
         unsigned long expires = timer->expires;
         unsigned long idx = expires - base->timer_jiffies;
@@@ -369,14 -371,14 +369,14 @@@ static inline void detach_timer(struct 
    * possible to set timer->base = NULL and drop the lock: the timer remains
    * locked.
    */
- -static tvec_base_t *lock_timer_base(struct timer_list *timer,
+ +static struct tvec_base *lock_timer_base(struct timer_list *timer,
                                         unsigned long *flags)
         __acquires(timer->base->lock)
   {
- -      tvec_base_t *base;
+ +      struct tvec_base *base;
   
         for (;;) {
- -              tvec_base_t *prelock_base = timer->base;
+ +              struct tvec_base *prelock_base = timer->base;
                 base = tbase_get_base(prelock_base);
                 if (likely(base != NULL)) {
                         spin_lock_irqsave(&base->lock, *flags);
@@@ -391,7 -393,7 +391,7 @@@
   
   int __mod_timer(struct timer_list *timer, unsigned long expires)
   {
- -      tvec_base_t *base, *new_base;
+ +      struct tvec_base *base, *new_base;
         unsigned long flags;
         int ret = 0;
   
@@@ -443,7 -445,7 +443,7 @@@ EXPORT_SYMBOL(__mod_timer)
    */
   void add_timer_on(struct timer_list *timer, int cpu)
   {
- -      tvec_base_t *base = per_cpu(tvec_bases, cpu);
+ +      struct tvec_base *base = per_cpu(tvec_bases, cpu);
         unsigned long flags;
   
         timer_stats_timer_set_start_info(timer);
@@@ -506,7 -508,7 +506,7 @@@ EXPORT_SYMBOL(mod_timer)
    */
   int del_timer(struct timer_list *timer)
   {
- -      tvec_base_t *base;
+ +      struct tvec_base *base;
         unsigned long flags;
         int ret = 0;
   
@@@ -537,7 -539,7 +537,7 @@@ EXPORT_SYMBOL(del_timer)
    */
   int try_to_del_timer_sync(struct timer_list *timer)
   {
- -      tvec_base_t *base;
+ +      struct tvec_base *base;
         unsigned long flags;
         int ret = -1;
   
@@@ -589,7 -591,7 +589,7 @@@ int del_timer_sync(struct timer_list *t
   EXPORT_SYMBOL(del_timer_sync);
   #endif
   
- -static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+ +static int cascade(struct tvec_base *base, struct tvec *tv, int index)
   {
         /* cascade all the timers from tv up one level */
         struct timer_list *timer, *tmp;
@@@ -618,7 -620,7 +618,7 @@@
    * This function cascades all vectors and executes all expired timer
    * vectors.
    */
- -static inline void __run_timers(tvec_base_t *base)
+ +static inline void __run_timers(struct tvec_base *base)
   {
         struct timer_list *timer;
   
@@@ -655,7 -657,7 +655,7 @@@
                                 int preempt_count = preempt_count();
                                 fn(data);
                                 if (preempt_count != preempt_count()) {
- -                                      printk(KERN_WARNING "huh, entered %p "
+ +                                      printk(KERN_ERR "huh, entered %p "
                                                "with preempt_count %08x, exited"
                                                " with %08x?\n",
                                                fn, preempt_count,
@@@ -676,13 -678,13 +676,13 @@@
    * is used on S/390 to stop all activity when a cpus is idle.
    * This functions needs to be called disabled.
    */
- -static unsigned long __next_timer_interrupt(tvec_base_t *base)
+ +static unsigned long __next_timer_interrupt(struct tvec_base *base)
   {
         unsigned long timer_jiffies = base->timer_jiffies;
         unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
         int index, slot, array, found = 0;
         struct timer_list *nte;
- -      tvec_t *varray[4];
+ +      struct tvec *varray[4];
   
         /* Look for timer events in tv1. */
         index = slot = timer_jiffies & TVR_MASK;
@@@ -714,7 -716,7 +714,7 @@@ cascade
         varray[3] = &base->tv5;
   
         for (array = 0; array < 4; array++) {
- -              tvec_t *varp = varray[array];
+ +              struct tvec *varp = varray[array];
   
                 index = slot = timer_jiffies & TVN_MASK;
                 do {
@@@ -793,7 -795,7 +793,7 @@@ static unsigned long cmp_next_hrtimer_e
    */
   unsigned long get_next_timer_interrupt(unsigned long now)
   {
- -      tvec_base_t *base = __get_cpu_var(tvec_bases);
+ +      struct tvec_base *base = __get_cpu_var(tvec_bases);
         unsigned long expires;
   
         spin_lock(&base->lock);
@@@ -892,9 -894,9 +892,9 @@@ static inline void calc_load(unsigned l
    */
   static void run_timer_softirq(struct softirq_action *h)
   {
- -      tvec_base_t *base = __get_cpu_var(tvec_bases);
+ +      struct tvec_base *base = __get_cpu_var(tvec_bases);
   
- -      hrtimer_run_queues();
+ +      hrtimer_run_pending();
   
         if (time_after_eq(jiffies, base->timer_jiffies))
                 __run_timers(base);
@@@ -905,7 -907,6 +905,7 @@@
    */
   void run_local_timers(void)
   {
+ +      hrtimer_run_queues();
         raise_softirq(TIMER_SOFTIRQ);
         softlockup_tick();
   }
@@@ -977,7 -978,7 +977,7 @@@ asmlinkage long sys_getppid(void
         int pid;
   
         rcu_read_lock();
- -      pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
+ +      pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns);
         rcu_read_unlock();
   
         return pid;
@@@ -1099,6 -1100,13 +1099,13 @@@ signed long __sched schedule_timeout_in
   }
   EXPORT_SYMBOL(schedule_timeout_interruptible);
   
+ signed long __sched schedule_timeout_killable(signed long timeout)
+ {
+       __set_current_state(TASK_KILLABLE);
+       return schedule_timeout(timeout);
+ }
+ EXPORT_SYMBOL(schedule_timeout_killable);
+ 
   signed long __sched schedule_timeout_uninterruptible(signed long timeout)
   {
         __set_current_state(TASK_UNINTERRUPTIBLE);
@@@ -1218,11 -1226,11 +1225,11 @@@ asmlinkage long sys_sysinfo(struct sysi
    */
   static struct lock_class_key base_lock_keys[NR_CPUS];
   
- -static int __devinit init_timers_cpu(int cpu)
+ +static int __cpuinit init_timers_cpu(int cpu)
   {
         int j;
- -      tvec_base_t *base;
- -      static char __devinitdata tvec_base_done[NR_CPUS];
+ +      struct tvec_base *base;
+ +      static char __cpuinitdata tvec_base_done[NR_CPUS];
   
         if (!tvec_base_done[cpu]) {
                 static char boot_done;
@@@ -1276,7 -1284,7 +1283,7 @@@
   }
   
   #ifdef CONFIG_HOTPLUG_CPU
- -static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+ +static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
   {
         struct timer_list *timer;
   
@@@ -1288,10 -1296,10 +1295,10 @@@
         }
   }
   
- -static void __devinit migrate_timers(int cpu)
+ +static void __cpuinit migrate_timers(int cpu)
   {
- -      tvec_base_t *old_base;
- -      tvec_base_t *new_base;
+ +      struct tvec_base *old_base;
+ +      struct tvec_base *new_base;
         int i;
   
         BUG_ON(cpu_online(cpu));
diff --combined mm/filemap.c

index f4d0cded0e10aa21b02707fcaf99c4cbcafa4f06,455119cc7f40c532540b92a39d2354522ef09dcc..89ce6fe5f8be152e71218085af5396dcb72d315c
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -124,18 -124,6 +124,18 @@@ void __remove_from_page_cache(struct pa
         mapping->nrpages--;
         __dec_zone_page_state(page, NR_FILE_PAGES);
         BUG_ON(page_mapped(page));
+ +
+ +      /*
+ +       * Some filesystems seem to re-dirty the page even after
+ +       * the VM has canceled the dirty bit (eg ext3 journaling).
+ +       *
+ +       * Fix it up by doing a final dirty accounting check after
+ +       * having removed the page entirely.
+ +       */
+ +      if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
+ +              dec_zone_page_state(page, NR_FILE_DIRTY);
+ +              dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ +      }
   }
   
   void remove_from_page_cache(struct page *page)
@@@ -185,6 -173,12 +185,12 @@@ static int sync_page(void *word
         return 0;
   }
   
+ static int sync_page_killable(void *word)
+ {
+       sync_page(word);
+       return fatal_signal_pending(current) ? -EINTR : 0;
+ }
+ 
   /**
    * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
    * @mapping:  address space structure to write
@@@ -589,6 -583,14 +595,14 @@@ void fastcall __lock_page(struct page *
   }
   EXPORT_SYMBOL(__lock_page);
   
+ int fastcall __lock_page_killable(struct page *page)
+ {
+       DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+ 
+       return __wait_on_bit_lock(page_waitqueue(page), &wait,
+                                       sync_page_killable, TASK_KILLABLE);
+ }
+ 
   /*
    * Variant of lock_page that does not require the caller to hold a reference
    * on the page's mapping.
@@@ -980,7 -982,8 +994,8 @@@ page_ok
   
   page_not_up_to_date:
                 /* Get exclusive access to the page ... */
-               lock_page(page);
+               if (lock_page_killable(page))
+                       goto readpage_eio;
   
                 /* Did it get truncated before we got the lock? */
                 if (!page->mapping) {
@@@ -1008,7 -1011,8 +1023,8 @@@ readpage
                 }
   
                 if (!PageUptodate(page)) {
-                       lock_page(page);
+                       if (lock_page_killable(page))
+                               goto readpage_eio;
                         if (!PageUptodate(page)) {
                                 if (page->mapping == NULL) {
                                         /*
@@@ -1019,15 -1023,16 +1035,16 @@@
                                         goto find_page;
                                 }
                                 unlock_page(page);
-                               error = -EIO;
                                 shrink_readahead_size_eio(filp, ra);
-                               goto readpage_error;
+                               goto readpage_eio;
                         }
                         unlock_page(page);
                 }
   
                 goto page_ok;
   
+ readpage_eio:
+               error = -EIO;
   readpage_error:
                 /* UHHUH! A synchronous read error occurred. Report it */
                 desc->error = error;
diff --combined net/sunrpc/auth.c

index bcd9abdb031c49a4473b75294d44ae2b2d6c02ac,1ea27559b1deb43dbeb9613fc3c9c501ef6aa942..eca941ce298b6465507c2f672dc5bf44350fffe6
--- 1/net/sunrpc/auth.c
--- 2/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@@ -51,7 -51,6 +51,7 @@@ rpcauth_register(const struct rpc_autho
         spin_unlock(&rpc_authflavor_lock);
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_register);
   
   int
   rpcauth_unregister(const struct rpc_authops *ops)
@@@ -69,7 -68,6 +69,7 @@@
         spin_unlock(&rpc_authflavor_lock);
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_unregister);
   
   struct rpc_auth *
   rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
@@@ -104,7 -102,6 +104,7 @@@
   out:
         return auth;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_create);
   
   void
   rpcauth_release(struct rpc_auth *auth)
@@@ -154,7 -151,6 +154,7 @@@ rpcauth_init_credcache(struct rpc_auth 
         auth->au_credcache = new;
         return 0;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
   
   /*
    * Destroy a list of credentials
@@@ -217,7 -213,6 +217,7 @@@ rpcauth_destroy_credcache(struct rpc_au
                 kfree(cache);
         }
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
   
   /*
    * Remove stale credentials. Avoid sleeping inside the loop.
@@@ -337,7 -332,6 +337,7 @@@ found
   out:
         return cred;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
   
   struct rpc_cred *
   rpcauth_lookupcred(struct rpc_auth *auth, int flags)
@@@ -356,7 -350,6 +356,7 @@@
         put_group_info(acred.group_info);
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
   
   void
   rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
@@@ -373,7 -366,7 +373,7 @@@
   #endif
         cred->cr_uid = acred->uid;
   }
- -EXPORT_SYMBOL(rpcauth_init_cred);
+ +EXPORT_SYMBOL_GPL(rpcauth_init_cred);
   
   struct rpc_cred *
   rpcauth_bindcred(struct rpc_task *task)
@@@ -385,7 -378,6 +385,6 @@@
                 .group_info = current->group_info,
         };
         struct rpc_cred *ret;
-       sigset_t oldset;
         int flags = 0;
   
         dprintk("RPC: %5u looking up %s cred\n",
@@@ -393,9 -385,7 +392,7 @@@
         get_group_info(acred.group_info);
         if (task->tk_flags & RPC_TASK_ROOTCREDS)
                 flags |= RPCAUTH_LOOKUP_ROOTCREDS;
-       rpc_clnt_sigmask(task->tk_client, &oldset);
         ret = auth->au_ops->lookup_cred(auth, &acred, flags);
-       rpc_clnt_sigunmask(task->tk_client, &oldset);
         if (!IS_ERR(ret))
                 task->tk_msg.rpc_cred = ret;
         else
@@@ -445,7 -435,6 +442,7 @@@ need_lock
   out_destroy:
         cred->cr_ops->crdestroy(cred);
   }
+ +EXPORT_SYMBOL_GPL(put_rpccred);
   
   void
   rpcauth_unbindcred(struct rpc_task *task)
diff --combined net/sunrpc/clnt.c

index 924916ceaa435b531b043b190cd006268492d28b,a99729ff450e3342434440950bebb68e7088dfaa..0998e6d0966469df4ceb8f5adde3a842956093d5
--- 1/net/sunrpc/clnt.c
--- 2/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@@ -30,7 -30,6 +30,7 @@@
   #include <linux/smp_lock.h>
   #include <linux/utsname.h>
   #include <linux/workqueue.h>
+ +#include <linux/in6.h>
   
   #include <linux/sunrpc/clnt.h>
   #include <linux/sunrpc/rpc_pipe_fs.h>
@@@ -122,9 -121,8 +122,9 @@@ rpc_setup_pipedir(struct rpc_clnt *clnt
         }
   }
   
- -static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
+ +static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
   {
+ +      struct rpc_program      *program = args->program;
         struct rpc_version      *version;
         struct rpc_clnt         *clnt = NULL;
         struct rpc_auth         *auth;
@@@ -133,13 -131,13 +133,13 @@@
   
         /* sanity check the name before trying to print it */
         err = -EINVAL;
- -      len = strlen(servname);
+ +      len = strlen(args->servername);
         if (len > RPC_MAXNETNAMELEN)
                 goto out_no_rpciod;
         len++;
   
         dprintk("RPC:       creating %s client for %s (xprt %p)\n",
- -                      program->name, servname, xprt);
+ +                      program->name, args->servername, xprt);
   
         err = rpciod_up();
         if (err)
@@@ -147,11 -145,7 +147,11 @@@
         err = -EINVAL;
         if (!xprt)
                 goto out_no_xprt;
- -      if (vers >= program->nrvers || !(version = program->version[vers]))
+ +
+ +      if (args->version >= program->nrvers)
+ +              goto out_err;
+ +      version = program->version[args->version];
+ +      if (version == NULL)
                 goto out_err;
   
         err = -ENOMEM;
@@@ -163,12 -157,12 +163,12 @@@
         clnt->cl_server = clnt->cl_inline_name;
         if (len > sizeof(clnt->cl_inline_name)) {
                 char *buf = kmalloc(len, GFP_KERNEL);
- -              if (buf != 0)
+ +              if (buf != NULL)
                         clnt->cl_server = buf;
                 else
                         len = sizeof(clnt->cl_inline_name);
         }
- -      strlcpy(clnt->cl_server, servname, len);
+ +      strlcpy(clnt->cl_server, args->servername, len);
   
         clnt->cl_xprt     = xprt;
         clnt->cl_procinfo = version->procs;
@@@ -188,15 -182,8 +188,15 @@@
         if (!xprt_bound(clnt->cl_xprt))
                 clnt->cl_autobind = 1;
   
+ +      clnt->cl_timeout = xprt->timeout;
+ +      if (args->timeout != NULL) {
+ +              memcpy(&clnt->cl_timeout_default, args->timeout,
+ +                              sizeof(clnt->cl_timeout_default));
+ +              clnt->cl_timeout = &clnt->cl_timeout_default;
+ +      }
+ +
         clnt->cl_rtt = &clnt->cl_rtt_default;
- -      rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+ +      rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
   
         kref_init(&clnt->cl_kref);
   
@@@ -204,10 -191,10 +204,10 @@@
         if (err < 0)
                 goto out_no_path;
   
- -      auth = rpcauth_create(flavor, clnt);
+ +      auth = rpcauth_create(args->authflavor, clnt);
         if (IS_ERR(auth)) {
                 printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
- -                              flavor);
+ +                              args->authflavor);
                 err = PTR_ERR(auth);
                 goto out_no_auth;
         }
@@@ -258,8 -245,9 +258,8 @@@ struct rpc_clnt *rpc_create(struct rpc_
                 .srcaddr = args->saddress,
                 .dstaddr = args->address,
                 .addrlen = args->addrsize,
- -              .timeout = args->timeout
         };
- -      char servername[20];
+ +      char servername[48];
   
         xprt = xprt_create_transport(&xprtargs);
         if (IS_ERR(xprt))
@@@ -270,34 -258,13 +270,34 @@@
          * up a string representation of the passed-in address.
          */
         if (args->servername == NULL) {
- -              struct sockaddr_in *addr =
- -                                      (struct sockaddr_in *) args->address;
- -              snprintf(servername, sizeof(servername), NIPQUAD_FMT,
- -                      NIPQUAD(addr->sin_addr.s_addr));
+ +              servername[0] = '\0';
+ +              switch (args->address->sa_family) {
+ +              case AF_INET: {
+ +                      struct sockaddr_in *sin =
+ +                                      (struct sockaddr_in *)args->address;
+ +                      snprintf(servername, sizeof(servername), NIPQUAD_FMT,
+ +                               NIPQUAD(sin->sin_addr.s_addr));
+ +                      break;
+ +              }
+ +              case AF_INET6: {
+ +                      struct sockaddr_in6 *sin =
+ +                                      (struct sockaddr_in6 *)args->address;
+ +                      snprintf(servername, sizeof(servername), NIP6_FMT,
+ +                               NIP6(sin->sin6_addr));
+ +                      break;
+ +              }
+ +              default:
+ +                      /* caller wants default server name, but
+ +                       * address family isn't recognized. */
+ +                      return ERR_PTR(-EINVAL);
+ +              }
                 args->servername = servername;
         }
   
+ +      xprt = xprt_create_transport(&xprtargs);
+ +      if (IS_ERR(xprt))
+ +              return (struct rpc_clnt *)xprt;
+ +
         /*
          * By default, kernel RPC client connects from a reserved port.
          * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
@@@ -308,12 -275,13 +308,12 @@@
         if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
                 xprt->resvport = 0;
   
- -      clnt = rpc_new_client(xprt, args->servername, args->program,
- -                              args->version, args->authflavor);
+ +      clnt = rpc_new_client(args, xprt);
         if (IS_ERR(clnt))
                 return clnt;
   
         if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
-               int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+               int err = rpc_ping(clnt, RPC_TASK_SOFT);
                 if (err != 0) {
                         rpc_shutdown_client(clnt);
                         return ERR_PTR(err);
@@@ -324,8 -292,6 +324,6 @@@
         if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
                 clnt->cl_softrtry = 0;
   
-       if (args->flags & RPC_CLNT_CREATE_INTR)
-               clnt->cl_intr = 1;
         if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
                 clnt->cl_autobind = 1;
         if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
@@@ -354,7 -320,7 +352,7 @@@ rpc_clone_client(struct rpc_clnt *clnt
         new->cl_autobind = 0;
         INIT_LIST_HEAD(&new->cl_tasks);
         spin_lock_init(&new->cl_lock);
- -      rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ +      rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
         new->cl_metrics = rpc_alloc_iostats(clnt);
         if (new->cl_metrics == NULL)
                 goto out_no_stats;
@@@ -377,7 -343,6 +375,7 @@@ out_no_clnt
         dprintk("RPC:       %s: returned error %d\n", __FUNCTION__, err);
         return ERR_PTR(err);
   }
+ +EXPORT_SYMBOL_GPL(rpc_clone_client);
   
   /*
    * Properly shut down an RPC client, terminating all outstanding
@@@ -396,7 -361,6 +394,7 @@@ void rpc_shutdown_client(struct rpc_cln
   
         rpc_release_client(clnt);
   }
+ +EXPORT_SYMBOL_GPL(rpc_shutdown_client);
   
   /*
    * Free an RPC client
@@@ -493,7 -457,7 +491,7 @@@ struct rpc_clnt *rpc_bind_new_program(s
         clnt->cl_prog     = program->number;
         clnt->cl_vers     = version->number;
         clnt->cl_stats    = program->stats;
-       err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+       err = rpc_ping(clnt, RPC_TASK_SOFT);
         if (err != 0) {
                 rpc_shutdown_client(clnt);
                 clnt = ERR_PTR(err);
@@@ -501,7 -465,6 +499,7 @@@
   out:
         return clnt;
   }
+ +EXPORT_SYMBOL_GPL(rpc_bind_new_program);
   
   /*
    * Default callback for async RPC calls
@@@ -515,81 -478,36 +513,34 @@@ static const struct rpc_call_ops rpc_de
         .rpc_call_done = rpc_default_callback,
   };
   
- /*
-  *    Export the signal mask handling for synchronous code that
-  *    sleeps on RPC calls
-  */
- #define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
- 
- static void rpc_save_sigmask(sigset_t *oldset, int intr)
- {
-       unsigned long   sigallow = sigmask(SIGKILL);
-       sigset_t sigmask;
- 
-       /* Block all signals except those listed in sigallow */
-       if (intr)
-               sigallow |= RPC_INTR_SIGNALS;
-       siginitsetinv(&sigmask, sigallow);
-       sigprocmask(SIG_BLOCK, &sigmask, oldset);
- }
- 
- static void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
- {
-       rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
- }
- 
- static void rpc_restore_sigmask(sigset_t *oldset)
- {
-       sigprocmask(SIG_SETMASK, oldset, NULL);
- }
- 
- void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
-       rpc_save_sigmask(oldset, clnt->cl_intr);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigmask);
- 
- void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
-       rpc_restore_sigmask(oldset);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
- 
- -static
- -struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
- -              struct rpc_message *msg,
- -              int flags,
- -              const struct rpc_call_ops *ops,
- -              void *data)
+ +/**
+ + * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ + * @task_setup_data: pointer to task initialisation data
+ + */
+ +struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
   {
         struct rpc_task *task, *ret;
-       sigset_t oldset;
   
- -      task = rpc_new_task(clnt, flags, ops, data);
+ +      task = rpc_new_task(task_setup_data);
         if (task == NULL) {
- -              rpc_release_calldata(ops, data);
- -              return ERR_PTR(-ENOMEM);
+ +              rpc_release_calldata(task_setup_data->callback_ops,
+ +                              task_setup_data->callback_data);
+ +              ret = ERR_PTR(-ENOMEM);
+ +              goto out;
         }
   
- -      /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
- -      if (msg != NULL) {
- -              rpc_call_setup(task, msg, 0);
- -              if (task->tk_status != 0) {
- -                      ret = ERR_PTR(task->tk_status);
- -                      rpc_put_task(task);
- -                      goto out;
- -              }
+ +      if (task->tk_status != 0) {
+ +              ret = ERR_PTR(task->tk_status);
+ +              rpc_put_task(task);
+ +              goto out;
         }
         atomic_inc(&task->tk_count);
-       /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
-       if (!RPC_IS_ASYNC(task)) {
-               rpc_task_sigmask(task, &oldset);
-               rpc_execute(task);
-               rpc_restore_sigmask(&oldset);
-       } else
-               rpc_execute(task);
+       rpc_execute(task);
         ret = task;
   out:
         return ret;
   }
+ +EXPORT_SYMBOL_GPL(rpc_run_task);
   
   /**
    * rpc_call_sync - Perform a synchronous RPC call
@@@ -600,24 -518,17 +551,24 @@@
   int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
   {
         struct rpc_task *task;
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = clnt,
+ +              .rpc_message = msg,
+ +              .callback_ops = &rpc_default_ops,
+ +              .flags = flags,
+ +      };
         int status;
   
         BUG_ON(flags & RPC_TASK_ASYNC);
   
- -      task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         status = task->tk_status;
         rpc_put_task(task);
         return status;
   }
+ +EXPORT_SYMBOL_GPL(rpc_call_sync);
   
   /**
    * rpc_call_async - Perform an asynchronous RPC call
@@@ -632,28 -543,45 +583,28 @@@ rpc_call_async(struct rpc_clnt *clnt, s
                const struct rpc_call_ops *tk_ops, void *data)
   {
         struct rpc_task *task;
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = clnt,
+ +              .rpc_message = msg,
+ +              .callback_ops = tk_ops,
+ +              .callback_data = data,
+ +              .flags = flags|RPC_TASK_ASYNC,
+ +      };
   
- -      task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
+ +      task = rpc_run_task(&task_setup_data);
         if (IS_ERR(task))
                 return PTR_ERR(task);
         rpc_put_task(task);
         return 0;
   }
- -
- -/**
- - * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
- - * @clnt: pointer to RPC client
- - * @flags: RPC flags
- - * @ops: RPC call ops
- - * @data: user call data
- - */
- -struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
- -                                      const struct rpc_call_ops *tk_ops,
- -                                      void *data)
- -{
- -      return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
- -}
- -EXPORT_SYMBOL(rpc_run_task);
+ +EXPORT_SYMBOL_GPL(rpc_call_async);
   
   void
- -rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+ +rpc_call_start(struct rpc_task *task)
   {
- -      task->tk_msg   = *msg;
- -      task->tk_flags |= flags;
- -      /* Bind the user cred */
- -      if (task->tk_msg.rpc_cred != NULL)
- -              rpcauth_holdcred(task);
- -      else
- -              rpcauth_bindcred(task);
- -
- -      if (task->tk_status == 0)
- -              task->tk_action = call_start;
- -      else
- -              task->tk_action = rpc_exit_task;
+ +      task->tk_action = call_start;
   }
+ +EXPORT_SYMBOL_GPL(rpc_call_start);
   
   /**
    * rpc_peeraddr - extract remote peer address from clnt's xprt
@@@ -682,8 -610,7 +633,8 @@@ EXPORT_SYMBOL_GPL(rpc_peeraddr)
    * @format: address format
    *
    */
- -char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+ +const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
+ +                           enum rpc_display_format_t format)
   {
         struct rpc_xprt *xprt = clnt->cl_xprt;
   
@@@ -701,7 -628,6 +652,7 @@@ rpc_setbufsize(struct rpc_clnt *clnt, u
         if (xprt->ops->set_buffer_size)
                 xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
   }
+ +EXPORT_SYMBOL_GPL(rpc_setbufsize);
   
   /*
    * Return size of largest payload RPC client can support, in bytes
@@@ -741,7 -667,6 +692,7 @@@ rpc_restart_call(struct rpc_task *task
   
         task->tk_action = call_start;
   }
+ +EXPORT_SYMBOL_GPL(rpc_restart_call);
   
   /*
    * 0.  Initial state
@@@ -1169,7 -1094,7 +1120,7 @@@ call_status(struct rpc_task *task
         case -ETIMEDOUT:
                 task->tk_action = call_timeout;
                 if (task->tk_client->cl_discrtry)
- -                      xprt_disconnect(task->tk_xprt);
+ +                      xprt_force_disconnect(task->tk_xprt);
                 break;
         case -ECONNREFUSED:
         case -ENOTCONN:
@@@ -1292,7 -1217,7 +1243,7 @@@ out_retry
         req->rq_received = req->rq_private_buf.len = 0;
         task->tk_status = 0;
         if (task->tk_client->cl_discrtry)
- -              xprt_disconnect(task->tk_xprt);
+ +              xprt_force_disconnect(task->tk_xprt);
   }
   
   /*
@@@ -1549,15 -1474,9 +1500,15 @@@ struct rpc_task *rpc_call_null(struct r
                 .rpc_proc = &rpcproc_null,
                 .rpc_cred = cred,
         };
- -      return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = clnt,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &rpc_default_ops,
+ +              .flags = flags,
+ +      };
+ +      return rpc_run_task(&task_setup_data);
   }
- -EXPORT_SYMBOL(rpc_call_null);
+ +EXPORT_SYMBOL_GPL(rpc_call_null);
   
   #ifdef RPC_DEBUG
   void rpc_show_tasks(void)
diff --combined net/sunrpc/rpcb_clnt.c

index fa5b8f202d5b3a358e090f484cff3475d4fc228e,c35b6e7fc68046b769b785ed64014657e1df601a..3164a0871cf039ca7c2f9ebb80ad2dc1eba7b180
--- 1/net/sunrpc/rpcb_clnt.c
--- 2/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@@ -54,6 -54,45 +54,6 @@@ enum 
   #define RPCB_HIGHPROC_3               RPCBPROC_TADDR2UADDR
   #define RPCB_HIGHPROC_4               RPCBPROC_GETSTAT
   
- -/*
- - * r_addr
- - *
- - * Quoting RFC 3530, section 2.2:
- - *
- - * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
- - * US-ASCII string:
- - *
- - *    h1.h2.h3.h4.p1.p2
- - *
- - * The prefix, "h1.h2.h3.h4", is the standard textual form for
- - * representing an IPv4 address, which is always four octets long.
- - * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
- - * the first through fourth octets each converted to ASCII-decimal.
- - * Assuming big-endian ordering, p1 and p2 are, respectively, the first
- - * and second octets each converted to ASCII-decimal.  For example, if a
- - * host, in big-endian order, has an address of 0x0A010307 and there is
- - * a service listening on, in big endian order, port 0x020F (decimal
- - * 527), then the complete universal address is "10.1.3.7.2.15".
- - *
- - * ...
- - *
- - * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
- - * US-ASCII string:
- - *
- - *    x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
- - *
- - * The suffix "p1.p2" is the service port, and is computed the same way
- - * as with universal addresses for TCP and UDP over IPv4.  The prefix,
- - * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
- - * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
- - * Additionally, the two alternative forms specified in Section 2.2 of
- - * [RFC2373] are also acceptable.
- - *
- - * XXX: Currently this implementation does not explicitly convert the
- - *      stored address to US-ASCII on non-ASCII systems.
- - */
- -#define RPCB_MAXADDRLEN               (128u)
- -
   /*
    * r_owner
    *
@@@ -73,9 -112,9 +73,9 @@@ struct rpcbind_args 
         u32                     r_vers;
         u32                     r_prot;
         unsigned short          r_port;
- -      char *                  r_netid;
- -      char                    r_addr[RPCB_MAXADDRLEN];
- -      char *                  r_owner;
+ +      const char *            r_netid;
+ +      const char *            r_addr;
+ +      const char *            r_owner;
   };
   
   static struct rpc_procinfo rpcb_procedures2[];
@@@ -89,6 -128,19 +89,6 @@@ struct rpcb_info 
   static struct rpcb_info rpcb_next_version[];
   static struct rpcb_info rpcb_next_version6[];
   
- -static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
- -{
- -      struct rpcbind_args *map = calldata;
- -      struct rpc_xprt *xprt = map->r_xprt;
- -      struct rpc_message msg = {
- -              .rpc_proc       = rpcb_next_version[xprt->bind_index].rpc_proc,
- -              .rpc_argp       = map,
- -              .rpc_resp       = &map->r_port,
- -      };
- -
- -      rpc_call_setup(task, &msg, 0);
- -}
- -
   static void rpcb_map_release(void *data)
   {
         struct rpcbind_args *map = data;
@@@ -98,6 -150,7 +98,6 @@@
   }
   
   static const struct rpc_call_ops rpcb_getport_ops = {
- -      .rpc_call_prepare       = rpcb_getport_prepare,
         .rpc_call_done          = rpcb_getport_done,
         .rpc_release            = rpcb_map_release,
   };
@@@ -109,19 -162,17 +109,18 @@@ static void rpcb_wake_rpcbind_waiters(s
   }
   
   static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
- -                                      int proto, int version, int privileged)
+ +                                  size_t salen, int proto, u32 version,
+ +                                  int privileged)
   {
         struct rpc_create_args args = {
                 .protocol       = proto,
                 .address        = srvaddr,
- -              .addrsize       = sizeof(struct sockaddr_in),
+ +              .addrsize       = salen,
                 .servername     = hostname,
                 .program        = &rpcb_program,
                 .version        = version,
                 .authflavor     = RPC_AUTH_UNIX,
-               .flags          = (RPC_CLNT_CREATE_NOPING |
-                                  RPC_CLNT_CREATE_INTR),
+               .flags          = RPC_CLNT_CREATE_NOPING,
         };
   
         switch (srvaddr->sa_family) {
@@@ -178,7 -229,7 +177,7 @@@ int rpcb_register(u32 prog, u32 vers, i
                         prog, vers, prot, port);
   
         rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
- -                                      XPRT_TRANSPORT_UDP, 2, 1);
+ +                              sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
         if (IS_ERR(rpcb_clnt))
                 return PTR_ERR(rpcb_clnt);
   
@@@ -200,15 -251,13 +199,15 @@@
    * @vers: RPC version number to bind
    * @prot: transport protocol to use to make this request
    *
+ + * Return value is the requested advertised port number,
+ + * or a negative errno value.
+ + *
    * Called from outside the RPC client in a synchronous task context.
    * Uses default timeout parameters specified by underlying transport.
    *
- - * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
+ + * XXX: Needs to support IPv6
    */
- -int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
- -                    __u32 vers, int prot)
+ +int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
   {
         struct rpcbind_args map = {
                 .r_prog         = prog,
@@@ -222,13 -271,14 +221,13 @@@
                 .rpc_resp       = &map.r_port,
         };
         struct rpc_clnt *rpcb_clnt;
- -      char hostname[40];
         int status;
   
         dprintk("RPC:       %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
                 __FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
   
- -      sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
- -      rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+ +      rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
+ +                              sizeof(*sin), prot, 2, 0);
         if (IS_ERR(rpcb_clnt))
                 return PTR_ERR(rpcb_clnt);
   
@@@ -244,24 -294,6 +243,24 @@@
   }
   EXPORT_SYMBOL_GPL(rpcb_getport_sync);
   
+ +static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+ +{
+ +      struct rpc_message msg = {
+ +              .rpc_proc = rpcb_next_version[version].rpc_proc,
+ +              .rpc_argp = map,
+ +              .rpc_resp = &map->r_port,
+ +      };
+ +      struct rpc_task_setup task_setup_data = {
+ +              .rpc_client = rpcb_clnt,
+ +              .rpc_message = &msg,
+ +              .callback_ops = &rpcb_getport_ops,
+ +              .callback_data = map,
+ +              .flags = RPC_TASK_ASYNC,
+ +      };
+ +
+ +      return rpc_run_task(&task_setup_data);
+ +}
+ +
   /**
    * rpcb_getport_async - obtain the port for a given RPC service on a given host
    * @task: task that is waiting for portmapper request
@@@ -272,14 -304,12 +271,14 @@@
   void rpcb_getport_async(struct rpc_task *task)
   {
         struct rpc_clnt *clnt = task->tk_client;
- -      int bind_version;
+ +      u32 bind_version;
         struct rpc_xprt *xprt = task->tk_xprt;
         struct rpc_clnt *rpcb_clnt;
         static struct rpcbind_args *map;
         struct rpc_task *child;
- -      struct sockaddr addr;
+ +      struct sockaddr_storage addr;
+ +      struct sockaddr *sap = (struct sockaddr *)&addr;
+ +      size_t salen;
         int status;
         struct rpcb_info *info;
   
@@@ -309,10 -339,10 +308,10 @@@
                 goto bailout_nofree;
         }
   
- -      rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+ +      salen = rpc_peeraddr(clnt, sap, sizeof(addr));
   
         /* Don't ever use rpcbind v2 for AF_INET6 requests */
- -      switch (addr.sa_family) {
+ +      switch (sap->sa_family) {
         case AF_INET:
                 info = rpcb_next_version;
                 break;
@@@ -337,7 -367,7 +336,7 @@@
         dprintk("RPC: %5u %s: trying rpcbind version %u\n",
                 task->tk_pid, __FUNCTION__, bind_version);
   
- -      rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+ +      rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
                                 bind_version, 0);
         if (IS_ERR(rpcb_clnt)) {
                 status = PTR_ERR(rpcb_clnt);
@@@ -359,10 -389,12 +358,10 @@@
         map->r_port = 0;
         map->r_xprt = xprt_get(xprt);
         map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
- -      memcpy(map->r_addr,
- -             rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
- -             sizeof(map->r_addr));
+ +      map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
         map->r_owner = RPCB_OWNER_STRING;       /* ignored for GETADDR */
   
- -      child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+ +      child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
         rpc_release_client(rpcb_clnt);
         if (IS_ERR(child)) {
                 status = -EIO;
@@@ -485,7 -517,7 +484,7 @@@ static int rpcb_decode_getaddr(struct r
          * Simple sanity check.  The smallest possible universal
          * address is an IPv4 address string containing 11 bytes.
          */
- -      if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+ +      if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
                 goto out_err;
   
         /*
@@@ -536,7 -568,7 +535,7 @@@ out_err
   #define RPCB_boolean_sz               (1u)
   
   #define RPCB_netid_sz         (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
- -#define RPCB_addr_sz          (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
+ +#define RPCB_addr_sz          (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
   #define RPCB_ownerstring_sz   (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
   
   #define RPCB_mappingargs_sz   RPCB_program_sz+RPCB_version_sz+        \
diff --combined net/sunrpc/sched.c

index 40ce6f6672d6bb8b735d2ed379dff9add6ff087b,4b22910b446106bff68781b94d380da0b479dfc5..4c669121e607f774b7739a326639e8f2aec146a3
--- 1/net/sunrpc/sched.c
--- 2/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@@ -45,7 -45,7 +45,7 @@@ static void                    rpc_release_task(struct 
   /*
    * RPC tasks sit here while waiting for conditions to improve.
    */
- -static RPC_WAITQ(delay_queue, "delayq");
+ +static struct rpc_wait_queue delay_queue;
   
   /*
    * rpciod-related stuff
@@@ -135,7 -135,7 +135,7 @@@ static void __rpc_add_wait_queue_priori
         if (unlikely(task->tk_priority > queue->maxpriority))
                 q = &queue->tasks[queue->maxpriority];
         list_for_each_entry(t, q, u.tk_wait.list) {
- -              if (t->tk_cookie == task->tk_cookie) {
+ +              if (t->tk_owner == task->tk_owner) {
                         list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
                         return;
                 }
@@@ -208,26 -208,26 +208,26 @@@ static inline void rpc_set_waitqueue_pr
         queue->count = 1 << (priority * 2);
   }
   
- -static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
+ +static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
   {
- -      queue->cookie = cookie;
+ +      queue->owner = pid;
         queue->nr = RPC_BATCH_COUNT;
   }
   
   static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
   {
         rpc_set_waitqueue_priority(queue, queue->maxpriority);
- -      rpc_set_waitqueue_cookie(queue, 0);
+ +      rpc_set_waitqueue_owner(queue, 0);
   }
   
- -static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
+ +static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
   {
         int i;
   
         spin_lock_init(&queue->lock);
         for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
                 INIT_LIST_HEAD(&queue->tasks[i]);
- -      queue->maxpriority = maxprio;
+ +      queue->maxpriority = nr_queues - 1;
         rpc_reset_waitqueue_priority(queue);
   #ifdef RPC_DEBUG
         queue->name = qname;
@@@ -236,18 -236,18 +236,18 @@@
   
   void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
   {
- -      __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
+ +      __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
   }
   
   void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
   {
- -      __rpc_init_priority_wait_queue(queue, qname, 0);
+ +      __rpc_init_priority_wait_queue(queue, qname, 1);
   }
- -EXPORT_SYMBOL(rpc_init_wait_queue);
+ +EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
   
- static int rpc_wait_bit_interruptible(void *word)
+ static int rpc_wait_bit_killable(void *word)
   {
-       if (signal_pending(current))
+       if (fatal_signal_pending(current))
                 return -ERESTARTSYS;
         schedule();
         return 0;
@@@ -299,11 -299,11 +299,11 @@@ static void rpc_mark_complete_task(stru
   int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
   {
         if (action == NULL)
-               action = rpc_wait_bit_interruptible;
+               action = rpc_wait_bit_killable;
         return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
-                       action, TASK_INTERRUPTIBLE);
+                       action, TASK_KILLABLE);
   }
- -EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+ +EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
   
   /*
    * Make an RPC task runnable.
@@@ -373,7 -373,6 +373,7 @@@ void rpc_sleep_on(struct rpc_wait_queu
         __rpc_sleep_on(q, task, action, timer);
         spin_unlock_bh(&q->lock);
   }
+ +EXPORT_SYMBOL_GPL(rpc_sleep_on);
   
   /**
    * __rpc_do_wake_up_task - wake up a single rpc_task
@@@ -445,7 -444,6 +445,7 @@@ void rpc_wake_up_task(struct rpc_task *
         }
         rcu_read_unlock_bh();
   }
+ +EXPORT_SYMBOL_GPL(rpc_wake_up_task);
   
   /*
    * Wake up the next task on a priority queue.
@@@ -456,12 -454,12 +456,12 @@@ static struct rpc_task * __rpc_wake_up_
         struct rpc_task *task;
   
         /*
- -       * Service a batch of tasks from a single cookie.
+ +       * Service a batch of tasks from a single owner.
          */
         q = &queue->tasks[queue->priority];
         if (!list_empty(q)) {
                 task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
- -              if (queue->cookie == task->tk_cookie) {
+ +              if (queue->owner == task->tk_owner) {
                         if (--queue->nr)
                                 goto out;
                         list_move_tail(&task->u.tk_wait.list, q);
@@@ -470,7 -468,7 +470,7 @@@
                  * Check if we need to switch queues.
                  */
                 if (--queue->count)
- -                      goto new_cookie;
+ +                      goto new_owner;
         }
   
         /*
@@@ -492,8 -490,8 +492,8 @@@
   
   new_queue:
         rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
- -new_cookie:
- -      rpc_set_waitqueue_cookie(queue, task->tk_cookie);
+ +new_owner:
+ +      rpc_set_waitqueue_owner(queue, task->tk_owner);
   out:
         __rpc_wake_up_task(task);
         return task;
@@@ -521,7 -519,6 +521,7 @@@ struct rpc_task * rpc_wake_up_next(stru
   
         return task;
   }
+ +EXPORT_SYMBOL_GPL(rpc_wake_up_next);
   
   /**
    * rpc_wake_up - wake up all rpc_tasks
@@@ -547,7 -544,6 +547,7 @@@ void rpc_wake_up(struct rpc_wait_queue 
         spin_unlock(&queue->lock);
         rcu_read_unlock_bh();
   }
+ +EXPORT_SYMBOL_GPL(rpc_wake_up);
   
   /**
    * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@@ -576,7 -572,6 +576,7 @@@ void rpc_wake_up_status(struct rpc_wait
         spin_unlock(&queue->lock);
         rcu_read_unlock_bh();
   }
+ +EXPORT_SYMBOL_GPL(rpc_wake_up_status);
   
   static void __rpc_atrun(struct rpc_task *task)
   {
@@@ -591,7 -586,6 +591,7 @@@ void rpc_delay(struct rpc_task *task, u
         task->tk_timeout = delay;
         rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
   }
+ +EXPORT_SYMBOL_GPL(rpc_delay);
   
   /*
    * Helper to call task->tk_ops->rpc_call_prepare
@@@ -620,7 -614,7 +620,7 @@@ void rpc_exit_task(struct rpc_task *tas
                 }
         }
   }
- -EXPORT_SYMBOL(rpc_exit_task);
+ +EXPORT_SYMBOL_GPL(rpc_exit_task);
   
   void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
   {
@@@ -696,10 -690,9 +696,9 @@@ static void __rpc_execute(struct rpc_ta
   
                 /* sync task: sleep here */
                 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
-               /* Note: Caller should be using rpc_clnt_sigmask() */
                 status = out_of_line_wait_on_bit(&task->tk_runstate,
-                               RPC_TASK_QUEUED, rpc_wait_bit_interruptible,
-                               TASK_INTERRUPTIBLE);
+                               RPC_TASK_QUEUED, rpc_wait_bit_killable,
+                               TASK_KILLABLE);
                 if (status == -ERESTARTSYS) {
                         /*
                          * When a sync task receives a signal, it exits with
@@@ -814,49 -807,38 +813,47 @@@ EXPORT_SYMBOL_GPL(rpc_free)
   /*
    * Creation and deletion of RPC task structures
    */
- -void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+ +static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
   {
         memset(task, 0, sizeof(*task));
- -      init_timer(&task->tk_timer);
- -      task->tk_timer.data     = (unsigned long) task;
- -      task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+ +      setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
+ +                      (unsigned long)task);
         atomic_set(&task->tk_count, 1);
- -      task->tk_client = clnt;
- -      task->tk_flags  = flags;
- -      task->tk_ops = tk_ops;
- -      if (tk_ops->rpc_call_prepare != NULL)
- -              task->tk_action = rpc_prepare_task;
- -      task->tk_calldata = calldata;
+ +      task->tk_flags  = task_setup_data->flags;
+ +      task->tk_ops = task_setup_data->callback_ops;
+ +      task->tk_calldata = task_setup_data->callback_data;
         INIT_LIST_HEAD(&task->tk_task);
   
         /* Initialize retry counters */
         task->tk_garb_retry = 2;
         task->tk_cred_retry = 2;
   
- -      task->tk_priority = RPC_PRIORITY_NORMAL;
- -      task->tk_cookie = (unsigned long)current;
+ +      task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
+ +      task->tk_owner = current->tgid;
   
         /* Initialize workqueue for async tasks */
         task->tk_workqueue = rpciod_workqueue;
   
- -      if (clnt) {
- -              kref_get(&clnt->cl_kref);
- -              if (clnt->cl_softrtry)
+ +      task->tk_client = task_setup_data->rpc_client;
+ +      if (task->tk_client != NULL) {
+ +              kref_get(&task->tk_client->cl_kref);
+ +              if (task->tk_client->cl_softrtry)
                         task->tk_flags |= RPC_TASK_SOFT;
-               if (!task->tk_client->cl_intr)
-                       task->tk_flags |= RPC_TASK_NOINTR;
         }
   
- -      BUG_ON(task->tk_ops == NULL);
+ +      if (task->tk_ops->rpc_call_prepare != NULL)
+ +              task->tk_action = rpc_prepare_task;
+ +
+ +      if (task_setup_data->rpc_message != NULL) {
+ +              memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
+ +              /* Bind the user cred */
+ +              if (task->tk_msg.rpc_cred != NULL)
+ +                      rpcauth_holdcred(task);
+ +              else
+ +                      rpcauth_bindcred(task);
+ +              if (task->tk_action == NULL)
+ +                      rpc_call_start(task);
+ +      }
   
         /* starting timestamp */
         task->tk_start = jiffies;
@@@ -881,22 -863,18 +878,22 @@@ static void rpc_free_task(struct rcu_he
   /*
    * Create a new task for the specified client.
    */
- -struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+ +struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
   {
- -      struct rpc_task *task;
- -
- -      task = rpc_alloc_task();
- -      if (!task)
- -              goto out;
+ +      struct rpc_task *task = setup_data->task;
+ +      unsigned short flags = 0;
+ +
+ +      if (task == NULL) {
+ +              task = rpc_alloc_task();
+ +              if (task == NULL)
+ +                      goto out;
+ +              flags = RPC_TASK_DYNAMIC;
+ +      }
   
- -      rpc_init_task(task, clnt, flags, tk_ops, calldata);
+ +      rpc_init_task(task, setup_data);
   
+ +      task->tk_flags |= flags;
         dprintk("RPC:       allocated task %p\n", task);
- -      task->tk_flags |= RPC_TASK_DYNAMIC;
   out:
         return task;
   }
@@@ -922,7 -900,7 +919,7 @@@ void rpc_put_task(struct rpc_task *task
                 call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
         rpc_release_calldata(tk_ops, calldata);
   }
- -EXPORT_SYMBOL(rpc_put_task);
+ +EXPORT_SYMBOL_GPL(rpc_put_task);
   
   static void rpc_release_task(struct rpc_task *task)
   {
@@@ -979,7 -957,6 +976,7 @@@ void rpc_killall_tasks(struct rpc_clnt 
         }
         spin_unlock(&clnt->cl_lock);
   }
+ +EXPORT_SYMBOL_GPL(rpc_killall_tasks);
   
   int rpciod_up(void)
   {
@@@ -1059,11 -1036,6 +1056,11 @@@ rpc_init_mempool(void
                 goto err_nomem;
         if (!rpciod_start())
                 goto err_nomem;
+ +      /*
+ +       * The following is not strictly a mempool initialisation,
+ +       * but there is no harm in doing it here
+ +       */
+ +      rpc_init_wait_queue(&delay_queue, "delayq");
         return 0;
   err_nomem:
         rpc_destroy_mempool();
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 1 Feb 2008 00:45:47 +0000 (11:45 +1100)
		1	2
fs/nfs/client.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/direct.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/nfs3proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/nfs4proc.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/pagelist.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/super.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/nfs/write.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/array.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/proc/base.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/nfs_fs.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sunrpc/clnt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sunrpc/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/ptrace.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/timer.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/auth.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/clnt.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/rpcb_clnt.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/sunrpc/sched.c	patch \|	diff1 \|	diff2 \|	blob \| history