* 'task_killable' of git://git.kernel.org/pub/scm/linux/kernel/git/willy/misc: (22 commits)
Remove commented-out code copied from NFS
NFS: Switch from intr mount option to TASK_KILLABLE
Add wait_for_completion_killable
Add wait_event_killable
Add schedule_timeout_killable
Use mutex_lock_killable in vfs_readdir
Add mutex_lock_killable
Use lock_page_killable
Add lock_page_killable
Add fatal_signal_pending
Add TASK_WAKEKILL
exit: Use task_is_*
signal: Use task_is_*
sched: Use task_contributes_to_load, TASK_ALL and TASK_NORMAL
ptrace: Use task_is_*
power: Use task_is_*
wait: Use TASK_NORMAL
proc/base.c: Use task_is_*
proc/array.c: Use TASK_REPORT
perfmon: Use task_is_*
...
Fixed up conflicts in NFS/sunrpc manually..
#include <linux/nfs_idmap.h>
#include <linux/vfs.h>
#include <linux/inet.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
#include <linux/nfs_xdr.h>
#include <asm/system.h>
};
#endif /* CONFIG_NFS_V3_ACL */
+struct nfs_client_initdata {
+ const char *hostname;
+ const struct sockaddr *addr;
+ size_t addrlen;
+ const struct nfs_rpc_ops *rpc_ops;
+ int proto;
+};
+
/*
* Allocate a shared client record
*
* Since these are allocated/deallocated very rarely, we don't
* bother putting them in a slab cache...
*/
-static struct nfs_client *nfs_alloc_client(const char *hostname,
- const struct sockaddr_in *addr,
- int nfsversion)
+static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
{
struct nfs_client *clp;
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
- if (nfsversion == 4) {
+ clp->rpc_ops = cl_init->rpc_ops;
+
+ if (cl_init->rpc_ops->version == 4) {
if (nfs_callback_up() < 0)
goto error_2;
__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
atomic_set(&clp->cl_count, 1);
clp->cl_cons_state = NFS_CS_INITING;
- clp->cl_nfsversion = nfsversion;
- memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+ memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
+ clp->cl_addrlen = cl_init->addrlen;
- if (hostname) {
- clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+ if (cl_init->hostname) {
+ clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
if (!clp->cl_hostname)
goto error_3;
}
INIT_LIST_HEAD(&clp->cl_superblocks);
clp->cl_rpcclient = ERR_PTR(-EINVAL);
+ clp->cl_proto = cl_init->proto;
+
#ifdef CONFIG_NFS_V4
init_rwsem(&clp->cl_sem);
INIT_LIST_HEAD(&clp->cl_delegations);
*/
static void nfs_free_client(struct nfs_client *clp)
{
- dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+ dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
nfs4_shutdown_client(clp);
}
}
+static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
+ const struct sockaddr_in *sa2)
+{
+ return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
+}
+
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1,
+ const struct sockaddr_in6 *sa2)
+{
+ return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr);
+}
+
+static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
+{
+ switch (sa1->sa_family) {
+ case AF_INET:
+ return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
+ (const struct sockaddr_in *)sa2);
+ case AF_INET6:
+ return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
+ (const struct sockaddr_in6 *)sa2);
+ }
+ BUG();
+}
+
/*
- * Find a client by address
- * - caller must hold nfs_client_lock
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
*/
-static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port)
+struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
{
struct nfs_client *clp;
+ spin_lock(&nfs_client_lock);
list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+
/* Don't match clients that failed to initialise properly */
- if (clp->cl_cons_state < 0)
+ if (clp->cl_cons_state != NFS_CS_READY)
continue;
/* Different NFS versions cannot share the same nfs_client */
- if (clp->cl_nfsversion != nfsversion)
+ if (clp->rpc_ops->version != nfsversion)
continue;
- if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
- sizeof(clp->cl_addr.sin_addr)) != 0)
+ if (addr->sa_family != clap->sa_family)
+ continue;
+ /* Match only the IP address, not the port number */
+ if (!nfs_sockaddr_match_ipaddr(addr, clap))
continue;
- if (!match_port || clp->cl_addr.sin_port == addr->sin_port)
- goto found;
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
}
-
+ spin_unlock(&nfs_client_lock);
return NULL;
-
-found:
- atomic_inc(&clp->cl_count);
- return clp;
}
/*
* Find a client by IP address and protocol version
* - returns NULL if no such client
*/
-struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
{
- struct nfs_client *clp;
+ struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
+ u32 nfsvers = clp->rpc_ops->version;
spin_lock(&nfs_client_lock);
- clp = __nfs_find_client(addr, nfsversion, 0);
+ list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
+ struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+
+ /* Don't match clients that failed to initialise properly */
+ if (clp->cl_cons_state != NFS_CS_READY)
+ continue;
+
+ /* Different NFS versions cannot share the same nfs_client */
+ if (clp->rpc_ops->version != nfsvers)
+ continue;
+
+ if (sap->sa_family != clap->sa_family)
+ continue;
+ /* Match only the IP address, not the port number */
+ if (!nfs_sockaddr_match_ipaddr(sap, clap))
+ continue;
+
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nfs_client_lock);
+ return clp;
+ }
spin_unlock(&nfs_client_lock);
- if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) {
- nfs_put_client(clp);
- clp = NULL;
+ return NULL;
+}
+
+/*
+ * Find an nfs_client on the list that matches the initialisation data
+ * that is supplied.
+ */
+static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
+{
+ struct nfs_client *clp;
+
+ list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+ /* Don't match clients that failed to initialise properly */
+ if (clp->cl_cons_state < 0)
+ continue;
+
+ /* Different NFS versions cannot share the same nfs_client */
+ if (clp->rpc_ops != data->rpc_ops)
+ continue;
+
+ if (clp->cl_proto != data->proto)
+ continue;
+
+ /* Match the full socket address */
+ if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
+ continue;
+
+ atomic_inc(&clp->cl_count);
+ return clp;
}
- return clp;
+ return NULL;
}
/*
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
*/
-static struct nfs_client *nfs_get_client(const char *hostname,
- const struct sockaddr_in *addr,
- int nfsversion)
+static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
{
struct nfs_client *clp, *new = NULL;
int error;
- dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
- hostname ?: "", NIPQUAD(addr->sin_addr),
- addr->sin_port, nfsversion);
+ dprintk("--> nfs_get_client(%s,v%u)\n",
+ cl_init->hostname ?: "", cl_init->rpc_ops->version);
/* see if the client already exists */
do {
spin_lock(&nfs_client_lock);
- clp = __nfs_find_client(addr, nfsversion, 1);
+ clp = nfs_match_client(cl_init);
if (clp)
goto found_client;
if (new)
spin_unlock(&nfs_client_lock);
- new = nfs_alloc_client(hostname, addr, nfsversion);
+ new = nfs_alloc_client(cl_init);
} while (new);
return ERR_PTR(-ENOMEM);
if (new)
nfs_free_client(new);
- error = wait_event_interruptible(nfs_client_active_wq,
+ error = wait_event_killable(nfs_client_active_wq,
clp->cl_cons_state != NFS_CS_INITING);
if (error < 0) {
nfs_put_client(clp);
switch (proto) {
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
- if (!to->to_initval)
+ if (to->to_initval == 0)
to->to_initval = 60 * HZ;
if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
to->to_initval = NFS_MAX_TCP_TIMEOUT;
to->to_increment = to->to_initval;
to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+ if (to->to_maxval > NFS_MAX_TCP_TIMEOUT)
+ to->to_maxval = NFS_MAX_TCP_TIMEOUT;
+ if (to->to_maxval < to->to_initval)
+ to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
case XPRT_TRANSPORT_UDP:
/*
* Create an RPC client handle
*/
-static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
- unsigned int timeo,
- unsigned int retrans,
- rpc_authflavor_t flavor,
- int flags)
+static int nfs_create_rpc_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ rpc_authflavor_t flavor,
+ int flags)
{
- struct rpc_timeout timeparms;
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
- .protocol = proto,
+ .protocol = clp->cl_proto,
.address = (struct sockaddr *)&clp->cl_addr,
- .addrsize = sizeof(clp->cl_addr),
- .timeout = &timeparms,
+ .addrsize = clp->cl_addrlen,
+ .timeout = timeparms,
.servername = clp->cl_hostname,
.program = &nfs_program,
.version = clp->rpc_ops->version,
if (!IS_ERR(clp->cl_rpcclient))
return 0;
- nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
- clp->retrans_timeo = timeparms.to_initval;
- clp->retrans_count = timeparms.to_retries;
-
clnt = rpc_create(&args);
if (IS_ERR(clnt)) {
dprintk("%s: cannot create RPC client. Error = %ld\n",
*/
static void nfs_destroy_server(struct nfs_server *server)
{
- if (!IS_ERR(server->client_acl))
- rpc_shutdown_client(server->client_acl);
-
if (!(server->flags & NFS_MOUNT_NONLM))
- lockd_down(); /* release rpc.lockd */
+ nlmclnt_done(server->nlm_host);
}
/*
*/
static int nfs_start_lockd(struct nfs_server *server)
{
- int error = 0;
+ struct nlm_host *host;
+ struct nfs_client *clp = server->nfs_client;
+ struct nlmclnt_initdata nlm_init = {
+ .hostname = clp->cl_hostname,
+ .address = (struct sockaddr *)&clp->cl_addr,
+ .addrlen = clp->cl_addrlen,
+ .protocol = server->flags & NFS_MOUNT_TCP ?
+ IPPROTO_TCP : IPPROTO_UDP,
+ .nfs_version = clp->rpc_ops->version,
+ };
- if (server->nfs_client->cl_nfsversion > 3)
- goto out;
+ if (nlm_init.nfs_version > 3)
+ return 0;
if (server->flags & NFS_MOUNT_NONLM)
- goto out;
- error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
- IPPROTO_TCP : IPPROTO_UDP);
- if (error < 0)
- server->flags |= NFS_MOUNT_NONLM;
- else
- server->destroy = nfs_destroy_server;
-out:
- return error;
+ return 0;
+
+ host = nlmclnt_init(&nlm_init);
+ if (IS_ERR(host))
+ return PTR_ERR(host);
+
+ server->nlm_host = host;
+ server->destroy = nfs_destroy_server;
+ return 0;
}
/*
#ifdef CONFIG_NFS_V3_ACL
static void nfs_init_server_aclclient(struct nfs_server *server)
{
- if (server->nfs_client->cl_nfsversion != 3)
+ if (server->nfs_client->rpc_ops->version != 3)
goto out_noacl;
if (server->flags & NFS_MOUNT_NOACL)
goto out_noacl;
/*
* Create a general RPC client
*/
-static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+static int nfs_init_server_rpcclient(struct nfs_server *server,
+ const struct rpc_timeout *timeo,
+ rpc_authflavor_t pseudoflavour)
{
struct nfs_client *clp = server->nfs_client;
return PTR_ERR(server->client);
}
+ memcpy(&server->client->cl_timeout_default,
+ timeo,
+ sizeof(server->client->cl_timeout_default));
+ server->client->cl_timeout = &server->client->cl_timeout_default;
+
if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
struct rpc_auth *auth;
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
- server->client->cl_intr = 0;
- if (server->flags & NFS4_MOUNT_INTR)
- server->client->cl_intr = 1;
-
return 0;
}
* Initialise an NFS2 or NFS3 client
*/
static int nfs_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
const struct nfs_parsed_mount_data *data)
{
int error;
return 0;
}
- /* Check NFS protocol revision and initialize RPC op vector */
- clp->rpc_ops = &nfs_v2_clientops;
-#ifdef CONFIG_NFS_V3
- if (clp->cl_nfsversion == 3)
- clp->rpc_ops = &nfs_v3_clientops;
-#endif
/*
* Create a client RPC handle for doing FSSTAT with UNIX auth only
* - RFC 2623, sec 2.3.2
*/
- error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
- data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
+ error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
static int nfs_init_server(struct nfs_server *server,
const struct nfs_parsed_mount_data *data)
{
+ struct nfs_client_initdata cl_init = {
+ .hostname = data->nfs_server.hostname,
+ .addr = (const struct sockaddr *)&data->nfs_server.address,
+ .addrlen = data->nfs_server.addrlen,
+ .rpc_ops = &nfs_v2_clientops,
+ .proto = data->nfs_server.protocol,
+ };
+ struct rpc_timeout timeparms;
struct nfs_client *clp;
- int error, nfsvers = 2;
+ int error;
dprintk("--> nfs_init_server()\n");
#ifdef CONFIG_NFS_V3
if (data->flags & NFS_MOUNT_VER3)
- nfsvers = 3;
+ cl_init.rpc_ops = &nfs_v3_clientops;
#endif
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(data->nfs_server.hostname,
- &data->nfs_server.address, nfsvers);
+ clp = nfs_get_client(&cl_init);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
}
- error = nfs_init_client(clp, data);
+ nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ data->timeo, data->retrans);
+ error = nfs_init_client(clp, &timeparms, data);
if (error < 0)
goto error;
if (error < 0)
goto error;
- error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+ error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
if (error < 0)
goto error;
INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link);
+ init_waitqueue_head(&server->active_wq);
+ atomic_set(&server->active, 0);
+
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
kfree(server);
if (server->destroy != NULL)
server->destroy(server);
+
+ if (!IS_ERR(server->client_acl))
+ rpc_shutdown_client(server->client_acl);
if (!IS_ERR(server->client))
rpc_shutdown_client(server->client);
* Initialise an NFS4 client record
*/
static int nfs4_init_client(struct nfs_client *clp,
- int proto, int timeo, int retrans,
+ const struct rpc_timeout *timeparms,
const char *ip_addr,
rpc_authflavor_t authflavour)
{
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
- error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour,
+ error = nfs_create_rpc_client(clp, timeparms, authflavour,
RPC_CLNT_CREATE_DISCRTRY);
if (error < 0)
goto error;
* Set up an NFS4 client
*/
static int nfs4_set_client(struct nfs_server *server,
- const char *hostname, const struct sockaddr_in *addr,
+ const char *hostname,
+ const struct sockaddr *addr,
+ const size_t addrlen,
const char *ip_addr,
rpc_authflavor_t authflavour,
- int proto, int timeo, int retrans)
+ int proto, const struct rpc_timeout *timeparms)
{
+ struct nfs_client_initdata cl_init = {
+ .hostname = hostname,
+ .addr = addr,
+ .addrlen = addrlen,
+ .rpc_ops = &nfs_v4_clientops,
+ .proto = proto,
+ };
struct nfs_client *clp;
int error;
dprintk("--> nfs4_set_client()\n");
/* Allocate or find a client reference we can use */
- clp = nfs_get_client(hostname, addr, 4);
+ clp = nfs_get_client(&cl_init);
if (IS_ERR(clp)) {
error = PTR_ERR(clp);
goto error;
}
- error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour);
+ error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
if (error < 0)
goto error_put;
static int nfs4_init_server(struct nfs_server *server,
const struct nfs_parsed_mount_data *data)
{
+ struct rpc_timeout timeparms;
int error;
dprintk("--> nfs4_init_server()\n");
+ nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ data->timeo, data->retrans);
+
+ /* Get a client record */
+ error = nfs4_set_client(server,
+ data->nfs_server.hostname,
+ (const struct sockaddr *)&data->nfs_server.address,
+ data->nfs_server.addrlen,
+ data->client_address,
+ data->auth_flavors[0],
+ data->nfs_server.protocol,
+ &timeparms);
+ if (error < 0)
+ goto error;
+
/* Initialise the client representation from the mount data */
server->flags = data->flags & NFS_MOUNT_FLAGMASK;
server->caps |= NFS_CAP_ATOMIC_OPEN;
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
- error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+ error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+error:
/* Done */
dprintk("<-- nfs4_init_server() = %d\n", error);
return error;
if (!server)
return ERR_PTR(-ENOMEM);
- /* Get a client record */
- error = nfs4_set_client(server,
- data->nfs_server.hostname,
- &data->nfs_server.address,
- data->client_address,
- data->auth_flavors[0],
- data->nfs_server.protocol,
- data->timeo, data->retrans);
- if (error < 0)
- goto error;
-
/* set up the general RPC client */
error = nfs4_init_server(server, data);
if (error < 0)
/* Get a client representation.
* Note: NFSv4 always uses TCP, */
- error = nfs4_set_client(server, data->hostname, data->addr,
- parent_client->cl_ipaddr,
- data->authflavor,
- parent_server->client->cl_xprt->prot,
- parent_client->retrans_timeo,
- parent_client->retrans_count);
+ error = nfs4_set_client(server, data->hostname,
+ data->addr,
+ data->addrlen,
+ parent_client->cl_ipaddr,
+ data->authflavor,
+ parent_server->client->cl_xprt->prot,
+ parent_server->client->cl_timeout);
if (error < 0)
goto error;
nfs_server_copy_userdata(server, parent_server);
server->caps |= NFS_CAP_ATOMIC_OPEN;
- error = nfs_init_server_rpcclient(server, data->authflavor);
+ error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
if (error < 0)
goto error;
server->fsid = fattr->fsid;
- error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+ error = nfs_init_server_rpcclient(server,
+ source->client->cl_timeout,
+ source->client->cl_auth->au_flavor);
if (error < 0)
goto out_free_server;
if (!IS_ERR(source->client_acl))
/* display one transport per line on subsequent lines */
clp = list_entry(v, struct nfs_client, cl_share_link);
- seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
- clp->cl_nfsversion,
- NIPQUAD(clp->cl_addr.sin_addr),
- ntohs(clp->cl_addr.sin_port),
+ seq_printf(m, "v%u %s %s %3d %s\n",
+ clp->rpc_ops->version,
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
atomic_read(&clp->cl_count),
clp->cl_hostname);
(unsigned long long) server->fsid.major,
(unsigned long long) server->fsid.minor);
- seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
- clp->cl_nfsversion,
- NIPQUAD(clp->cl_addr.sin_addr),
- ntohs(clp->cl_addr.sin_port),
+ seq_printf(m, "v%u %s %s %-7s %-17s\n",
+ clp->rpc_ops->version,
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
dev,
fsid);
static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
{
ssize_t result = -EIOCBQUEUED;
- struct rpc_clnt *clnt;
- sigset_t oldset;
/* Async requests don't wait here */
if (dreq->iocb)
goto out;
- clnt = NFS_CLIENT(dreq->inode);
- rpc_clnt_sigmask(clnt, &oldset);
- result = wait_for_completion_interruptible(&dreq->completion);
- rpc_clnt_sigunmask(clnt, &oldset);
+ result = wait_for_completion_killable(&dreq->completion);
if (!result)
result = dreq->error;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
size_t rsize = NFS_SERVER(inode)->rsize;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs_read_direct_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
unsigned int pgbase;
int result;
ssize_t started = 0;
data->req = (struct nfs_page *) dreq;
data->inode = inode;
- data->cred = ctx->cred;
+ data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = pos;
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
- rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- &nfs_read_direct_ops, data);
- NFS_PROTO(inode)->read_setup(data);
+ task_setup_data.task = &data->task;
+ task_setup_data.callback_data = data;
+ NFS_PROTO(inode)->read_setup(data, &msg);
- data->task.tk_cookie = (unsigned long) inode;
-
- rpc_execute(&data->task);
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
dprintk("NFS: %5u initiated direct read call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
struct inode *inode = dreq->inode;
struct list_head *p;
struct nfs_write_data *data;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_cred = dreq->ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(inode),
+ .callback_ops = &nfs_write_direct_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
dreq->count = 0;
get_dreq(dreq);
get_dreq(dreq);
+ /* Use stable writes */
+ data->args.stable = NFS_FILE_SYNC;
+
/*
* Reset data->res.
*/
* Reuse data->task; data->args should not have changed
* since the original request was sent.
*/
- rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- &nfs_write_direct_ops, data);
- NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
-
- data->task.tk_priority = RPC_PRIORITY_NORMAL;
- data->task.tk_cookie = (unsigned long) inode;
+ task_setup_data.task = &data->task;
+ task_setup_data.callback_data = data;
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ NFS_PROTO(inode)->write_setup(data, &msg);
/*
* We're called via an RPC callback, so BKL is already held.
*/
- rpc_execute(&data->task);
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
{
struct nfs_write_data *data = dreq->commit_data;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = dreq->ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .task = &data->task,
+ .rpc_client = NFS_CLIENT(dreq->inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs_commit_direct_ops,
+ .callback_data = data,
+ .flags = RPC_TASK_ASYNC,
+ };
data->inode = dreq->inode;
- data->cred = dreq->ctx->cred;
+ data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(data->inode);
data->args.offset = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
- rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
- &nfs_commit_direct_ops, data);
- NFS_PROTO(data->inode)->commit_setup(data, 0);
+ NFS_PROTO(data->inode)->commit_setup(data, &msg);
- data->task.tk_priority = RPC_PRIORITY_NORMAL;
- data->task.tk_cookie = (unsigned long)data->inode;
/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
dreq->commit_data = NULL;
dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
- rpc_execute(&data->task);
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
}
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
struct inode *inode = ctx->path.dentry->d_inode;
unsigned long user_addr = (unsigned long)iov->iov_base;
size_t count = iov->iov_len;
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs_write_direct_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
size_t wsize = NFS_SERVER(inode)->wsize;
unsigned int pgbase;
int result;
data->req = (struct nfs_page *) dreq;
data->inode = inode;
- data->cred = ctx->cred;
+ data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.context = ctx;
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
data->args.count = bytes;
+ data->args.stable = sync;
data->res.fattr = &data->fattr;
data->res.count = bytes;
data->res.verf = &data->verf;
- rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
- &nfs_write_direct_ops, data);
- NFS_PROTO(inode)->write_setup(data, sync);
+ task_setup_data.task = &data->task;
+ task_setup_data.callback_data = data;
+ msg.rpc_argp = &data->args;
+ msg.rpc_resp = &data->res;
+ NFS_PROTO(inode)->write_setup(data, &msg);
- data->task.tk_priority = RPC_PRIORITY_NORMAL;
- data->task.tk_cookie = (unsigned long) inode;
-
- rpc_execute(&data->task);
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
dprintk("NFS: %5u initiated direct write call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
size_t wsize = NFS_SERVER(inode)->wsize;
- int sync = 0;
+ int sync = NFS_UNSTABLE;
dreq = nfs_direct_req_alloc();
if (!dreq)
nfs_alloc_commit_data(dreq);
if (dreq->commit_data == NULL || count < wsize)
- sync = FLUSH_STABLE;
+ sync = NFS_FILE_SYNC;
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
retval = generic_write_checks(file, &pos, &count, 0);
if (retval)
goto out;
- if (!count)
- goto out; /* return 0 */
retval = -EINVAL;
if ((ssize_t) count < 0)
*/
static void nfs_invalidate_inode(struct inode *inode)
{
- set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
nfs_zap_caches_locked(inode);
}
struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque;
struct nfs_fattr *fattr = desc->fattr;
- NFS_FILEID(inode) = fattr->fileid;
+ set_nfs_fileid(inode, fattr->fileid);
nfs_copy_fh(NFS_FH(inode), desc->fh);
return 0;
}
inode->i_fop = &nfs_dir_operations;
if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
&& fattr->size <= NFS_LIMIT_READDIRPLUS)
- set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+ set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
/* Deal with crossing mountpoints */
if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
*/
static int nfs_wait_on_inode(struct inode *inode)
{
- struct rpc_clnt *clnt = NFS_CLIENT(inode);
struct nfs_inode *nfsi = NFS_I(inode);
- sigset_t oldmask;
int error;
- rpc_clnt_sigmask(clnt, &oldmask);
error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
- nfs_wait_schedule, TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldmask);
+ nfs_wait_schedule, TASK_KILLABLE);
return error;
}
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
int err;
- /* Flush out writes to the server in order to update c/mtime */
- if (S_ISREG(inode->i_mode))
+ /*
+ * Flush out writes to the server in order to update c/mtime.
+ *
+ * Hold the i_mutex to suspend application writes temporarily;
+ * this prevents long-running writing applications from blocking
+ * nfs_wb_nocommit.
+ */
+ if (S_ISREG(inode->i_mode)) {
+ mutex_lock(&inode->i_mutex);
nfs_wb_nocommit(inode);
+ mutex_unlock(&inode->i_mutex);
+ }
/*
* We may force a getattr if the user cares about atime.
if (status == -ESTALE) {
nfs_zap_caches(inode);
if (!S_ISDIR(inode->i_mode))
- set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+ set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
}
goto out;
}
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
}
- if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
- inode->i_size = fattr->size;
+ if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
+ nfsi->npages == 0)
+ inode->i_size = nfs_size_to_loff_t(fattr->size);
}
}
dprintk("NFS: mtime change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- nfsi->cache_change_attribute = now;
+ if (S_ISDIR(inode->i_mode))
+ nfs_force_lookup_revalidate(inode);
}
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = now;
+ if (S_ISDIR(inode->i_mode))
+ nfs_force_lookup_revalidate(inode);
}
/* Check if our cached file size is stale */
void nfs4_clear_inode(struct inode *inode)
{
/* If we are holding a delegation, return it! */
- nfs_inode_return_delegation(inode);
+ nfs_inode_return_delegation_noreclaim(inode);
/* First call standard NFS clear_inode() code */
nfs_clear_inode(inode);
}
static int
nfs3_rpc_wrapper(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
- sigset_t oldset;
int res;
- rpc_clnt_sigmask(clnt, &oldset);
do {
res = rpc_call_sync(clnt, msg, flags);
if (res != -EJUKEBOX)
break;
- schedule_timeout_interruptible(NFS_JUKEBOX_RETRY_TIME);
+ schedule_timeout_killable(NFS_JUKEBOX_RETRY_TIME);
res = -ERESTARTSYS;
- } while (!signalled());
- rpc_clnt_sigunmask(clnt, &oldset);
+ } while (!fatal_signal_pending(current));
return res;
}
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
-
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
-
- data->args.stable = NFS_UNSTABLE;
- if (how & FLUSH_STABLE) {
- data->args.stable = NFS_FILE_SYNC;
- if (NFS_I(data->inode)->ncommit)
- data->args.stable = NFS_DATA_SYNC;
- }
-
- /* Finalize the task. */
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
-
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
}
static int
nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
{
- return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
+ struct inode *inode = filp->f_path.dentry->d_inode;
+
+ return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
const struct nfs_rpc_ops nfs_v3_clientops = {
spin_lock(&dir->i_lock);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
- nfsi->cache_change_attribute = jiffies;
+ nfs_force_lookup_revalidate(dir);
nfsi->change_attr = cinfo->after;
spin_unlock(&dir->i_lock);
}
static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
{
- sigset_t oldset;
int ret;
- rpc_clnt_sigmask(task->tk_client, &oldset);
ret = rpc_wait_for_completion_task(task);
- rpc_clnt_sigunmask(task->tk_client, &oldset);
return ret;
}
return err;
}
-static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs4_opendata *data = calldata;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
- .rpc_argp = &data->c_arg,
- .rpc_resp = &data->c_res,
- .rpc_cred = data->owner->so_cred,
- };
- data->timestamp = jiffies;
- rpc_call_setup(task, &msg, 0);
-}
-
static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
if (data->rpc_status == 0) {
memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
sizeof(data->o_res.stateid.data));
+ nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
}
- nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
}
/* In case of error, no cleanup! */
if (!data->rpc_done)
goto out_free;
- nfs_confirm_seqid(&data->owner->so_seqid, 0);
state = nfs4_opendata_to_nfs4_state(data);
if (!IS_ERR(state))
nfs4_close_state(&data->path, state, data->o_arg.open_flags);
}
static const struct rpc_call_ops nfs4_open_confirm_ops = {
- .rpc_call_prepare = nfs4_open_confirm_prepare,
.rpc_call_done = nfs4_open_confirm_done,
.rpc_release = nfs4_open_confirm_release,
};
{
struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+ .rpc_argp = &data->c_arg,
+ .rpc_resp = &data->c_res,
+ .rpc_cred = data->owner->so_cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_open_confirm_ops,
+ .callback_data = data,
+ .flags = RPC_TASK_ASYNC,
+ };
int status;
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
- task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+ data->timestamp = jiffies;
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
{
struct nfs4_opendata *data = calldata;
struct nfs4_state_owner *sp = data->owner;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
- .rpc_argp = &data->o_arg,
- .rpc_resp = &data->o_res,
- .rpc_cred = sp->so_cred,
- };
-
+
if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
return;
/*
data->o_arg.id = sp->so_owner_id.id;
data->o_arg.clientid = sp->so_client->cl_clientid;
if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
- msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+ task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
}
data->timestamp = jiffies;
- rpc_call_setup(task, &msg, 0);
+ rpc_call_start(task);
return;
out_no_action:
task->tk_action = NULL;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
- nfs_confirm_seqid(&data->owner->so_seqid, 0);
state = nfs4_opendata_to_nfs4_state(data);
if (!IS_ERR(state))
nfs4_close_state(&data->path, state, data->o_arg.open_flags);
struct nfs_openargs *o_arg = &data->o_arg;
struct nfs_openres *o_res = &data->o_res;
struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+ .rpc_argp = o_arg,
+ .rpc_resp = o_res,
+ .rpc_cred = data->owner->so_cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_open_ops,
+ .callback_data = data,
+ .flags = RPC_TASK_ASYNC,
+ };
int status;
kref_get(&data->kref);
data->rpc_done = 0;
data->rpc_status = 0;
data->cancelled = 0;
- task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
{
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
- .rpc_argp = &calldata->arg,
- .rpc_resp = &calldata->res,
- .rpc_cred = state->owner->so_cred,
- };
int clear_rd, clear_wr, clear_rdwr;
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
}
nfs_fattr_init(calldata->res.fattr);
if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
- msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+ task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
calldata->arg.open_flags = FMODE_READ;
} else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
- msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+ task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
calldata->arg.open_flags = FMODE_WRITE;
}
calldata->timestamp = jiffies;
- rpc_call_setup(task, &msg, 0);
+ rpc_call_start(task);
}
static const struct rpc_call_ops nfs4_close_ops = {
struct nfs4_closedata *calldata;
struct nfs4_state_owner *sp = state->owner;
struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+ .rpc_cred = state->owner->so_cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_close_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
int status = -ENOMEM;
calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
calldata->path.mnt = mntget(path->mnt);
calldata->path.dentry = dget(path->dentry);
- task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+ msg.rpc_argp = &calldata->arg,
+ msg.rpc_resp = &calldata->res,
+ task_setup_data.callback_data = calldata;
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = 0;
return 0;
}
-static void nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
-
data->timestamp = jiffies;
-
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
- struct inode *inode = data->inode;
- struct nfs_server *server = NFS_SERVER(inode);
- int stable;
-
- if (how & FLUSH_STABLE) {
- if (!NFS_I(inode)->ncommit)
- stable = NFS_FILE_SYNC;
- else
- stable = NFS_DATA_SYNC;
- } else
- stable = NFS_UNSTABLE;
- data->args.stable = stable;
+ struct nfs_server *server = NFS_SERVER(data->inode);
+
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
-
data->timestamp = jiffies;
- /* Finalize the task. */
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
}
static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->res.server = server;
-
- rpc_call_setup(&data->task, &msg, 0);
+ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
}
/*
return 0;
}
- static int nfs4_wait_bit_interruptible(void *word)
+ static int nfs4_wait_bit_killable(void *word)
{
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
{
- sigset_t oldset;
int res;
might_sleep();
rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
- rpc_clnt_sigmask(clnt, &oldset);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
- nfs4_wait_bit_interruptible,
- TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldset);
+ nfs4_wait_bit_killable, TASK_KILLABLE);
rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
return res;
static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
{
- sigset_t oldset;
int res = 0;
might_sleep();
*timeout = NFS4_POLL_RETRY_MIN;
if (*timeout > NFS4_POLL_RETRY_MAX)
*timeout = NFS4_POLL_RETRY_MAX;
- rpc_clnt_sigmask(clnt, &oldset);
- if (clnt->cl_intr) {
- schedule_timeout_interruptible(*timeout);
- if (signalled())
- res = -ERESTARTSYS;
- } else
- schedule_timeout_uninterruptible(*timeout);
- rpc_clnt_sigunmask(clnt, &oldset);
+ schedule_timeout_killable(*timeout);
+ if (fatal_signal_pending(current))
+ res = -ERESTARTSYS;
*timeout <<= 1;
return res;
}
for(;;) {
setclientid.sc_name_len = scnprintf(setclientid.sc_name,
- sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
- clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
+ sizeof(setclientid.sc_name), "%s/%s %s %s %u",
+ clp->cl_ipaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_PROTO),
cred->cr_ops->cr_name,
clp->cl_id_uniquifier);
setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
- sizeof(setclientid.sc_netid), "tcp");
+ sizeof(setclientid.sc_netid),
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_NETID));
setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
- sizeof(setclientid.sc_uaddr), "%s.%d.%d",
+ sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);
struct nfs4_delegreturnres res;
struct nfs_fh fh;
nfs4_stateid stateid;
- struct rpc_cred *cred;
unsigned long timestamp;
struct nfs_fattr fattr;
int rpc_status;
};
-static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs4_delegreturndata *data = calldata;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->cred,
- };
- nfs_fattr_init(data->res.fattr);
- rpc_call_setup(task, &msg, 0);
-}
-
static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
{
struct nfs4_delegreturndata *data = calldata;
static void nfs4_delegreturn_release(void *calldata)
{
- struct nfs4_delegreturndata *data = calldata;
-
- put_rpccred(data->cred);
kfree(calldata);
}
static const struct rpc_call_ops nfs4_delegreturn_ops = {
- .rpc_call_prepare = nfs4_delegreturn_prepare,
.rpc_call_done = nfs4_delegreturn_done,
.rpc_release = nfs4_delegreturn_release,
};
-static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
{
struct nfs4_delegreturndata *data;
struct nfs_server *server = NFS_SERVER(inode);
struct rpc_task *task;
- int status;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
+ .rpc_cred = cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = server->client,
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_delegreturn_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
+ int status = 0;
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
memcpy(&data->stateid, stateid, sizeof(data->stateid));
data->res.fattr = &data->fattr;
data->res.server = server;
- data->cred = get_rpccred(cred);
+ nfs_fattr_init(data->res.fattr);
data->timestamp = jiffies;
data->rpc_status = 0;
- task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
+ task_setup_data.callback_data = data;
+ msg.rpc_argp = &data->args,
+ msg.rpc_resp = &data->res,
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
+ if (!issync)
+ goto out;
status = nfs4_wait_for_completion_rpc_task(task);
- if (status == 0) {
- status = data->rpc_status;
- if (status == 0)
- nfs_refresh_inode(inode, &data->fattr);
- }
+ if (status != 0)
+ goto out;
+ status = data->rpc_status;
+ if (status != 0)
+ goto out;
+ nfs_refresh_inode(inode, &data->fattr);
+out:
rpc_put_task(task);
return status;
}
-int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
int err;
do {
- err = _nfs4_proc_delegreturn(inode, cred, stateid);
+ err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
static unsigned long
nfs4_set_lock_task_retry(unsigned long timeout)
{
- schedule_timeout_interruptible(timeout);
+ schedule_timeout_killable(timeout);
timeout <<= 1;
if (timeout > NFS4_LOCK_MAXTIMEOUT)
return NFS4_LOCK_MAXTIMEOUT;
static void nfs4_locku_prepare(struct rpc_task *task, void *data)
{
struct nfs4_unlockdata *calldata = data;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
- .rpc_argp = &calldata->arg,
- .rpc_resp = &calldata->res,
- .rpc_cred = calldata->lsp->ls_state->owner->so_cred,
- };
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
return;
return;
}
calldata->timestamp = jiffies;
- rpc_call_setup(task, &msg, 0);
+ rpc_call_start(task);
}
static const struct rpc_call_ops nfs4_locku_ops = {
struct nfs_seqid *seqid)
{
struct nfs4_unlockdata *data;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
+ .rpc_cred = ctx->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(lsp->ls_state->inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_locku_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
/* Ensure this is an unlock - when canceling a lock, the
* canceled lock is passed in, and it won't be an unlock.
return ERR_PTR(-ENOMEM);
}
- return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
+ msg.rpc_argp = &data->arg,
+ msg.rpc_resp = &data->res,
+ task_setup_data.callback_data = data;
+ return rpc_run_task(&task_setup_data);
}
static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
+ p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
+ if (p->arg.open_seqid == NULL)
+ goto out_free;
p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
if (p->arg.lock_seqid == NULL)
- goto out_free;
+ goto out_free_seqid;
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_id.id;
p->ctx = get_nfs_open_context(ctx);
memcpy(&p->fl, fl, sizeof(p->fl));
return p;
+out_free_seqid:
+ nfs_free_seqid(p->arg.open_seqid);
out_free:
kfree(p);
return NULL;
{
struct nfs4_lockdata *data = calldata;
struct nfs4_state *state = data->lsp->ls_state;
- struct nfs4_state_owner *sp = state->owner;
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
- .rpc_argp = &data->arg,
- .rpc_resp = &data->res,
- .rpc_cred = sp->so_cred,
- };
+ dprintk("%s: begin!\n", __FUNCTION__);
if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
return;
- dprintk("%s: begin!\n", __FUNCTION__);
/* Do we need to do an open_to_lock_owner? */
if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
- data->arg.open_seqid = nfs_alloc_seqid(&sp->so_seqid);
- if (data->arg.open_seqid == NULL) {
- data->rpc_status = -ENOMEM;
- task->tk_action = NULL;
- goto out;
- }
+ if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
+ return;
data->arg.open_stateid = &state->stateid;
data->arg.new_lock_owner = 1;
- }
+ } else
+ data->arg.new_lock_owner = 0;
data->timestamp = jiffies;
- rpc_call_setup(task, &msg, 0);
-out:
+ rpc_call_start(task);
dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
}
struct nfs4_lockdata *data = calldata;
dprintk("%s: begin!\n", __FUNCTION__);
- if (data->arg.open_seqid != NULL)
- nfs_free_seqid(data->arg.open_seqid);
+ nfs_free_seqid(data->arg.open_seqid);
if (data->cancelled != 0) {
struct rpc_task *task;
task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
{
struct nfs4_lockdata *data;
struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+ .rpc_cred = state->owner->so_cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(state->inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs4_lock_ops,
+ .flags = RPC_TASK_ASYNC,
+ };
int ret;
dprintk("%s: begin!\n", __FUNCTION__);
data->arg.block = 1;
if (reclaim != 0)
data->arg.reclaim = 1;
- task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
- &nfs4_lock_ops, data);
+ msg.rpc_argp = &data->arg,
+ msg.rpc_resp = &data->res,
+ task_setup_data.callback_data = data;
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
ret = nfs4_wait_for_completion_rpc_task(task);
if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
return -EOPNOTSUPP;
- if (!S_ISREG(inode->i_mode) &&
- (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
- return -EPERM;
-
return nfs4_proc_set_acl(inode, buf, buflen);
}
struct page *page,
unsigned int offset, unsigned int count)
{
-- struct nfs_server *server = NFS_SERVER(inode);
struct nfs_page *req;
for (;;) {
if (req != NULL)
break;
- if (signalled() && (server->flags & NFS_MOUNT_INTR))
+ if (fatal_signal_pending(current))
return ERR_PTR(-ERESTARTSYS);
yield();
}
* nfs_set_page_tag_locked - Tag a request as locked
* @req:
*/
-static int nfs_set_page_tag_locked(struct nfs_page *req)
+int nfs_set_page_tag_locked(struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
- if (!nfs_lock_request(req))
+ if (!nfs_lock_request_dontget(req))
return 0;
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ if (req->wb_page != NULL)
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 1;
}
if (req->wb_page != NULL) {
spin_lock(&inode->i_lock);
radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ nfs_unlock_request(req);
spin_unlock(&inode->i_lock);
- }
- nfs_unlock_request(req);
+ } else
+ nfs_unlock_request(req);
}
/**
kref_put(&req->wb_kref, nfs_free_request);
}
- static int nfs_wait_bit_interruptible(void *word)
+ static int nfs_wait_bit_killable(void *word)
{
int ret = 0;
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
ret = -ERESTARTSYS;
else
schedule();
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
*
- * Interruptible by signals only if mounted with intr flag.
+ * Interruptible by fatal signals only.
* The user is responsible for holding a count on the request.
*/
int
nfs_wait_on_request(struct nfs_page *req)
{
- struct rpc_clnt *clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
- sigset_t oldmask;
int ret = 0;
if (!test_bit(PG_BUSY, &req->wb_flags))
goto out;
- /*
- * Note: the call to rpc_clnt_sigmask() suffices to ensure that we
- * are not interrupted if intr flag is not set
- */
- rpc_clnt_sigmask(clnt, &oldmask);
ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_interruptible, TASK_INTERRUPTIBLE);
- rpc_clnt_sigunmask(clnt, &oldmask);
+ nfs_wait_bit_killable, TASK_KILLABLE);
out:
return ret;
}
goto out;
idx_start = req->wb_index + 1;
if (nfs_set_page_tag_locked(req)) {
+ kref_get(&req->wb_kref);
nfs_list_remove_request(req);
radix_tree_tag_clear(&nfsi->nfs_page_tree,
req->wb_index, tag);
#include <linux/nfs_idmap.h>
#include <linux/vfs.h>
#include <linux/inet.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
#include <linux/nfs_xdr.h>
#include <linux/magic.h>
#include <linux/parser.h>
Opt_actimeo,
Opt_namelen,
Opt_mountport,
- Opt_mountprog, Opt_mountvers,
- Opt_nfsprog, Opt_nfsvers,
+ Opt_mountvers,
+ Opt_nfsvers,
/* Mount options that take string arguments */
- Opt_sec, Opt_proto, Opt_mountproto,
+ Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
Opt_addr, Opt_mountaddr, Opt_clientaddr,
/* Mount options that are ignored */
{ Opt_userspace, "retry=%u" },
{ Opt_namelen, "namlen=%u" },
{ Opt_mountport, "mountport=%u" },
- { Opt_mountprog, "mountprog=%u" },
{ Opt_mountvers, "mountvers=%u" },
- { Opt_nfsprog, "nfsprog=%u" },
{ Opt_nfsvers, "nfsvers=%u" },
{ Opt_nfsvers, "vers=%u" },
{ Opt_mountproto, "mountproto=%s" },
{ Opt_addr, "addr=%s" },
{ Opt_clientaddr, "clientaddr=%s" },
- { Opt_userspace, "mounthost=%s" },
+ { Opt_mounthost, "mounthost=%s" },
{ Opt_mountaddr, "mountaddr=%s" },
{ Opt_err, NULL }
static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static void nfs_kill_super(struct super_block *);
+static void nfs_put_super(struct super_block *);
static struct file_system_type nfs_fs_type = {
.owner = THIS_MODULE,
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
+ .put_super = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
unregister_filesystem(&nfs_fs_type);
}
+void nfs_sb_active(struct nfs_server *server)
+{
+ atomic_inc(&server->active);
+}
+
+void nfs_sb_deactive(struct nfs_server *server)
+{
+ if (atomic_dec_and_test(&server->active))
+ wake_up(&server->active_wq);
+}
+
+static void nfs_put_super(struct super_block *sb)
+{
+ struct nfs_server *server = NFS_SB(sb);
+ /*
+ * Make sure there are no outstanding ops to this server.
+ * If so, wait for them to finish before allowing the
+ * unmount to continue.
+ */
+ wait_event(server->active_wq, atomic_read(&server->active) == 0);
+}
+
/*
* Deliver file system statistics to userspace
*/
const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
- { NFS_MOUNT_INTR, ",intr", ",nointr" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
{ NFS_MOUNT_NONLM, ",nolock", "" },
}
seq_printf(m, ",proto=%s",
rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
- seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
- seq_printf(m, ",retrans=%u", clp->retrans_count);
+ seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
+ seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
}
nfs_show_mount_options(m, nfss, 0);
- seq_printf(m, ",addr="NIPQUAD_FMT,
- NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
+ seq_printf(m, ",addr=%s",
+ rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
+ RPC_DISPLAY_ADDR));
return 0;
}
seq_printf(m, ",namelen=%d", nfss->namelen);
#ifdef CONFIG_NFS_V4
- if (nfss->nfs_client->cl_nfsversion == 4) {
+ if (nfss->nfs_client->rpc_ops->version == 4) {
seq_printf(m, "\n\tnfsv4:\t");
seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
}
/*
- * Sanity-check a server address provided by the mount command
+ * Set the port number in an address. Be agnostic about the address family.
+ */
+static void nfs_set_port(struct sockaddr *sap, unsigned short port)
+{
+ switch (sap->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ ap->sin_port = htons(port);
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ ap->sin6_port = htons(port);
+ break;
+ }
+ }
+}
+
+/*
+ * Sanity-check a server address provided by the mount command.
+ *
+ * Address family must be initialized, and address must not be
+ * the ANY address for that family.
*/
static int nfs_verify_server_address(struct sockaddr *addr)
{
switch (addr->sa_family) {
case AF_INET: {
- struct sockaddr_in *sa = (struct sockaddr_in *) addr;
- if (sa->sin_addr.s_addr != INADDR_ANY)
- return 1;
- break;
+ struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+ return sa->sin_addr.s_addr != INADDR_ANY;
+ }
+ case AF_INET6: {
+ struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+ return !ipv6_addr_any(sa);
}
}
return 0;
}
+/*
+ * Parse string addresses passed in via a mount option,
+ * and construct a sockaddr based on the result.
+ *
+ * If address parsing fails, set the sockaddr's address
+ * family to AF_UNSPEC to force nfs_verify_server_address()
+ * to punt the mount.
+ */
+static void nfs_parse_server_address(char *value,
+ struct sockaddr *sap,
+ size_t *len)
+{
+ if (strchr(value, ':')) {
+ struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ u8 *addr = (u8 *)&ap->sin6_addr.in6_u;
+
+ ap->sin6_family = AF_INET6;
+ *len = sizeof(*ap);
+ if (in6_pton(value, -1, addr, '\0', NULL))
+ return;
+ } else {
+ struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ u8 *addr = (u8 *)&ap->sin_addr.s_addr;
+
+ ap->sin_family = AF_INET;
+ *len = sizeof(*ap);
+ if (in4_pton(value, -1, addr, '\0', NULL))
+ return;
+ }
+
+ sap->sa_family = AF_UNSPEC;
+ *len = 0;
+}
+
/*
* Error-check and convert a string of mount options from user space into
* a data structure
struct nfs_parsed_mount_data *mnt)
{
char *p, *string;
+ unsigned short port = 0;
if (!raw) {
dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
mnt->flags &= ~NFS_MOUNT_SOFT;
break;
case Opt_intr:
- mnt->flags |= NFS_MOUNT_INTR;
- break;
case Opt_nointr:
- mnt->flags &= ~NFS_MOUNT_INTR;
break;
case Opt_posix:
mnt->flags |= NFS_MOUNT_POSIX;
return 0;
if (option < 0 || option > 65535)
return 0;
- mnt->nfs_server.address.sin_port = htons(option);
+ port = option;
break;
case Opt_rsize:
if (match_int(args, &mnt->rsize))
return 0;
mnt->mount_server.port = option;
break;
- case Opt_mountprog:
- if (match_int(args, &option))
- return 0;
- if (option < 0)
- return 0;
- mnt->mount_server.program = option;
- break;
case Opt_mountvers:
if (match_int(args, &option))
return 0;
return 0;
mnt->mount_server.version = option;
break;
- case Opt_nfsprog:
- if (match_int(args, &option))
- return 0;
- if (option < 0)
- return 0;
- mnt->nfs_server.program = option;
- break;
case Opt_nfsvers:
if (match_int(args, &option))
return 0;
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
- mnt->nfs_server.address.sin_family = AF_INET;
- mnt->nfs_server.address.sin_addr.s_addr =
- in_aton(string);
+ nfs_parse_server_address(string, (struct sockaddr *)
+ &mnt->nfs_server.address,
+ &mnt->nfs_server.addrlen);
kfree(string);
break;
case Opt_clientaddr:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
+ kfree(mnt->client_address);
mnt->client_address = string;
break;
+ case Opt_mounthost:
+ string = match_strdup(args);
+ if (string == NULL)
+ goto out_nomem;
+ kfree(mnt->mount_server.hostname);
+ mnt->mount_server.hostname = string;
+ break;
case Opt_mountaddr:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
- mnt->mount_server.address.sin_family = AF_INET;
- mnt->mount_server.address.sin_addr.s_addr =
- in_aton(string);
+ nfs_parse_server_address(string, (struct sockaddr *)
+ &mnt->mount_server.address,
+ &mnt->mount_server.addrlen);
kfree(string);
break;
}
}
+ nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
+
return 1;
out_nomem:
static int nfs_try_mount(struct nfs_parsed_mount_data *args,
struct nfs_fh *root_fh)
{
- struct sockaddr_in sin;
+ struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address;
+ char *hostname;
int status;
if (args->mount_server.version == 0) {
args->mount_server.version = NFS_MNT_VERSION;
}
+ if (args->mount_server.hostname)
+ hostname = args->mount_server.hostname;
+ else
+ hostname = args->nfs_server.hostname;
+
/*
* Construct the mount server's address.
*/
- if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
- sin = args->mount_server.address;
- else
- sin = args->nfs_server.address;
+ if (args->mount_server.address.ss_family == AF_UNSPEC) {
+ memcpy(sap, &args->nfs_server.address,
+ args->nfs_server.addrlen);
+ args->mount_server.addrlen = args->nfs_server.addrlen;
+ }
+
/*
* autobind will be used if mount_server.port == 0
*/
- sin.sin_port = htons(args->mount_server.port);
+ nfs_set_port(sap, args->mount_server.port);
/*
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount((struct sockaddr *) &sin,
- sizeof(sin),
- args->nfs_server.hostname,
+ status = nfs_mount(sap,
+ args->mount_server.addrlen,
+ hostname,
args->nfs_server.export_path,
args->mount_server.version,
args->mount_server.protocol,
if (status == 0)
return 0;
- dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
- ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
+ dfprintk(MOUNT, "NFS: unable to mount server %s, error %d",
+ hostname, status);
return status;
}
*
* + breaking back: trying proto=udp after proto=tcp, v2 after v3,
* mountproto=tcp after mountproto=udp, and so on
- *
- * XXX: as far as I can tell, changing the NFS program number is not
- * supported in the NFS client.
*/
static int nfs_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args,
args->acdirmin = 30;
args->acdirmax = 60;
args->mount_server.protocol = XPRT_TRANSPORT_UDP;
- args->mount_server.program = NFS_MNT_PROGRAM;
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
- args->nfs_server.program = NFS_PROGRAM;
switch (data->version) {
case 1:
memset(mntfh->data + mntfh->size, 0,
sizeof(mntfh->data) - mntfh->size);
- if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
- goto out_no_address;
-
/*
* Translate to nfs_parsed_mount_data, which nfs_fill_super
* can deal with.
args->acregmax = data->acregmax;
args->acdirmin = data->acdirmin;
args->acdirmax = data->acdirmax;
- args->nfs_server.address = data->addr;
+
+ memcpy(&args->nfs_server.address, &data->addr,
+ sizeof(data->addr));
+ args->nfs_server.addrlen = sizeof(data->addr);
+ if (!nfs_verify_server_address((struct sockaddr *)
+ &args->nfs_server.address))
+ goto out_no_address;
+
if (!(data->flags & NFS_MOUNT_TCP))
args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
/* N.B. caller will free nfs_server.hostname in all cases */
return ret;
}
+static int nfs_compare_super_address(struct nfs_server *server1,
+ struct nfs_server *server2)
+{
+ struct sockaddr *sap1, *sap2;
+
+ sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr;
+ sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr;
+
+ if (sap1->sa_family != sap2->sa_family)
+ return 0;
+
+ switch (sap1->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1;
+ struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2;
+ if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
+ return 0;
+ if (sin1->sin_port != sin2->sin_port)
+ return 0;
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1;
+ struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2;
+ if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
+ return 0;
+ if (sin1->sin6_port != sin2->sin6_port)
+ return 0;
+ break;
+ }
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
static int nfs_compare_super(struct super_block *sb, void *data)
{
struct nfs_sb_mountdata *sb_mntdata = data;
struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
int mntflags = sb_mntdata->mntflags;
- if (memcmp(&old->nfs_client->cl_addr,
- &server->nfs_client->cl_addr,
- sizeof(old->nfs_client->cl_addr)) != 0)
+ if (!nfs_compare_super_address(old, server))
return 0;
/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
if (old->flags & NFS_MOUNT_UNSHARED)
out:
kfree(data.nfs_server.hostname);
+ kfree(data.mount_server.hostname);
return error;
out_err_nosb:
error = PTR_ERR(mntroot);
goto error_splat_super;
}
- if (mntroot->d_inode->i_op != &nfs_dir_inode_operations) {
+ if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
dput(mntroot);
error = -ESTALE;
goto error_splat_super;
nfs_initialise_sb(sb);
}
+/*
+ * If the user didn't specify a port, set the port number to
+ * the NFS version 4 default port.
+ */
+static void nfs4_default_port(struct sockaddr *sap)
+{
+ switch (sap->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+ if (ap->sin_port == 0)
+ ap->sin_port = htons(NFS_PORT);
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+ if (ap->sin6_port == 0)
+ ap->sin6_port = htons(NFS_PORT);
+ break;
+ }
+ }
+}
+
/*
* Validate NFSv4 mount options
*/
struct nfs_parsed_mount_data *args,
const char *dev_name)
{
+ struct sockaddr_in *ap;
struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
char *c;
switch (data->version) {
case 1:
- if (data->host_addrlen != sizeof(args->nfs_server.address))
+ ap = (struct sockaddr_in *)&args->nfs_server.address;
+ if (data->host_addrlen > sizeof(args->nfs_server.address))
+ goto out_no_address;
+ if (data->host_addrlen == 0)
goto out_no_address;
- if (copy_from_user(&args->nfs_server.address,
- data->host_addr,
- sizeof(args->nfs_server.address)))
+ args->nfs_server.addrlen = data->host_addrlen;
+ if (copy_from_user(ap, data->host_addr, data->host_addrlen))
return -EFAULT;
- if (args->nfs_server.address.sin_port == 0)
- args->nfs_server.address.sin_port = htons(NFS_PORT);
if (!nfs_verify_server_address((struct sockaddr *)
&args->nfs_server.address))
goto out_no_address;
+ nfs4_default_port((struct sockaddr *)
+ &args->nfs_server.address);
+
switch (data->auth_flavourlen) {
case 0:
args->auth_flavors[0] = RPC_AUTH_UNIX;
&args->nfs_server.address))
return -EINVAL;
+ nfs4_default_port((struct sockaddr *)
+ &args->nfs_server.address);
+
switch (args->auth_flavor_len) {
case 0:
args->auth_flavors[0] = RPC_AUTH_UNIX;
len = c - dev_name;
if (len > NFS4_MAXNAMLEN)
return -ENAMETOOLONG;
- args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
- if (args->nfs_server.hostname == NULL)
- return -ENOMEM;
- strncpy(args->nfs_server.hostname, dev_name, len - 1);
+ /* N.B. caller will free nfs_server.hostname in all cases */
+ args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
c++; /* step over the ':' */
len = strlen(c);
if (len > NFS4_MAXPATHLEN)
return -ENAMETOOLONG;
- args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
- if (args->nfs_server.export_path == NULL)
- return -ENOMEM;
- strncpy(args->nfs_server.export_path, c, len);
+ args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
- dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
+ dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
if (args->client_address == NULL)
goto out_no_client_address;
error = PTR_ERR(mntroot);
goto error_splat_super;
}
+ if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+ dput(mntroot);
+ error = -ESTALE;
+ goto error_splat_super;
+ }
s->s_flags |= MS_ACTIVE;
mnt->mnt_sb = s;
error = PTR_ERR(mntroot);
goto error_splat_super;
}
+ if (mntroot->d_inode->i_op != NFS_SB(s)->nfs_client->rpc_ops->dir_inode_ops) {
+ dput(mntroot);
+ error = -ESTALE;
+ goto error_splat_super;
+ }
s->s_flags |= MS_ACTIVE;
mnt->mnt_sb = s;
}
/* Update file length */
nfs_grow_file(page, offset, count);
- nfs_unlock_request(req);
+ nfs_clear_page_tag_locked(req);
return 0;
}
struct page *page)
{
struct inode *inode = page->mapping->host;
- struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
int ret;
spin_unlock(&inode->i_lock);
return 0;
}
- if (nfs_lock_request_dontget(req))
+ if (nfs_set_page_tag_locked(req))
break;
/* Note: If we hold the page lock, as is the case in nfs_writepage,
- * then the call to nfs_lock_request_dontget() will always
+ * then the call to nfs_set_page_tag_locked() will always
* succeed provided that someone hasn't already marked the
* request as dirty (in which case we don't care).
*/
if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
/* This request is marked for commit */
spin_unlock(&inode->i_lock);
- nfs_unlock_request(req);
+ nfs_clear_page_tag_locked(req);
nfs_pageio_complete(pgio);
return 0;
}
spin_unlock(&inode->i_lock);
BUG();
}
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
- NFS_PAGE_TAG_LOCKED);
spin_unlock(&inode->i_lock);
nfs_pageio_add_request(pgio, req);
return 0;
set_page_private(req->wb_page, (unsigned long)req);
nfsi->npages++;
kref_get(&req->wb_kref);
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
return 0;
}
/*
* Wait for a request to complete.
*
- * Interruptible by signals only if mounted with intr flag.
+ * Interruptible by fatal signals only.
*/
static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages)
{
spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
if (req) {
- if (!nfs_lock_request_dontget(req)) {
+ if (!nfs_set_page_tag_locked(req)) {
int error;
spin_unlock(&inode->i_lock);
|| req->wb_page != page
|| !nfs_dirty_request(req)
|| offset > rqend || end < req->wb_offset) {
- nfs_unlock_request(req);
+ nfs_clear_page_tag_locked(req);
return ERR_PTR(-EBUSY);
}
nfs_clear_page_tag_locked(req);
}
-static inline int flush_task_priority(int how)
+static int flush_task_priority(int how)
{
switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
case FLUSH_HIGHPRI:
unsigned int count, unsigned int offset,
int how)
{
- struct inode *inode;
- int flags;
+ struct inode *inode = req->wb_context->path.dentry->d_inode;
+ int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ int priority = flush_task_priority(how);
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = req->wb_context->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = NFS_CLIENT(inode),
+ .task = &data->task,
+ .rpc_message = &msg,
+ .callback_ops = call_ops,
+ .callback_data = data,
+ .flags = flags,
+ .priority = priority,
+ };
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
- data->cred = req->wb_context->cred;
+ data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
data->args.pages = data->pagevec;
data->args.count = count;
data->args.context = req->wb_context;
+ data->args.stable = NFS_UNSTABLE;
+ if (how & FLUSH_STABLE) {
+ data->args.stable = NFS_DATA_SYNC;
+ if (!NFS_I(inode)->ncommit)
+ data->args.stable = NFS_FILE_SYNC;
+ }
data->res.fattr = &data->fattr;
data->res.count = count;
nfs_fattr_init(&data->fattr);
/* Set up the initial task struct. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
- rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
- NFS_PROTO(inode)->write_setup(data, how);
-
- data->task.tk_priority = flush_task_priority(how);
- data->task.tk_cookie = (unsigned long)inode;
+ NFS_PROTO(inode)->write_setup(data, &msg);
dprintk("NFS: %5u initiated write call "
"(req %s/%Ld, %u bytes @ offset %Lu)\n",
(long long)NFS_FILEID(inode),
count,
(unsigned long long)data->args.offset);
-}
-static void nfs_execute_write(struct nfs_write_data *data)
-{
- rpc_execute(&data->task);
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
}
/*
wsize, offset, how);
offset += wsize;
nbytes -= wsize;
- nfs_execute_write(data);
} while (nbytes != 0);
return 0;
/* Set up the argument struct */
nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
- nfs_execute_write(data);
return 0;
out_bad:
while (!list_empty(head)) {
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags)
{
- int wsize = NFS_SERVER(inode)->wsize;
+ size_t wsize = NFS_SERVER(inode)->wsize;
if (wsize < PAGE_CACHE_SIZE)
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
struct nfs_write_data *data,
int how)
{
- struct nfs_page *first;
- struct inode *inode;
- int flags;
+ struct nfs_page *first = nfs_list_entry(head->next);
+ struct inode *inode = first->wb_context->path.dentry->d_inode;
+ int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+ int priority = flush_task_priority(how);
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = first->wb_context->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .task = &data->task,
+ .rpc_client = NFS_CLIENT(inode),
+ .rpc_message = &msg,
+ .callback_ops = &nfs_commit_ops,
+ .callback_data = data,
+ .flags = flags,
+ .priority = priority,
+ };
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
list_splice_init(head, &data->pages);
- first = nfs_list_entry(data->pages.next);
- inode = first->wb_context->path.dentry->d_inode;
data->inode = inode;
- data->cred = first->wb_context->cred;
+ data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(data->inode);
/* Note: we always request a commit of the entire inode */
nfs_fattr_init(&data->fattr);
/* Set up the initial task struct. */
- flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
- rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
- NFS_PROTO(inode)->commit_setup(data, how);
+ NFS_PROTO(inode)->commit_setup(data, &msg);
- data->task.tk_priority = flush_task_priority(how);
- data->task.tk_cookie = (unsigned long)inode;
-
dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task))
+ rpc_put_task(task);
}
/*
/* Set up the argument struct */
nfs_commit_rpcsetup(head, data, how);
- nfs_execute_write(data);
return 0;
out_bad:
while (!list_empty(head)) {
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & (TASK_RUNNING |
- TASK_INTERRUPTIBLE |
- TASK_UNINTERRUPTIBLE |
- TASK_STOPPED |
- TASK_TRACED)) |
- tsk->exit_state;
+ unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
const char **p = &task_state_array[0];
while (state) {
ppid = pid_alive(p) ?
task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
tpid = pid_alive(p) && p->ptrace ?
- task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0;
+ task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0;
buffer += sprintf(buffer,
"State:\t%s\n"
"Tgid:\t%d\n"
}
sid = task_session_nr_ns(task, ns);
+ ppid = task_tgid_nr_ns(task->real_parent, ns);
pgid = task_pgrp_nr_ns(task, ns);
- ppid = task_ppid_nr_ns(task, ns);
unlock_task_sighand(task, &flags);
}
(task == current || \
(task->parent == current && \
(task->ptrace & PT_PTRACED) && \
- (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
+ (task_is_stopped_or_traced(task)) && \
security_ptrace(current,task) == 0))
+struct mm_struct *mm_for_maps(struct task_struct *task)
+{
+ struct mm_struct *mm = get_task_mm(task);
+ if (!mm)
+ return NULL;
+ down_read(&mm->mmap_sem);
+ task_lock(task);
+ if (task->mm != mm)
+ goto out;
+ if (task->mm != current->mm && __ptrace_may_attach(task) < 0)
+ goto out;
+ task_unlock(task);
+ return mm;
+out:
+ task_unlock(task);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return NULL;
+}
+
static int proc_pid_cmdline(struct task_struct *task, char * buffer)
{
int res = 0;
}
#endif
+#ifdef CONFIG_LATENCYTOP
+static int lstats_show_proc(struct seq_file *m, void *v)
+{
+ int i;
+ struct task_struct *task = m->private;
+ seq_puts(m, "Latency Top version : v0.1\n");
+
+ for (i = 0; i < 32; i++) {
+ if (task->latency_record[i].backtrace[0]) {
+ int q;
+ seq_printf(m, "%i %li %li ",
+ task->latency_record[i].count,
+ task->latency_record[i].time,
+ task->latency_record[i].max);
+ for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+ char sym[KSYM_NAME_LEN];
+ char *c;
+ if (!task->latency_record[i].backtrace[q])
+ break;
+ if (task->latency_record[i].backtrace[q] == ULONG_MAX)
+ break;
+ sprint_symbol(sym, task->latency_record[i].backtrace[q]);
+ c = strchr(sym, '+');
+ if (c)
+ *c = 0;
+ seq_printf(m, "%s ", sym);
+ }
+ seq_printf(m, "\n");
+ }
+
+ }
+ return 0;
+}
+
+static int lstats_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct seq_file *m;
+ struct task_struct *task = get_proc_task(inode);
+
+ ret = single_open(file, lstats_show_proc, NULL);
+ if (!ret) {
+ m = file->private_data;
+ m->private = task;
+ }
+ return ret;
+}
+
+static ssize_t lstats_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offs)
+{
+ struct seq_file *m;
+ struct task_struct *task;
+
+ m = file->private_data;
+ task = m->private;
+ clear_all_latency_tracing(task);
+
+ return count;
+}
+
+static const struct file_operations proc_lstats_operations = {
+ .open = lstats_open,
+ .read = seq_read,
+ .write = lstats_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+#endif
+
/* The badness from the OOM killer */
unsigned long badness(struct task_struct *p, unsigned long uptime);
static int proc_oom_score(struct task_struct *task, char *buffer)
};
#endif
+
#ifdef CONFIG_SCHED_DEBUG
/*
* Print out various scheduling related per-task fields:
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, pid_schedstat),
#endif
+#ifdef CONFIG_LATENCYTOP
+ REG("latency", S_IRUGO, lstats),
+#endif
#ifdef CONFIG_PROC_PID_CPUSET
REG("cpuset", S_IRUGO, cpuset),
#endif
#ifdef CONFIG_SCHEDSTATS
INF("schedstat", S_IRUGO, pid_schedstat),
#endif
+#ifdef CONFIG_LATENCYTOP
+ REG("latency", S_IRUGO, lstats),
+#endif
#ifdef CONFIG_PROC_PID_CPUSET
REG("cpuset", S_IRUGO, cpuset),
#endif
#define NFS_INO_STALE (2) /* possible stale inode */
#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */
-static inline struct nfs_inode *NFS_I(struct inode *inode)
+static inline struct nfs_inode *NFS_I(const struct inode *inode)
{
return container_of(inode, struct nfs_inode, vfs_inode);
}
-#define NFS_SB(s) ((struct nfs_server *)(s->s_fs_info))
-#define NFS_FH(inode) (&NFS_I(inode)->fh)
-#define NFS_SERVER(inode) (NFS_SB(inode->i_sb))
-#define NFS_CLIENT(inode) (NFS_SERVER(inode)->client)
-#define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops)
-#define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf)
-#define NFS_MINATTRTIMEO(inode) \
- (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
- : NFS_SERVER(inode)->acregmin)
-#define NFS_MAXATTRTIMEO(inode) \
- (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
- : NFS_SERVER(inode)->acregmax)
+static inline struct nfs_server *NFS_SB(const struct super_block *s)
+{
+ return (struct nfs_server *)(s->s_fs_info);
+}
+
+static inline struct nfs_fh *NFS_FH(const struct inode *inode)
+{
+ return &NFS_I(inode)->fh;
+}
+
+static inline struct nfs_server *NFS_SERVER(const struct inode *inode)
+{
+ return NFS_SB(inode->i_sb);
+}
+
+static inline struct rpc_clnt *NFS_CLIENT(const struct inode *inode)
+{
+ return NFS_SERVER(inode)->client;
+}
+
+static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
+{
+ return NFS_SERVER(inode)->nfs_client->rpc_ops;
+}
+
+static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
+{
+ return NFS_I(inode)->cookieverf;
+}
+
+static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
+{
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ return S_ISDIR(inode->i_mode) ? nfss->acdirmin : nfss->acregmin;
+}
-#define NFS_FLAGS(inode) (NFS_I(inode)->flags)
-#define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
+static inline unsigned NFS_MAXATTRTIMEO(const struct inode *inode)
+{
+ struct nfs_server *nfss = NFS_SERVER(inode);
+ return S_ISDIR(inode->i_mode) ? nfss->acdirmax : nfss->acregmax;
+}
-#define NFS_FILEID(inode) (NFS_I(inode)->fileid)
+static inline int NFS_STALE(const struct inode *inode)
+{
+ return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+}
+
+static inline __u64 NFS_FILEID(const struct inode *inode)
+{
+ return NFS_I(inode)->fileid;
+}
+
+static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
+{
+ NFS_I(inode)->fileid = fileid;
+}
static inline void nfs_mark_for_revalidate(struct inode *inode)
{
static inline int NFS_USE_READDIRPLUS(struct inode *inode)
{
- return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+ return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
}
static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
extern const struct file_operations nfs_dir_operations;
extern struct dentry_operations nfs_dentry_operations;
+extern void nfs_force_lookup_revalidate(struct inode *dir);
extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
extern void nfs_access_zap_cache(struct inode *inode);
#define nfs_wait_event(clnt, wq, condition) \
({ \
- int __retval = 0; \
- if (clnt->cl_intr) { \
- sigset_t oldmask; \
- rpc_clnt_sigmask(clnt, &oldmask); \
- __retval = wait_event_interruptible(wq, condition); \
- rpc_clnt_sigunmask(clnt, &oldmask); \
- } else \
- wait_event(wq, condition); \
+ int __retval = wait_event_killable(wq, condition); \
__retval; \
})
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#define CLONE_IO 0x80000000 /* Clone io context */
/*
* Scheduling policies
#include <linux/proportions.h>
#include <linux/seccomp.h>
#include <linux/rcupdate.h>
-#include <linux/futex.h>
#include <linux/rtmutex.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/task_io_accounting.h>
#include <linux/kobject.h>
+#include <linux/latencytop.h>
#include <asm/processor.h>
struct exec_domain;
struct futex_pi_state;
+struct robust_list_head;
struct bio;
/*
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
- #define TASK_STOPPED 4
- #define TASK_TRACED 8
+ #define __TASK_STOPPED 4
+ #define __TASK_TRACED 8
/* in tsk->exit_state */
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/* in tsk->state again */
#define TASK_DEAD 64
+ #define TASK_WAKEKILL 128
+
+ /* Convenience macros for the sake of set_task_state */
+ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+ #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
+ #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
+
+ /* Convenience macros for the sake of wake_up */
+ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+ #define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+ /* get_task_state() */
+ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
+ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+ __TASK_TRACED)
+
+ #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
+ #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+ #define task_is_stopped_or_traced(task) \
+ ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+ #define task_contributes_to_load(task) \
+ ((task->state & TASK_UNINTERRUPTIBLE) != 0)
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
}
#endif
+extern unsigned long rt_needs_cpu(int cpu);
+
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
+extern void hrtick_resched(void);
+
+extern void sched_show_task(struct task_struct *p);
#ifdef CONFIG_DETECT_SOFTLOCKUP
extern void softlockup_tick(void);
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int softlockup_thresh;
+extern unsigned long softlockup_thresh;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
#else
static inline void softlockup_tick(void)
{
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
extern signed long FASTCALL(schedule_timeout(signed long timeout));
extern signed long schedule_timeout_interruptible(signed long timeout);
+ extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
#ifdef CONFIG_FAIR_USER_SCHED
struct task_group *tg;
#ifdef CONFIG_SYSFS
- struct kset kset;
- struct subsys_attribute user_attr;
+ struct kobject kobj;
struct work_struct work;
#endif
#endif
};
-#ifdef CONFIG_FAIR_USER_SCHED
-extern int uids_kobject_init(void);
-#else
-static inline int uids_kobject_init(void) { return 0; }
-#endif
+extern int uids_sysfs_init(void);
extern struct user_struct *find_user(uid_t);
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
void (*yield_task) (struct rq *rq);
+ int (*select_task_rq)(struct task_struct *p, int sync);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
int (*move_one_task) (struct rq *this_rq, int this_cpu,
struct rq *busiest, struct sched_domain *sd,
enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+ void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
#endif
void (*set_curr_task) (struct rq *rq);
- void (*task_tick) (struct rq *rq, struct task_struct *p);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_new) (struct rq *rq, struct task_struct *p);
+ void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
+
+ void (*join_domain)(struct rq *rq);
+ void (*leave_domain)(struct rq *rq);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
};
struct load_weight {
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
u64 wait_max;
+ u64 wait_count;
+ u64 wait_sum;
u64 sleep_start;
u64 sleep_max;
#endif
};
+struct sched_rt_entity {
+ struct list_head run_list;
+ unsigned int time_slice;
+ unsigned long timeout;
+ int nr_cpus_allowed;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct sched_rt_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct rt_rq *rt_rq;
+ /* rq "owned" by this entity/group: */
+ struct rt_rq *my_q;
+#endif
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
#endif
int prio, static_prio, normal_prio;
- struct list_head run_list;
const struct sched_class *sched_class;
struct sched_entity se;
+ struct sched_rt_entity rt;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
- unsigned short ioprio;
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
unsigned int policy;
cpumask_t cpus_allowed;
- unsigned int time_slice;
+
+#ifdef CONFIG_PREEMPT_RCU
+ int rcu_read_lock_nesting;
+ int rcu_flipctr_idx;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+/* hung task detection */
+ unsigned long last_switch_timestamp;
+ unsigned long last_switch_count;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
int make_it_fail;
#endif
struct prop_local_single dirties;
+#ifdef CONFIG_LATENCYTOP
+ int latency_record_count;
+ struct latency_record latency_record[LT_SAVECOUNT];
+#endif
};
/*
*
* set_task_vxid() : assigns a virtual id to a task;
*
- * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
- * the result depends on the namespace and whether the
- * task in question is the namespace's init. e.g. for the
- * namespace's init this will return 0 when called from
- * the namespace of this init, or appropriate id otherwise.
- *
- *
* see also pid_nr() etc in include/linux/pid.h
*/
}
-static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
-}
-
/**
* pid_alive - check that a task structure is not stale
* @p: Task structure to be checked.
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_rt_period;
+extern unsigned int sysctl_sched_rt_ratio;
+#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+extern unsigned int sysctl_sched_min_bal_int_shares;
+extern unsigned int sysctl_sched_max_bal_int_shares;
+#endif
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-
+
+ extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+
+ static inline int fatal_signal_pending(struct task_struct *p)
+ {
+ return signal_pending(p) && __fatal_signal_pending(p);
+ }
+
static inline int need_resched(void)
{
return unlikely(test_thread_flag(TIF_NEED_RESCHED));
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
-extern int cond_resched(void);
-extern int cond_resched_lock(spinlock_t * lock);
-extern int cond_resched_softirq(void);
-
-/*
- * Does a critical section need to be broken due to another
- * task waiting?:
- */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
-# define need_lockbreak(lock) ((lock)->break_lock)
+#ifdef CONFIG_PREEMPT
+static inline int cond_resched(void)
+{
+ return 0;
+}
#else
-# define need_lockbreak(lock) 0
+extern int _cond_resched(void);
+static inline int cond_resched(void)
+{
+ return _cond_resched();
+}
#endif
+extern int cond_resched_lock(spinlock_t * lock);
+extern int cond_resched_softirq(void);
/*
* Does a critical section need to be broken due to another
- * task waiting or preemption being signalled:
+ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * but a general need for low latency)
*/
-static inline int lock_need_resched(spinlock_t *lock)
+static inline int spin_needbreak(spinlock_t *lock)
{
- if (need_lockbreak(lock) || need_resched())
- return 1;
+#ifdef CONFIG_PREEMPT
+ return spin_is_contended(lock);
+#else
return 0;
+#endif
}
/*
struct rpc_iostats * cl_metrics; /* per-client statistics */
unsigned int cl_softrtry : 1,/* soft timeouts */
- cl_intr : 1,/* interruptible */
cl_discrtry : 1,/* disconnect before retry */
cl_autobind : 1;/* use getport() */
struct rpc_rtt * cl_rtt; /* RTO estimator data */
+ const struct rpc_timeout *cl_timeout; /* Timeout strategy */
int cl_nodelen; /* nodename length */
char cl_nodename[UNX_MAXNODENAME];
struct dentry * cl_dentry; /* inode */
struct rpc_clnt * cl_parent; /* Points to parent of clones */
struct rpc_rtt cl_rtt_default;
+ struct rpc_timeout cl_timeout_default;
struct rpc_program * cl_program;
char cl_inline_name[32];
};
struct sockaddr *address;
size_t addrsize;
struct sockaddr *saddress;
- struct rpc_timeout *timeout;
+ const struct rpc_timeout *timeout;
char *servername;
struct rpc_program *program;
u32 version;
/* Values for "flags" field */
#define RPC_CLNT_CREATE_HARDRTRY (1UL << 0)
- #define RPC_CLNT_CREATE_INTR (1UL << 1)
#define RPC_CLNT_CREATE_AUTOBIND (1UL << 2)
#define RPC_CLNT_CREATE_NONPRIVPORT (1UL << 3)
#define RPC_CLNT_CREATE_NOPING (1UL << 4)
void rpc_release_client(struct rpc_clnt *);
int rpcb_register(u32, u32, int, unsigned short, int *);
-int rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
+int rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
void rpcb_getport_async(struct rpc_task *);
-void rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
-
+void rpc_call_start(struct rpc_task *);
int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
int flags, const struct rpc_call_ops *tk_ops,
void *calldata);
struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
int flags);
void rpc_restart_call(struct rpc_task *);
--void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
--void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
size_t rpc_max_payload(struct rpc_clnt *);
void rpc_force_rebind(struct rpc_clnt *);
size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
-char * rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
#endif /* __KERNEL__ */
#endif /* _LINUX_SUNRPC_CLNT_H */
__u8 tk_garb_retry;
__u8 tk_cred_retry;
- unsigned long tk_cookie; /* Cookie for batching tasks */
-
/*
* timeout_fn to be executed by timer bottom half
* callback to be executed after waking up
struct timer_list tk_timer; /* kernel timer */
unsigned long tk_timeout; /* timeout for rpc_sleep() */
unsigned short tk_flags; /* misc flags */
- unsigned char tk_priority : 2;/* Task priority */
unsigned long tk_runstate; /* Task run status */
struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
* be any workqueue
unsigned long tk_start; /* RPC task init timestamp */
long tk_rtt; /* round-trip time (jiffies) */
+ pid_t tk_owner; /* Process id for batching tasks */
+ unsigned char tk_priority : 2;/* Task priority */
+
#ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */
#endif
void (*rpc_release)(void *);
};
+struct rpc_task_setup {
+ struct rpc_task *task;
+ struct rpc_clnt *rpc_client;
+ const struct rpc_message *rpc_message;
+ const struct rpc_call_ops *callback_ops;
+ void *callback_data;
+ unsigned short flags;
+ signed char priority;
+};
/*
* RPC task flags
#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
#define RPC_TASK_KILLED 0x0100 /* task was killed */
#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */
- #define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */
#define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC)
#define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER)
#define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED)
#define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL)
#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT)
- #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
#define RPC_TASK_RUNNING 0
#define RPC_TASK_QUEUED 1
* Note: if you change these, you must also change
* the task initialization definitions below.
*/
-#define RPC_PRIORITY_LOW 0
-#define RPC_PRIORITY_NORMAL 1
-#define RPC_PRIORITY_HIGH 2
-#define RPC_NR_PRIORITY (RPC_PRIORITY_HIGH+1)
+#define RPC_PRIORITY_LOW (-1)
+#define RPC_PRIORITY_NORMAL (0)
+#define RPC_PRIORITY_HIGH (1)
+#define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
/*
* RPC synchronization objects
struct rpc_wait_queue {
spinlock_t lock;
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
- unsigned long cookie; /* cookie of last task serviced */
+ pid_t owner; /* process id of last task serviced */
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
unsigned char priority; /* current priority */
unsigned char count; /* # task groups remaining serviced so far */
* performance of NFS operations such as read/write.
*/
#define RPC_BATCH_COUNT 16
-
-#ifndef RPC_DEBUG
-# define RPC_WAITQ_INIT(var,qname) { \
- .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
- .tasks = { \
- [0] = LIST_HEAD_INIT(var.tasks[0]), \
- [1] = LIST_HEAD_INIT(var.tasks[1]), \
- [2] = LIST_HEAD_INIT(var.tasks[2]), \
- }, \
- }
-#else
-# define RPC_WAITQ_INIT(var,qname) { \
- .lock = __SPIN_LOCK_UNLOCKED(var.lock), \
- .tasks = { \
- [0] = LIST_HEAD_INIT(var.tasks[0]), \
- [1] = LIST_HEAD_INIT(var.tasks[1]), \
- [2] = LIST_HEAD_INIT(var.tasks[2]), \
- }, \
- .name = qname, \
- }
-#endif
-# define RPC_WAITQ(var,qname) struct rpc_wait_queue var = RPC_WAITQ_INIT(var,qname)
-
#define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0)
/*
* Function prototypes
*/
-struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
- const struct rpc_call_ops *ops, void *data);
-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
- const struct rpc_call_ops *ops, void *data);
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
- int flags, const struct rpc_call_ops *ops,
- void *data);
+struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
+struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
void rpc_put_task(struct rpc_task *);
void rpc_exit_task(struct rpc_task *);
void rpc_release_calldata(const struct rpc_call_ops *, void *);
void ptrace_untrace(struct task_struct *child)
{
spin_lock(&child->sighand->siglock);
- if (child->state == TASK_TRACED) {
+ if (task_is_traced(child)) {
if (child->signal->flags & SIGNAL_STOP_STOPPED) {
child->state = TASK_STOPPED;
} else {
add_parent(child);
}
- if (child->state == TASK_TRACED)
+ if (task_is_traced(child))
ptrace_untrace(child);
}
&& child->signal != NULL) {
ret = 0;
spin_lock_irq(&child->sighand->siglock);
- if (child->state == TASK_STOPPED) {
+ if (task_is_stopped(child)) {
child->state = TASK_TRACED;
- } else if (child->state != TASK_TRACED && !kill) {
+ } else if (!task_is_traced(child) && !kill) {
ret = -ESRCH;
}
spin_unlock_irq(&child->sighand->siglock);
return ret;
}
-static int may_attach(struct task_struct *task)
+int __ptrace_may_attach(struct task_struct *task)
{
/* May we inspect the given task?
* This check is used both for attaching with ptrace
{
int err;
task_lock(task);
- err = may_attach(task);
+ err = __ptrace_may_attach(task);
task_unlock(task);
return !err;
}
/* the same process cannot be attached many times */
if (task->ptrace & PT_PTRACED)
goto bad;
- retval = may_attach(task);
+ retval = __ptrace_may_attach(task);
if (retval)
goto bad;
return error;
}
+
+#ifdef PTRACE_SINGLESTEP
+#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
+#else
+#define is_singlestep(request) 0
+#endif
+
+#ifdef PTRACE_SINGLEBLOCK
+#define is_singleblock(request) ((request) == PTRACE_SINGLEBLOCK)
+#else
+#define is_singleblock(request) 0
+#endif
+
+#ifdef PTRACE_SYSEMU
+#define is_sysemu_singlestep(request) ((request) == PTRACE_SYSEMU_SINGLESTEP)
+#else
+#define is_sysemu_singlestep(request) 0
+#endif
+
+static int ptrace_resume(struct task_struct *child, long request, long data)
+{
+ if (!valid_signal(data))
+ return -EIO;
+
+ if (request == PTRACE_SYSCALL)
+ set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+
+#ifdef TIF_SYSCALL_EMU
+ if (request == PTRACE_SYSEMU || request == PTRACE_SYSEMU_SINGLESTEP)
+ set_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+ else
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
+
+ if (is_singleblock(request)) {
+ if (unlikely(!arch_has_block_step()))
+ return -EIO;
+ user_enable_block_step(child);
+ } else if (is_singlestep(request) || is_sysemu_singlestep(request)) {
+ if (unlikely(!arch_has_single_step()))
+ return -EIO;
+ user_enable_single_step(child);
+ }
+ else
+ user_disable_single_step(child);
+
+ child->exit_code = data;
+ wake_up_process(child);
+
+ return 0;
+}
+
int ptrace_request(struct task_struct *child, long request,
long addr, long data)
{
int ret = -EIO;
switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ return generic_ptrace_peekdata(child, addr, data);
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ return generic_ptrace_pokedata(child, addr, data);
+
#ifdef PTRACE_OLDSETOPTIONS
case PTRACE_OLDSETOPTIONS:
#endif
case PTRACE_DETACH: /* detach a process that was attached. */
ret = ptrace_detach(child, data);
break;
+
+#ifdef PTRACE_SINGLESTEP
+ case PTRACE_SINGLESTEP:
+#endif
+#ifdef PTRACE_SINGLEBLOCK
+ case PTRACE_SINGLEBLOCK:
+#endif
+#ifdef PTRACE_SYSEMU
+ case PTRACE_SYSEMU:
+ case PTRACE_SYSEMU_SINGLESTEP:
+#endif
+ case PTRACE_SYSCALL:
+ case PTRACE_CONT:
+ return ptrace_resume(child, request, data);
+
+ case PTRACE_KILL:
+ if (child->exit_state) /* already dead */
+ return 0;
+ return ptrace_resume(child, request, SIGKILL);
+
default:
break;
}
lock_kernel();
if (request == PTRACE_TRACEME) {
ret = ptrace_traceme();
+ if (!ret)
+ arch_ptrace_attach(current);
goto out;
}
copied = access_process_vm(tsk, addr, &data, sizeof(data), 1);
return (copied == sizeof(data)) ? 0 : -EIO;
}
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+
+int compat_ptrace_request(struct task_struct *child, compat_long_t request,
+ compat_ulong_t addr, compat_ulong_t data)
+{
+ compat_ulong_t __user *datap = compat_ptr(data);
+ compat_ulong_t word;
+ int ret;
+
+ switch (request) {
+ case PTRACE_PEEKTEXT:
+ case PTRACE_PEEKDATA:
+ ret = access_process_vm(child, addr, &word, sizeof(word), 0);
+ if (ret != sizeof(word))
+ ret = -EIO;
+ else
+ ret = put_user(word, datap);
+ break;
+
+ case PTRACE_POKETEXT:
+ case PTRACE_POKEDATA:
+ ret = access_process_vm(child, addr, &data, sizeof(data), 1);
+ ret = (ret != sizeof(data) ? -EIO : 0);
+ break;
+
+ case PTRACE_GETEVENTMSG:
+ ret = put_user((compat_ulong_t) child->ptrace_message, datap);
+ break;
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ }
+
+ return ret;
+}
+
+#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
+asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
+ compat_long_t addr, compat_long_t data)
+{
+ struct task_struct *child;
+ long ret;
+
+ /*
+ * This lock_kernel fixes a subtle race with suid exec
+ */
+ lock_kernel();
+ if (request == PTRACE_TRACEME) {
+ ret = ptrace_traceme();
+ goto out;
+ }
+
+ child = ptrace_get_task_struct(pid);
+ if (IS_ERR(child)) {
+ ret = PTR_ERR(child);
+ goto out;
+ }
+
+ if (request == PTRACE_ATTACH) {
+ ret = ptrace_attach(child);
+ /*
+ * Some architectures need to do book-keeping after
+ * a ptrace attach.
+ */
+ if (!ret)
+ arch_ptrace_attach(child);
+ goto out_put_task_struct;
+ }
+
+ ret = ptrace_check_attach(child, request == PTRACE_KILL);
+ if (!ret)
+ ret = compat_arch_ptrace(child, request, addr, data);
+
+ out_put_task_struct:
+ put_task_struct(child);
+ out:
+ unlock_kernel();
+ return ret;
+}
+#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
+
+#endif /* CONFIG_COMPAT */
* by Peter Williams
* 2007-05-06 Interactivity improvements to CFS by Mike Galbraith
* 2007-07-01 Group scheduling enhancements by Srivatsa Vaddagiri
+ * 2007-11-29 RT balancing improvements by Steven Rostedt, Gregory Haskins,
+ * Thomas Gleixner, Mike Kravetz
*/
#include <linux/mm.h>
#include <linux/reciprocal_div.h>
#include <linux/unistd.h>
#include <linux/pagemap.h>
+#include <linux/hrtimer.h>
#include <asm/tlb.h>
#include <asm/irq_regs.h>
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
/*
- * Some helpers for converting nanosecond timing to jiffy resolution
+ * Helpers for converting nanosecond timing to jiffy resolution
*/
#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
-#define JIFFIES_TO_NS(TIME) ((TIME) * (NSEC_PER_SEC / HZ))
#define NICE_0_LOAD SCHED_LOAD_SCALE
#define NICE_0_SHIFT SCHED_LOAD_SHIFT
struct cfs_rq;
+static LIST_HEAD(task_groups);
+
/* task group related information */
struct task_group {
#ifdef CONFIG_FAIR_CGROUP_SCHED
struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */
struct cfs_rq **cfs_rq;
+
+ struct sched_rt_entity **rt_se;
+ struct rt_rq **rt_rq;
+
+ unsigned int rt_ratio;
+
+ /*
+ * shares assigned to a task group governs how much of cpu bandwidth
+ * is allocated to the group. The more shares a group has, the more is
+ * the cpu bandwidth allocated to it.
+ *
+ * For ex, lets say that there are three task groups, A, B and C which
+ * have been assigned shares 1000, 2000 and 3000 respectively. Then,
+ * cpu bandwidth allocated by the scheduler to task groups A, B and C
+ * should be:
+ *
+ * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
+ * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
+ * Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
+ *
+ * The weight assigned to a task group's schedulable entities on every
+ * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
+ * group's shares. For ex: lets say that task group A has been
+ * assigned shares of 1000 and there are two CPUs in a system. Then,
+ *
+ * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
+ *
+ * Note: It's not necessary that each of a task's group schedulable
+ * entity have the same weight on all CPUs. If the group
+ * has 2 of its tasks on CPU0 and 1 task on CPU1, then a
+ * better distribution of weight could be:
+ *
+ * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
+ * tg_A->se[1]->load.weight = 1/2 * 2000 = 667
+ *
+ * rebalance_shares() is responsible for distributing the shares of a
+ * task groups like this among the group's schedulable entities across
+ * cpus.
+ *
+ */
unsigned long shares;
- /* spinlock to serialize modification to shares */
- spinlock_t lock;
+
struct rcu_head rcu;
+ struct list_head list;
};
/* Default task group's sched entity on each cpu */
/* Default task group's cfs_rq on each cpu */
static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
+static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
+static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
+
static struct sched_entity *init_sched_entity_p[NR_CPUS];
static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
+static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS];
+static struct rt_rq *init_rt_rq_p[NR_CPUS];
+
+/* task_group_mutex serializes add/remove of task groups and also changes to
+ * a task group's cpu shares.
+ */
+static DEFINE_MUTEX(task_group_mutex);
+
+/* doms_cur_mutex serializes access to doms_cur[] array */
+static DEFINE_MUTEX(doms_cur_mutex);
+
+#ifdef CONFIG_SMP
+/* kernel thread that runs rebalance_shares() periodically */
+static struct task_struct *lb_monitor_task;
+static int load_balance_monitor(void *unused);
+#endif
+
+static void set_se_shares(struct sched_entity *se, unsigned long shares);
+
/* Default task group.
* Every task in system belong to this group at bootup.
*/
struct task_group init_task_group = {
- .se = init_sched_entity_p,
+ .se = init_sched_entity_p,
.cfs_rq = init_cfs_rq_p,
+
+ .rt_se = init_sched_rt_entity_p,
+ .rt_rq = init_rt_rq_p,
};
#ifdef CONFIG_FAIR_USER_SCHED
-# define INIT_TASK_GRP_LOAD 2*NICE_0_LOAD
+# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
#else
-# define INIT_TASK_GRP_LOAD NICE_0_LOAD
+# define INIT_TASK_GROUP_LOAD NICE_0_LOAD
#endif
-static int init_task_group_load = INIT_TASK_GRP_LOAD;
+#define MIN_GROUP_SHARES 2
+
+static int init_task_group_load = INIT_TASK_GROUP_LOAD;
/* return group to which a task belongs */
static inline struct task_group *task_group(struct task_struct *p)
}
/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
-static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu)
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
{
p->se.cfs_rq = task_group(p)->cfs_rq[cpu];
p->se.parent = task_group(p)->se[cpu];
+
+ p->rt.rt_rq = task_group(p)->rt_rq[cpu];
+ p->rt.parent = task_group(p)->rt_se[cpu];
+}
+
+static inline void lock_task_group_list(void)
+{
+ mutex_lock(&task_group_mutex);
+}
+
+static inline void unlock_task_group_list(void)
+{
+ mutex_unlock(&task_group_mutex);
+}
+
+static inline void lock_doms_cur(void)
+{
+ mutex_lock(&doms_cur_mutex);
+}
+
+static inline void unlock_doms_cur(void)
+{
+ mutex_unlock(&doms_cur_mutex);
}
#else
-static inline void set_task_cfs_rq(struct task_struct *p, unsigned int cpu) { }
+static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+static inline void lock_task_group_list(void) { }
+static inline void unlock_task_group_list(void) { }
+static inline void lock_doms_cur(void) { }
+static inline void unlock_doms_cur(void) { }
#endif /* CONFIG_FAIR_GROUP_SCHED */
/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
struct rt_prio_array active;
- int rt_load_balance_idx;
- struct list_head *rt_load_balance_head, *rt_load_balance_curr;
+ unsigned long rt_nr_running;
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+ int highest_prio; /* highest queued rt task prio */
+#endif
+#ifdef CONFIG_SMP
+ unsigned long rt_nr_migratory;
+ int overloaded;
+#endif
+ int rt_throttled;
+ u64 rt_time;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ struct rq *rq;
+ struct list_head leaf_rt_rq_list;
+ struct task_group *tg;
+ struct sched_rt_entity *rt_se;
+#endif
+};
+
+#ifdef CONFIG_SMP
+
+/*
+ * We add the notion of a root-domain which will be used to define per-domain
+ * variables. Each exclusive cpuset essentially defines an island domain by
+ * fully partitioning the member cpus from any other cpuset. Whenever a new
+ * exclusive cpuset is created, we also create and attach a new root-domain
+ * object.
+ *
+ */
+struct root_domain {
+ atomic_t refcount;
+ cpumask_t span;
+ cpumask_t online;
+
+ /*
+ * The "RT overload" flag: it gets set if a CPU has more than
+ * one runnable RT task.
+ */
+ cpumask_t rto_mask;
+ atomic_t rto_count;
};
+/*
+ * By default the system creates a single root-domain with all cpus as
+ * members (mimicking the global state we have today).
+ */
+static struct root_domain def_root_domain;
+
+#endif
+
/*
* This is the main, per-CPU runqueue data structure.
*
u64 nr_switches;
struct cfs_rq cfs;
+ struct rt_rq rt;
+ u64 rt_period_expire;
+ int rt_throttled;
+
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
struct list_head leaf_cfs_rq_list;
+ struct list_head leaf_rt_rq_list;
#endif
- struct rt_rq rt;
/*
* This is part of a global counter where only the total sum
u64 clock, prev_clock_raw;
s64 clock_max_delta;
- unsigned int clock_warps, clock_overflows;
+ unsigned int clock_warps, clock_overflows, clock_underflows;
u64 idle_clock;
unsigned int clock_deep_idle_events;
u64 tick_timestamp;
atomic_t nr_iowait;
#ifdef CONFIG_SMP
+ struct root_domain *rd;
struct sched_domain *sd;
/* For active balancing */
struct list_head migration_queue;
#endif
+#ifdef CONFIG_SCHED_HRTICK
+ unsigned long hrtick_flags;
+ ktime_t hrtick_expire;
+ struct hrtimer hrtick_timer;
+#endif
+
#ifdef CONFIG_SCHEDSTATS
/* latency stats */
struct sched_info rq_sched_info;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-static DEFINE_MUTEX(sched_hotcpu_mutex);
static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
{
#define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
+unsigned long rt_needs_cpu(int cpu)
+{
+ struct rq *rq = cpu_rq(cpu);
+ u64 delta;
+
+ if (!rq->rt_throttled)
+ return 0;
+
+ if (rq->clock > rq->rt_period_expire)
+ return 1;
+
+ delta = rq->rt_period_expire - rq->clock;
+ do_div(delta, NSEC_PER_SEC / HZ);
+
+ return (unsigned long)delta;
+}
+
/*
* Tunables that become constants when CONFIG_SCHED_DEBUG is off:
*/
SCHED_FEAT_START_DEBIT = 4,
SCHED_FEAT_TREE_AVG = 8,
SCHED_FEAT_APPROX_AVG = 16,
+ SCHED_FEAT_HRTICK = 32,
+ SCHED_FEAT_DOUBLE_TICK = 64,
};
const_debug unsigned int sysctl_sched_features =
SCHED_FEAT_WAKEUP_PREEMPT * 1 |
SCHED_FEAT_START_DEBIT * 1 |
SCHED_FEAT_TREE_AVG * 0 |
- SCHED_FEAT_APPROX_AVG * 0;
+ SCHED_FEAT_APPROX_AVG * 0 |
+ SCHED_FEAT_HRTICK * 1 |
+ SCHED_FEAT_DOUBLE_TICK * 0;
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
*/
const_debug unsigned int sysctl_sched_nr_migrate = 32;
+/*
+ * period over which we measure -rt task cpu usage in ms.
+ * default: 1s
+ */
+const_debug unsigned int sysctl_sched_rt_period = 1000;
+
+#define SCHED_RT_FRAC_SHIFT 16
+#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT)
+
+/*
+ * ratio of time -rt tasks may consume.
+ * default: 95%
+ */
+const_debug unsigned int sysctl_sched_rt_ratio = 62259;
+
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
local_irq_save(flags);
rq = cpu_rq(cpu);
- update_rq_clock(rq);
+ /*
+ * Only call sched_clock() if the scheduler has already been
+ * initialized (some code might call cpu_clock() very early):
+ */
+ if (rq->idle)
+ update_rq_clock(rq);
now = rq->clock;
local_irq_restore(flags);
# define finish_arch_switch(prev) do { } while (0)
#endif
+static inline int task_current(struct rq *rq, struct task_struct *p)
+{
+ return rq->curr == p;
+}
+
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline int task_running(struct rq *rq, struct task_struct *p)
{
- return rq->curr == p;
+ return task_current(rq, p);
}
static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
#ifdef CONFIG_SMP
return p->oncpu;
#else
- return rq->curr == p;
+ return task_current(rq, p);
#endif
}
rq->prev_clock_raw = now;
rq->clock += delta_ns;
spin_unlock(&rq->lock);
+ touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+static void __resched_task(struct task_struct *p, int tif_bit);
+
+static inline void resched_task(struct task_struct *p)
+{
+ __resched_task(p, TIF_NEED_RESCHED);
+}
+
+#ifdef CONFIG_SCHED_HRTICK
+/*
+ * Use HR-timers to deliver accurate preemption points.
+ *
+ * Its all a bit involved since we cannot program an hrt while holding the
+ * rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
+ * reschedule event.
+ *
+ * When we get rescheduled we reprogram the hrtick_timer outside of the
+ * rq->lock.
+ */
+static inline void resched_hrt(struct task_struct *p)
+{
+ __resched_task(p, TIF_HRTICK_RESCHED);
+}
+
+static inline void resched_rq(struct rq *rq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&rq->lock, flags);
+ resched_task(rq->curr);
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+enum {
+ HRTICK_SET, /* re-programm hrtick_timer */
+ HRTICK_RESET, /* not a new slice */
+};
+
+/*
+ * Use hrtick when:
+ * - enabled by features
+ * - hrtimer is actually high res
+ */
+static inline int hrtick_enabled(struct rq *rq)
+{
+ if (!sched_feat(HRTICK))
+ return 0;
+ return hrtimer_is_hres_active(&rq->hrtick_timer);
+}
+
+/*
+ * Called to set the hrtick timer state.
+ *
+ * called with rq->lock held and irqs disabled
+ */
+static void hrtick_start(struct rq *rq, u64 delay, int reset)
+{
+ assert_spin_locked(&rq->lock);
+
+ /*
+ * preempt at: now + delay
+ */
+ rq->hrtick_expire =
+ ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
+ /*
+ * indicate we need to program the timer
+ */
+ __set_bit(HRTICK_SET, &rq->hrtick_flags);
+ if (reset)
+ __set_bit(HRTICK_RESET, &rq->hrtick_flags);
+
+ /*
+ * New slices are called from the schedule path and don't need a
+ * forced reschedule.
+ */
+ if (reset)
+ resched_hrt(rq->curr);
+}
+
+static void hrtick_clear(struct rq *rq)
+{
+ if (hrtimer_active(&rq->hrtick_timer))
+ hrtimer_cancel(&rq->hrtick_timer);
+}
+
+/*
+ * Update the timer from the possible pending state.
+ */
+static void hrtick_set(struct rq *rq)
+{
+ ktime_t time;
+ int set, reset;
+ unsigned long flags;
+
+ WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+
+ spin_lock_irqsave(&rq->lock, flags);
+ set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
+ reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
+ time = rq->hrtick_expire;
+ clear_thread_flag(TIF_HRTICK_RESCHED);
+ spin_unlock_irqrestore(&rq->lock, flags);
+
+ if (set) {
+ hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
+ if (reset && !hrtimer_active(&rq->hrtick_timer))
+ resched_rq(rq);
+ } else
+ hrtick_clear(rq);
+}
+
+/*
+ * High-resolution timer tick.
+ * Runs from hardirq context with interrupts disabled.
+ */
+static enum hrtimer_restart hrtick(struct hrtimer *timer)
+{
+ struct rq *rq = container_of(timer, struct rq, hrtick_timer);
+
+ WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+
+ spin_lock(&rq->lock);
+ __update_rq_clock(rq);
+ rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+ spin_unlock(&rq->lock);
+
+ return HRTIMER_NORESTART;
+}
+
+static inline void init_rq_hrtick(struct rq *rq)
+{
+ rq->hrtick_flags = 0;
+ hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ rq->hrtick_timer.function = hrtick;
+ rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
+}
+
+void hrtick_resched(void)
+{
+ struct rq *rq;
+ unsigned long flags;
+
+ if (!test_thread_flag(TIF_HRTICK_RESCHED))
+ return;
+
+ local_irq_save(flags);
+ rq = cpu_rq(smp_processor_id());
+ hrtick_set(rq);
+ local_irq_restore(flags);
+}
+#else
+static inline void hrtick_clear(struct rq *rq)
+{
+}
+
+static inline void hrtick_set(struct rq *rq)
+{
+}
+
+static inline void init_rq_hrtick(struct rq *rq)
+{
+}
+
+void hrtick_resched(void)
+{
+}
+#endif
+
/*
* resched_task - mark a task 'to be rescheduled now'.
*
#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
#endif
-static void resched_task(struct task_struct *p)
+static void __resched_task(struct task_struct *p, int tif_bit)
{
int cpu;
assert_spin_locked(&task_rq(p)->lock);
- if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
+ if (unlikely(test_tsk_thread_flag(p, tif_bit)))
return;
- set_tsk_thread_flag(p, TIF_NEED_RESCHED);
+ set_tsk_thread_flag(p, tif_bit);
cpu = task_cpu(p);
if (cpu == smp_processor_id())
spin_unlock_irqrestore(&rq->lock, flags);
}
#else
-static inline void resched_task(struct task_struct *p)
+static void __resched_task(struct task_struct *p, int tif_bit)
{
assert_spin_locked(&task_rq(p)->lock);
- set_tsk_need_resched(p);
+ set_tsk_thread_flag(p, tif_bit);
}
#endif
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
#endif
+static inline void inc_cpu_load(struct rq *rq, unsigned long load)
+{
+ update_load_add(&rq->load, load);
+}
+
+static inline void dec_cpu_load(struct rq *rq, unsigned long load)
+{
+ update_load_sub(&rq->load, load);
+}
+
+#ifdef CONFIG_SMP
+static unsigned long source_load(int cpu, int type);
+static unsigned long target_load(int cpu, int type);
+static unsigned long cpu_avg_load_per_task(int cpu);
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+#endif /* CONFIG_SMP */
+
#include "sched_stats.h"
#include "sched_idletask.c"
#include "sched_fair.c"
#define sched_class_highest (&rt_sched_class)
-/*
- * Update delta_exec, delta_fair fields for rq.
- *
- * delta_fair clock advances at a rate inversely proportional to
- * total load (rq->load.weight) on the runqueue, while
- * delta_exec advances at the same rate as wall-clock (provided
- * cpu is not idle).
- *
- * delta_exec / delta_fair is a measure of the (smoothened) load on this
- * runqueue over any given interval. This (smoothened) load is used
- * during load balance.
- *
- * This function is called /before/ updating rq->load
- * and when switching tasks.
- */
-static inline void inc_load(struct rq *rq, const struct task_struct *p)
-{
- update_load_add(&rq->load, p->se.load.weight);
-}
-
-static inline void dec_load(struct rq *rq, const struct task_struct *p)
-{
- update_load_sub(&rq->load, p->se.load.weight);
-}
-
-static void inc_nr_running(struct task_struct *p, struct rq *rq)
+static void inc_nr_running(struct rq *rq)
{
rq->nr_running++;
- inc_load(rq, p);
}
-static void dec_nr_running(struct task_struct *p, struct rq *rq)
+static void dec_nr_running(struct rq *rq)
{
rq->nr_running--;
- dec_load(rq, p);
}
static void set_load_weight(struct task_struct *p)
*/
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
- if (p->state == TASK_UNINTERRUPTIBLE)
+ if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
enqueue_task(rq, p, wakeup);
- inc_nr_running(p, rq);
+ inc_nr_running(rq);
}
/*
*/
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
{
- if (p->state == TASK_UNINTERRUPTIBLE)
+ if (task_contributes_to_load(p))
rq->nr_uninterruptible++;
dequeue_task(rq, p, sleep);
- dec_nr_running(p, rq);
+ dec_nr_running(rq);
}
/**
static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
{
- set_task_cfs_rq(p, cpu);
+ set_task_rq(p, cpu);
#ifdef CONFIG_SMP
/*
* After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
#endif
}
+static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class,
+ int oldprio, int running)
+{
+ if (prev_class != p->sched_class) {
+ if (prev_class->switched_from)
+ prev_class->switched_from(rq, p, running);
+ p->sched_class->switched_to(rq, p, running);
+ } else
+ p->sched_class->prio_changed(rq, p, oldprio, running);
+}
+
#ifdef CONFIG_SMP
/*
* Is this task likely cache-hot:
*/
-static inline int
+static int
task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
{
s64 delta;
/*
* Return the average load per task on the cpu's run queue
*/
-static inline unsigned long cpu_avg_load_per_task(int cpu)
+static unsigned long cpu_avg_load_per_task(int cpu)
{
struct rq *rq = cpu_rq(cpu);
unsigned long total = weighted_cpuload(cpu);
#endif /* CONFIG_SMP */
-/*
- * wake_idle() will wake a task on an idle cpu if task->cpu is
- * not idle and an idle cpu is available. The span of cpus to
- * search starts with cpus closest then further out as needed,
- * so we always favor a closer, idle cpu.
- *
- * Returns the CPU we should wake onto.
- */
-#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, struct task_struct *p)
-{
- cpumask_t tmp;
- struct sched_domain *sd;
- int i;
-
- /*
- * If it is idle, then it is the best cpu to run this task.
- *
- * This cpu is also the best, if it has more than one task already.
- * Siblings must be also busy(in most cases) as they didn't already
- * pickup the extra load from this cpu and hence we need not check
- * sibling runqueue info. This will avoid the checks and cache miss
- * penalities associated with that.
- */
- if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
- return cpu;
-
- for_each_domain(cpu, sd) {
- if (sd->flags & SD_WAKE_IDLE) {
- cpus_and(tmp, sd->span, p->cpus_allowed);
- for_each_cpu_mask(i, tmp) {
- if (idle_cpu(i)) {
- if (i != task_cpu(p)) {
- schedstat_inc(p,
- se.nr_wakeups_idle);
- }
- return i;
- }
- }
- } else {
- break;
- }
- }
- return cpu;
-}
-#else
-static inline int wake_idle(int cpu, struct task_struct *p)
-{
- return cpu;
-}
-#endif
-
/***
* try_to_wake_up - wake up a thread
* @p: the to-be-woken-up thread
unsigned long flags;
long old_state;
struct rq *rq;
-#ifdef CONFIG_SMP
- struct sched_domain *sd, *this_sd = NULL;
- unsigned long load, this_load;
- int new_cpu;
-#endif
rq = task_rq_lock(p, &flags);
old_state = p->state;
if (unlikely(task_running(rq, p)))
goto out_activate;
- new_cpu = cpu;
-
- schedstat_inc(rq, ttwu_count);
- if (cpu == this_cpu) {
- schedstat_inc(rq, ttwu_local);
- goto out_set_cpu;
- }
-
- for_each_domain(this_cpu, sd) {
- if (cpu_isset(cpu, sd->span)) {
- schedstat_inc(sd, ttwu_wake_remote);
- this_sd = sd;
- break;
- }
- }
-
- if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
- goto out_set_cpu;
-
- /*
- * Check for affine wakeup and passive balancing possibilities.
- */
- if (this_sd) {
- int idx = this_sd->wake_idx;
- unsigned int imbalance;
-
- imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
-
- load = source_load(cpu, idx);
- this_load = target_load(this_cpu, idx);
-
- new_cpu = this_cpu; /* Wake to this CPU if we can */
-
- if (this_sd->flags & SD_WAKE_AFFINE) {
- unsigned long tl = this_load;
- unsigned long tl_per_task;
-
- /*
- * Attract cache-cold tasks on sync wakeups:
- */
- if (sync && !task_hot(p, rq->clock, this_sd))
- goto out_set_cpu;
-
- schedstat_inc(p, se.nr_wakeups_affine_attempts);
- tl_per_task = cpu_avg_load_per_task(this_cpu);
-
- /*
- * If sync wakeup then subtract the (maximum possible)
- * effect of the currently running task from the load
- * of the current CPU:
- */
- if (sync)
- tl -= current->se.load.weight;
-
- if ((tl <= load &&
- tl + target_load(cpu, idx) <= tl_per_task) ||
- 100*(tl + p->se.load.weight) <= imbalance*load) {
- /*
- * This domain has SD_WAKE_AFFINE and
- * p is cache cold in this domain, and
- * there is no bad imbalance.
- */
- schedstat_inc(this_sd, ttwu_move_affine);
- schedstat_inc(p, se.nr_wakeups_affine);
- goto out_set_cpu;
- }
- }
-
- /*
- * Start passive balancing when half the imbalance_pct
- * limit is reached.
- */
- if (this_sd->flags & SD_WAKE_BALANCE) {
- if (imbalance*this_load <= 100*load) {
- schedstat_inc(this_sd, ttwu_move_balance);
- schedstat_inc(p, se.nr_wakeups_passive);
- goto out_set_cpu;
- }
- }
- }
-
- new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
-out_set_cpu:
- new_cpu = wake_idle(new_cpu, p);
- if (new_cpu != cpu) {
- set_task_cpu(p, new_cpu);
+ cpu = p->sched_class->select_task_rq(p, sync);
+ if (cpu != orig_cpu) {
+ set_task_cpu(p, cpu);
task_rq_unlock(rq, &flags);
/* might preempt at this point */
rq = task_rq_lock(p, &flags);
cpu = task_cpu(p);
}
+#ifdef CONFIG_SCHEDSTATS
+ schedstat_inc(rq, ttwu_count);
+ if (cpu == this_cpu)
+ schedstat_inc(rq, ttwu_local);
+ else {
+ struct sched_domain *sd;
+ for_each_domain(this_cpu, sd) {
+ if (cpu_isset(cpu, sd->span)) {
+ schedstat_inc(sd, ttwu_wake_remote);
+ break;
+ }
+ }
+ }
+#endif
+
out_activate:
#endif /* CONFIG_SMP */
schedstat_inc(p, se.nr_wakeups);
out_running:
p->state = TASK_RUNNING;
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_wake_up)
+ p->sched_class->task_wake_up(rq, p);
+#endif
out:
task_rq_unlock(rq, &flags);
int fastcall wake_up_process(struct task_struct *p)
{
- return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
- TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+ return try_to_wake_up(p, TASK_ALL, 0);
}
EXPORT_SYMBOL(wake_up_process);
p->se.wait_max = 0;
#endif
- INIT_LIST_HEAD(&p->run_list);
+ INIT_LIST_HEAD(&p->rt.run_list);
p->se.on_rq = 0;
#ifdef CONFIG_PREEMPT_NOTIFIERS
* management (if any):
*/
p->sched_class->task_new(rq, p);
- inc_nr_running(p, rq);
+ inc_nr_running(rq);
}
check_preempt_curr(rq, p);
+#ifdef CONFIG_SMP
+ if (p->sched_class->task_wake_up)
+ p->sched_class->task_wake_up(rq, p);
+#endif
task_rq_unlock(rq, &flags);
}
prev_state = prev->state;
finish_arch_switch(prev);
finish_lock_switch(rq, prev);
+#ifdef CONFIG_SMP
+ if (current->sched_class->post_schedule)
+ current->sched_class->post_schedule(rq);
+#endif
+
fire_sched_in_preempt_notifiers(current);
if (mm)
mmdrop(mm);
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
*/
-static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
+static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
__releases(this_rq->lock)
__acquires(busiest->lock)
__acquires(this_rq->lock)
{
+ int ret = 0;
+
if (unlikely(!irqs_disabled())) {
/* printk() doesn't work good under rq->lock */
spin_unlock(&this_rq->lock);
spin_unlock(&this_rq->lock);
spin_lock(&busiest->lock);
spin_lock(&this_rq->lock);
+ ret = 1;
} else
spin_lock(&busiest->lock);
}
+ return ret;
}
/*
rq = task_rq_lock(p, &flags);
ns = p->se.sum_exec_runtime;
- if (rq->curr == p) {
+ if (task_current(rq, p)) {
update_rq_clock(rq);
delta_exec = rq->clock - p->se.exec_start;
if ((s64)delta_exec > 0)
/*
* Let rq->clock advance by at least TICK_NSEC:
*/
- if (unlikely(rq->clock < next_tick))
+ if (unlikely(rq->clock < next_tick)) {
rq->clock = next_tick;
+ rq->clock_underflows++;
+ }
rq->tick_timestamp = rq->clock;
update_cpu_load(rq);
- if (curr != rq->idle) /* FIXME: needed? */
- curr->sched_class->task_tick(rq, curr);
+ curr->sched_class->task_tick(rq, curr, 0);
+ update_sched_rt_period(rq);
spin_unlock(&rq->lock);
#ifdef CONFIG_SMP
schedule_debug(prev);
+ hrtick_clear(rq);
+
/*
* Do the rq-clock update outside the rq lock:
*/
switch_count = &prev->nvcsw;
}
+#ifdef CONFIG_SMP
+ if (prev->sched_class->pre_schedule)
+ prev->sched_class->pre_schedule(rq, prev);
+#endif
+
if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);
++*switch_count;
context_switch(rq, prev, next); /* unlocks the rq */
+ /*
+ * the context switch might have flipped the stack from under
+ * us, hence refresh the local variables.
+ */
+ cpu = smp_processor_id();
+ rq = cpu_rq(cpu);
} else
spin_unlock_irq(&rq->lock);
- if (unlikely(reacquire_kernel_lock(current) < 0)) {
- cpu = smp_processor_id();
- rq = cpu_rq(cpu);
+ hrtick_set(rq);
+
+ if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
- }
+
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
asmlinkage void __sched preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
-#ifdef CONFIG_PREEMPT_BKL
struct task_struct *task = current;
int saved_lock_depth;
-#endif
+
/*
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
* clear ->lock_depth so that schedule() doesnt
* auto-release the semaphore:
*/
-#ifdef CONFIG_PREEMPT_BKL
saved_lock_depth = task->lock_depth;
task->lock_depth = -1;
-#endif
schedule();
-#ifdef CONFIG_PREEMPT_BKL
task->lock_depth = saved_lock_depth;
-#endif
sub_preempt_count(PREEMPT_ACTIVE);
/*
asmlinkage void __sched preempt_schedule_irq(void)
{
struct thread_info *ti = current_thread_info();
-#ifdef CONFIG_PREEMPT_BKL
struct task_struct *task = current;
int saved_lock_depth;
-#endif
+
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
* clear ->lock_depth so that schedule() doesnt
* auto-release the semaphore:
*/
-#ifdef CONFIG_PREEMPT_BKL
saved_lock_depth = task->lock_depth;
task->lock_depth = -1;
-#endif
local_irq_enable();
schedule();
local_irq_disable();
-#ifdef CONFIG_PREEMPT_BKL
task->lock_depth = saved_lock_depth;
-#endif
sub_preempt_count(PREEMPT_ACTIVE);
/*
spin_lock_irqsave(&x->wait.lock, flags);
x->done++;
- __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
- 1, 0, NULL);
+ __wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete);
spin_lock_irqsave(&x->wait.lock, flags);
x->done += UINT_MAX/2;
- __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE,
- 0, 0, NULL);
+ __wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
spin_unlock_irqrestore(&x->wait.lock, flags);
}
EXPORT_SYMBOL(complete_all);
wait.flags |= WQ_FLAG_EXCLUSIVE;
__add_wait_queue_tail(&x->wait, &wait);
do {
- if (state == TASK_INTERRUPTIBLE &&
- signal_pending(current)) {
+ if ((state == TASK_INTERRUPTIBLE &&
+ signal_pending(current)) ||
+ (state == TASK_KILLABLE &&
+ fatal_signal_pending(current))) {
__remove_wait_queue(&x->wait, &wait);
return -ERESTARTSYS;
}
}
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
+ int __sched wait_for_completion_killable(struct completion *x)
+ {
+ long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
+ if (t == -ERESTARTSYS)
+ return t;
+ return 0;
+ }
+ EXPORT_SYMBOL(wait_for_completion_killable);
+
static long __sched
sleep_on_common(wait_queue_head_t *q, int state, long timeout)
{
unsigned long flags;
int oldprio, on_rq, running;
struct rq *rq;
+ const struct sched_class *prev_class = p->sched_class;
BUG_ON(prio < 0 || prio > MAX_PRIO);
oldprio = p->prio;
on_rq = p->se.on_rq;
- running = task_running(rq, p);
+ running = task_current(rq, p);
if (on_rq) {
dequeue_task(rq, p, 0);
if (running)
if (on_rq) {
if (running)
p->sched_class->set_curr_task(rq);
+
enqueue_task(rq, p, 0);
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else {
- check_preempt_curr(rq, p);
- }
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
}
task_rq_unlock(rq, &flags);
}
goto out_unlock;
}
on_rq = p->se.on_rq;
- if (on_rq) {
+ if (on_rq)
dequeue_task(rq, p, 0);
- dec_load(rq, p);
- }
p->static_prio = NICE_TO_PRIO(nice);
set_load_weight(p);
if (on_rq) {
enqueue_task(rq, p, 0);
- inc_load(rq, p);
/*
* If the task increased its priority or is running and
* lowered its priority, then reschedule its CPU:
{
int retval, oldprio, oldpolicy = -1, on_rq, running;
unsigned long flags;
+ const struct sched_class *prev_class = p->sched_class;
struct rq *rq;
/* may grab non-irq protected spin_locks */
}
update_rq_clock(rq);
on_rq = p->se.on_rq;
- running = task_running(rq, p);
+ running = task_current(rq, p);
if (on_rq) {
deactivate_task(rq, p, 0);
if (running)
if (on_rq) {
if (running)
p->sched_class->set_curr_task(rq);
+
activate_task(rq, p, 0);
- /*
- * Reschedule if we are currently running on this runqueue and
- * our priority decreased, or if we are not currently running on
- * this runqueue and our priority is higher than the current's
- */
- if (running) {
- if (p->prio > oldprio)
- resched_task(rq->curr);
- } else {
- check_preempt_curr(rq, p);
- }
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
}
__task_rq_unlock(rq);
spin_unlock_irqrestore(&p->pi_lock, flags);
struct task_struct *p;
int retval;
- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
read_lock(&tasklist_lock);
p = find_process_by_pid(pid);
if (!p) {
read_unlock(&tasklist_lock);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return -ESRCH;
}
}
out_unlock:
put_task_struct(p);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return retval;
}
struct task_struct *p;
int retval;
- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
read_lock(&tasklist_lock);
retval = -ESRCH;
out_unlock:
read_unlock(&tasklist_lock);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return retval;
}
} while (need_resched());
}
-int __sched cond_resched(void)
+#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
+int __sched _cond_resched(void)
{
if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
system_state == SYSTEM_RUNNING) {
}
return 0;
}
-EXPORT_SYMBOL(cond_resched);
+EXPORT_SYMBOL(_cond_resched);
+#endif
/*
* cond_resched_lock() - if a reschedule is pending, drop the given lock,
*/
int cond_resched_lock(spinlock_t *lock)
{
+ int resched = need_resched() && system_state == SYSTEM_RUNNING;
int ret = 0;
- if (need_lockbreak(lock)) {
+ if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
- cpu_relax();
- ret = 1;
- spin_lock(lock);
- }
- if (need_resched() && system_state == SYSTEM_RUNNING) {
- spin_release(&lock->dep_map, 1, _THIS_IP_);
- _raw_spin_unlock(lock);
- preempt_enable_no_resched();
- __cond_resched();
+ if (resched && need_resched())
+ __cond_resched();
+ else
+ cpu_relax();
ret = 1;
spin_lock(lock);
}
static const char stat_nam[] = "RSDTtZX";
-static void show_task(struct task_struct *p)
+void sched_show_task(struct task_struct *p)
{
unsigned long free = 0;
unsigned state;
}
#endif
printk(KERN_CONT "%5lu %5d %6d\n", free,
- task_pid_nr(p), task_pid_nr(p->parent));
+ task_pid_nr(p), task_pid_nr(p->real_parent));
- if (state != TASK_RUNNING)
- show_stack(p, NULL);
+ show_stack(p, NULL);
}
void show_state_filter(unsigned long state_filter)
*/
touch_nmi_watchdog();
if (!state_filter || (p->state & state_filter))
- show_task(p);
+ sched_show_task(p);
} while_each_thread(g, p);
touch_all_softlockup_watchdogs();
spin_unlock_irqrestore(&rq->lock, flags);
/* Set the preempt count _outside_ the spinlocks! */
-#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
- task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
-#else
task_thread_info(idle)->preempt_count = 0;
-#endif
+
/*
* The idle tasks have their own, simple scheduling class:
*/
goto out;
}
- p->cpus_allowed = new_mask;
+ if (p->sched_class->set_cpus_allowed)
+ p->sched_class->set_cpus_allowed(p, &new_mask);
+ else {
+ p->cpus_allowed = new_mask;
+ p->rt.nr_cpus_allowed = cpus_weight(new_mask);
+ }
+
/* Can the task run on the task's current CPU? If so, we're done */
if (cpu_isset(task_cpu(p), new_mask))
goto out;
struct rq *rq;
switch (action) {
- case CPU_LOCK_ACQUIRE:
- mutex_lock(&sched_hotcpu_mutex);
- break;
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
case CPU_ONLINE_FROZEN:
/* Strictly unnecessary, as first user will wake it. */
wake_up_process(cpu_rq(cpu)->migration_thread);
+
+ /* Update our root-domain */
+ rq = cpu_rq(cpu);
+ spin_lock_irqsave(&rq->lock, flags);
+ if (rq->rd) {
+ BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ cpu_set(cpu, rq->rd->online);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
break;
#ifdef CONFIG_HOTPLUG_CPU
}
spin_unlock_irq(&rq->lock);
break;
-#endif
- case CPU_LOCK_RELEASE:
- mutex_unlock(&sched_hotcpu_mutex);
+
+ case CPU_DOWN_PREPARE:
+ /* Update our root-domain */
+ rq = cpu_rq(cpu);
+ spin_lock_irqsave(&rq->lock, flags);
+ if (rq->rd) {
+ BUG_ON(!cpu_isset(cpu, rq->rd->span));
+ cpu_clear(cpu, rq->rd->online);
+ }
+ spin_unlock_irqrestore(&rq->lock, flags);
break;
+#endif
}
return NOTIFY_OK;
}
return 1;
}
+static void rq_attach_root(struct rq *rq, struct root_domain *rd)
+{
+ unsigned long flags;
+ const struct sched_class *class;
+
+ spin_lock_irqsave(&rq->lock, flags);
+
+ if (rq->rd) {
+ struct root_domain *old_rd = rq->rd;
+
+ for (class = sched_class_highest; class; class = class->next) {
+ if (class->leave_domain)
+ class->leave_domain(rq);
+ }
+
+ cpu_clear(rq->cpu, old_rd->span);
+ cpu_clear(rq->cpu, old_rd->online);
+
+ if (atomic_dec_and_test(&old_rd->refcount))
+ kfree(old_rd);
+ }
+
+ atomic_inc(&rd->refcount);
+ rq->rd = rd;
+
+ cpu_set(rq->cpu, rd->span);
+ if (cpu_isset(rq->cpu, cpu_online_map))
+ cpu_set(rq->cpu, rd->online);
+
+ for (class = sched_class_highest; class; class = class->next) {
+ if (class->join_domain)
+ class->join_domain(rq);
+ }
+
+ spin_unlock_irqrestore(&rq->lock, flags);
+}
+
+static void init_rootdomain(struct root_domain *rd)
+{
+ memset(rd, 0, sizeof(*rd));
+
+ cpus_clear(rd->span);
+ cpus_clear(rd->online);
+}
+
+static void init_defrootdomain(void)
+{
+ init_rootdomain(&def_root_domain);
+ atomic_set(&def_root_domain.refcount, 1);
+}
+
+static struct root_domain *alloc_rootdomain(void)
+{
+ struct root_domain *rd;
+
+ rd = kmalloc(sizeof(*rd), GFP_KERNEL);
+ if (!rd)
+ return NULL;
+
+ init_rootdomain(rd);
+
+ return rd;
+}
+
/*
- * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
+ * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
* hold the hotplug lock.
*/
-static void cpu_attach_domain(struct sched_domain *sd, int cpu)
+static void
+cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
{
struct rq *rq = cpu_rq(cpu);
struct sched_domain *tmp;
sched_domain_debug(sd, cpu);
+ rq_attach_root(rq, rd);
rcu_assign_pointer(rq->sd, sd);
}
static int build_sched_domains(const cpumask_t *cpu_map)
{
int i;
+ struct root_domain *rd;
#ifdef CONFIG_NUMA
struct sched_group **sched_group_nodes = NULL;
int sd_allnodes = 0;
sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
#endif
+ rd = alloc_rootdomain();
+ if (!rd) {
+ printk(KERN_WARNING "Cannot alloc root domain\n");
+ return -ENOMEM;
+ }
+
/*
* Set up domains for cpus specified by the cpu_map.
*/
#else
sd = &per_cpu(phys_domains, i);
#endif
- cpu_attach_domain(sd, i);
+ cpu_attach_domain(sd, rd, i);
}
return 0;
unregister_sched_domain_sysctl();
for_each_cpu_mask(i, *cpu_map)
- cpu_attach_domain(NULL, i);
+ cpu_attach_domain(NULL, &def_root_domain, i);
synchronize_sched();
arch_destroy_sched_domains(cpu_map);
}
{
int i, j;
+ lock_doms_cur();
+
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
ndoms_cur = ndoms_new;
register_sched_domain_sysctl();
+
+ unlock_doms_cur();
}
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
{
int err;
- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
detach_destroy_domains(&cpu_online_map);
err = arch_init_sched_domains(&cpu_online_map);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
return err;
}
{
cpumask_t non_isolated_cpus;
- mutex_lock(&sched_hotcpu_mutex);
+ get_online_cpus();
arch_init_sched_domains(&cpu_online_map);
cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
if (cpus_empty(non_isolated_cpus))
cpu_set(smp_processor_id(), non_isolated_cpus);
- mutex_unlock(&sched_hotcpu_mutex);
+ put_online_cpus();
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
if (set_cpus_allowed(current, non_isolated_cpus) < 0)
BUG();
sched_init_granularity();
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (nr_cpu_ids == 1)
+ return;
+
+ lb_monitor_task = kthread_create(load_balance_monitor, NULL,
+ "group_balance");
+ if (!IS_ERR(lb_monitor_task)) {
+ lb_monitor_task->flags |= PF_NOFREEZE;
+ wake_up_process(lb_monitor_task);
+ } else {
+ printk(KERN_ERR "Could not create load balance monitor thread"
+ "(error = %ld) \n", PTR_ERR(lb_monitor_task));
+ }
+#endif
}
#else
void __init sched_init_smp(void)
cfs_rq->min_vruntime = (u64)(-(1LL << 20));
}
+static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+{
+ struct rt_prio_array *array;
+ int i;
+
+ array = &rt_rq->active;
+ for (i = 0; i < MAX_RT_PRIO; i++) {
+ INIT_LIST_HEAD(array->queue + i);
+ __clear_bit(i, array->bitmap);
+ }
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_RT_PRIO, array->bitmap);
+
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+ rt_rq->highest_prio = MAX_RT_PRIO;
+#endif
+#ifdef CONFIG_SMP
+ rt_rq->rt_nr_migratory = 0;
+ rt_rq->overloaded = 0;
+#endif
+
+ rt_rq->rt_time = 0;
+ rt_rq->rt_throttled = 0;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ rt_rq->rq = rq;
+#endif
+}
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg,
+ struct cfs_rq *cfs_rq, struct sched_entity *se,
+ int cpu, int add)
+{
+ tg->cfs_rq[cpu] = cfs_rq;
+ init_cfs_rq(cfs_rq, rq);
+ cfs_rq->tg = tg;
+ if (add)
+ list_add(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+
+ tg->se[cpu] = se;
+ se->cfs_rq = &rq->cfs;
+ se->my_q = cfs_rq;
+ se->load.weight = tg->shares;
+ se->load.inv_weight = div64_64(1ULL<<32, se->load.weight);
+ se->parent = NULL;
+}
+
+static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
+ struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
+ int cpu, int add)
+{
+ tg->rt_rq[cpu] = rt_rq;
+ init_rt_rq(rt_rq, rq);
+ rt_rq->tg = tg;
+ rt_rq->rt_se = rt_se;
+ if (add)
+ list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
+
+ tg->rt_se[cpu] = rt_se;
+ rt_se->rt_rq = &rq->rt;
+ rt_se->my_q = rt_rq;
+ rt_se->parent = NULL;
+ INIT_LIST_HEAD(&rt_se->run_list);
+}
+#endif
+
void __init sched_init(void)
{
int highest_cpu = 0;
int i, j;
+#ifdef CONFIG_SMP
+ init_defrootdomain();
+#endif
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ list_add(&init_task_group.list, &task_groups);
+#endif
+
for_each_possible_cpu(i) {
- struct rt_prio_array *array;
struct rq *rq;
rq = cpu_rq(i);
rq->nr_running = 0;
rq->clock = 1;
init_cfs_rq(&rq->cfs, rq);
+ init_rt_rq(&rq->rt, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
- INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
- {
- struct cfs_rq *cfs_rq = &per_cpu(init_cfs_rq, i);
- struct sched_entity *se =
- &per_cpu(init_sched_entity, i);
-
- init_cfs_rq_p[i] = cfs_rq;
- init_cfs_rq(cfs_rq, rq);
- cfs_rq->tg = &init_task_group;
- list_add(&cfs_rq->leaf_cfs_rq_list,
- &rq->leaf_cfs_rq_list);
-
- init_sched_entity_p[i] = se;
- se->cfs_rq = &rq->cfs;
- se->my_q = cfs_rq;
- se->load.weight = init_task_group_load;
- se->load.inv_weight =
- div64_64(1ULL<<32, init_task_group_load);
- se->parent = NULL;
- }
init_task_group.shares = init_task_group_load;
- spin_lock_init(&init_task_group.lock);
+ INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
+ init_tg_cfs_entry(rq, &init_task_group,
+ &per_cpu(init_cfs_rq, i),
+ &per_cpu(init_sched_entity, i), i, 1);
+
+ init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */
+ INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
+ init_tg_rt_entry(rq, &init_task_group,
+ &per_cpu(init_rt_rq, i),
+ &per_cpu(init_sched_rt_entity, i), i, 1);
#endif
+ rq->rt_period_expire = 0;
+ rq->rt_throttled = 0;
for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
rq->cpu_load[j] = 0;
#ifdef CONFIG_SMP
rq->sd = NULL;
+ rq->rd = NULL;
rq->active_balance = 0;
rq->next_balance = jiffies;
rq->push_cpu = 0;
rq->cpu = i;
rq->migration_thread = NULL;
INIT_LIST_HEAD(&rq->migration_queue);
+ rq_attach_root(rq, &def_root_domain);
#endif
+ init_rq_hrtick(rq);
atomic_set(&rq->nr_iowait, 0);
-
- array = &rq->rt.active;
- for (j = 0; j < MAX_RT_PRIO; j++) {
- INIT_LIST_HEAD(array->queue + j);
- __clear_bit(j, array->bitmap);
- }
highest_cpu = i;
- /* delimiter for bitsearch: */
- __set_bit(MAX_RT_PRIO, array->bitmap);
}
set_load_weight(&init_task);
#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_SMP
+/*
+ * distribute shares of all task groups among their schedulable entities,
+ * to reflect load distribution across cpus.
+ */
+static int rebalance_shares(struct sched_domain *sd, int this_cpu)
+{
+ struct cfs_rq *cfs_rq;
+ struct rq *rq = cpu_rq(this_cpu);
+ cpumask_t sdspan = sd->span;
+ int balanced = 1;
+
+ /* Walk thr' all the task groups that we have */
+ for_each_leaf_cfs_rq(rq, cfs_rq) {
+ int i;
+ unsigned long total_load = 0, total_shares;
+ struct task_group *tg = cfs_rq->tg;
+
+ /* Gather total task load of this group across cpus */
+ for_each_cpu_mask(i, sdspan)
+ total_load += tg->cfs_rq[i]->load.weight;
+
+ /* Nothing to do if this group has no load */
+ if (!total_load)
+ continue;
+
+ /*
+ * tg->shares represents the number of cpu shares the task group
+ * is eligible to hold on a single cpu. On N cpus, it is
+ * eligible to hold (N * tg->shares) number of cpu shares.
+ */
+ total_shares = tg->shares * cpus_weight(sdspan);
+
+ /*
+ * redistribute total_shares across cpus as per the task load
+ * distribution.
+ */
+ for_each_cpu_mask(i, sdspan) {
+ unsigned long local_load, local_shares;
+
+ local_load = tg->cfs_rq[i]->load.weight;
+ local_shares = (local_load * total_shares) / total_load;
+ if (!local_shares)
+ local_shares = MIN_GROUP_SHARES;
+ if (local_shares == tg->se[i]->load.weight)
+ continue;
+
+ spin_lock_irq(&cpu_rq(i)->lock);
+ set_se_shares(tg->se[i], local_shares);
+ spin_unlock_irq(&cpu_rq(i)->lock);
+ balanced = 0;
+ }
+ }
+
+ return balanced;
+}
+
+/*
+ * How frequently should we rebalance_shares() across cpus?
+ *
+ * The more frequently we rebalance shares, the more accurate is the fairness
+ * of cpu bandwidth distribution between task groups. However higher frequency
+ * also implies increased scheduling overhead.
+ *
+ * sysctl_sched_min_bal_int_shares represents the minimum interval between
+ * consecutive calls to rebalance_shares() in the same sched domain.
+ *
+ * sysctl_sched_max_bal_int_shares represents the maximum interval between
+ * consecutive calls to rebalance_shares() in the same sched domain.
+ *
+ * These settings allows for the appropriate trade-off between accuracy of
+ * fairness and the associated overhead.
+ *
+ */
+
+/* default: 8ms, units: milliseconds */
+const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
+
+/* default: 128ms, units: milliseconds */
+const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
+
+/* kernel thread that runs rebalance_shares() periodically */
+static int load_balance_monitor(void *unused)
+{
+ unsigned int timeout = sysctl_sched_min_bal_int_shares;
+ struct sched_param schedparm;
+ int ret;
+
+ /*
+ * We don't want this thread's execution to be limited by the shares
+ * assigned to default group (init_task_group). Hence make it run
+ * as a SCHED_RR RT task at the lowest priority.
+ */
+ schedparm.sched_priority = 1;
+ ret = sched_setscheduler(current, SCHED_RR, &schedparm);
+ if (ret)
+ printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
+ " monitor thread (error = %d) \n", ret);
+
+ while (!kthread_should_stop()) {
+ int i, cpu, balanced = 1;
+
+ /* Prevent cpus going down or coming up */
+ get_online_cpus();
+ /* lockout changes to doms_cur[] array */
+ lock_doms_cur();
+ /*
+ * Enter a rcu read-side critical section to safely walk rq->sd
+ * chain on various cpus and to walk task group list
+ * (rq->leaf_cfs_rq_list) in rebalance_shares().
+ */
+ rcu_read_lock();
+
+ for (i = 0; i < ndoms_cur; i++) {
+ cpumask_t cpumap = doms_cur[i];
+ struct sched_domain *sd = NULL, *sd_prev = NULL;
+
+ cpu = first_cpu(cpumap);
+
+ /* Find the highest domain at which to balance shares */
+ for_each_domain(cpu, sd) {
+ if (!(sd->flags & SD_LOAD_BALANCE))
+ continue;
+ sd_prev = sd;
+ }
+
+ sd = sd_prev;
+ /* sd == NULL? No load balance reqd in this domain */
+ if (!sd)
+ continue;
+
+ balanced &= rebalance_shares(sd, cpu);
+ }
+
+ rcu_read_unlock();
+
+ unlock_doms_cur();
+ put_online_cpus();
+
+ if (!balanced)
+ timeout = sysctl_sched_min_bal_int_shares;
+ else if (timeout < sysctl_sched_max_bal_int_shares)
+ timeout *= 2;
+
+ msleep_interruptible(timeout);
+ }
+
+ return 0;
+}
+#endif /* CONFIG_SMP */
+
+static void free_sched_group(struct task_group *tg)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ if (tg->cfs_rq)
+ kfree(tg->cfs_rq[i]);
+ if (tg->se)
+ kfree(tg->se[i]);
+ if (tg->rt_rq)
+ kfree(tg->rt_rq[i]);
+ if (tg->rt_se)
+ kfree(tg->rt_se[i]);
+ }
+
+ kfree(tg->cfs_rq);
+ kfree(tg->se);
+ kfree(tg->rt_rq);
+ kfree(tg->rt_se);
+ kfree(tg);
+}
+
/* allocate runqueue etc for a new task group */
struct task_group *sched_create_group(void)
{
struct task_group *tg;
struct cfs_rq *cfs_rq;
struct sched_entity *se;
+ struct rt_rq *rt_rq;
+ struct sched_rt_entity *rt_se;
struct rq *rq;
int i;
tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
if (!tg->se)
goto err;
+ tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+ if (!tg->rt_rq)
+ goto err;
+ tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+ if (!tg->rt_se)
+ goto err;
+
+ tg->shares = NICE_0_LOAD;
+ tg->rt_ratio = 0; /* XXX */
for_each_possible_cpu(i) {
rq = cpu_rq(i);
- cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
- cpu_to_node(i));
+ cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
+ GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
if (!cfs_rq)
goto err;
- se = kmalloc_node(sizeof(struct sched_entity), GFP_KERNEL,
- cpu_to_node(i));
+ se = kmalloc_node(sizeof(struct sched_entity),
+ GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
if (!se)
goto err;
- memset(cfs_rq, 0, sizeof(struct cfs_rq));
- memset(se, 0, sizeof(struct sched_entity));
+ rt_rq = kmalloc_node(sizeof(struct rt_rq),
+ GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ if (!rt_rq)
+ goto err;
- tg->cfs_rq[i] = cfs_rq;
- init_cfs_rq(cfs_rq, rq);
- cfs_rq->tg = tg;
+ rt_se = kmalloc_node(sizeof(struct sched_rt_entity),
+ GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
+ if (!rt_se)
+ goto err;
- tg->se[i] = se;
- se->cfs_rq = &rq->cfs;
- se->my_q = cfs_rq;
- se->load.weight = NICE_0_LOAD;
- se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
- se->parent = NULL;
+ init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+ init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
}
+ lock_task_group_list();
for_each_possible_cpu(i) {
rq = cpu_rq(i);
cfs_rq = tg->cfs_rq[i];
list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ rt_rq = tg->rt_rq[i];
+ list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
}
-
- tg->shares = NICE_0_LOAD;
- spin_lock_init(&tg->lock);
+ list_add_rcu(&tg->list, &task_groups);
+ unlock_task_group_list();
return tg;
err:
- for_each_possible_cpu(i) {
- if (tg->cfs_rq)
- kfree(tg->cfs_rq[i]);
- if (tg->se)
- kfree(tg->se[i]);
- }
- kfree(tg->cfs_rq);
- kfree(tg->se);
- kfree(tg);
-
+ free_sched_group(tg);
return ERR_PTR(-ENOMEM);
}
/* rcu callback to free various structures associated with a task group */
-static void free_sched_group(struct rcu_head *rhp)
+static void free_sched_group_rcu(struct rcu_head *rhp)
{
- struct task_group *tg = container_of(rhp, struct task_group, rcu);
- struct cfs_rq *cfs_rq;
- struct sched_entity *se;
- int i;
-
/* now it should be safe to free those cfs_rqs */
- for_each_possible_cpu(i) {
- cfs_rq = tg->cfs_rq[i];
- kfree(cfs_rq);
-
- se = tg->se[i];
- kfree(se);
- }
-
- kfree(tg->cfs_rq);
- kfree(tg->se);
- kfree(tg);
+ free_sched_group(container_of(rhp, struct task_group, rcu));
}
/* Destroy runqueue etc associated with a task group */
void sched_destroy_group(struct task_group *tg)
{
struct cfs_rq *cfs_rq = NULL;
+ struct rt_rq *rt_rq = NULL;
int i;
+ lock_task_group_list();
for_each_possible_cpu(i) {
cfs_rq = tg->cfs_rq[i];
list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+ rt_rq = tg->rt_rq[i];
+ list_del_rcu(&rt_rq->leaf_rt_rq_list);
}
+ list_del_rcu(&tg->list);
+ unlock_task_group_list();
BUG_ON(!cfs_rq);
/* wait for possible concurrent references to cfs_rqs complete */
- call_rcu(&tg->rcu, free_sched_group);
+ call_rcu(&tg->rcu, free_sched_group_rcu);
}
/* change task's runqueue when it moves between groups.
rq = task_rq_lock(tsk, &flags);
- if (tsk->sched_class != &fair_sched_class) {
- set_task_cfs_rq(tsk, task_cpu(tsk));
- goto done;
- }
-
update_rq_clock(rq);
- running = task_running(rq, tsk);
+ running = task_current(rq, tsk);
on_rq = tsk->se.on_rq;
if (on_rq) {
tsk->sched_class->put_prev_task(rq, tsk);
}
- set_task_cfs_rq(tsk, task_cpu(tsk));
+ set_task_rq(tsk, task_cpu(tsk));
if (on_rq) {
if (unlikely(running))
enqueue_task(rq, tsk, 0);
}
-done:
task_rq_unlock(rq, &flags);
}
+/* rq->lock to be locked by caller */
static void set_se_shares(struct sched_entity *se, unsigned long shares)
{
struct cfs_rq *cfs_rq = se->cfs_rq;
struct rq *rq = cfs_rq->rq;
int on_rq;
- spin_lock_irq(&rq->lock);
+ if (!shares)
+ shares = MIN_GROUP_SHARES;
on_rq = se->on_rq;
- if (on_rq)
+ if (on_rq) {
dequeue_entity(cfs_rq, se, 0);
+ dec_cpu_load(rq, se->load.weight);
+ }
se->load.weight = shares;
se->load.inv_weight = div64_64((1ULL<<32), shares);
- if (on_rq)
+ if (on_rq) {
enqueue_entity(cfs_rq, se, 0);
-
- spin_unlock_irq(&rq->lock);
+ inc_cpu_load(rq, se->load.weight);
+ }
}
int sched_group_set_shares(struct task_group *tg, unsigned long shares)
{
int i;
+ struct cfs_rq *cfs_rq;
+ struct rq *rq;
- spin_lock(&tg->lock);
+ lock_task_group_list();
if (tg->shares == shares)
goto done;
+ if (shares < MIN_GROUP_SHARES)
+ shares = MIN_GROUP_SHARES;
+
+ /*
+ * Prevent any load balance activity (rebalance_shares,
+ * load_balance_fair) from referring to this group first,
+ * by taking it off the rq->leaf_cfs_rq_list on each cpu.
+ */
+ for_each_possible_cpu(i) {
+ cfs_rq = tg->cfs_rq[i];
+ list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+ }
+
+ /* wait for any ongoing reference to this group to finish */
+ synchronize_sched();
+
+ /*
+ * Now we are free to modify the group's share on each cpu
+ * w/o tripping rebalance_share or load_balance_fair.
+ */
tg->shares = shares;
- for_each_possible_cpu(i)
+ for_each_possible_cpu(i) {
+ spin_lock_irq(&cpu_rq(i)->lock);
set_se_shares(tg->se[i], shares);
+ spin_unlock_irq(&cpu_rq(i)->lock);
+ }
+ /*
+ * Enable load balance activity on this group, by inserting it back on
+ * each cpu's rq->leaf_cfs_rq_list.
+ */
+ for_each_possible_cpu(i) {
+ rq = cpu_rq(i);
+ cfs_rq = tg->cfs_rq[i];
+ list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+ }
done:
- spin_unlock(&tg->lock);
+ unlock_task_group_list();
return 0;
}
return tg->shares;
}
+/*
+ * Ensure the total rt_ratio <= sysctl_sched_rt_ratio
+ */
+int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio)
+{
+ struct task_group *tgi;
+ unsigned long total = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(tgi, &task_groups, list)
+ total += tgi->rt_ratio;
+ rcu_read_unlock();
+
+ if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio)
+ return -EINVAL;
+
+ tg->rt_ratio = rt_ratio;
+ return 0;
+}
+
+unsigned long sched_group_rt_ratio(struct task_group *tg)
+{
+ return tg->rt_ratio;
+}
+
#endif /* CONFIG_FAIR_GROUP_SCHED */
#ifdef CONFIG_FAIR_CGROUP_SCHED
return (u64) tg->shares;
}
+static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype,
+ u64 rt_ratio_val)
+{
+ return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val);
+}
+
+static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft)
+{
+ struct task_group *tg = cgroup_tg(cgrp);
+
+ return (u64) tg->rt_ratio;
+}
+
static struct cftype cpu_files[] = {
{
.name = "shares",
.read_uint = cpu_shares_read_uint,
.write_uint = cpu_shares_write_uint,
},
+ {
+ .name = "rt_ratio",
+ .read_uint = cpu_rt_ratio_read_uint,
+ .write_uint = cpu_rt_ratio_write_uint,
+ },
};
static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
set_tsk_thread_flag(t, TIF_SIGPENDING);
/*
- * For SIGKILL, we want to wake it up in the stopped/traced case.
- * We don't check t->state here because there is a race with it
+ * For SIGKILL, we want to wake it up in the stopped/traced/killable
+ * case. We don't check t->state here because there is a race with it
* executing another processor and just now entering stopped state.
* By using wake_up_state, we ensure the process will wake up and
* handle its death signal.
*/
mask = TASK_INTERRUPTIBLE;
if (resume)
- mask |= TASK_STOPPED | TASK_TRACED;
+ mask |= TASK_WAKEKILL;
if (!wake_up_state(t, mask))
kick_process(t);
}
* Wake up the stopped thread _after_ setting
* TIF_SIGPENDING
*/
- state = TASK_STOPPED;
+ state = __TASK_STOPPED;
if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
state |= TASK_INTERRUPTIBLE;
current->comm, task_pid_nr(current), signr);
#if defined(__i386__) && !defined(__arch_um__)
- printk("code at %08lx: ", regs->eip);
+ printk("code at %08lx: ", regs->ip);
{
int i;
for (i = 0; i < 16; i++) {
unsigned char insn;
- __get_user(insn, (unsigned char *)(regs->eip + i));
+ __get_user(insn, (unsigned char *)(regs->ip + i));
printk("%02x ", insn);
}
}
return 0;
if (sig == SIGKILL)
return 1;
- if (p->state & (TASK_STOPPED | TASK_TRACED))
+ if (task_is_stopped_or_traced(p))
return 0;
return task_curr(p) || !signal_pending(p);
}
}
}
+ int fastcall __fatal_signal_pending(struct task_struct *tsk)
+ {
+ return sigismember(&tsk->pending.signal, SIGKILL);
+ }
+
/*
* Must be called under rcu_read_lock() or with tasklist_lock read-held.
*/
BUG_ON(sig == -1);
/* do_notify_parent_cldstop should have been called instead. */
- BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
+ BUG_ON(task_is_stopped_or_traced(tsk));
BUG_ON(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
* so this check has no races.
*/
if (!t->exit_state &&
- !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+ !task_is_stopped_or_traced(t)) {
stop_count++;
signal_wake_up(t, 0);
}
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
-typedef struct tvec_s {
+struct tvec {
struct list_head vec[TVN_SIZE];
-} tvec_t;
+};
-typedef struct tvec_root_s {
+struct tvec_root {
struct list_head vec[TVR_SIZE];
-} tvec_root_t;
+};
-struct tvec_t_base_s {
+struct tvec_base {
spinlock_t lock;
struct timer_list *running_timer;
unsigned long timer_jiffies;
- tvec_root_t tv1;
- tvec_t tv2;
- tvec_t tv3;
- tvec_t tv4;
- tvec_t tv5;
+ struct tvec_root tv1;
+ struct tvec tv2;
+ struct tvec tv3;
+ struct tvec tv4;
+ struct tvec tv5;
} ____cacheline_aligned;
-typedef struct tvec_t_base_s tvec_base_t;
-
-tvec_base_t boot_tvec_bases;
+struct tvec_base boot_tvec_bases;
EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
+static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
/*
- * Note that all tvec_bases is 2 byte aligned and lower bit of
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
* base in timer_list is guaranteed to be zero. Use the LSB for
* the new flag to indicate whether the timer is deferrable
*/
#define TBASE_DEFERRABLE_FLAG (0x1)
/* Functions below help us manage 'deferrable' flag */
-static inline unsigned int tbase_get_deferrable(tvec_base_t *base)
+static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
{
return ((unsigned int)(unsigned long)base & TBASE_DEFERRABLE_FLAG);
}
-static inline tvec_base_t *tbase_get_base(tvec_base_t *base)
+static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
{
- return ((tvec_base_t *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
+ return ((struct tvec_base *)((unsigned long)base & ~TBASE_DEFERRABLE_FLAG));
}
static inline void timer_set_deferrable(struct timer_list *timer)
{
- timer->base = ((tvec_base_t *)((unsigned long)(timer->base) |
+ timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
TBASE_DEFERRABLE_FLAG));
}
static inline void
-timer_set_base(struct timer_list *timer, tvec_base_t *new_base)
+timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
{
- timer->base = (tvec_base_t *)((unsigned long)(new_base) |
+ timer->base = (struct tvec_base *)((unsigned long)(new_base) |
tbase_get_deferrable(timer->base));
}
EXPORT_SYMBOL_GPL(round_jiffies_relative);
-static inline void set_running_timer(tvec_base_t *base,
+static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)
{
#ifdef CONFIG_SMP
#endif
}
-static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
+static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
unsigned long idx = expires - base->timer_jiffies;
* possible to set timer->base = NULL and drop the lock: the timer remains
* locked.
*/
-static tvec_base_t *lock_timer_base(struct timer_list *timer,
+static struct tvec_base *lock_timer_base(struct timer_list *timer,
unsigned long *flags)
__acquires(timer->base->lock)
{
- tvec_base_t *base;
+ struct tvec_base *base;
for (;;) {
- tvec_base_t *prelock_base = timer->base;
+ struct tvec_base *prelock_base = timer->base;
base = tbase_get_base(prelock_base);
if (likely(base != NULL)) {
spin_lock_irqsave(&base->lock, *flags);
int __mod_timer(struct timer_list *timer, unsigned long expires)
{
- tvec_base_t *base, *new_base;
+ struct tvec_base *base, *new_base;
unsigned long flags;
int ret = 0;
*/
void add_timer_on(struct timer_list *timer, int cpu)
{
- tvec_base_t *base = per_cpu(tvec_bases, cpu);
+ struct tvec_base *base = per_cpu(tvec_bases, cpu);
unsigned long flags;
timer_stats_timer_set_start_info(timer);
*/
int del_timer(struct timer_list *timer)
{
- tvec_base_t *base;
+ struct tvec_base *base;
unsigned long flags;
int ret = 0;
*/
int try_to_del_timer_sync(struct timer_list *timer)
{
- tvec_base_t *base;
+ struct tvec_base *base;
unsigned long flags;
int ret = -1;
EXPORT_SYMBOL(del_timer_sync);
#endif
-static int cascade(tvec_base_t *base, tvec_t *tv, int index)
+static int cascade(struct tvec_base *base, struct tvec *tv, int index)
{
/* cascade all the timers from tv up one level */
struct timer_list *timer, *tmp;
* This function cascades all vectors and executes all expired timer
* vectors.
*/
-static inline void __run_timers(tvec_base_t *base)
+static inline void __run_timers(struct tvec_base *base)
{
struct timer_list *timer;
int preempt_count = preempt_count();
fn(data);
if (preempt_count != preempt_count()) {
- printk(KERN_WARNING "huh, entered %p "
+ printk(KERN_ERR "huh, entered %p "
"with preempt_count %08x, exited"
" with %08x?\n",
fn, preempt_count,
* is used on S/390 to stop all activity when a cpus is idle.
* This functions needs to be called disabled.
*/
-static unsigned long __next_timer_interrupt(tvec_base_t *base)
+static unsigned long __next_timer_interrupt(struct tvec_base *base)
{
unsigned long timer_jiffies = base->timer_jiffies;
unsigned long expires = timer_jiffies + NEXT_TIMER_MAX_DELTA;
int index, slot, array, found = 0;
struct timer_list *nte;
- tvec_t *varray[4];
+ struct tvec *varray[4];
/* Look for timer events in tv1. */
index = slot = timer_jiffies & TVR_MASK;
varray[3] = &base->tv5;
for (array = 0; array < 4; array++) {
- tvec_t *varp = varray[array];
+ struct tvec *varp = varray[array];
index = slot = timer_jiffies & TVN_MASK;
do {
*/
unsigned long get_next_timer_interrupt(unsigned long now)
{
- tvec_base_t *base = __get_cpu_var(tvec_bases);
+ struct tvec_base *base = __get_cpu_var(tvec_bases);
unsigned long expires;
spin_lock(&base->lock);
*/
static void run_timer_softirq(struct softirq_action *h)
{
- tvec_base_t *base = __get_cpu_var(tvec_bases);
+ struct tvec_base *base = __get_cpu_var(tvec_bases);
- hrtimer_run_queues();
+ hrtimer_run_pending();
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
*/
void run_local_timers(void)
{
+ hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
softlockup_tick();
}
int pid;
rcu_read_lock();
- pid = task_ppid_nr_ns(current, current->nsproxy->pid_ns);
+ pid = task_tgid_nr_ns(current->real_parent, current->nsproxy->pid_ns);
rcu_read_unlock();
return pid;
}
EXPORT_SYMBOL(schedule_timeout_interruptible);
+ signed long __sched schedule_timeout_killable(signed long timeout)
+ {
+ __set_current_state(TASK_KILLABLE);
+ return schedule_timeout(timeout);
+ }
+ EXPORT_SYMBOL(schedule_timeout_killable);
+
signed long __sched schedule_timeout_uninterruptible(signed long timeout)
{
__set_current_state(TASK_UNINTERRUPTIBLE);
*/
static struct lock_class_key base_lock_keys[NR_CPUS];
-static int __devinit init_timers_cpu(int cpu)
+static int __cpuinit init_timers_cpu(int cpu)
{
int j;
- tvec_base_t *base;
- static char __devinitdata tvec_base_done[NR_CPUS];
+ struct tvec_base *base;
+ static char __cpuinitdata tvec_base_done[NR_CPUS];
if (!tvec_base_done[cpu]) {
static char boot_done;
}
#ifdef CONFIG_HOTPLUG_CPU
-static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
+static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
{
struct timer_list *timer;
}
}
-static void __devinit migrate_timers(int cpu)
+static void __cpuinit migrate_timers(int cpu)
{
- tvec_base_t *old_base;
- tvec_base_t *new_base;
+ struct tvec_base *old_base;
+ struct tvec_base *new_base;
int i;
BUG_ON(cpu_online(cpu));
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
BUG_ON(page_mapped(page));
+
+ /*
+ * Some filesystems seem to re-dirty the page even after
+ * the VM has canceled the dirty bit (eg ext3 journaling).
+ *
+ * Fix it up by doing a final dirty accounting check after
+ * having removed the page entirely.
+ */
+ if (PageDirty(page) && mapping_cap_account_dirty(mapping)) {
+ dec_zone_page_state(page, NR_FILE_DIRTY);
+ dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
+ }
}
void remove_from_page_cache(struct page *page)
return 0;
}
+ static int sync_page_killable(void *word)
+ {
+ sync_page(word);
+ return fatal_signal_pending(current) ? -EINTR : 0;
+ }
+
/**
* __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
* @mapping: address space structure to write
}
EXPORT_SYMBOL(__lock_page);
+ int fastcall __lock_page_killable(struct page *page)
+ {
+ DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
+
+ return __wait_on_bit_lock(page_waitqueue(page), &wait,
+ sync_page_killable, TASK_KILLABLE);
+ }
+
/*
* Variant of lock_page that does not require the caller to hold a reference
* on the page's mapping.
page_not_up_to_date:
/* Get exclusive access to the page ... */
- lock_page(page);
+ if (lock_page_killable(page))
+ goto readpage_eio;
/* Did it get truncated before we got the lock? */
if (!page->mapping) {
}
if (!PageUptodate(page)) {
- lock_page(page);
+ if (lock_page_killable(page))
+ goto readpage_eio;
if (!PageUptodate(page)) {
if (page->mapping == NULL) {
/*
goto find_page;
}
unlock_page(page);
- error = -EIO;
shrink_readahead_size_eio(filp, ra);
- goto readpage_error;
+ goto readpage_eio;
}
unlock_page(page);
}
goto page_ok;
+ readpage_eio:
+ error = -EIO;
readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */
desc->error = error;
spin_unlock(&rpc_authflavor_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(rpcauth_register);
int
rpcauth_unregister(const struct rpc_authops *ops)
spin_unlock(&rpc_authflavor_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(rpcauth_unregister);
struct rpc_auth *
rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
out:
return auth;
}
+EXPORT_SYMBOL_GPL(rpcauth_create);
void
rpcauth_release(struct rpc_auth *auth)
auth->au_credcache = new;
return 0;
}
+EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
/*
* Destroy a list of credentials
kfree(cache);
}
}
+EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
/*
* Remove stale credentials. Avoid sleeping inside the loop.
out:
return cred;
}
+EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
struct rpc_cred *
rpcauth_lookupcred(struct rpc_auth *auth, int flags)
put_group_info(acred.group_info);
return ret;
}
+EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
void
rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
#endif
cred->cr_uid = acred->uid;
}
-EXPORT_SYMBOL(rpcauth_init_cred);
+EXPORT_SYMBOL_GPL(rpcauth_init_cred);
struct rpc_cred *
rpcauth_bindcred(struct rpc_task *task)
.group_info = current->group_info,
};
struct rpc_cred *ret;
- sigset_t oldset;
int flags = 0;
dprintk("RPC: %5u looking up %s cred\n",
get_group_info(acred.group_info);
if (task->tk_flags & RPC_TASK_ROOTCREDS)
flags |= RPCAUTH_LOOKUP_ROOTCREDS;
- rpc_clnt_sigmask(task->tk_client, &oldset);
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
- rpc_clnt_sigunmask(task->tk_client, &oldset);
if (!IS_ERR(ret))
task->tk_msg.rpc_cred = ret;
else
out_destroy:
cred->cr_ops->crdestroy(cred);
}
+EXPORT_SYMBOL_GPL(put_rpccred);
void
rpcauth_unbindcred(struct rpc_task *task)
#include <linux/smp_lock.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>
+#include <linux/in6.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
}
}
-static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
+static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
{
+ struct rpc_program *program = args->program;
struct rpc_version *version;
struct rpc_clnt *clnt = NULL;
struct rpc_auth *auth;
/* sanity check the name before trying to print it */
err = -EINVAL;
- len = strlen(servname);
+ len = strlen(args->servername);
if (len > RPC_MAXNETNAMELEN)
goto out_no_rpciod;
len++;
dprintk("RPC: creating %s client for %s (xprt %p)\n",
- program->name, servname, xprt);
+ program->name, args->servername, xprt);
err = rpciod_up();
if (err)
err = -EINVAL;
if (!xprt)
goto out_no_xprt;
- if (vers >= program->nrvers || !(version = program->version[vers]))
+
+ if (args->version >= program->nrvers)
+ goto out_err;
+ version = program->version[args->version];
+ if (version == NULL)
goto out_err;
err = -ENOMEM;
clnt->cl_server = clnt->cl_inline_name;
if (len > sizeof(clnt->cl_inline_name)) {
char *buf = kmalloc(len, GFP_KERNEL);
- if (buf != 0)
+ if (buf != NULL)
clnt->cl_server = buf;
else
len = sizeof(clnt->cl_inline_name);
}
- strlcpy(clnt->cl_server, servname, len);
+ strlcpy(clnt->cl_server, args->servername, len);
clnt->cl_xprt = xprt;
clnt->cl_procinfo = version->procs;
if (!xprt_bound(clnt->cl_xprt))
clnt->cl_autobind = 1;
+ clnt->cl_timeout = xprt->timeout;
+ if (args->timeout != NULL) {
+ memcpy(&clnt->cl_timeout_default, args->timeout,
+ sizeof(clnt->cl_timeout_default));
+ clnt->cl_timeout = &clnt->cl_timeout_default;
+ }
+
clnt->cl_rtt = &clnt->cl_rtt_default;
- rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+ rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
kref_init(&clnt->cl_kref);
if (err < 0)
goto out_no_path;
- auth = rpcauth_create(flavor, clnt);
+ auth = rpcauth_create(args->authflavor, clnt);
if (IS_ERR(auth)) {
printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
- flavor);
+ args->authflavor);
err = PTR_ERR(auth);
goto out_no_auth;
}
.srcaddr = args->saddress,
.dstaddr = args->address,
.addrlen = args->addrsize,
- .timeout = args->timeout
};
- char servername[20];
+ char servername[48];
xprt = xprt_create_transport(&xprtargs);
if (IS_ERR(xprt))
* up a string representation of the passed-in address.
*/
if (args->servername == NULL) {
- struct sockaddr_in *addr =
- (struct sockaddr_in *) args->address;
- snprintf(servername, sizeof(servername), NIPQUAD_FMT,
- NIPQUAD(addr->sin_addr.s_addr));
+ servername[0] = '\0';
+ switch (args->address->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)args->address;
+ snprintf(servername, sizeof(servername), NIPQUAD_FMT,
+ NIPQUAD(sin->sin_addr.s_addr));
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sin =
+ (struct sockaddr_in6 *)args->address;
+ snprintf(servername, sizeof(servername), NIP6_FMT,
+ NIP6(sin->sin6_addr));
+ break;
+ }
+ default:
+ /* caller wants default server name, but
+ * address family isn't recognized. */
+ return ERR_PTR(-EINVAL);
+ }
args->servername = servername;
}
+ xprt = xprt_create_transport(&xprtargs);
+ if (IS_ERR(xprt))
+ return (struct rpc_clnt *)xprt;
+
/*
* By default, kernel RPC client connects from a reserved port.
* CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,
if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
xprt->resvport = 0;
- clnt = rpc_new_client(xprt, args->servername, args->program,
- args->version, args->authflavor);
+ clnt = rpc_new_client(args, xprt);
if (IS_ERR(clnt))
return clnt;
if (!(args->flags & RPC_CLNT_CREATE_NOPING)) {
- int err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+ int err = rpc_ping(clnt, RPC_TASK_SOFT);
if (err != 0) {
rpc_shutdown_client(clnt);
return ERR_PTR(err);
if (args->flags & RPC_CLNT_CREATE_HARDRTRY)
clnt->cl_softrtry = 0;
- if (args->flags & RPC_CLNT_CREATE_INTR)
- clnt->cl_intr = 1;
if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
clnt->cl_autobind = 1;
if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
new->cl_autobind = 0;
INIT_LIST_HEAD(&new->cl_tasks);
spin_lock_init(&new->cl_lock);
- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
new->cl_metrics = rpc_alloc_iostats(clnt);
if (new->cl_metrics == NULL)
goto out_no_stats;
dprintk("RPC: %s: returned error %d\n", __FUNCTION__, err);
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(rpc_clone_client);
/*
* Properly shut down an RPC client, terminating all outstanding
rpc_release_client(clnt);
}
+EXPORT_SYMBOL_GPL(rpc_shutdown_client);
/*
* Free an RPC client
clnt->cl_prog = program->number;
clnt->cl_vers = version->number;
clnt->cl_stats = program->stats;
- err = rpc_ping(clnt, RPC_TASK_SOFT|RPC_TASK_NOINTR);
+ err = rpc_ping(clnt, RPC_TASK_SOFT);
if (err != 0) {
rpc_shutdown_client(clnt);
clnt = ERR_PTR(err);
out:
return clnt;
}
+EXPORT_SYMBOL_GPL(rpc_bind_new_program);
/*
* Default callback for async RPC calls
.rpc_call_done = rpc_default_callback,
};
- /*
- * Export the signal mask handling for synchronous code that
- * sleeps on RPC calls
- */
- #define RPC_INTR_SIGNALS (sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT) | sigmask(SIGTERM))
-
- static void rpc_save_sigmask(sigset_t *oldset, int intr)
- {
- unsigned long sigallow = sigmask(SIGKILL);
- sigset_t sigmask;
-
- /* Block all signals except those listed in sigallow */
- if (intr)
- sigallow |= RPC_INTR_SIGNALS;
- siginitsetinv(&sigmask, sigallow);
- sigprocmask(SIG_BLOCK, &sigmask, oldset);
- }
-
- static void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
- {
- rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
- }
-
- static void rpc_restore_sigmask(sigset_t *oldset)
- {
- sigprocmask(SIG_SETMASK, oldset, NULL);
- }
-
- void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
- rpc_save_sigmask(oldset, clnt->cl_intr);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigmask);
-
- void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
- {
- rpc_restore_sigmask(oldset);
- }
- EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
-
-static
-struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
- struct rpc_message *msg,
- int flags,
- const struct rpc_call_ops *ops,
- void *data)
+/**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @task_setup_data: pointer to task initialisation data
+ */
+struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
{
struct rpc_task *task, *ret;
- sigset_t oldset;
- task = rpc_new_task(clnt, flags, ops, data);
+ task = rpc_new_task(task_setup_data);
if (task == NULL) {
- rpc_release_calldata(ops, data);
- return ERR_PTR(-ENOMEM);
+ rpc_release_calldata(task_setup_data->callback_ops,
+ task_setup_data->callback_data);
+ ret = ERR_PTR(-ENOMEM);
+ goto out;
}
- /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
- if (msg != NULL) {
- rpc_call_setup(task, msg, 0);
- if (task->tk_status != 0) {
- ret = ERR_PTR(task->tk_status);
- rpc_put_task(task);
- goto out;
- }
+ if (task->tk_status != 0) {
+ ret = ERR_PTR(task->tk_status);
+ rpc_put_task(task);
+ goto out;
}
atomic_inc(&task->tk_count);
- /* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
- if (!RPC_IS_ASYNC(task)) {
- rpc_task_sigmask(task, &oldset);
- rpc_execute(task);
- rpc_restore_sigmask(&oldset);
- } else
- rpc_execute(task);
+ rpc_execute(task);
ret = task;
out:
return ret;
}
+EXPORT_SYMBOL_GPL(rpc_run_task);
/**
* rpc_call_sync - Perform a synchronous RPC call
int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
{
struct rpc_task *task;
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = msg,
+ .callback_ops = &rpc_default_ops,
+ .flags = flags,
+ };
int status;
BUG_ON(flags & RPC_TASK_ASYNC);
- task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
status = task->tk_status;
rpc_put_task(task);
return status;
}
+EXPORT_SYMBOL_GPL(rpc_call_sync);
/**
* rpc_call_async - Perform an asynchronous RPC call
const struct rpc_call_ops *tk_ops, void *data)
{
struct rpc_task *task;
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = msg,
+ .callback_ops = tk_ops,
+ .callback_data = data,
+ .flags = flags|RPC_TASK_ASYNC,
+ };
- task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
+ task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
}
-
-/**
- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
- * @clnt: pointer to RPC client
- * @flags: RPC flags
- * @ops: RPC call ops
- * @data: user call data
- */
-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
- const struct rpc_call_ops *tk_ops,
- void *data)
-{
- return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
-}
-EXPORT_SYMBOL(rpc_run_task);
+EXPORT_SYMBOL_GPL(rpc_call_async);
void
-rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+rpc_call_start(struct rpc_task *task)
{
- task->tk_msg = *msg;
- task->tk_flags |= flags;
- /* Bind the user cred */
- if (task->tk_msg.rpc_cred != NULL)
- rpcauth_holdcred(task);
- else
- rpcauth_bindcred(task);
-
- if (task->tk_status == 0)
- task->tk_action = call_start;
- else
- task->tk_action = rpc_exit_task;
+ task->tk_action = call_start;
}
+EXPORT_SYMBOL_GPL(rpc_call_start);
/**
* rpc_peeraddr - extract remote peer address from clnt's xprt
* @format: address format
*
*/
-char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
+ enum rpc_display_format_t format)
{
struct rpc_xprt *xprt = clnt->cl_xprt;
if (xprt->ops->set_buffer_size)
xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
}
+EXPORT_SYMBOL_GPL(rpc_setbufsize);
/*
* Return size of largest payload RPC client can support, in bytes
task->tk_action = call_start;
}
+EXPORT_SYMBOL_GPL(rpc_restart_call);
/*
* 0. Initial state
case -ETIMEDOUT:
task->tk_action = call_timeout;
if (task->tk_client->cl_discrtry)
- xprt_disconnect(task->tk_xprt);
+ xprt_force_disconnect(task->tk_xprt);
break;
case -ECONNREFUSED:
case -ENOTCONN:
req->rq_received = req->rq_private_buf.len = 0;
task->tk_status = 0;
if (task->tk_client->cl_discrtry)
- xprt_disconnect(task->tk_xprt);
+ xprt_force_disconnect(task->tk_xprt);
}
/*
.rpc_proc = &rpcproc_null,
.rpc_cred = cred,
};
- return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpc_default_ops,
+ .flags = flags,
+ };
+ return rpc_run_task(&task_setup_data);
}
-EXPORT_SYMBOL(rpc_call_null);
+EXPORT_SYMBOL_GPL(rpc_call_null);
#ifdef RPC_DEBUG
void rpc_show_tasks(void)
#define RPCB_HIGHPROC_3 RPCBPROC_TADDR2UADDR
#define RPCB_HIGHPROC_4 RPCBPROC_GETSTAT
-/*
- * r_addr
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
- * US-ASCII string:
- *
- * h1.h2.h3.h4.p1.p2
- *
- * The prefix, "h1.h2.h3.h4", is the standard textual form for
- * representing an IPv4 address, which is always four octets long.
- * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
- * the first through fourth octets each converted to ASCII-decimal.
- * Assuming big-endian ordering, p1 and p2 are, respectively, the first
- * and second octets each converted to ASCII-decimal. For example, if a
- * host, in big-endian order, has an address of 0x0A010307 and there is
- * a service listening on, in big endian order, port 0x020F (decimal
- * 527), then the complete universal address is "10.1.3.7.2.15".
- *
- * ...
- *
- * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
- * US-ASCII string:
- *
- * x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
- *
- * The suffix "p1.p2" is the service port, and is computed the same way
- * as with universal addresses for TCP and UDP over IPv4. The prefix,
- * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
- * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
- * Additionally, the two alternative forms specified in Section 2.2 of
- * [RFC2373] are also acceptable.
- *
- * XXX: Currently this implementation does not explicitly convert the
- * stored address to US-ASCII on non-ASCII systems.
- */
-#define RPCB_MAXADDRLEN (128u)
-
/*
* r_owner
*
u32 r_vers;
u32 r_prot;
unsigned short r_port;
- char * r_netid;
- char r_addr[RPCB_MAXADDRLEN];
- char * r_owner;
+ const char * r_netid;
+ const char * r_addr;
+ const char * r_owner;
};
static struct rpc_procinfo rpcb_procedures2[];
static struct rpcb_info rpcb_next_version[];
static struct rpcb_info rpcb_next_version6[];
-static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
-{
- struct rpcbind_args *map = calldata;
- struct rpc_xprt *xprt = map->r_xprt;
- struct rpc_message msg = {
- .rpc_proc = rpcb_next_version[xprt->bind_index].rpc_proc,
- .rpc_argp = map,
- .rpc_resp = &map->r_port,
- };
-
- rpc_call_setup(task, &msg, 0);
-}
-
static void rpcb_map_release(void *data)
{
struct rpcbind_args *map = data;
}
static const struct rpc_call_ops rpcb_getport_ops = {
- .rpc_call_prepare = rpcb_getport_prepare,
.rpc_call_done = rpcb_getport_done,
.rpc_release = rpcb_map_release,
};
}
static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
- int proto, int version, int privileged)
+ size_t salen, int proto, u32 version,
+ int privileged)
{
struct rpc_create_args args = {
.protocol = proto,
.address = srvaddr,
- .addrsize = sizeof(struct sockaddr_in),
+ .addrsize = salen,
.servername = hostname,
.program = &rpcb_program,
.version = version,
.authflavor = RPC_AUTH_UNIX,
- .flags = (RPC_CLNT_CREATE_NOPING |
- RPC_CLNT_CREATE_INTR),
+ .flags = RPC_CLNT_CREATE_NOPING,
};
switch (srvaddr->sa_family) {
prog, vers, prot, port);
rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
- XPRT_TRANSPORT_UDP, 2, 1);
+ sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
if (IS_ERR(rpcb_clnt))
return PTR_ERR(rpcb_clnt);
* @vers: RPC version number to bind
* @prot: transport protocol to use to make this request
*
+ * Return value is the requested advertised port number,
+ * or a negative errno value.
+ *
* Called from outside the RPC client in a synchronous task context.
* Uses default timeout parameters specified by underlying transport.
*
- * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
+ * XXX: Needs to support IPv6
*/
-int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
- __u32 vers, int prot)
+int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
{
struct rpcbind_args map = {
.r_prog = prog,
.rpc_resp = &map.r_port,
};
struct rpc_clnt *rpcb_clnt;
- char hostname[40];
int status;
dprintk("RPC: %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
__FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
- sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
- rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+ rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
+ sizeof(*sin), prot, 2, 0);
if (IS_ERR(rpcb_clnt))
return PTR_ERR(rpcb_clnt);
}
EXPORT_SYMBOL_GPL(rpcb_getport_sync);
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+{
+ struct rpc_message msg = {
+ .rpc_proc = rpcb_next_version[version].rpc_proc,
+ .rpc_argp = map,
+ .rpc_resp = &map->r_port,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = rpcb_clnt,
+ .rpc_message = &msg,
+ .callback_ops = &rpcb_getport_ops,
+ .callback_data = map,
+ .flags = RPC_TASK_ASYNC,
+ };
+
+ return rpc_run_task(&task_setup_data);
+}
+
/**
* rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request
void rpcb_getport_async(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- int bind_version;
+ u32 bind_version;
struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_clnt *rpcb_clnt;
static struct rpcbind_args *map;
struct rpc_task *child;
- struct sockaddr addr;
+ struct sockaddr_storage addr;
+ struct sockaddr *sap = (struct sockaddr *)&addr;
+ size_t salen;
int status;
struct rpcb_info *info;
goto bailout_nofree;
}
- rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+ salen = rpc_peeraddr(clnt, sap, sizeof(addr));
/* Don't ever use rpcbind v2 for AF_INET6 requests */
- switch (addr.sa_family) {
+ switch (sap->sa_family) {
case AF_INET:
info = rpcb_next_version;
break;
dprintk("RPC: %5u %s: trying rpcbind version %u\n",
task->tk_pid, __FUNCTION__, bind_version);
- rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+ rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
bind_version, 0);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
map->r_port = 0;
map->r_xprt = xprt_get(xprt);
map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
- memcpy(map->r_addr,
- rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
- sizeof(map->r_addr));
+ map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
- child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+ child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
rpc_release_client(rpcb_clnt);
if (IS_ERR(child)) {
status = -EIO;
* Simple sanity check. The smallest possible universal
* address is an IPv4 address string containing 11 bytes.
*/
- if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+ if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
goto out_err;
/*
#define RPCB_boolean_sz (1u)
#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
-#define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN))
+#define RPCB_addr_sz (1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
#define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
#define RPCB_mappingargs_sz RPCB_program_sz+RPCB_version_sz+ \
/*
* RPC tasks sit here while waiting for conditions to improve.
*/
-static RPC_WAITQ(delay_queue, "delayq");
+static struct rpc_wait_queue delay_queue;
/*
* rpciod-related stuff
if (unlikely(task->tk_priority > queue->maxpriority))
q = &queue->tasks[queue->maxpriority];
list_for_each_entry(t, q, u.tk_wait.list) {
- if (t->tk_cookie == task->tk_cookie) {
+ if (t->tk_owner == task->tk_owner) {
list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
return;
}
queue->count = 1 << (priority * 2);
}
-static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
+static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
{
- queue->cookie = cookie;
+ queue->owner = pid;
queue->nr = RPC_BATCH_COUNT;
}
static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
{
rpc_set_waitqueue_priority(queue, queue->maxpriority);
- rpc_set_waitqueue_cookie(queue, 0);
+ rpc_set_waitqueue_owner(queue, 0);
}
-static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
+static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
{
int i;
spin_lock_init(&queue->lock);
for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
INIT_LIST_HEAD(&queue->tasks[i]);
- queue->maxpriority = maxprio;
+ queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
#ifdef RPC_DEBUG
queue->name = qname;
void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
{
- __rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
+ __rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
}
void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
{
- __rpc_init_priority_wait_queue(queue, qname, 0);
+ __rpc_init_priority_wait_queue(queue, qname, 1);
}
-EXPORT_SYMBOL(rpc_init_wait_queue);
+EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
- static int rpc_wait_bit_interruptible(void *word)
+ static int rpc_wait_bit_killable(void *word)
{
- if (signal_pending(current))
+ if (fatal_signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
{
if (action == NULL)
- action = rpc_wait_bit_interruptible;
+ action = rpc_wait_bit_killable;
return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
- action, TASK_INTERRUPTIBLE);
+ action, TASK_KILLABLE);
}
-EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
/*
* Make an RPC task runnable.
__rpc_sleep_on(q, task, action, timer);
spin_unlock_bh(&q->lock);
}
+EXPORT_SYMBOL_GPL(rpc_sleep_on);
/**
* __rpc_do_wake_up_task - wake up a single rpc_task
}
rcu_read_unlock_bh();
}
+EXPORT_SYMBOL_GPL(rpc_wake_up_task);
/*
* Wake up the next task on a priority queue.
struct rpc_task *task;
/*
- * Service a batch of tasks from a single cookie.
+ * Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
if (!list_empty(q)) {
task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
- if (queue->cookie == task->tk_cookie) {
+ if (queue->owner == task->tk_owner) {
if (--queue->nr)
goto out;
list_move_tail(&task->u.tk_wait.list, q);
* Check if we need to switch queues.
*/
if (--queue->count)
- goto new_cookie;
+ goto new_owner;
}
/*
new_queue:
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_cookie:
- rpc_set_waitqueue_cookie(queue, task->tk_cookie);
+new_owner:
+ rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
__rpc_wake_up_task(task);
return task;
return task;
}
+EXPORT_SYMBOL_GPL(rpc_wake_up_next);
/**
* rpc_wake_up - wake up all rpc_tasks
spin_unlock(&queue->lock);
rcu_read_unlock_bh();
}
+EXPORT_SYMBOL_GPL(rpc_wake_up);
/**
* rpc_wake_up_status - wake up all rpc_tasks and set their status value.
spin_unlock(&queue->lock);
rcu_read_unlock_bh();
}
+EXPORT_SYMBOL_GPL(rpc_wake_up_status);
static void __rpc_atrun(struct rpc_task *task)
{
task->tk_timeout = delay;
rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
}
+EXPORT_SYMBOL_GPL(rpc_delay);
/*
* Helper to call task->tk_ops->rpc_call_prepare
}
}
}
-EXPORT_SYMBOL(rpc_exit_task);
+EXPORT_SYMBOL_GPL(rpc_exit_task);
void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
{
/* sync task: sleep here */
dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
- /* Note: Caller should be using rpc_clnt_sigmask() */
status = out_of_line_wait_on_bit(&task->tk_runstate,
- RPC_TASK_QUEUED, rpc_wait_bit_interruptible,
- TASK_INTERRUPTIBLE);
+ RPC_TASK_QUEUED, rpc_wait_bit_killable,
+ TASK_KILLABLE);
if (status == -ERESTARTSYS) {
/*
* When a sync task receives a signal, it exits with
/*
* Creation and deletion of RPC task structures
*/
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
{
memset(task, 0, sizeof(*task));
- init_timer(&task->tk_timer);
- task->tk_timer.data = (unsigned long) task;
- task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+ setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
+ (unsigned long)task);
atomic_set(&task->tk_count, 1);
- task->tk_client = clnt;
- task->tk_flags = flags;
- task->tk_ops = tk_ops;
- if (tk_ops->rpc_call_prepare != NULL)
- task->tk_action = rpc_prepare_task;
- task->tk_calldata = calldata;
+ task->tk_flags = task_setup_data->flags;
+ task->tk_ops = task_setup_data->callback_ops;
+ task->tk_calldata = task_setup_data->callback_data;
INIT_LIST_HEAD(&task->tk_task);
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
- task->tk_priority = RPC_PRIORITY_NORMAL;
- task->tk_cookie = (unsigned long)current;
+ task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
+ task->tk_owner = current->tgid;
/* Initialize workqueue for async tasks */
task->tk_workqueue = rpciod_workqueue;
- if (clnt) {
- kref_get(&clnt->cl_kref);
- if (clnt->cl_softrtry)
+ task->tk_client = task_setup_data->rpc_client;
+ if (task->tk_client != NULL) {
+ kref_get(&task->tk_client->cl_kref);
+ if (task->tk_client->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
- if (!task->tk_client->cl_intr)
- task->tk_flags |= RPC_TASK_NOINTR;
}
- BUG_ON(task->tk_ops == NULL);
+ if (task->tk_ops->rpc_call_prepare != NULL)
+ task->tk_action = rpc_prepare_task;
+
+ if (task_setup_data->rpc_message != NULL) {
+ memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
+ /* Bind the user cred */
+ if (task->tk_msg.rpc_cred != NULL)
+ rpcauth_holdcred(task);
+ else
+ rpcauth_bindcred(task);
+ if (task->tk_action == NULL)
+ rpc_call_start(task);
+ }
/* starting timestamp */
task->tk_start = jiffies;
/*
* Create a new task for the specified client.
*/
-struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
{
- struct rpc_task *task;
-
- task = rpc_alloc_task();
- if (!task)
- goto out;
+ struct rpc_task *task = setup_data->task;
+ unsigned short flags = 0;
+
+ if (task == NULL) {
+ task = rpc_alloc_task();
+ if (task == NULL)
+ goto out;
+ flags = RPC_TASK_DYNAMIC;
+ }
- rpc_init_task(task, clnt, flags, tk_ops, calldata);
+ rpc_init_task(task, setup_data);
+ task->tk_flags |= flags;
dprintk("RPC: allocated task %p\n", task);
- task->tk_flags |= RPC_TASK_DYNAMIC;
out:
return task;
}
call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
rpc_release_calldata(tk_ops, calldata);
}
-EXPORT_SYMBOL(rpc_put_task);
+EXPORT_SYMBOL_GPL(rpc_put_task);
static void rpc_release_task(struct rpc_task *task)
{
}
spin_unlock(&clnt->cl_lock);
}
+EXPORT_SYMBOL_GPL(rpc_killall_tasks);
int rpciod_up(void)
{
goto err_nomem;
if (!rpciod_start())
goto err_nomem;
+ /*
+ * The following is not strictly a mempool initialisation,
+ * but there is no harm in doing it here
+ */
+ rpc_init_wait_queue(&delay_queue, "delayq");
return 0;
err_nomem:
rpc_destroy_mempool();