[patch 2/3] OCFS2 Configurable timeouts
[sfrench/cifs-2.6.git] / fs / ocfs2 / cluster / tcp.c
index 0f60cc0d3985d9e80596e3f63b7059cb594d7d97..ebbaee664c667d26c4e9562c49697c45885a0fe0 100644 (file)
            ##args);                                                    \
 } while (0)
 
-static rwlock_t o2net_handler_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(o2net_handler_lock);
 static struct rb_root o2net_handler_tree = RB_ROOT;
 
 static struct o2net_node o2net_nodes[O2NM_MAX_NODES];
@@ -140,13 +140,35 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] =
                 [O2NET_ERR_DIED]       = -EHOSTDOWN,};
 
 /* can't quite avoid *all* internal declarations :/ */
-static void o2net_sc_connect_completed(void *arg);
-static void o2net_rx_until_empty(void *arg);
-static void o2net_shutdown_sc(void *arg);
+static void o2net_sc_connect_completed(struct work_struct *work);
+static void o2net_rx_until_empty(struct work_struct *work);
+static void o2net_shutdown_sc(struct work_struct *work);
 static void o2net_listen_data_ready(struct sock *sk, int bytes);
-static void o2net_sc_send_keep_req(void *arg);
+static void o2net_sc_send_keep_req(struct work_struct *work);
 static void o2net_idle_timer(unsigned long data);
 static void o2net_sc_postpone_idle(struct o2net_sock_container *sc);
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc);
+
+/*
+ * FIXME: These should use to_o2nm_cluster_from_node(), but we end up
+ * losing our parent link to the cluster during shutdown. This can be
+ * solved by adding a pre-removal callback to configfs, or passing
+ * around the cluster with the node. -jeffm
+ */
+static inline int o2net_reconnect_delay(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_reconnect_delay_ms;
+}
+
+static inline int o2net_keepalive_delay(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_keepalive_delay_ms;
+}
+
+static inline int o2net_idle_timeout(struct o2nm_node *node)
+{
+       return o2nm_single_cluster->cl_idle_timeout_ms;
+}
 
 static inline int o2net_sys_err_to_errno(enum o2net_system_error err)
 {
@@ -271,6 +293,8 @@ static void sc_kref_release(struct kref *kref)
 {
        struct o2net_sock_container *sc = container_of(kref,
                                        struct o2net_sock_container, sc_kref);
+       BUG_ON(timer_pending(&sc->sc_idle_timeout));
+
        sclog(sc, "releasing\n");
 
        if (sc->sc_sock) {
@@ -308,10 +332,10 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node)
        o2nm_node_get(node);
        sc->sc_node = node;
 
-       INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed, sc);
-       INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty, sc);
-       INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc, sc);
-       INIT_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req, sc);
+       INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed);
+       INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty);
+       INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc);
+       INIT_DELAYED_WORK(&sc->sc_keepalive_work, o2net_sc_send_keep_req);
 
        init_timer(&sc->sc_idle_timeout);
        sc->sc_idle_timeout.function = o2net_idle_timer;
@@ -342,7 +366,7 @@ static void o2net_sc_queue_work(struct o2net_sock_container *sc,
                sc_put(sc);
 }
 static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
-                                       struct work_struct *work,
+                                       struct delayed_work *work,
                                        int delay)
 {
        sc_get(sc);
@@ -350,7 +374,7 @@ static void o2net_sc_queue_delayed_work(struct o2net_sock_container *sc,
                sc_put(sc);
 }
 static void o2net_sc_cancel_delayed_work(struct o2net_sock_container *sc,
-                                        struct work_struct *work)
+                                        struct delayed_work *work)
 {
        if (cancel_delayed_work(work))
                sc_put(sc);
@@ -396,8 +420,8 @@ static void o2net_set_nn_state(struct o2net_node *nn,
        }
 
        if (was_valid && !valid) {
-               mlog(ML_NOTICE, "no longer connected to " SC_NODEF_FMT "\n",
-                    SC_NODEF_ARGS(old_sc));
+               printk(KERN_INFO "o2net: no longer connected to "
+                      SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
                o2net_complete_nodes_nsw(nn);
        }
 
@@ -409,10 +433,10 @@ static void o2net_set_nn_state(struct o2net_node *nn,
                 * the only way to start connecting again is to down
                 * heartbeat and bring it back up. */
                cancel_delayed_work(&nn->nn_connect_expired);
-               mlog(ML_NOTICE, "%s " SC_NODEF_FMT "\n", 
-                    o2nm_this_node() > sc->sc_node->nd_num ?
-                       "connected to" : "accepted connection from",
-                    SC_NODEF_ARGS(sc));
+               printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
+                      o2nm_this_node() > sc->sc_node->nd_num ?
+                               "connected to" : "accepted connection from",
+                      SC_NODEF_ARGS(sc));
        }
 
        /* trigger the connecting worker func as long as we're not valid,
@@ -424,9 +448,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
                /* delay if we're withing a RECONNECT_DELAY of the
                 * last attempt */
                delay = (nn->nn_last_connect_attempt +
-                        msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+                        msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
                        - jiffies;
-               if (delay > msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS))
+               if (delay > msecs_to_jiffies(o2net_reconnect_delay(sc->sc_node)))
                        delay = 0;
                mlog(ML_CONN, "queueing conn attempt in %lu jiffies\n", delay);
                queue_delayed_work(o2net_wq, &nn->nn_connect_work, delay);
@@ -564,9 +588,11 @@ static void o2net_ensure_shutdown(struct o2net_node *nn,
  * ourselves as state_change couldn't get the nn_lock and call set_nn_state
  * itself.
  */
-static void o2net_shutdown_sc(void *arg)
+static void o2net_shutdown_sc(struct work_struct *work)
 {
-       struct o2net_sock_container *sc = arg;
+       struct o2net_sock_container *sc =
+               container_of(work, struct o2net_sock_container,
+                            sc_shutdown_work);
        struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
 
        sclog(sc, "shutting down\n");
@@ -1103,7 +1129,7 @@ static int o2net_check_handshake(struct o2net_sock_container *sc)
        /* set valid and queue the idle timers only if it hasn't been
         * shut down already */
        if (nn->nn_sc == sc) {
-               o2net_sc_postpone_idle(sc);
+               o2net_sc_reset_idle_timer(sc);
                o2net_set_nn_state(nn, sc, 1, 0);
        }
        spin_unlock(&nn->nn_lock);
@@ -1201,9 +1227,10 @@ out:
 /* this work func is triggerd by data ready.  it reads until it can read no
  * more.  it interprets 0, eof, as fatal.  if data_ready hits while we're doing
  * our work the work struct will be marked and we'll be called again. */
-static void o2net_rx_until_empty(void *arg)
+static void o2net_rx_until_empty(struct work_struct *work)
 {
-       struct o2net_sock_container *sc = arg;
+       struct o2net_sock_container *sc =
+               container_of(work, struct o2net_sock_container, sc_rx_work);
        int ret;
 
        do {
@@ -1249,9 +1276,11 @@ static int o2net_set_nodelay(struct socket *sock)
 
 /* called when a connect completes and after a sock is accepted.  the
  * rx path will see the response and mark the sc valid */
-static void o2net_sc_connect_completed(void *arg)
+static void o2net_sc_connect_completed(struct work_struct *work)
 {
-       struct o2net_sock_container *sc = arg;
+       struct o2net_sock_container *sc =
+               container_of(work, struct o2net_sock_container,
+                            sc_connect_work);
 
        mlog(ML_MSG, "sc sending handshake with ver %llu id %llx\n",
               (unsigned long long)O2NET_PROTOCOL_VERSION,
@@ -1262,9 +1291,11 @@ static void o2net_sc_connect_completed(void *arg)
 }
 
 /* this is called as a work_struct func. */
-static void o2net_sc_send_keep_req(void *arg)
+static void o2net_sc_send_keep_req(struct work_struct *work)
 {
-       struct o2net_sock_container *sc = arg;
+       struct o2net_sock_container *sc =
+               container_of(work, struct o2net_sock_container,
+                            sc_keepalive_work.work);
 
        o2net_sendpage(sc, o2net_keep_req, sizeof(*o2net_keep_req));
        sc_put(sc);
@@ -1280,8 +1311,10 @@ static void o2net_idle_timer(unsigned long data)
 
        do_gettimeofday(&now);
 
-       mlog(ML_NOTICE, "connection to " SC_NODEF_FMT " has been idle for 10 "
-            "seconds, shutting it down.\n", SC_NODEF_ARGS(sc));
+       printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+            "seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
+                    o2net_idle_timeout(sc->sc_node) / 1000,
+                    o2net_idle_timeout(sc->sc_node) % 1000);
        mlog(ML_NOTICE, "here are some times that might help debug the "
             "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
             "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
@@ -1299,14 +1332,21 @@ static void o2net_idle_timer(unsigned long data)
        o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
 }
 
-static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc)
 {
        o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work);
        o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work,
-                                   O2NET_KEEPALIVE_DELAY_SECS * HZ);
+                     msecs_to_jiffies(o2net_keepalive_delay(sc->sc_node)));
        do_gettimeofday(&sc->sc_tv_timer);
        mod_timer(&sc->sc_idle_timeout,
-                 jiffies + (O2NET_IDLE_TIMEOUT_SECS * HZ));
+              jiffies + msecs_to_jiffies(o2net_idle_timeout(sc->sc_node)));
+}
+
+static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
+{
+       /* Only push out an existing timer */
+       if (timer_pending(&sc->sc_idle_timeout))
+               o2net_sc_reset_idle_timer(sc);
 }
 
 /* this work func is kicked whenever a path sets the nn state which doesn't
@@ -1314,14 +1354,15 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc)
  * having a connect attempt fail, etc. This centralizes the logic which decides
  * if a connect attempt should be made or if we should give up and all future
  * transmit attempts should fail */
-static void o2net_start_connect(void *arg)
+static void o2net_start_connect(struct work_struct *work)
 {
-       struct o2net_node *nn = arg;
+       struct o2net_node *nn =
+               container_of(work, struct o2net_node, nn_connect_work.work);
        struct o2net_sock_container *sc = NULL;
        struct o2nm_node *node = NULL, *mynode = NULL;
        struct socket *sock = NULL;
        struct sockaddr_in myaddr = {0, }, remoteaddr = {0, };
-       int ret = 0;
+       int ret = 0, stop;
 
        /* if we're greater we initiate tx, otherwise we accept */
        if (o2nm_this_node() <= o2net_num_from_nn(nn))
@@ -1342,10 +1383,9 @@ static void o2net_start_connect(void *arg)
 
        spin_lock(&nn->nn_lock);
        /* see if we already have one pending or have given up */
-       if (nn->nn_sc || nn->nn_persistent_error)
-               arg = NULL;
+       stop = (nn->nn_sc || nn->nn_persistent_error);
        spin_unlock(&nn->nn_lock);
-       if (arg == NULL) /* *shrug*, needed some indicator */
+       if (stop)
                goto out;
 
        nn->nn_last_connect_attempt = jiffies;
@@ -1421,24 +1461,29 @@ out:
        return;
 }
 
-static void o2net_connect_expired(void *arg)
+static void o2net_connect_expired(struct work_struct *work)
 {
-       struct o2net_node *nn = arg;
+       struct o2net_node *nn =
+               container_of(work, struct o2net_node, nn_connect_expired.work);
 
        spin_lock(&nn->nn_lock);
        if (!nn->nn_sc_valid) {
+               struct o2nm_node *node = nn->nn_sc->sc_node;
                mlog(ML_ERROR, "no connection established with node %u after "
-                    "%u seconds, giving up and returning errors.\n",
-                    o2net_num_from_nn(nn), O2NET_IDLE_TIMEOUT_SECS);
+                    "%u.%u seconds, giving up and returning errors.\n",
+                    o2net_num_from_nn(nn),
+                    o2net_idle_timeout(node) / 1000,
+                    o2net_idle_timeout(node) % 1000);
 
                o2net_set_nn_state(nn, NULL, 0, -ENOTCONN);
        }
        spin_unlock(&nn->nn_lock);
 }
 
-static void o2net_still_up(void *arg)
+static void o2net_still_up(struct work_struct *work)
 {
-       struct o2net_node *nn = arg;
+       struct o2net_node *nn =
+               container_of(work, struct o2net_node, nn_still_up.work);
 
        o2quo_hb_still_up(o2net_num_from_nn(nn));
 }
@@ -1480,14 +1525,14 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num,
 
        /* ensure an immediate connect attempt */
        nn->nn_last_connect_attempt = jiffies -
-               (msecs_to_jiffies(O2NET_RECONNECT_DELAY_MS) + 1);
+               (msecs_to_jiffies(o2net_reconnect_delay(node)) + 1);
 
        if (node_num != o2nm_this_node()) {
                /* heartbeat doesn't work unless a local node number is
                 * configured and doing so brings up the o2net_wq, so we can
                 * use it.. */
                queue_delayed_work(o2net_wq, &nn->nn_connect_expired,
-                                  O2NET_IDLE_TIMEOUT_SECS * HZ);
+                                  msecs_to_jiffies(o2net_idle_timeout(node)));
 
                /* believe it or not, accept and node hearbeating testing
                 * can succeed for this node before we got here.. so
@@ -1644,9 +1689,9 @@ out:
        return ret;
 }
 
-static void o2net_accept_many(void *arg)
+static void o2net_accept_many(struct work_struct *work)
 {
-       struct socket *sock = arg;
+       struct socket *sock = o2net_listen_sock;
        while (o2net_accept_one(sock) == 0)
                cond_resched();
 }
@@ -1700,7 +1745,7 @@ static int o2net_open_listening_sock(__be16 port)
        write_unlock_bh(&sock->sk->sk_callback_lock);
 
        o2net_listen_sock = sock;
-       INIT_WORK(&o2net_listen_work, o2net_accept_many, sock);
+       INIT_WORK(&o2net_listen_work, o2net_accept_many);
 
        sock->sk->sk_reuse = 1;
        ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
@@ -1819,9 +1864,10 @@ int o2net_init(void)
                struct o2net_node *nn = o2net_nn_from_num(i);
 
                spin_lock_init(&nn->nn_lock);
-               INIT_WORK(&nn->nn_connect_work, o2net_start_connect, nn);
-               INIT_WORK(&nn->nn_connect_expired, o2net_connect_expired, nn);
-               INIT_WORK(&nn->nn_still_up, o2net_still_up, nn);
+               INIT_DELAYED_WORK(&nn->nn_connect_work, o2net_start_connect);
+               INIT_DELAYED_WORK(&nn->nn_connect_expired,
+                                 o2net_connect_expired);
+               INIT_DELAYED_WORK(&nn->nn_still_up, o2net_still_up);
                /* until we see hb from a node we'll return einval */
                nn->nn_persistent_error = -ENOTCONN;
                init_waitqueue_head(&nn->nn_sc_wq);