Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/hid
[sfrench/cifs-2.6.git] / drivers / block / drbd / drbd_main.c
index 93d1f9b469d4cce769a5923686bd96753969a491..be2d2da9cdba41db2c09f8bea3b1d8df0474f24e 100644 (file)
@@ -684,6 +684,9 @@ static int is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
        else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
                rv = SS_NO_REMOTE_DISK;
 
+       else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
+               rv = SS_NO_UP_TO_DATE_DISK;
+
        else if ((ns.conn == C_CONNECTED ||
                  ns.conn == C_WF_BITMAP_S ||
                  ns.conn == C_SYNC_SOURCE ||
@@ -840,7 +843,12 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
                        break;
                case C_WF_BITMAP_S:
                case C_PAUSED_SYNC_S:
-                       ns.pdsk = D_OUTDATED;
+                       /* remap any consistent state to D_OUTDATED,
+                        * but disallow "upgrade" of not even consistent states.
+                        */
+                       ns.pdsk =
+                               (D_DISKLESS < os.pdsk && os.pdsk < D_OUTDATED)
+                               ? os.pdsk : D_OUTDATED;
                        break;
                case C_SYNC_SOURCE:
                        ns.pdsk = D_INCONSISTENT;
@@ -1205,21 +1213,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        &&  (ns.pdsk < D_INCONSISTENT ||
             ns.pdsk == D_UNKNOWN ||
             ns.pdsk == D_OUTDATED)) {
-               kfree(mdev->p_uuid);
-               mdev->p_uuid = NULL;
                if (get_ldev(mdev)) {
                        if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
-                           mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
-                               drbd_uuid_new_current(mdev);
-                               drbd_send_uuids(mdev);
-                       }
+                           mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE &&
+                           !atomic_read(&mdev->new_c_uuid))
+                               atomic_set(&mdev->new_c_uuid, 2);
                        put_ldev(mdev);
                }
        }
 
        if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
-               if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0)
-                       drbd_uuid_new_current(mdev);
+               /* Diskless peer becomes primary or got connected do diskless, primary peer. */
+               if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 &&
+                   !atomic_read(&mdev->new_c_uuid))
+                       atomic_set(&mdev->new_c_uuid, 2);
 
                /* D_DISKLESS Peer becomes secondary */
                if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1232,7 +1239,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
            os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
                kfree(mdev->p_uuid); /* We expect to receive up-to-date UUIDs soon. */
                mdev->p_uuid = NULL; /* ...to not use the old ones in the mean time */
-               drbd_send_sizes(mdev, 0);  /* to start sync... */
+               drbd_send_sizes(mdev, 0, 0);  /* to start sync... */
                drbd_send_uuids(mdev);
                drbd_send_state(mdev);
        }
@@ -1343,6 +1350,24 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
        drbd_md_sync(mdev);
 }
 
+static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       if (get_ldev(mdev)) {
+               if (mdev->ldev->md.uuid[UI_BITMAP] == 0) {
+                       drbd_uuid_new_current(mdev);
+                       if (get_net_conf(mdev)) {
+                               drbd_send_uuids(mdev);
+                               put_net_conf(mdev);
+                       }
+                       drbd_md_sync(mdev);
+               }
+               put_ldev(mdev);
+       }
+       atomic_dec(&mdev->new_c_uuid);
+       wake_up(&mdev->misc_wait);
+
+       return 1;
+}
 
 static int drbd_thread_setup(void *arg)
 {
@@ -1755,7 +1780,7 @@ int drbd_send_sync_uuid(struct drbd_conf *mdev, u64 val)
                             (struct p_header *)&p, sizeof(p));
 }
 
-int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
+int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
 {
        struct p_sizes p;
        sector_t d_size, u_size;
@@ -1767,7 +1792,6 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
                d_size = drbd_get_max_capacity(mdev->ldev);
                u_size = mdev->ldev->dc.disk_size;
                q_order_type = drbd_queue_order_type(mdev);
-               p.queue_order_type = cpu_to_be32(drbd_queue_order_type(mdev));
                put_ldev(mdev);
        } else {
                d_size = 0;
@@ -1779,7 +1803,8 @@ int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply)
        p.u_size = cpu_to_be64(u_size);
        p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
        p.max_segment_size = cpu_to_be32(queue_max_segment_size(mdev->rq_queue));
-       p.queue_order_type = cpu_to_be32(q_order_type);
+       p.queue_order_type = cpu_to_be16(q_order_type);
+       p.dds_flags = cpu_to_be16(flags);
 
        ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
                           (struct p_header *)&p, sizeof(p));
@@ -2180,6 +2205,43 @@ int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
        return ok;
 }
 
+static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
+{
+       struct p_delay_probe dp;
+       int offset, ok = 0;
+       struct timeval now;
+
+       mutex_lock(&ds->mutex);
+       if (likely(ds->socket)) {
+               do_gettimeofday(&now);
+               offset = now.tv_usec - mdev->dps_time.tv_usec +
+                        (now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
+               dp.seq_num  = cpu_to_be32(mdev->delay_seq);
+               dp.offset   = cpu_to_be32(offset);
+
+               ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
+                                   (struct p_header *)&dp, sizeof(dp), 0);
+       }
+       mutex_unlock(&ds->mutex);
+
+       return ok;
+}
+
+static int drbd_send_delay_probes(struct drbd_conf *mdev)
+{
+       int ok;
+
+       mdev->delay_seq++;
+       do_gettimeofday(&mdev->dps_time);
+       ok = drbd_send_delay_probe(mdev, &mdev->meta);
+       ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
+
+       mdev->dp_volume_last = mdev->send_cnt;
+       mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
+
+       return ok;
+}
+
 /* called on sndtimeo
  * returns FALSE if we should retry,
  * TRUE if we think connection is dead
@@ -2309,6 +2371,44 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
        return 1;
 }
 
+static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
+{
+       struct page *page = e->pages;
+       unsigned len = e->size;
+       page_chain_for_each(page) {
+               unsigned l = min_t(unsigned, len, PAGE_SIZE);
+               if (!_drbd_send_page(mdev, page, 0, l))
+                       return 0;
+               len -= l;
+       }
+       return 1;
+}
+
+static void consider_delay_probes(struct drbd_conf *mdev)
+{
+       if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93)
+               return;
+
+       if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt)
+               drbd_send_delay_probes(mdev);
+}
+
+static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
+{
+       if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
+               drbd_send_delay_probes(mdev);
+
+       return 1;
+}
+
+static void delay_probe_timer_fn(unsigned long data)
+{
+       struct drbd_conf *mdev = (struct drbd_conf *) data;
+
+       if (list_empty(&mdev->delay_probe_work.list))
+               drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
+}
+
 /* Used to send write requests
  * R_PRIMARY -> Peer   (P_DATA)
  */
@@ -2360,7 +2460,7 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
                drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE));
        if (ok && dgs) {
                dgb = mdev->int_dig_out;
-               drbd_csum(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
+               drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
                ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
        }
        if (ok) {
@@ -2371,6 +2471,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
        }
 
        drbd_put_data_sock(mdev);
+
+       if (ok)
+               consider_delay_probes(mdev);
+
        return ok;
 }
 
@@ -2409,13 +2513,17 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
                                        sizeof(p), MSG_MORE);
        if (ok && dgs) {
                dgb = mdev->int_dig_out;
-               drbd_csum(mdev, mdev->integrity_w_tfm, e->private_bio, dgb);
+               drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
                ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE);
        }
        if (ok)
-               ok = _drbd_send_zc_bio(mdev, e->private_bio);
+               ok = _drbd_send_zc_ee(mdev, e);
 
        drbd_put_data_sock(mdev);
+
+       if (ok)
+               consider_delay_probes(mdev);
+
        return ok;
 }
 
@@ -2600,6 +2708,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        atomic_set(&mdev->net_cnt, 0);
        atomic_set(&mdev->packet_seq, 0);
        atomic_set(&mdev->pp_in_use, 0);
+       atomic_set(&mdev->new_c_uuid, 0);
 
        mutex_init(&mdev->md_io_mutex);
        mutex_init(&mdev->data.mutex);
@@ -2628,16 +2737,26 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
        INIT_LIST_HEAD(&mdev->unplug_work.list);
        INIT_LIST_HEAD(&mdev->md_sync_work.list);
        INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
+       INIT_LIST_HEAD(&mdev->delay_probes);
+       INIT_LIST_HEAD(&mdev->delay_probe_work.list);
+       INIT_LIST_HEAD(&mdev->uuid_work.list);
+
        mdev->resync_work.cb  = w_resync_inactive;
        mdev->unplug_work.cb  = w_send_write_hint;
        mdev->md_sync_work.cb = w_md_sync;
        mdev->bm_io_work.w.cb = w_bitmap_io;
+       mdev->delay_probe_work.cb = w_delay_probes;
+       mdev->uuid_work.cb = w_new_current_uuid;
        init_timer(&mdev->resync_timer);
        init_timer(&mdev->md_sync_timer);
+       init_timer(&mdev->delay_probe_timer);
        mdev->resync_timer.function = resync_timer_fn;
        mdev->resync_timer.data = (unsigned long) mdev;
        mdev->md_sync_timer.function = md_sync_timer_fn;
        mdev->md_sync_timer.data = (unsigned long) mdev;
+       mdev->delay_probe_timer.function = delay_probe_timer_fn;
+       mdev->delay_probe_timer.data = (unsigned long) mdev;
+
 
        init_waitqueue_head(&mdev->misc_wait);
        init_waitqueue_head(&mdev->state_wait);
@@ -2680,7 +2799,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
        drbd_set_my_capacity(mdev, 0);
        if (mdev->bitmap) {
                /* maybe never allocated. */
-               drbd_bm_resize(mdev, 0);
+               drbd_bm_resize(mdev, 0, 1);
                drbd_bm_cleanup(mdev);
        }
 
@@ -3129,7 +3248,7 @@ int __init drbd_init(void)
        if (err)
                goto Enomem;
 
-       drbd_proc = proc_create("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops);
+       drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
        if (!drbd_proc) {
                printk(KERN_ERR "drbd: unable to register proc file\n");
                goto Enomem;
@@ -3660,7 +3779,8 @@ _drbd_fault_str(unsigned int type) {
                [DRBD_FAULT_DT_RD] = "Data read",
                [DRBD_FAULT_DT_RA] = "Data read ahead",
                [DRBD_FAULT_BM_ALLOC] = "BM allocation",
-               [DRBD_FAULT_AL_EE] = "EE allocation"
+               [DRBD_FAULT_AL_EE] = "EE allocation",
+               [DRBD_FAULT_RECEIVE] = "receive data corruption",
        };
 
        return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";