Merge tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2019 02:49:58 +0000 (18:49 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2019 02:49:58 +0000 (18:49 -0800)
Pull more block updates from Jens Axboe:

 - Dead code removal for loop/sunvdc (Chengguang)

 - Mark BIDI support for bsg as deprecated, logging a single dmesg
   warning if anyone is actually using it (Christoph)

 - blkcg cleanup, killing a dead function and making the tryget_closest
   variant easier to read (Dennis)

 - Floppy fixes, one fixing a regression in swim3 (Finn)

 - lightnvm use-after-free fix (Gustavo)

 - gdrom leak fix (Wenwen)

 - a set of drbd updates (Lars, Luc, Nathan, Roland)

* tag 'for-4.21/block-20190102' of git://git.kernel.dk/linux-block: (28 commits)
  block/swim3: Fix regression on PowerBook G3
  block/swim3: Fix -EBUSY error when re-opening device after unmount
  block/swim3: Remove dead return statement
  block/amiflop: Don't log error message on invalid ioctl
  gdrom: fix a memory leak bug
  lightnvm: pblk: fix use-after-free bug
  block: sunvdc: remove redundant code
  block: loop: remove redundant code
  bsg: deprecate BIDI support in bsg
  blkcg: remove unused __blkg_release_rcu()
  blkcg: clean up blkg_tryget_closest()
  drbd: Change drbd_request_detach_interruptible's return type to int
  drbd: Avoid Clang warning about pointless switch statment
  drbd: introduce P_ZEROES (REQ_OP_WRITE_ZEROES on the "wire")
  drbd: skip spurious timeout (ping-timeo) when failing promote
  drbd: don't retry connection if peers do not agree on "authentication" settings
  drbd: fix print_st_err()'s prototype to match the definition
  drbd: avoid spurious self-outdating with concurrent disconnect / down
  drbd: do not block when adjusting "disk-options" while IO is frozen
  drbd: fix comment typos
  ...

23 files changed:
block/bio.c
block/blk-cgroup.c
block/bsg.c
drivers/block/amiflop.c
drivers/block/drbd/drbd_debugfs.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_nl.c
drivers/block/drbd/drbd_protocol.h
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_req.h
drivers/block/drbd/drbd_state.c
drivers/block/drbd/drbd_state.h
drivers/block/drbd/drbd_worker.c
drivers/block/loop.c
drivers/block/sunvdc.c
drivers/block/swim3.c
drivers/cdrom/gdrom.c
drivers/lightnvm/pblk-recovery.c
include/linux/blk-cgroup.h
include/linux/drbd.h
include/linux/genl_magic_struct.h

index 8281bfcbc265a211302e5cf0195002f35eab6c3c..4db1008309edd9c6ca8113afb553aad969de5b85 100644 (file)
@@ -2097,8 +2097,12 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg);
  */
 void bio_clone_blkg_association(struct bio *dst, struct bio *src)
 {
+       rcu_read_lock();
+
        if (src->bi_blkg)
                __bio_associate_blkg(dst, src->bi_blkg);
+
+       rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
 #endif /* CONFIG_BLK_CGROUP */
index c8cc1cbb637003d78e8b09577a0f49c0d9bfcb9a..2bed5725aa035e28c03225ffa4b8c4f8aeb9a85a 100644 (file)
@@ -438,29 +438,6 @@ static void blkg_destroy_all(struct request_queue *q)
        spin_unlock_irq(&q->queue_lock);
 }
 
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-void __blkg_release_rcu(struct rcu_head *rcu_head)
-{
-       struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-
-       /* release the blkcg and parent blkg refs this blkg has been holding */
-       css_put(&blkg->blkcg->css);
-       if (blkg->parent)
-               blkg_put(blkg->parent);
-
-       wb_congested_put(blkg->wb_congested);
-
-       blkg_free(blkg);
-}
-EXPORT_SYMBOL_GPL(__blkg_release_rcu);
-
 static int blkcg_reset_stats(struct cgroup_subsys_state *css,
                             struct cftype *cftype, u64 val)
 {
index 44f6028b9567b46c50cbf3d9a57bade0b157e95a..50e5f8f666f2a907d99d17e2fb8ca311a8365a98 100644 (file)
@@ -177,6 +177,10 @@ bsg_map_hdr(struct request_queue *q, struct sg_io_v4 *hdr, fmode_t mode)
                        goto out;
                }
 
+               pr_warn_once(
+                       "BIDI support in bsg has been deprecated and might be removed. "
+                       "Please report your use case to linux-scsi@vger.kernel.org\n");
+
                next_rq = blk_get_request(q, REQ_OP_SCSI_IN, 0);
                if (IS_ERR(next_rq)) {
                        ret = PTR_ERR(next_rq);
index bf996bd44cfcda3a33105c29ad2ba44d45238605..0903e0803ec8e6aeaea3c99c455cea844ce7e661 100644 (file)
@@ -1601,8 +1601,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
                        return p->type->read_size;
 #endif
        default:
-               printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.",
-                      cmd, drive);
                return -ENOSYS;
        }
        return 0;
index 5d5e8d6a8a56bb30a6ff4cc2afb71f36910ca58f..f13b48ff5f43223bf6ea20584eb260fd8e995696 100644 (file)
@@ -237,6 +237,8 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
        seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
        seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
        seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+       seq_print_rq_state_bit(m, f & EE_TRIM, &sep, "trim");
+       seq_print_rq_state_bit(m, f & EE_ZEROOUT, &sep, "zero-out");
        seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
        seq_putc(m, '\n');
 }
index 1e47db57b9d222b8f440aa3f8b21b943a18b1d9f..000a2f4c0e9283cada5a512cb1e4e7204080cde2 100644 (file)
@@ -430,7 +430,11 @@ enum {
        __EE_MAY_SET_IN_SYNC,
 
        /* is this a TRIM aka REQ_OP_DISCARD? */
-       __EE_IS_TRIM,
+       __EE_TRIM,
+       /* explicit zero-out requested, or
+        * our lower level cannot handle trim,
+        * and we want to fall back to zeroout instead */
+       __EE_ZEROOUT,
 
        /* In case a barrier failed,
         * we need to resubmit without the barrier flag. */
@@ -472,7 +476,8 @@ enum {
 };
 #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
 #define EE_MAY_SET_IN_SYNC     (1<<__EE_MAY_SET_IN_SYNC)
-#define EE_IS_TRIM             (1<<__EE_IS_TRIM)
+#define EE_TRIM                (1<<__EE_TRIM)
+#define EE_ZEROOUT             (1<<__EE_ZEROOUT)
 #define EE_RESUBMITTED         (1<<__EE_RESUBMITTED)
 #define EE_WAS_ERROR           (1<<__EE_WAS_ERROR)
 #define EE_HAS_DIGEST          (1<<__EE_HAS_DIGEST)
@@ -1556,6 +1561,8 @@ extern void start_resync_timer_fn(struct timer_list *t);
 extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
 
 /* drbd_receiver.c */
+extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
+               sector_t start, unsigned int nr_sectors, int flags);
 extern int drbd_receiver(struct drbd_thread *thi);
 extern int drbd_ack_receiver(struct drbd_thread *thi);
 extern void drbd_send_ping_wf(struct work_struct *ws);
@@ -1609,13 +1616,7 @@ static inline void drbd_tcp_quickack(struct socket *sock)
 }
 
 /* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(struct drbd_device *device,
-                                       sector_t size)
-{
-       /* set_capacity(device->this_bdev->bd_disk, size); */
-       set_capacity(device->vdisk, size);
-       device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
-}
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
 
 /*
  * used to submit our private bio
index f973a2a845c885f460c75bcc92f8cf770ffc903f..714eb64fabfd1286b49837dfb937870e7bba1cd6 100644 (file)
@@ -1668,7 +1668,11 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
                        (bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
                        (bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
                        (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
-                       (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
+                       (bio_op(bio) == REQ_OP_WRITE_ZEROES ?
+                         ((connection->agreed_features & DRBD_FF_WZEROES) ?
+                          (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
+                          : DP_DISCARD)
+                       : 0);
        else
                return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
 }
@@ -1712,10 +1716,11 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
        }
        p->dp_flags = cpu_to_be32(dp_flags);
 
-       if (dp_flags & DP_DISCARD) {
+       if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
+               enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
                struct p_trim *t = (struct p_trim*)p;
                t->size = cpu_to_be32(req->i.size);
-               err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
+               err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
                goto out;
        }
        if (dp_flags & DP_WSAME) {
@@ -2034,6 +2039,21 @@ void drbd_init_set_defaults(struct drbd_device *device)
        device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
 }
 
+static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+       /* set_capacity(device->this_bdev->bd_disk, size); */
+       set_capacity(device->vdisk, size);
+       device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+       char ppb[10];
+       _drbd_set_my_capacity(device, size);
+       drbd_info(device, "size = %s (%llu KB)\n",
+               ppsize(ppb, size>>1), (unsigned long long)size>>1);
+}
+
 void drbd_device_cleanup(struct drbd_device *device)
 {
        int i;
@@ -2059,7 +2079,7 @@ void drbd_device_cleanup(struct drbd_device *device)
        }
        D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
 
-       drbd_set_my_capacity(device, 0);
+       _drbd_set_my_capacity(device, 0);
        if (device->bitmap) {
                /* maybe never allocated. */
                drbd_bm_resize(device, 0, 1);
index d15703b1ffe843c6c05b01b0ec76425882324bf3..f2471172a96193fbcd5dc68adc4415286647290e 100644 (file)
@@ -127,6 +127,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
        return 0;
 }
 
+__printf(2, 3)
+static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
+{
+       va_list args;
+       struct nlattr *nla, *txt;
+       int err = -EMSGSIZE;
+       int len;
+
+       nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+       if (!nla)
+               return err;
+
+       txt = nla_reserve(skb, T_info_text, 256);
+       if (!txt) {
+               nla_nest_cancel(skb, nla);
+               return err;
+       }
+       va_start(args, fmt);
+       len = vscnprintf(nla_data(txt), 256, fmt, args);
+       va_end(args);
+
+       /* maybe: retry with larger reserve, if truncated */
+       txt->nla_len = nla_attr_size(len+1);
+       nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
+       nla_nest_end(skb, nla);
+
+       return 0;
+}
+
 /* This would be a good candidate for a "pre_doit" hook,
  * and per-family private info->pointers.
  * But we need to stay compatible with older kernels.
@@ -668,14 +697,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for
                if (rv == SS_TWO_PRIMARIES) {
                        /* Maybe the peer is detected as dead very soon...
                           retry at most once more in this case. */
-                       int timeo;
-                       rcu_read_lock();
-                       nc = rcu_dereference(connection->net_conf);
-                       timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
-                       rcu_read_unlock();
-                       schedule_timeout_interruptible(timeo);
-                       if (try < max_tries)
+                       if (try < max_tries) {
+                               int timeo;
                                try = max_tries - 1;
+                               rcu_read_lock();
+                               nc = rcu_dereference(connection->net_conf);
+                               timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
+                               rcu_read_unlock();
+                               schedule_timeout_interruptible(timeo);
+                       }
                        continue;
                }
                if (rv < SS_SUCCESS) {
@@ -921,7 +951,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
        } prev;
        sector_t u_size, size;
        struct drbd_md *md = &device->ldev->md;
-       char ppb[10];
        void *buffer;
 
        int md_moved, la_size_changed;
@@ -999,8 +1028,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
                /* racy, see comments above. */
                drbd_set_my_capacity(device, size);
                md->la_size_sect = size;
-               drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
-                    (unsigned long long)size>>1);
        }
        if (rv <= DS_ERROR)
                goto err_out;
@@ -1234,6 +1261,21 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
        }
 }
 
+static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
+{
+       /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits():
+        * if we can handle "zeroes" efficiently on the protocol,
+        * we want to do that, even if our backend does not announce
+        * max_write_zeroes_sectors itself. */
+       struct drbd_connection *connection = first_peer_device(device)->connection;
+       /* If the peer announces WZEROES support, use it.  Otherwise, rather
+        * send explicit zeroes than rely on some discard-zeroes-data magic. */
+       if (connection->agreed_features & DRBD_FF_WZEROES)
+               q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
+       else
+               q->limits.max_write_zeroes_sectors = 0;
+}
+
 static void decide_on_write_same_support(struct drbd_device *device,
                        struct request_queue *q,
                        struct request_queue *b, struct o_qlim *o,
@@ -1344,6 +1386,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
                }
        }
        fixup_discard_if_not_supported(q);
+       fixup_write_zeroes(device, q);
 }
 
 void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
@@ -1514,6 +1557,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
        }
 }
 
+static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
+{
+       int err = -EBUSY;
+
+       if (device->act_log &&
+           device->act_log->nr_elements == dc->al_extents)
+               return 0;
+
+       drbd_suspend_io(device);
+       /* If IO completion is currently blocked, we would likely wait
+        * "forever" for the activity log to become unused. So we don't. */
+       if (atomic_read(&device->ap_bio_cnt))
+               goto out;
+
+       wait_event(device->al_wait, lc_try_lock(device->act_log));
+       drbd_al_shrink(device);
+       err = drbd_check_al_size(device, dc);
+       lc_unlock(device->act_log);
+       wake_up(&device->al_wait);
+out:
+       drbd_resume_io(device);
+       return err;
+}
+
 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
 {
        struct drbd_config_context adm_ctx;
@@ -1576,15 +1643,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
                }
        }
 
-       drbd_suspend_io(device);
-       wait_event(device->al_wait, lc_try_lock(device->act_log));
-       drbd_al_shrink(device);
-       err = drbd_check_al_size(device, new_disk_conf);
-       lc_unlock(device->act_log);
-       wake_up(&device->al_wait);
-       drbd_resume_io(device);
-
+       err = disk_opts_check_al_size(device, new_disk_conf);
        if (err) {
+               /* Could be just "busy". Ignore?
+                * Introduce dedicated error code? */
+               drbd_msg_put_info(adm_ctx.reply_skb,
+                       "Try again without changing current al-extents setting");
                retcode = ERR_NOMEM;
                goto fail_unlock;
        }
@@ -1934,9 +1998,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
                }
        }
 
-       if (device->state.conn < C_CONNECTED &&
-           device->state.role == R_PRIMARY && device->ed_uuid &&
-           (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
+       if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
+           (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
+            (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
                drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
                    (unsigned long long)device->ed_uuid);
                retcode = ERR_DATA_NOT_CURRENT;
@@ -1950,11 +2014,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
        }
 
        /* Prevent shrinking of consistent devices ! */
-       if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-           drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
-               drbd_warn(device, "refusing to truncate a consistent device\n");
-               retcode = ERR_DISK_TOO_SMALL;
-               goto force_diskless_dec;
+       {
+       unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
+       unsigned long long eff = nbc->md.la_size_sect;
+       if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
+               if (nsz == nbc->disk_conf->disk_size) {
+                       drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
+               } else {
+                       drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
+                       drbd_msg_sprintf_info(adm_ctx.reply_skb,
+                               "To-be-attached device has last effective > current size, and is consistent\n"
+                               "(%llu > %llu sectors). Refusing to attach.", eff, nsz);
+                       retcode = ERR_IMPLICIT_SHRINK;
+                       goto force_diskless_dec;
+               }
+       }
        }
 
        lock_all_resources();
@@ -2654,8 +2728,10 @@ out:
 
 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
 {
+       enum drbd_conns cstate;
        enum drbd_state_rv rv;
 
+repeat:
        rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
                        force ? CS_HARD : 0);
 
@@ -2673,6 +2749,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
 
                break;
        case SS_CW_FAILED_BY_PEER:
+               spin_lock_irq(&connection->resource->req_lock);
+               cstate = connection->cstate;
+               spin_unlock_irq(&connection->resource->req_lock);
+               if (cstate <= C_WF_CONNECTION)
+                       goto repeat;
                /* The peer probably wants to see us outdated. */
                rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
                                                        disk, D_OUTDATED), 0);
index 48dabbb21e116ec34db6b7e85a79f48944ac4256..e6fc5ad72501625ae23b7bf33856f97dbd312e47 100644 (file)
@@ -70,6 +70,11 @@ enum drbd_packet {
         * we may fall back to an opencoded loop instead. */
        P_WSAME               = 0x34,
 
+       /* 0x35 already claimed in DRBD 9 */
+       P_ZEROES              = 0x36, /* data sock: zero-out, WRITE_ZEROES */
+
+       /* 0x40 .. 0x48 already claimed in DRBD 9 */
+
        P_MAY_IGNORE          = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
        P_MAX_OPT_CMD         = 0x101,
 
@@ -130,6 +135,12 @@ struct p_header100 {
 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 #define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
 #define DP_WSAME            512 /* equiv. REQ_WRITE_SAME */
+#define DP_ZEROES          1024 /* equiv. REQ_OP_WRITE_ZEROES */
+
+/* possible combinations:
+ * REQ_OP_WRITE_ZEROES:  DP_DISCARD | DP_ZEROES
+ * REQ_OP_WRITE_ZEROES + REQ_NOUNMAP: DP_ZEROES
+ */
 
 struct p_data {
        u64         sector;    /* 64 bits sector number */
@@ -197,6 +208,42 @@ struct p_block_req {
  */
 #define DRBD_FF_WSAME 4
 
+/* supports REQ_OP_WRITE_ZEROES on the "wire" protocol.
+ *
+ * We used to map that to "discard" on the sending side, and if we cannot
+ * guarantee that discard zeroes data, the receiving side would map discard
+ * back to zero-out.
+ *
+ * With the introduction of REQ_OP_WRITE_ZEROES,
+ * we started to use that for both WRITE_ZEROES and DISCARDS,
+ * hoping that WRITE_ZEROES would "do what we want",
+ * UNMAP if possible, zero-out the rest.
+ *
+ * The example scenario is some LVM "thin" backend.
+ *
+ * While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
+ * with "skip_block_zeroing=true", after a partial block write allocated
+ * that block, that same block may well map "undefined old garbage" from
+ * the backends on LBAs that have not yet been written to.
+ *
+ * If we cannot distinguish between zero-out and discard on the receiving
+ * side, to avoid "undefined old garbage" to pop up randomly at later times
+ * on supposedly zero-initialized blocks, we'd need to map all discards to
+ * zero-out on the receiving side.  But that would potentially do a full
+ * alloc on thinly provisioned backends, even when the expectation was to
+ * unmap/trim/discard/de-allocate.
+ *
+ * We need to distinguish on the protocol level, whether we need to guarantee
+ * zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
+ * or if we want to put the emphasis on discard, and only do a "best effort
+ * zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
+ * only potential unaligned head and tail clippings), to at least *try* to
+ * avoid "false positives" in an online-verify later, hoping that someone
+ * set skip_block_zeroing=false.
+ */
+#define DRBD_FF_WZEROES 8
+
+
 struct p_connection_features {
        u32 protocol_min;
        u32 feature_flags;
index ccfcf00f2798d3c7cbad526517fcdd86c2f02314..c7ad88d91a09e7dd427be0792a48ef0f9989178b 100644 (file)
@@ -50,7 +50,7 @@
 #include "drbd_req.h"
 #include "drbd_vli.h"
 
-#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
+#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
 
 struct packet_info {
        enum drbd_packet cmd;
@@ -1490,14 +1490,129 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
                drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
 }
 
-static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
+/*
+ * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
+ * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
+ * will directly go to fallback mode, submitting normal writes, and
+ * never even try to UNMAP.
+ *
+ * And dm-thin does not do this (yet), mostly because in general it has
+ * to assume that "skip_block_zeroing" is set.  See also:
+ * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
+ * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
+ *
+ * We *may* ignore the discard-zeroes-data setting, if so configured.
+ *
+ * Assumption is that this "discard_zeroes_data=0" is only because the backend
+ * may ignore partial unaligned discards.
+ *
+ * LVM/DM thin as of at least
+ *   LVM version:     2.02.115(2)-RHEL7 (2015-01-28)
+ *   Library version: 1.02.93-RHEL7 (2015-01-28)
+ *   Driver version:  4.29.0
+ * still behaves this way.
+ *
+ * For unaligned (wrt. alignment and granularity) or too small discards,
+ * we zero-out the initial (and/or) trailing unaligned partial chunks,
+ * but discard all the aligned full chunks.
+ *
+ * At least for LVM/DM thin, with skip_block_zeroing=false,
+ * the result is effectively "discard_zeroes_data=1".
+ */
+/* flags: EE_TRIM|EE_ZEROOUT */
+int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
 {
        struct block_device *bdev = device->ldev->backing_bdev;
+       struct request_queue *q = bdev_get_queue(bdev);
+       sector_t tmp, nr;
+       unsigned int max_discard_sectors, granularity;
+       int alignment;
+       int err = 0;
 
-       if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
-                       GFP_NOIO, 0))
-               peer_req->flags |= EE_WAS_ERROR;
+       if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
+               goto zero_out;
+
+       /* Zero-sector (unknown) and one-sector granularities are the same.  */
+       granularity = max(q->limits.discard_granularity >> 9, 1U);
+       alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
+
+       max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
+       max_discard_sectors -= max_discard_sectors % granularity;
+       if (unlikely(!max_discard_sectors))
+               goto zero_out;
+
+       if (nr_sectors < granularity)
+               goto zero_out;
+
+       tmp = start;
+       if (sector_div(tmp, granularity) != alignment) {
+               if (nr_sectors < 2*granularity)
+                       goto zero_out;
+               /* start + gran - (start + gran - align) % gran */
+               tmp = start + granularity - alignment;
+               tmp = start + granularity - sector_div(tmp, granularity);
+
+               nr = tmp - start;
+               /* don't flag BLKDEV_ZERO_NOUNMAP, we don't know how many
+                * layers are below us, some may have smaller granularity */
+               err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
+               nr_sectors -= nr;
+               start = tmp;
+       }
+       while (nr_sectors >= max_discard_sectors) {
+               err |= blkdev_issue_discard(bdev, start, max_discard_sectors, GFP_NOIO, 0);
+               nr_sectors -= max_discard_sectors;
+               start += max_discard_sectors;
+       }
+       if (nr_sectors) {
+               /* max_discard_sectors is unsigned int (and a multiple of
+                * granularity, we made sure of that above already);
+                * nr is < max_discard_sectors;
+                * I don't need sector_div here, even though nr is sector_t */
+               nr = nr_sectors;
+               nr -= (unsigned int)nr % granularity;
+               if (nr) {
+                       err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
+                       nr_sectors -= nr;
+                       start += nr;
+               }
+       }
+ zero_out:
+       if (nr_sectors) {
+               err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO,
+                               (flags & EE_TRIM) ? 0 : BLKDEV_ZERO_NOUNMAP);
+       }
+       return err != 0;
+}
 
+static bool can_do_reliable_discards(struct drbd_device *device)
+{
+       struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
+       struct disk_conf *dc;
+       bool can_do;
+
+       if (!blk_queue_discard(q))
+               return false;
+
+       rcu_read_lock();
+       dc = rcu_dereference(device->ldev->disk_conf);
+       can_do = dc->discard_zeroes_if_aligned;
+       rcu_read_unlock();
+       return can_do;
+}
+
+static void drbd_issue_peer_discard_or_zero_out(struct drbd_device *device, struct drbd_peer_request *peer_req)
+{
+       /* If the backend cannot discard, or does not guarantee
+        * read-back zeroes in discarded ranges, we fall back to
+        * zero-out.  Unless configuration specifically requested
+        * otherwise. */
+       if (!can_do_reliable_discards(device))
+               peer_req->flags |= EE_ZEROOUT;
+
+       if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
+           peer_req->i.size >> 9, peer_req->flags & (EE_ZEROOUT|EE_TRIM)))
+               peer_req->flags |= EE_WAS_ERROR;
        drbd_endio_write_sec_final(peer_req);
 }
 
@@ -1550,7 +1665,7 @@ int drbd_submit_peer_request(struct drbd_device *device,
         * Correctness first, performance later.  Next step is to code an
         * asynchronous variant of the same.
         */
-       if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) {
+       if (peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) {
                /* wait for all pending IO completions, before we start
                 * zeroing things out. */
                conn_wait_active_ee_empty(peer_req->peer_device->connection);
@@ -1567,8 +1682,8 @@ int drbd_submit_peer_request(struct drbd_device *device,
                        spin_unlock_irq(&device->resource->req_lock);
                }
 
-               if (peer_req->flags & EE_IS_TRIM)
-                       drbd_issue_peer_discard(device, peer_req);
+               if (peer_req->flags & (EE_TRIM|EE_ZEROOUT))
+                       drbd_issue_peer_discard_or_zero_out(device, peer_req);
                else /* EE_WRITE_SAME */
                        drbd_issue_peer_wsame(device, peer_req);
                return 0;
@@ -1765,6 +1880,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
        void *dig_vv = peer_device->connection->int_dig_vv;
        unsigned long *data;
        struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
+       struct p_trim *zeroes = (pi->cmd == P_ZEROES) ? pi->data : NULL;
        struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
 
        digest_size = 0;
@@ -1786,6 +1902,10 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
                if (!expect(data_size == 0))
                        return NULL;
                ds = be32_to_cpu(trim->size);
+       } else if (zeroes) {
+               if (!expect(data_size == 0))
+                       return NULL;
+               ds = be32_to_cpu(zeroes->size);
        } else if (wsame) {
                if (data_size != queue_logical_block_size(device->rq_queue)) {
                        drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
@@ -1802,7 +1922,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 
        if (!expect(IS_ALIGNED(ds, 512)))
                return NULL;
-       if (trim || wsame) {
+       if (trim || wsame || zeroes) {
                if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
                        return NULL;
        } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
@@ -1827,7 +1947,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
 
        peer_req->flags |= EE_WRITE;
        if (trim) {
-               peer_req->flags |= EE_IS_TRIM;
+               peer_req->flags |= EE_TRIM;
+               return peer_req;
+       }
+       if (zeroes) {
+               peer_req->flags |= EE_ZEROOUT;
                return peer_req;
        }
        if (wsame)
@@ -2326,8 +2450,12 @@ static unsigned long wire_flags_to_bio_flags(u32 dpf)
 
 static unsigned long wire_flags_to_bio_op(u32 dpf)
 {
-       if (dpf & DP_DISCARD)
+       if (dpf & DP_ZEROES)
                return REQ_OP_WRITE_ZEROES;
+       if (dpf & DP_DISCARD)
+               return REQ_OP_DISCARD;
+       if (dpf & DP_WSAME)
+               return REQ_OP_WRITE_SAME;
        else
                return REQ_OP_WRITE;
 }
@@ -2517,9 +2645,20 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
        op = wire_flags_to_bio_op(dp_flags);
        op_flags = wire_flags_to_bio_flags(dp_flags);
        if (pi->cmd == P_TRIM) {
+               D_ASSERT(peer_device, peer_req->i.size > 0);
+               D_ASSERT(peer_device, op == REQ_OP_DISCARD);
+               D_ASSERT(peer_device, peer_req->pages == NULL);
+               /* need to play safe: an older DRBD sender
+                * may mean zero-out while sending P_TRIM. */
+               if (0 == (connection->agreed_features & DRBD_FF_WZEROES))
+                       peer_req->flags |= EE_ZEROOUT;
+       } else if (pi->cmd == P_ZEROES) {
                D_ASSERT(peer_device, peer_req->i.size > 0);
                D_ASSERT(peer_device, op == REQ_OP_WRITE_ZEROES);
                D_ASSERT(peer_device, peer_req->pages == NULL);
+               /* Do (not) pass down BLKDEV_ZERO_NOUNMAP? */
+               if (dp_flags & DP_DISCARD)
+                       peer_req->flags |= EE_TRIM;
        } else if (peer_req->pages == NULL) {
                D_ASSERT(device, peer_req->i.size == 0);
                D_ASSERT(device, dp_flags & DP_FLUSH);
@@ -2587,7 +2726,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
         * we wait for all pending requests, respectively wait for
         * active_ee to become empty in drbd_submit_peer_request();
         * better not add ourselves here. */
-       if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0)
+       if ((peer_req->flags & (EE_TRIM|EE_WRITE_SAME|EE_ZEROOUT)) == 0)
                list_add_tail(&peer_req->w.list, &device->active_ee);
        spin_unlock_irq(&device->resource->req_lock);
 
@@ -3364,7 +3503,7 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
        enum drbd_conns rv = C_MASK;
        enum drbd_disk_state mydisk;
        struct net_conf *nc;
-       int hg, rule_nr, rr_conflict, tentative;
+       int hg, rule_nr, rr_conflict, tentative, always_asbp;
 
        mydisk = device->state.disk;
        if (mydisk == D_NEGOTIATING)
@@ -3415,8 +3554,12 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
 
        rcu_read_lock();
        nc = rcu_dereference(peer_device->connection->net_conf);
+       always_asbp = nc->always_asbp;
+       rr_conflict = nc->rr_conflict;
+       tentative = nc->tentative;
+       rcu_read_unlock();
 
-       if (hg == 100 || (hg == -100 && nc->always_asbp)) {
+       if (hg == 100 || (hg == -100 && always_asbp)) {
                int pcount = (device->state.role == R_PRIMARY)
                           + (peer_role == R_PRIMARY);
                int forced = (hg == -100);
@@ -3455,9 +3598,6 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
                             "Sync from %s node\n",
                             (hg < 0) ? "peer" : "this");
        }
-       rr_conflict = nc->rr_conflict;
-       tentative = nc->tentative;
-       rcu_read_unlock();
 
        if (hg == -100) {
                /* FIXME this log message is not correct if we end up here
@@ -3980,6 +4120,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
        struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
        enum determine_dev_size dd = DS_UNCHANGED;
        sector_t p_size, p_usize, p_csize, my_usize;
+       sector_t new_size, cur_size;
        int ldsc = 0; /* local disk size changed */
        enum dds_flags ddsf;
 
@@ -3987,6 +4128,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
        if (!peer_device)
                return config_unknown_volume(connection, pi);
        device = peer_device->device;
+       cur_size = drbd_get_capacity(device->this_bdev);
 
        p_size = be64_to_cpu(p->d_size);
        p_usize = be64_to_cpu(p->u_size);
@@ -3997,7 +4139,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
        device->p_size = p_size;
 
        if (get_ldev(device)) {
-               sector_t new_size, cur_size;
                rcu_read_lock();
                my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
                rcu_read_unlock();
@@ -4012,13 +4153,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
                if (device->state.conn == C_WF_REPORT_PARAMS)
                        p_usize = min_not_zero(my_usize, p_usize);
 
-               /* Never shrink a device with usable data during connect.
-                  But allow online shrinking if we are connected. */
+               /* Never shrink a device with usable data during connect,
+                * or "attach" on the peer.
+                * But allow online shrinking if we are connected. */
                new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
-               cur_size = drbd_get_capacity(device->this_bdev);
                if (new_size < cur_size &&
                    device->state.disk >= D_OUTDATED &&
-                   device->state.conn < C_CONNECTED) {
+                   (device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS)) {
                        drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
                                        (unsigned long long)new_size, (unsigned long long)cur_size);
                        conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
@@ -4046,8 +4187,8 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
                        synchronize_rcu();
                        kfree(old_disk_conf);
 
-                       drbd_info(device, "Peer sets u_size to %lu sectors\n",
-                                (unsigned long)my_usize);
+                       drbd_info(device, "Peer sets u_size to %lu sectors (old: %lu)\n",
+                                (unsigned long)p_usize, (unsigned long)my_usize);
                }
 
                put_ldev(device);
@@ -4080,9 +4221,36 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
                 *
                 * However, if he sends a zero current size,
                 * take his (user-capped or) backing disk size anyways.
+                *
+                * Unless of course he does not have a disk himself.
+                * In which case we ignore this completely.
                 */
+               sector_t new_size = p_csize ?: p_usize ?: p_size;
                drbd_reconsider_queue_parameters(device, NULL, o);
-               drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
+               if (new_size == 0) {
+                       /* Ignore, peer does not know nothing. */
+               } else if (new_size == cur_size) {
+                       /* nothing to do */
+               } else if (cur_size != 0 && p_size == 0) {
+                       drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n",
+                                       (unsigned long long)new_size, (unsigned long long)cur_size);
+               } else if (new_size < cur_size && device->state.role == R_PRIMARY) {
+                       drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n",
+                                       (unsigned long long)new_size, (unsigned long long)cur_size);
+                       conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
+                       return -EIO;
+               } else {
+                       /* I believe the peer, if
+                        *  - I don't have a current size myself
+                        *  - we agree on the size anyways
+                        *  - I do have a current size, am Secondary,
+                        *    and he has the only disk
+                        *  - I do have a current size, am Primary,
+                        *    and he has the only disk,
+                        *    which is larger than my current size
+                        */
+                       drbd_set_my_capacity(device, new_size);
+               }
        }
 
        if (get_ldev(device)) {
@@ -4142,7 +4310,7 @@ static int receive_uuids(struct drbd_connection *connection, struct packet_info
        kfree(device->p_uuid);
        device->p_uuid = p_uuid;
 
-       if (device->state.conn < C_CONNECTED &&
+       if ((device->state.conn < C_CONNECTED || device->state.pdsk == D_DISKLESS) &&
            device->state.disk < D_INCONSISTENT &&
            device->state.role == R_PRIMARY &&
            (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
@@ -4368,6 +4536,25 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
        if (peer_state.conn == C_AHEAD)
                ns.conn = C_BEHIND;
 
+       /* TODO:
+        * if (primary and diskless and peer uuid != effective uuid)
+        *     abort attach on peer;
+        *
+        * If this node does not have good data, was already connected, but
+        * the peer did a late attach only now, trying to "negotiate" with me,
+        * AND I am currently Primary, possibly frozen, with some specific
+        * "effective" uuid, this should never be reached, really, because
+        * we first send the uuids, then the current state.
+        *
+        * In this scenario, we already dropped the connection hard
+        * when we received the unsuitable uuids (receive_uuids().
+        *
+        * Should we want to change this, that is: not drop the connection in
+        * receive_uuids() already, then we would need to add a branch here
+        * that aborts the attach of "unsuitable uuids" on the peer in case
+        * this node is currently Diskless Primary.
+        */
+
        if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
            get_ldev_if_state(device, D_NEGOTIATING)) {
                int cr; /* consider resync */
@@ -4380,7 +4567,7 @@ static int receive_state(struct drbd_connection *connection, struct packet_info
                       (peer_state.disk == D_NEGOTIATING ||
                        os.disk == D_NEGOTIATING));
                /* if we have both been inconsistent, and the peer has been
-                * forced to be UpToDate with --overwrite-data */
+                * forced to be UpToDate with --force */
                cr |= test_bit(CONSIDER_RESYNC, &device->flags);
                /* if we had been plain connected, and the admin requested to
                 * start a sync by "invalidate" or "invalidate-remote" */
@@ -4845,7 +5032,7 @@ static int receive_rs_deallocated(struct drbd_connection *connection, struct pac
 
                peer_req->w.cb = e_end_resync_block;
                peer_req->submit_jif = jiffies;
-               peer_req->flags |= EE_IS_TRIM;
+               peer_req->flags |= EE_TRIM;
 
                spin_lock_irq(&device->resource->req_lock);
                list_add_tail(&peer_req->w.list, &device->sync_ee);
@@ -4913,6 +5100,7 @@ static struct data_cmd drbd_cmd_handler[] = {
        [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
        [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
        [P_TRIM]            = { 0, sizeof(struct p_trim), receive_Data },
+       [P_ZEROES]          = { 0, sizeof(struct p_trim), receive_Data },
        [P_RS_DEALLOCATED]  = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
        [P_WSAME]           = { 1, sizeof(struct p_wsame), receive_Data },
 };
@@ -5197,11 +5385,12 @@ static int drbd_do_features(struct drbd_connection *connection)
        drbd_info(connection, "Handshake successful: "
             "Agreed network protocol version %d\n", connection->agreed_pro_version);
 
-       drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n",
+       drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s%s.\n",
                  connection->agreed_features,
                  connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
                  connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
-                 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" :
+                 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" : "",
+                 connection->agreed_features & DRBD_FF_WZEROES ? " WRITE_ZEROES" :
                  connection->agreed_features ? "" : " none");
 
        return 1;
@@ -5284,7 +5473,7 @@ static int drbd_do_auth(struct drbd_connection *connection)
        if (pi.cmd != P_AUTH_CHALLENGE) {
                drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
                         cmdname(pi.cmd), pi.cmd);
-               rv = 0;
+               rv = -1;
                goto fail;
        }
 
index 1c4da17e902ec75861616f1c8ef784b8514fe99f..643a04af213bc86db9d2adcefbd92bea4d2d38a2 100644 (file)
@@ -63,7 +63,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
        drbd_req_make_private_bio(req, bio_src);
        req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
                      | (bio_op(bio_src) == REQ_OP_WRITE_SAME ? RQ_WSAME : 0)
-                     | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_UNMAP : 0)
+                     | (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
                      | (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
        req->device = device;
        req->master_bio = bio_src;
@@ -1155,12 +1155,11 @@ static int drbd_process_write_request(struct drbd_request *req)
        return remote;
 }
 
-static void drbd_process_discard_req(struct drbd_request *req)
+static void drbd_process_discard_or_zeroes_req(struct drbd_request *req, int flags)
 {
-       struct block_device *bdev = req->device->ldev->backing_bdev;
-
-       if (blkdev_issue_zeroout(bdev, req->i.sector, req->i.size >> 9,
-                       GFP_NOIO, 0))
+       int err = drbd_issue_discard_or_zero_out(req->device,
+                               req->i.sector, req->i.size >> 9, flags);
+       if (err)
                req->private_bio->bi_status = BLK_STS_IOERR;
        bio_endio(req->private_bio);
 }
@@ -1189,9 +1188,11 @@ drbd_submit_req_private_bio(struct drbd_request *req)
        if (get_ldev(device)) {
                if (drbd_insert_fault(device, type))
                        bio_io_error(bio);
-               else if (bio_op(bio) == REQ_OP_WRITE_ZEROES ||
-                        bio_op(bio) == REQ_OP_DISCARD)
-                       drbd_process_discard_req(req);
+               else if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
+                       drbd_process_discard_or_zeroes_req(req, EE_ZEROOUT |
+                           ((bio->bi_opf & REQ_NOUNMAP) ? 0 : EE_TRIM));
+               else if (bio_op(bio) == REQ_OP_DISCARD)
+                       drbd_process_discard_or_zeroes_req(req, EE_TRIM);
                else
                        generic_make_request(bio);
                put_ldev(device);
index 94c654020f0f258f2511a27712eb1acc45ef720a..c2f569d2661b656d6e6e077f182871682cb1e13e 100644 (file)
@@ -208,6 +208,7 @@ enum drbd_req_state_bits {
        __RQ_WRITE,
        __RQ_WSAME,
        __RQ_UNMAP,
+       __RQ_ZEROES,
 
        /* Should call drbd_al_complete_io() for this request... */
        __RQ_IN_ACT_LOG,
@@ -253,6 +254,7 @@ enum drbd_req_state_bits {
 #define RQ_WRITE           (1UL << __RQ_WRITE)
 #define RQ_WSAME           (1UL << __RQ_WSAME)
 #define RQ_UNMAP           (1UL << __RQ_UNMAP)
+#define RQ_ZEROES          (1UL << __RQ_ZEROES)
 #define RQ_IN_ACT_LOG      (1UL << __RQ_IN_ACT_LOG)
 #define RQ_UNPLUG          (1UL << __RQ_UNPLUG)
 #define RQ_POSTPONED      (1UL << __RQ_POSTPONED)
index 0813c654c89387e36e0174a453028e8c5b75f26b..2b4c0db5d867ccec3470bccc103f74d2d51e05a2 100644 (file)
@@ -688,11 +688,9 @@ request_detach(struct drbd_device *device)
                        CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
 }
 
-enum drbd_state_rv
-drbd_request_detach_interruptible(struct drbd_device *device)
+int drbd_request_detach_interruptible(struct drbd_device *device)
 {
-       enum drbd_state_rv rv;
-       int ret;
+       int ret, rv;
 
        drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
        wait_event_interruptible(device->state_wait,
@@ -1124,7 +1122,7 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st
                        ns.pdsk = D_UP_TO_DATE;
        }
 
-       /* Implications of the connection stat on the disk states */
+       /* Implications of the connection state on the disk states */
        disk_min = D_DISKLESS;
        disk_max = D_UP_TO_DATE;
        pdsk_min = D_INCONSISTENT;
@@ -2109,9 +2107,8 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
                        spin_unlock_irq(&connection->resource->req_lock);
                }
        }
-       kref_put(&connection->kref, drbd_destroy_connection);
-
        conn_md_sync(connection);
+       kref_put(&connection->kref, drbd_destroy_connection);
 
        return 0;
 }
index ea58301d0895c96212f2fdc38e39e9859d9c627d..f87371e55e682ac4deaa4ce69fb44765ceb8460b 100644 (file)
@@ -131,7 +131,7 @@ extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state
                                          enum chg_state_flags,
                                          struct completion *done);
 extern void print_st_err(struct drbd_device *, union drbd_state,
-                       union drbd_state, int);
+                       union drbd_state, enum drbd_state_rv);
 
 enum drbd_state_rv
 _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
@@ -162,8 +162,7 @@ static inline int drbd_request_state(struct drbd_device *device,
 }
 
 /* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */
-enum drbd_state_rv
-drbd_request_detach_interruptible(struct drbd_device *device);
+int drbd_request_detach_interruptible(struct drbd_device *device);
 
 enum drbd_role conn_highest_role(struct drbd_connection *connection);
 enum drbd_role conn_highest_peer(struct drbd_connection *connection);
index 99255d0c9e2ffab9bde0500b14ed446c5bce281d..268ef0c5d4ab6b0eee19e7dab23fc8aeee29a1ca 100644 (file)
@@ -153,7 +153,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
        do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
 
        /* FIXME do we want to detach for failed REQ_OP_DISCARD?
-        * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */
+        * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */
        if (peer_req->flags & EE_WAS_ERROR)
                __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
 
index 0939f36548c948b42dfd7f85e892a30236b17954..b8a0720d365309753093adcacc86996dae0000c5 100644 (file)
@@ -616,7 +616,6 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
        default:
                WARN_ON_ONCE(1);
                return -EIO;
-               break;
        }
 }
 
index 9c0553dd13e7c391763cbbaac989844ec9e87821..0ff27e2d98c46256cea2e491c92a0b1ac76aff83 100644 (file)
@@ -633,7 +633,6 @@ static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
        case VD_OP_GET_EFI:
        case VD_OP_SET_EFI:
                return -EOPNOTSUPP;
-               break;
        };
 
        map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
index c1c676a33e4a60d8a123daac45e7006129396cb6..1e2ae90d7715adfadd59a131f637f5c86667502a 100644 (file)
@@ -995,7 +995,11 @@ static void floppy_release(struct gendisk *disk, fmode_t mode)
        struct swim3 __iomem *sw = fs->swim3;
 
        mutex_lock(&swim3_mutex);
-       if (fs->ref_count > 0 && --fs->ref_count == 0) {
+       if (fs->ref_count > 0)
+               --fs->ref_count;
+       else if (fs->ref_count == -1)
+               fs->ref_count = 0;
+       if (fs->ref_count == 0) {
                swim3_action(fs, MOTOR_OFF);
                out_8(&sw->control_bic, 0xff);
                swim3_select(fs, RELAX);
@@ -1087,8 +1091,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
        struct floppy_state *fs = &floppy_states[index];
        int rc = -EBUSY;
 
-       /* Do this first for message macros */
-       memset(fs, 0, sizeof(*fs));
        fs->mdev = mdev;
        fs->index = index;
 
@@ -1151,7 +1153,6 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
                swim3_err("%s", "Couldn't request interrupt\n");
                pmac_call_feature(PMAC_FTR_SWIM3_ENABLE, swim, 0, 0);
                goto out_unmap;
-               return -EBUSY;
        }
 
        timer_setup(&fs->timeout, NULL, 0);
@@ -1188,14 +1189,15 @@ static int swim3_attach(struct macio_dev *mdev,
                        return rc;
        }
 
-       fs = &floppy_states[floppy_count];
-
        disk = alloc_disk(1);
        if (disk == NULL) {
                rc = -ENOMEM;
                goto out_unregister;
        }
 
+       fs = &floppy_states[floppy_count];
+       memset(fs, 0, sizeof(*fs));
+
        disk->queue = blk_mq_init_sq_queue(&fs->tag_set, &swim3_mq_ops, 2,
                                                BLK_MQ_F_SHOULD_MERGE);
        if (IS_ERR(disk->queue)) {
index a5b8afe3609c10e3dc0394f07cea53c0e17e70a9..f8b7345fe1cb6071301e3df34305b1bf654d3b44 100644 (file)
@@ -873,6 +873,7 @@ static void __exit exit_gdrom(void)
        platform_device_unregister(pd);
        platform_driver_unregister(&gdrom_driver);
        kfree(gd.toc);
+       kfree(gd.cd_info);
 }
 
 module_init(init_gdrom);
index 3fcf062d752cfe3cfed93102a97ba9f78bc5c2c4..5ee20da7bdb3d7a0f5d52ebef06afb51d191f722 100644 (file)
@@ -418,7 +418,6 @@ retry_rq:
        if (ret) {
                pblk_err(pblk, "I/O submission failed: %d\n", ret);
                bio_put(bio);
-               bio_put(bio);
                return ret;
        }
 
index f025fd1e22e654bff0803b0a80599eafee196ae4..76c61318fda5b5c714cb8f2926b79c439fcb45f8 100644 (file)
@@ -499,22 +499,33 @@ static inline void blkg_get(struct blkcg_gq *blkg)
  */
 static inline bool blkg_tryget(struct blkcg_gq *blkg)
 {
-       return percpu_ref_tryget(&blkg->refcnt);
+       return blkg && percpu_ref_tryget(&blkg->refcnt);
 }
 
 /**
  * blkg_tryget_closest - try and get a blkg ref on the closet blkg
  * @blkg: blkg to get
  *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
+ * This needs to be called rcu protected.  As the failure mode here is to walk
+ * up the blkg tree, this ensure that the blkg->parent pointers are always
+ * valid.  This returns the blkg that it ended up taking a reference on or %NULL
+ * if no reference was taken.
  */
 static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
 {
-       while (blkg && !percpu_ref_tryget(&blkg->refcnt))
+       struct blkcg_gq *ret_blkg = NULL;
+
+       WARN_ON_ONCE(!rcu_read_lock_held());
+
+       while (blkg) {
+               if (blkg_tryget(blkg)) {
+                       ret_blkg = blkg;
+                       break;
+               }
                blkg = blkg->parent;
+       }
 
-       return blkg;
+       return ret_blkg;
 }
 
 /**
index 2d0259327721444b587e6ec6bd8388a01afc82db..a19d98367f089553b9c1e003021e79ea455831d0 100644 (file)
@@ -51,7 +51,7 @@
 #endif
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.4.10"
+#define REL_VERSION "8.4.11"
 #define API_VERSION 1
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 101
index 5972e4969197aeeab2114f452cfd44ab24b3ac25..eeae59d3ceb7401af5badf6297ee2fad3a388fd7 100644 (file)
@@ -191,6 +191,7 @@ static inline void ct_assert_unique_operations(void)
 {
        switch (0) {
 #include GENL_MAGIC_INCLUDE_FILE
+       case 0:
                ;
        }
 }
@@ -209,6 +210,7 @@ static inline void ct_assert_unique_top_level_attributes(void)
 {
        switch (0) {
 #include GENL_MAGIC_INCLUDE_FILE
+       case 0:
                ;
        }
 }
@@ -218,7 +220,8 @@ static inline void ct_assert_unique_top_level_attributes(void)
 static inline void ct_assert_unique_ ## s_name ## _attributes(void)    \
 {                                                                      \
        switch (0) {                                                    \
-               s_fields                                                \
+       s_fields                                                        \
+       case 0:                                                         \
                        ;                                               \
        }                                                               \
 }