Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / drivers / block / drbd / drbd_nl.c
1 /*
2    drbd_nl.c
3
4    This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6    Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7    Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8    Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10    drbd is free software; you can redistribute it and/or modify
11    it under the terms of the GNU General Public License as published by
12    the Free Software Foundation; either version 2, or (at your option)
13    any later version.
14
15    drbd is distributed in the hope that it will be useful,
16    but WITHOUT ANY WARRANTY; without even the implied warranty of
17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18    GNU General Public License for more details.
19
20    You should have received a copy of the GNU General Public License
21    along with drbd; see the file COPYING.  If not, write to
22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24  */
25
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/blkpg.h>
33 #include <linux/cpumask.h>
34 #include "drbd_int.h"
35 #include "drbd_protocol.h"
36 #include "drbd_req.h"
37 #include <asm/unaligned.h>
38 #include <linux/drbd_limits.h>
39 #include <linux/kthread.h>
40
41 #include <net/genetlink.h>
42
43 /* .doit */
44 // int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45 // int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info);
48 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info);
49
50 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info);
51 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info);
52 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
53
54 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
56 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
57 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
59 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
60 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
71 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
72 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74 /* .dumpit */
75 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77 #include <linux/drbd_genl_api.h>
78 #include "drbd_nla.h"
79 #include <linux/genl_magic_func.h>
80
81 /* used blkdev_get_by_path, to claim our meta data device(s) */
82 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
83
84 static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85 {
86         genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87         if (genlmsg_reply(skb, info))
88                 printk(KERN_ERR "drbd: error sending genl reply\n");
89 }
90
91 /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
92  * reason it could fail was no space in skb, and there are 4k available. */
93 int drbd_msg_put_info(struct sk_buff *skb, const char *info)
94 {
95         struct nlattr *nla;
96         int err = -EMSGSIZE;
97
98         if (!info || !info[0])
99                 return 0;
100
101         nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
102         if (!nla)
103                 return err;
104
105         err = nla_put_string(skb, T_info_text, info);
106         if (err) {
107                 nla_nest_cancel(skb, nla);
108                 return err;
109         } else
110                 nla_nest_end(skb, nla);
111         return 0;
112 }
113
114 /* This would be a good candidate for a "pre_doit" hook,
115  * and per-family private info->pointers.
116  * But we need to stay compatible with older kernels.
117  * If it returns successfully, adm_ctx members are valid.
118  *
119  * At this point, we still rely on the global genl_lock().
120  * If we want to avoid that, and allow "genl_family.parallel_ops", we may need
121  * to add additional synchronization against object destruction/modification.
122  */
123 #define DRBD_ADM_NEED_MINOR     1
124 #define DRBD_ADM_NEED_RESOURCE  2
125 #define DRBD_ADM_NEED_CONNECTION 4
126 static int drbd_adm_prepare(struct drbd_config_context *adm_ctx,
127         struct sk_buff *skb, struct genl_info *info, unsigned flags)
128 {
129         struct drbd_genlmsghdr *d_in = info->userhdr;
130         const u8 cmd = info->genlhdr->cmd;
131         int err;
132
133         memset(adm_ctx, 0, sizeof(*adm_ctx));
134
135         /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
136         if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN))
137                return -EPERM;
138
139         adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
140         if (!adm_ctx->reply_skb) {
141                 err = -ENOMEM;
142                 goto fail;
143         }
144
145         adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb,
146                                         info, &drbd_genl_family, 0, cmd);
147         /* put of a few bytes into a fresh skb of >= 4k will always succeed.
148          * but anyways */
149         if (!adm_ctx->reply_dh) {
150                 err = -ENOMEM;
151                 goto fail;
152         }
153
154         adm_ctx->reply_dh->minor = d_in->minor;
155         adm_ctx->reply_dh->ret_code = NO_ERROR;
156
157         adm_ctx->volume = VOLUME_UNSPECIFIED;
158         if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
159                 struct nlattr *nla;
160                 /* parse and validate only */
161                 err = drbd_cfg_context_from_attrs(NULL, info);
162                 if (err)
163                         goto fail;
164
165                 /* It was present, and valid,
166                  * copy it over to the reply skb. */
167                 err = nla_put_nohdr(adm_ctx->reply_skb,
168                                 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
169                                 info->attrs[DRBD_NLA_CFG_CONTEXT]);
170                 if (err)
171                         goto fail;
172
173                 /* and assign stuff to the adm_ctx */
174                 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
175                 if (nla)
176                         adm_ctx->volume = nla_get_u32(nla);
177                 nla = nested_attr_tb[__nla_type(T_ctx_resource_name)];
178                 if (nla)
179                         adm_ctx->resource_name = nla_data(nla);
180                 adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)];
181                 adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)];
182                 if ((adm_ctx->my_addr &&
183                      nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) ||
184                     (adm_ctx->peer_addr &&
185                      nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) {
186                         err = -EINVAL;
187                         goto fail;
188                 }
189         }
190
191         adm_ctx->minor = d_in->minor;
192         adm_ctx->device = minor_to_device(d_in->minor);
193
194         /* We are protected by the global genl_lock().
195          * But we may explicitly drop it/retake it in drbd_adm_set_role(),
196          * so make sure this object stays around. */
197         if (adm_ctx->device)
198                 kref_get(&adm_ctx->device->kref);
199
200         if (adm_ctx->resource_name) {
201                 adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name);
202         }
203
204         if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) {
205                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor");
206                 return ERR_MINOR_INVALID;
207         }
208         if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) {
209                 drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource");
210                 if (adm_ctx->resource_name)
211                         return ERR_RES_NOT_KNOWN;
212                 return ERR_INVALID_REQUEST;
213         }
214
215         if (flags & DRBD_ADM_NEED_CONNECTION) {
216                 if (adm_ctx->resource) {
217                         drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected");
218                         return ERR_INVALID_REQUEST;
219                 }
220                 if (adm_ctx->device) {
221                         drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected");
222                         return ERR_INVALID_REQUEST;
223                 }
224                 if (adm_ctx->my_addr && adm_ctx->peer_addr)
225                         adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr),
226                                                           nla_len(adm_ctx->my_addr),
227                                                           nla_data(adm_ctx->peer_addr),
228                                                           nla_len(adm_ctx->peer_addr));
229                 if (!adm_ctx->connection) {
230                         drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection");
231                         return ERR_INVALID_REQUEST;
232                 }
233         }
234
235         /* some more paranoia, if the request was over-determined */
236         if (adm_ctx->device && adm_ctx->resource &&
237             adm_ctx->device->resource != adm_ctx->resource) {
238                 pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n",
239                                 adm_ctx->minor, adm_ctx->resource->name,
240                                 adm_ctx->device->resource->name);
241                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource");
242                 return ERR_INVALID_REQUEST;
243         }
244         if (adm_ctx->device &&
245             adm_ctx->volume != VOLUME_UNSPECIFIED &&
246             adm_ctx->volume != adm_ctx->device->vnr) {
247                 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
248                                 adm_ctx->minor, adm_ctx->volume,
249                                 adm_ctx->device->vnr,
250                                 adm_ctx->device->resource->name);
251                 drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume");
252                 return ERR_INVALID_REQUEST;
253         }
254
255         /* still, provide adm_ctx->resource always, if possible. */
256         if (!adm_ctx->resource) {
257                 adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource
258                         : adm_ctx->connection ? adm_ctx->connection->resource : NULL;
259                 if (adm_ctx->resource)
260                         kref_get(&adm_ctx->resource->kref);
261         }
262
263         return NO_ERROR;
264
265 fail:
266         nlmsg_free(adm_ctx->reply_skb);
267         adm_ctx->reply_skb = NULL;
268         return err;
269 }
270
271 static int drbd_adm_finish(struct drbd_config_context *adm_ctx,
272         struct genl_info *info, int retcode)
273 {
274         if (adm_ctx->device) {
275                 kref_put(&adm_ctx->device->kref, drbd_destroy_device);
276                 adm_ctx->device = NULL;
277         }
278         if (adm_ctx->connection) {
279                 kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection);
280                 adm_ctx->connection = NULL;
281         }
282         if (adm_ctx->resource) {
283                 kref_put(&adm_ctx->resource->kref, drbd_destroy_resource);
284                 adm_ctx->resource = NULL;
285         }
286
287         if (!adm_ctx->reply_skb)
288                 return -ENOMEM;
289
290         adm_ctx->reply_dh->ret_code = retcode;
291         drbd_adm_send_reply(adm_ctx->reply_skb, info);
292         return 0;
293 }
294
295 static void setup_khelper_env(struct drbd_connection *connection, char **envp)
296 {
297         char *afs;
298
299         /* FIXME: A future version will not allow this case. */
300         if (connection->my_addr_len == 0 || connection->peer_addr_len == 0)
301                 return;
302
303         switch (((struct sockaddr *)&connection->peer_addr)->sa_family) {
304         case AF_INET6:
305                 afs = "ipv6";
306                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
307                          &((struct sockaddr_in6 *)&connection->peer_addr)->sin6_addr);
308                 break;
309         case AF_INET:
310                 afs = "ipv4";
311                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
312                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
313                 break;
314         default:
315                 afs = "ssocks";
316                 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
317                          &((struct sockaddr_in *)&connection->peer_addr)->sin_addr);
318         }
319         snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
320 }
321
322 int drbd_khelper(struct drbd_device *device, char *cmd)
323 {
324         char *envp[] = { "HOME=/",
325                         "TERM=linux",
326                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
327                          (char[20]) { }, /* address family */
328                          (char[60]) { }, /* address */
329                         NULL };
330         char mb[12];
331         char *argv[] = {usermode_helper, cmd, mb, NULL };
332         struct drbd_connection *connection = first_peer_device(device)->connection;
333         struct sib_info sib;
334         int ret;
335
336         if (current == connection->worker.task)
337                 set_bit(CALLBACK_PENDING, &connection->flags);
338
339         snprintf(mb, 12, "minor-%d", device_to_minor(device));
340         setup_khelper_env(connection, envp);
341
342         /* The helper may take some time.
343          * write out any unsynced meta data changes now */
344         drbd_md_sync(device);
345
346         drbd_info(device, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
347         sib.sib_reason = SIB_HELPER_PRE;
348         sib.helper_name = cmd;
349         drbd_bcast_event(device, &sib);
350         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
351         if (ret)
352                 drbd_warn(device, "helper command: %s %s %s exit code %u (0x%x)\n",
353                                 usermode_helper, cmd, mb,
354                                 (ret >> 8) & 0xff, ret);
355         else
356                 drbd_info(device, "helper command: %s %s %s exit code %u (0x%x)\n",
357                                 usermode_helper, cmd, mb,
358                                 (ret >> 8) & 0xff, ret);
359         sib.sib_reason = SIB_HELPER_POST;
360         sib.helper_exit_code = ret;
361         drbd_bcast_event(device, &sib);
362
363         if (current == connection->worker.task)
364                 clear_bit(CALLBACK_PENDING, &connection->flags);
365
366         if (ret < 0) /* Ignore any ERRNOs we got. */
367                 ret = 0;
368
369         return ret;
370 }
371
372 static int conn_khelper(struct drbd_connection *connection, char *cmd)
373 {
374         char *envp[] = { "HOME=/",
375                         "TERM=linux",
376                         "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
377                          (char[20]) { }, /* address family */
378                          (char[60]) { }, /* address */
379                         NULL };
380         char *resource_name = connection->resource->name;
381         char *argv[] = {usermode_helper, cmd, resource_name, NULL };
382         int ret;
383
384         setup_khelper_env(connection, envp);
385         conn_md_sync(connection);
386
387         drbd_info(connection, "helper command: %s %s %s\n", usermode_helper, cmd, resource_name);
388         /* TODO: conn_bcast_event() ?? */
389
390         ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
391         if (ret)
392                 drbd_warn(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
393                           usermode_helper, cmd, resource_name,
394                           (ret >> 8) & 0xff, ret);
395         else
396                 drbd_info(connection, "helper command: %s %s %s exit code %u (0x%x)\n",
397                           usermode_helper, cmd, resource_name,
398                           (ret >> 8) & 0xff, ret);
399         /* TODO: conn_bcast_event() ?? */
400
401         if (ret < 0) /* Ignore any ERRNOs we got. */
402                 ret = 0;
403
404         return ret;
405 }
406
407 static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connection)
408 {
409         enum drbd_fencing_p fp = FP_NOT_AVAIL;
410         struct drbd_peer_device *peer_device;
411         int vnr;
412
413         rcu_read_lock();
414         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
415                 struct drbd_device *device = peer_device->device;
416                 if (get_ldev_if_state(device, D_CONSISTENT)) {
417                         struct disk_conf *disk_conf =
418                                 rcu_dereference(peer_device->device->ldev->disk_conf);
419                         fp = max_t(enum drbd_fencing_p, fp, disk_conf->fencing);
420                         put_ldev(device);
421                 }
422         }
423         rcu_read_unlock();
424
425         if (fp == FP_NOT_AVAIL) {
426                 /* IO Suspending works on the whole resource.
427                    Do it only for one device. */
428                 vnr = 0;
429                 peer_device = idr_get_next(&connection->peer_devices, &vnr);
430                 drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0));
431         }
432
433         return fp;
434 }
435
436 bool conn_try_outdate_peer(struct drbd_connection *connection)
437 {
438         unsigned int connect_cnt;
439         union drbd_state mask = { };
440         union drbd_state val = { };
441         enum drbd_fencing_p fp;
442         char *ex_to_string;
443         int r;
444
445         spin_lock_irq(&connection->resource->req_lock);
446         if (connection->cstate >= C_WF_REPORT_PARAMS) {
447                 drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n");
448                 spin_unlock_irq(&connection->resource->req_lock);
449                 return false;
450         }
451
452         connect_cnt = connection->connect_cnt;
453         spin_unlock_irq(&connection->resource->req_lock);
454
455         fp = highest_fencing_policy(connection);
456         switch (fp) {
457         case FP_NOT_AVAIL:
458                 drbd_warn(connection, "Not fencing peer, I'm not even Consistent myself.\n");
459                 goto out;
460         case FP_DONT_CARE:
461                 return true;
462         default: ;
463         }
464
465         r = conn_khelper(connection, "fence-peer");
466
467         switch ((r>>8) & 0xff) {
468         case 3: /* peer is inconsistent */
469                 ex_to_string = "peer is inconsistent or worse";
470                 mask.pdsk = D_MASK;
471                 val.pdsk = D_INCONSISTENT;
472                 break;
473         case 4: /* peer got outdated, or was already outdated */
474                 ex_to_string = "peer was fenced";
475                 mask.pdsk = D_MASK;
476                 val.pdsk = D_OUTDATED;
477                 break;
478         case 5: /* peer was down */
479                 if (conn_highest_disk(connection) == D_UP_TO_DATE) {
480                         /* we will(have) create(d) a new UUID anyways... */
481                         ex_to_string = "peer is unreachable, assumed to be dead";
482                         mask.pdsk = D_MASK;
483                         val.pdsk = D_OUTDATED;
484                 } else {
485                         ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
486                 }
487                 break;
488         case 6: /* Peer is primary, voluntarily outdate myself.
489                  * This is useful when an unconnected R_SECONDARY is asked to
490                  * become R_PRIMARY, but finds the other peer being active. */
491                 ex_to_string = "peer is active";
492                 drbd_warn(connection, "Peer is primary, outdating myself.\n");
493                 mask.disk = D_MASK;
494                 val.disk = D_OUTDATED;
495                 break;
496         case 7:
497                 if (fp != FP_STONITH)
498                         drbd_err(connection, "fence-peer() = 7 && fencing != Stonith !!!\n");
499                 ex_to_string = "peer was stonithed";
500                 mask.pdsk = D_MASK;
501                 val.pdsk = D_OUTDATED;
502                 break;
503         default:
504                 /* The script is broken ... */
505                 drbd_err(connection, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
506                 return false; /* Eventually leave IO frozen */
507         }
508
509         drbd_info(connection, "fence-peer helper returned %d (%s)\n",
510                   (r>>8) & 0xff, ex_to_string);
511
512  out:
513
514         /* Not using
515            conn_request_state(connection, mask, val, CS_VERBOSE);
516            here, because we might were able to re-establish the connection in the
517            meantime. */
518         spin_lock_irq(&connection->resource->req_lock);
519         if (connection->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &connection->flags)) {
520                 if (connection->connect_cnt != connect_cnt)
521                         /* In case the connection was established and droped
522                            while the fence-peer handler was running, ignore it */
523                         drbd_info(connection, "Ignoring fence-peer exit code\n");
524                 else
525                         _conn_request_state(connection, mask, val, CS_VERBOSE);
526         }
527         spin_unlock_irq(&connection->resource->req_lock);
528
529         return conn_highest_pdsk(connection) <= D_OUTDATED;
530 }
531
532 static int _try_outdate_peer_async(void *data)
533 {
534         struct drbd_connection *connection = (struct drbd_connection *)data;
535
536         conn_try_outdate_peer(connection);
537
538         kref_put(&connection->kref, drbd_destroy_connection);
539         return 0;
540 }
541
542 void conn_try_outdate_peer_async(struct drbd_connection *connection)
543 {
544         struct task_struct *opa;
545
546         kref_get(&connection->kref);
547         /* We may just have force_sig()'ed this thread
548          * to get it out of some blocking network function.
549          * Clear signals; otherwise kthread_run(), which internally uses
550          * wait_on_completion_killable(), will mistake our pending signal
551          * for a new fatal signal and fail. */
552         flush_signals(current);
553         opa = kthread_run(_try_outdate_peer_async, connection, "drbd_async_h");
554         if (IS_ERR(opa)) {
555                 drbd_err(connection, "out of mem, failed to invoke fence-peer helper\n");
556                 kref_put(&connection->kref, drbd_destroy_connection);
557         }
558 }
559
560 enum drbd_state_rv
561 drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
562 {
563         const int max_tries = 4;
564         enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
565         struct net_conf *nc;
566         int try = 0;
567         int forced = 0;
568         union drbd_state mask, val;
569
570         if (new_role == R_PRIMARY) {
571                 struct drbd_connection *connection;
572
573                 /* Detect dead peers as soon as possible.  */
574
575                 rcu_read_lock();
576                 for_each_connection(connection, device->resource)
577                         request_ping(connection);
578                 rcu_read_unlock();
579         }
580
581         mutex_lock(device->state_mutex);
582
583         mask.i = 0; mask.role = R_MASK;
584         val.i  = 0; val.role  = new_role;
585
586         while (try++ < max_tries) {
587                 rv = _drbd_request_state(device, mask, val, CS_WAIT_COMPLETE);
588
589                 /* in case we first succeeded to outdate,
590                  * but now suddenly could establish a connection */
591                 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
592                         val.pdsk = 0;
593                         mask.pdsk = 0;
594                         continue;
595                 }
596
597                 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
598                     (device->state.disk < D_UP_TO_DATE &&
599                      device->state.disk >= D_INCONSISTENT)) {
600                         mask.disk = D_MASK;
601                         val.disk  = D_UP_TO_DATE;
602                         forced = 1;
603                         continue;
604                 }
605
606                 if (rv == SS_NO_UP_TO_DATE_DISK &&
607                     device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
608                         D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
609
610                         if (conn_try_outdate_peer(first_peer_device(device)->connection)) {
611                                 val.disk = D_UP_TO_DATE;
612                                 mask.disk = D_MASK;
613                         }
614                         continue;
615                 }
616
617                 if (rv == SS_NOTHING_TO_DO)
618                         goto out;
619                 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
620                         if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) {
621                                 drbd_warn(device, "Forced into split brain situation!\n");
622                                 mask.pdsk = D_MASK;
623                                 val.pdsk  = D_OUTDATED;
624
625                         }
626                         continue;
627                 }
628                 if (rv == SS_TWO_PRIMARIES) {
629                         /* Maybe the peer is detected as dead very soon...
630                            retry at most once more in this case. */
631                         int timeo;
632                         rcu_read_lock();
633                         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
634                         timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
635                         rcu_read_unlock();
636                         schedule_timeout_interruptible(timeo);
637                         if (try < max_tries)
638                                 try = max_tries - 1;
639                         continue;
640                 }
641                 if (rv < SS_SUCCESS) {
642                         rv = _drbd_request_state(device, mask, val,
643                                                 CS_VERBOSE + CS_WAIT_COMPLETE);
644                         if (rv < SS_SUCCESS)
645                                 goto out;
646                 }
647                 break;
648         }
649
650         if (rv < SS_SUCCESS)
651                 goto out;
652
653         if (forced)
654                 drbd_warn(device, "Forced to consider local data as UpToDate!\n");
655
656         /* Wait until nothing is on the fly :) */
657         wait_event(device->misc_wait, atomic_read(&device->ap_pending_cnt) == 0);
658
659         /* FIXME also wait for all pending P_BARRIER_ACK? */
660
661         if (new_role == R_SECONDARY) {
662                 set_disk_ro(device->vdisk, true);
663                 if (get_ldev(device)) {
664                         device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
665                         put_ldev(device);
666                 }
667         } else {
668                 /* Called from drbd_adm_set_role only.
669                  * We are still holding the conf_update mutex. */
670                 nc = first_peer_device(device)->connection->net_conf;
671                 if (nc)
672                         nc->discard_my_data = 0; /* without copy; single bit op is atomic */
673
674                 set_disk_ro(device->vdisk, false);
675                 if (get_ldev(device)) {
676                         if (((device->state.conn < C_CONNECTED ||
677                                device->state.pdsk <= D_FAILED)
678                               && device->ldev->md.uuid[UI_BITMAP] == 0) || forced)
679                                 drbd_uuid_new_current(device);
680
681                         device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
682                         put_ldev(device);
683                 }
684         }
685
686         /* writeout of activity log covered areas of the bitmap
687          * to stable storage done in after state change already */
688
689         if (device->state.conn >= C_WF_REPORT_PARAMS) {
690                 /* if this was forced, we should consider sync */
691                 if (forced)
692                         drbd_send_uuids(first_peer_device(device));
693                 drbd_send_current_state(first_peer_device(device));
694         }
695
696         drbd_md_sync(device);
697
698         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
699 out:
700         mutex_unlock(device->state_mutex);
701         return rv;
702 }
703
704 static const char *from_attrs_err_to_txt(int err)
705 {
706         return  err == -ENOMSG ? "required attribute missing" :
707                 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
708                 err == -EEXIST ? "can not change invariant setting" :
709                 "invalid attribute value";
710 }
711
712 int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
713 {
714         struct drbd_config_context adm_ctx;
715         struct set_role_parms parms;
716         int err;
717         enum drbd_ret_code retcode;
718
719         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
720         if (!adm_ctx.reply_skb)
721                 return retcode;
722         if (retcode != NO_ERROR)
723                 goto out;
724
725         memset(&parms, 0, sizeof(parms));
726         if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
727                 err = set_role_parms_from_attrs(&parms, info);
728                 if (err) {
729                         retcode = ERR_MANDATORY_TAG;
730                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
731                         goto out;
732                 }
733         }
734         genl_unlock();
735         mutex_lock(&adm_ctx.resource->adm_mutex);
736
737         if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
738                 retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate);
739         else
740                 retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0);
741
742         mutex_unlock(&adm_ctx.resource->adm_mutex);
743         genl_lock();
744 out:
745         drbd_adm_finish(&adm_ctx, info, retcode);
746         return 0;
747 }
748
749 /* Initializes the md.*_offset members, so we are able to find
750  * the on disk meta data.
751  *
752  * We currently have two possible layouts:
753  * external:
754  *   |----------- md_size_sect ------------------|
755  *   [ 4k superblock ][ activity log ][  Bitmap  ]
756  *   | al_offset == 8 |
757  *   | bm_offset = al_offset + X      |
758  *  ==> bitmap sectors = md_size_sect - bm_offset
759  *
760  * internal:
761  *            |----------- md_size_sect ------------------|
762  * [data.....][  Bitmap  ][ activity log ][ 4k superblock ]
763  *                        | al_offset < 0 |
764  *            | bm_offset = al_offset - Y |
765  *  ==> bitmap sectors = Y = al_offset - bm_offset
766  *
767  *  Activity log size used to be fixed 32kB,
768  *  but is about to become configurable.
769  */
770 static void drbd_md_set_sector_offsets(struct drbd_device *device,
771                                        struct drbd_backing_dev *bdev)
772 {
773         sector_t md_size_sect = 0;
774         unsigned int al_size_sect = bdev->md.al_size_4k * 8;
775
776         bdev->md.md_offset = drbd_md_ss(bdev);
777
778         switch (bdev->md.meta_dev_idx) {
779         default:
780                 /* v07 style fixed size indexed meta data */
781                 bdev->md.md_size_sect = MD_128MB_SECT;
782                 bdev->md.al_offset = MD_4kB_SECT;
783                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
784                 break;
785         case DRBD_MD_INDEX_FLEX_EXT:
786                 /* just occupy the full device; unit: sectors */
787                 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
788                 bdev->md.al_offset = MD_4kB_SECT;
789                 bdev->md.bm_offset = MD_4kB_SECT + al_size_sect;
790                 break;
791         case DRBD_MD_INDEX_INTERNAL:
792         case DRBD_MD_INDEX_FLEX_INT:
793                 /* al size is still fixed */
794                 bdev->md.al_offset = -al_size_sect;
795                 /* we need (slightly less than) ~ this much bitmap sectors: */
796                 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
797                 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
798                 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
799                 md_size_sect = ALIGN(md_size_sect, 8);
800
801                 /* plus the "drbd meta data super block",
802                  * and the activity log; */
803                 md_size_sect += MD_4kB_SECT + al_size_sect;
804
805                 bdev->md.md_size_sect = md_size_sect;
806                 /* bitmap offset is adjusted by 'super' block size */
807                 bdev->md.bm_offset   = -md_size_sect + MD_4kB_SECT;
808                 break;
809         }
810 }
811
812 /* input size is expected to be in KB */
813 char *ppsize(char *buf, unsigned long long size)
814 {
815         /* Needs 9 bytes at max including trailing NUL:
816          * -1ULL ==> "16384 EB" */
817         static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
818         int base = 0;
819         while (size >= 10000 && base < sizeof(units)-1) {
820                 /* shift + round */
821                 size = (size >> 10) + !!(size & (1<<9));
822                 base++;
823         }
824         sprintf(buf, "%u %cB", (unsigned)size, units[base]);
825
826         return buf;
827 }
828
829 /* there is still a theoretical deadlock when called from receiver
830  * on an D_INCONSISTENT R_PRIMARY:
831  *  remote READ does inc_ap_bio, receiver would need to receive answer
832  *  packet from remote to dec_ap_bio again.
833  *  receiver receive_sizes(), comes here,
834  *  waits for ap_bio_cnt == 0. -> deadlock.
835  * but this cannot happen, actually, because:
836  *  R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
837  *  (not connected, or bad/no disk on peer):
838  *  see drbd_fail_request_early, ap_bio_cnt is zero.
839  *  R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
840  *  peer may not initiate a resize.
841  */
842 /* Note these are not to be confused with
843  * drbd_adm_suspend_io/drbd_adm_resume_io,
844  * which are (sub) state changes triggered by admin (drbdsetup),
845  * and can be long lived.
846  * This changes an device->flag, is triggered by drbd internals,
847  * and should be short-lived. */
848 void drbd_suspend_io(struct drbd_device *device)
849 {
850         set_bit(SUSPEND_IO, &device->flags);
851         if (drbd_suspended(device))
852                 return;
853         wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
854 }
855
856 void drbd_resume_io(struct drbd_device *device)
857 {
858         clear_bit(SUSPEND_IO, &device->flags);
859         wake_up(&device->misc_wait);
860 }
861
862 /**
863  * drbd_determine_dev_size() -  Sets the right device size obeying all constraints
864  * @device:     DRBD device.
865  *
866  * Returns 0 on success, negative return values indicate errors.
867  * You should call drbd_md_sync() after calling this function.
868  */
869 enum determine_dev_size
870 drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
871 {
872         sector_t prev_first_sect, prev_size; /* previous meta location */
873         sector_t la_size_sect, u_size;
874         struct drbd_md *md = &device->ldev->md;
875         u32 prev_al_stripe_size_4k;
876         u32 prev_al_stripes;
877         sector_t size;
878         char ppb[10];
879         void *buffer;
880
881         int md_moved, la_size_changed;
882         enum determine_dev_size rv = DS_UNCHANGED;
883
884         /* race:
885          * application request passes inc_ap_bio,
886          * but then cannot get an AL-reference.
887          * this function later may wait on ap_bio_cnt == 0. -> deadlock.
888          *
889          * to avoid that:
890          * Suspend IO right here.
891          * still lock the act_log to not trigger ASSERTs there.
892          */
893         drbd_suspend_io(device);
894         buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */
895         if (!buffer) {
896                 drbd_resume_io(device);
897                 return DS_ERROR;
898         }
899
900         /* no wait necessary anymore, actually we could assert that */
901         wait_event(device->al_wait, lc_try_lock(device->act_log));
902
903         prev_first_sect = drbd_md_first_sector(device->ldev);
904         prev_size = device->ldev->md.md_size_sect;
905         la_size_sect = device->ldev->md.la_size_sect;
906
907         if (rs) {
908                 /* rs is non NULL if we should change the AL layout only */
909
910                 prev_al_stripes = md->al_stripes;
911                 prev_al_stripe_size_4k = md->al_stripe_size_4k;
912
913                 md->al_stripes = rs->al_stripes;
914                 md->al_stripe_size_4k = rs->al_stripe_size / 4;
915                 md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
916         }
917
918         drbd_md_set_sector_offsets(device, device->ldev);
919
920         rcu_read_lock();
921         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
922         rcu_read_unlock();
923         size = drbd_new_dev_size(device, device->ldev, u_size, flags & DDSF_FORCED);
924
925         if (size < la_size_sect) {
926                 if (rs && u_size == 0) {
927                         /* Remove "rs &&" later. This check should always be active, but
928                            right now the receiver expects the permissive behavior */
929                         drbd_warn(device, "Implicit shrink not allowed. "
930                                  "Use --size=%llus for explicit shrink.\n",
931                                  (unsigned long long)size);
932                         rv = DS_ERROR_SHRINK;
933                 }
934                 if (u_size > size)
935                         rv = DS_ERROR_SPACE_MD;
936                 if (rv != DS_UNCHANGED)
937                         goto err_out;
938         }
939
940         if (drbd_get_capacity(device->this_bdev) != size ||
941             drbd_bm_capacity(device) != size) {
942                 int err;
943                 err = drbd_bm_resize(device, size, !(flags & DDSF_NO_RESYNC));
944                 if (unlikely(err)) {
945                         /* currently there is only one error: ENOMEM! */
946                         size = drbd_bm_capacity(device)>>1;
947                         if (size == 0) {
948                                 drbd_err(device, "OUT OF MEMORY! "
949                                     "Could not allocate bitmap!\n");
950                         } else {
951                                 drbd_err(device, "BM resizing failed. "
952                                     "Leaving size unchanged at size = %lu KB\n",
953                                     (unsigned long)size);
954                         }
955                         rv = DS_ERROR;
956                 }
957                 /* racy, see comments above. */
958                 drbd_set_my_capacity(device, size);
959                 device->ldev->md.la_size_sect = size;
960                 drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
961                      (unsigned long long)size>>1);
962         }
963         if (rv <= DS_ERROR)
964                 goto err_out;
965
966         la_size_changed = (la_size_sect != device->ldev->md.la_size_sect);
967
968         md_moved = prev_first_sect != drbd_md_first_sector(device->ldev)
969                 || prev_size       != device->ldev->md.md_size_sect;
970
971         if (la_size_changed || md_moved || rs) {
972                 u32 prev_flags;
973
974                 drbd_al_shrink(device); /* All extents inactive. */
975
976                 prev_flags = md->flags;
977                 md->flags &= ~MDF_PRIMARY_IND;
978                 drbd_md_write(device, buffer);
979
980                 drbd_info(device, "Writing the whole bitmap, %s\n",
981                          la_size_changed && md_moved ? "size changed and md moved" :
982                          la_size_changed ? "size changed" : "md moved");
983                 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
984                 drbd_bitmap_io(device, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
985                                "size changed", BM_LOCKED_MASK);
986                 drbd_initialize_al(device, buffer);
987
988                 md->flags = prev_flags;
989                 drbd_md_write(device, buffer);
990
991                 if (rs)
992                         drbd_info(device, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
993                                   md->al_stripes, md->al_stripe_size_4k * 4);
994         }
995
996         if (size > la_size_sect)
997                 rv = la_size_sect ? DS_GREW : DS_GREW_FROM_ZERO;
998         if (size < la_size_sect)
999                 rv = DS_SHRUNK;
1000
1001         if (0) {
1002         err_out:
1003                 if (rs) {
1004                         md->al_stripes = prev_al_stripes;
1005                         md->al_stripe_size_4k = prev_al_stripe_size_4k;
1006                         md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
1007
1008                         drbd_md_set_sector_offsets(device, device->ldev);
1009                 }
1010         }
1011         lc_unlock(device->act_log);
1012         wake_up(&device->al_wait);
1013         drbd_md_put_buffer(device);
1014         drbd_resume_io(device);
1015
1016         return rv;
1017 }
1018
1019 sector_t
1020 drbd_new_dev_size(struct drbd_device *device, struct drbd_backing_dev *bdev,
1021                   sector_t u_size, int assume_peer_has_space)
1022 {
1023         sector_t p_size = device->p_size;   /* partner's disk size. */
1024         sector_t la_size_sect = bdev->md.la_size_sect; /* last agreed size. */
1025         sector_t m_size; /* my size */
1026         sector_t size = 0;
1027
1028         m_size = drbd_get_max_capacity(bdev);
1029
1030         if (device->state.conn < C_CONNECTED && assume_peer_has_space) {
1031                 drbd_warn(device, "Resize while not connected was forced by the user!\n");
1032                 p_size = m_size;
1033         }
1034
1035         if (p_size && m_size) {
1036                 size = min_t(sector_t, p_size, m_size);
1037         } else {
1038                 if (la_size_sect) {
1039                         size = la_size_sect;
1040                         if (m_size && m_size < size)
1041                                 size = m_size;
1042                         if (p_size && p_size < size)
1043                                 size = p_size;
1044                 } else {
1045                         if (m_size)
1046                                 size = m_size;
1047                         if (p_size)
1048                                 size = p_size;
1049                 }
1050         }
1051
1052         if (size == 0)
1053                 drbd_err(device, "Both nodes diskless!\n");
1054
1055         if (u_size) {
1056                 if (u_size > size)
1057                         drbd_err(device, "Requested disk size is too big (%lu > %lu)\n",
1058                             (unsigned long)u_size>>1, (unsigned long)size>>1);
1059                 else
1060                         size = u_size;
1061         }
1062
1063         return size;
1064 }
1065
1066 /**
1067  * drbd_check_al_size() - Ensures that the AL is of the right size
1068  * @device:     DRBD device.
1069  *
1070  * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
1071  * failed, and 0 on success. You should call drbd_md_sync() after you called
1072  * this function.
1073  */
1074 static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1075 {
1076         struct lru_cache *n, *t;
1077         struct lc_element *e;
1078         unsigned int in_use;
1079         int i;
1080
1081         if (device->act_log &&
1082             device->act_log->nr_elements == dc->al_extents)
1083                 return 0;
1084
1085         in_use = 0;
1086         t = device->act_log;
1087         n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
1088                 dc->al_extents, sizeof(struct lc_element), 0);
1089
1090         if (n == NULL) {
1091                 drbd_err(device, "Cannot allocate act_log lru!\n");
1092                 return -ENOMEM;
1093         }
1094         spin_lock_irq(&device->al_lock);
1095         if (t) {
1096                 for (i = 0; i < t->nr_elements; i++) {
1097                         e = lc_element_by_index(t, i);
1098                         if (e->refcnt)
1099                                 drbd_err(device, "refcnt(%d)==%d\n",
1100                                     e->lc_number, e->refcnt);
1101                         in_use += e->refcnt;
1102                 }
1103         }
1104         if (!in_use)
1105                 device->act_log = n;
1106         spin_unlock_irq(&device->al_lock);
1107         if (in_use) {
1108                 drbd_err(device, "Activity log still in use!\n");
1109                 lc_destroy(n);
1110                 return -EBUSY;
1111         } else {
1112                 if (t)
1113                         lc_destroy(t);
1114         }
1115         drbd_md_mark_dirty(device); /* we changed device->act_log->nr_elemens */
1116         return 0;
1117 }
1118
1119 static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size)
1120 {
1121         struct request_queue * const q = device->rq_queue;
1122         unsigned int max_hw_sectors = max_bio_size >> 9;
1123         unsigned int max_segments = 0;
1124         struct request_queue *b = NULL;
1125
1126         if (get_ldev_if_state(device, D_ATTACHING)) {
1127                 b = device->ldev->backing_bdev->bd_disk->queue;
1128
1129                 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1130                 rcu_read_lock();
1131                 max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs;
1132                 rcu_read_unlock();
1133
1134                 blk_set_stacking_limits(&q->limits);
1135                 blk_queue_max_write_same_sectors(q, 0);
1136         }
1137
1138         blk_queue_logical_block_size(q, 512);
1139         blk_queue_max_hw_sectors(q, max_hw_sectors);
1140         /* This is the workaround for "bio would need to, but cannot, be split" */
1141         blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
1142         blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
1143
1144         if (b) {
1145                 struct drbd_connection *connection = first_peer_device(device)->connection;
1146
1147                 if (blk_queue_discard(b) &&
1148                     (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) {
1149                         /* For now, don't allow more than one activity log extent worth of data
1150                          * to be discarded in one go. We may need to rework drbd_al_begin_io()
1151                          * to allow for even larger discard ranges */
1152                         q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS;
1153
1154                         queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
1155                         /* REALLY? Is stacking secdiscard "legal"? */
1156                         if (blk_queue_secdiscard(b))
1157                                 queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q);
1158                 } else {
1159                         q->limits.max_discard_sectors = 0;
1160                         queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q);
1161                         queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q);
1162                 }
1163
1164                 blk_queue_stack_limits(q, b);
1165
1166                 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
1167                         drbd_info(device, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
1168                                  q->backing_dev_info.ra_pages,
1169                                  b->backing_dev_info.ra_pages);
1170                         q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1171                 }
1172                 put_ldev(device);
1173         }
1174 }
1175
1176 void drbd_reconsider_max_bio_size(struct drbd_device *device)
1177 {
1178         unsigned int now, new, local, peer;
1179
1180         now = queue_max_hw_sectors(device->rq_queue) << 9;
1181         local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1182         peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1183
1184         if (get_ldev_if_state(device, D_ATTACHING)) {
1185                 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9;
1186                 device->local_max_bio_size = local;
1187                 put_ldev(device);
1188         }
1189         local = min(local, DRBD_MAX_BIO_SIZE);
1190
1191         /* We may ignore peer limits if the peer is modern enough.
1192            Because new from 8.3.8 onwards the peer can use multiple
1193            BIOs for a single peer_request */
1194         if (device->state.conn >= C_WF_REPORT_PARAMS) {
1195                 if (first_peer_device(device)->connection->agreed_pro_version < 94)
1196                         peer = min(device->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
1197                         /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
1198                 else if (first_peer_device(device)->connection->agreed_pro_version == 94)
1199                         peer = DRBD_MAX_SIZE_H80_PACKET;
1200                 else if (first_peer_device(device)->connection->agreed_pro_version < 100)
1201                         peer = DRBD_MAX_BIO_SIZE_P95;  /* drbd 8.3.8 onwards, before 8.4.0 */
1202                 else
1203                         peer = DRBD_MAX_BIO_SIZE;
1204
1205                 /* We may later detach and re-attach on a disconnected Primary.
1206                  * Avoid this setting to jump back in that case.
1207                  * We want to store what we know the peer DRBD can handle,
1208                  * not what the peer IO backend can handle. */
1209                 if (peer > device->peer_max_bio_size)
1210                         device->peer_max_bio_size = peer;
1211         }
1212         new = min(local, peer);
1213
1214         if (device->state.role == R_PRIMARY && new < now)
1215                 drbd_err(device, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
1216
1217         if (new != now)
1218                 drbd_info(device, "max BIO size = %u\n", new);
1219
1220         drbd_setup_queue_param(device, new);
1221 }
1222
1223 /* Starts the worker thread */
1224 static void conn_reconfig_start(struct drbd_connection *connection)
1225 {
1226         drbd_thread_start(&connection->worker);
1227         drbd_flush_workqueue(&connection->sender_work);
1228 }
1229
1230 /* if still unconfigured, stops worker again. */
1231 static void conn_reconfig_done(struct drbd_connection *connection)
1232 {
1233         bool stop_threads;
1234         spin_lock_irq(&connection->resource->req_lock);
1235         stop_threads = conn_all_vols_unconf(connection) &&
1236                 connection->cstate == C_STANDALONE;
1237         spin_unlock_irq(&connection->resource->req_lock);
1238         if (stop_threads) {
1239                 /* asender is implicitly stopped by receiver
1240                  * in conn_disconnect() */
1241                 drbd_thread_stop(&connection->receiver);
1242                 drbd_thread_stop(&connection->worker);
1243         }
1244 }
1245
1246 /* Make sure IO is suspended before calling this function(). */
1247 static void drbd_suspend_al(struct drbd_device *device)
1248 {
1249         int s = 0;
1250
1251         if (!lc_try_lock(device->act_log)) {
1252                 drbd_warn(device, "Failed to lock al in drbd_suspend_al()\n");
1253                 return;
1254         }
1255
1256         drbd_al_shrink(device);
1257         spin_lock_irq(&device->resource->req_lock);
1258         if (device->state.conn < C_CONNECTED)
1259                 s = !test_and_set_bit(AL_SUSPENDED, &device->flags);
1260         spin_unlock_irq(&device->resource->req_lock);
1261         lc_unlock(device->act_log);
1262
1263         if (s)
1264                 drbd_info(device, "Suspended AL updates\n");
1265 }
1266
1267
1268 static bool should_set_defaults(struct genl_info *info)
1269 {
1270         unsigned flags = ((struct drbd_genlmsghdr*)info->userhdr)->flags;
1271         return 0 != (flags & DRBD_GENL_F_SET_DEFAULTS);
1272 }
1273
1274 static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1275 {
1276         /* This is limited by 16 bit "slot" numbers,
1277          * and by available on-disk context storage.
1278          *
1279          * Also (u16)~0 is special (denotes a "free" extent).
1280          *
1281          * One transaction occupies one 4kB on-disk block,
1282          * we have n such blocks in the on disk ring buffer,
1283          * the "current" transaction may fail (n-1),
1284          * and there is 919 slot numbers context information per transaction.
1285          *
1286          * 72 transaction blocks amounts to more than 2**16 context slots,
1287          * so cap there first.
1288          */
1289         const unsigned int max_al_nr = DRBD_AL_EXTENTS_MAX;
1290         const unsigned int sufficient_on_disk =
1291                 (max_al_nr + AL_CONTEXT_PER_TRANSACTION -1)
1292                 /AL_CONTEXT_PER_TRANSACTION;
1293
1294         unsigned int al_size_4k = bdev->md.al_size_4k;
1295
1296         if (al_size_4k > sufficient_on_disk)
1297                 return max_al_nr;
1298
1299         return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1300 }
1301
1302 int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1303 {
1304         struct drbd_config_context adm_ctx;
1305         enum drbd_ret_code retcode;
1306         struct drbd_device *device;
1307         struct disk_conf *new_disk_conf, *old_disk_conf;
1308         struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
1309         int err, fifo_size;
1310
1311         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1312         if (!adm_ctx.reply_skb)
1313                 return retcode;
1314         if (retcode != NO_ERROR)
1315                 goto finish;
1316
1317         device = adm_ctx.device;
1318         mutex_lock(&adm_ctx.resource->adm_mutex);
1319
1320         /* we also need a disk
1321          * to change the options on */
1322         if (!get_ldev(device)) {
1323                 retcode = ERR_NO_DISK;
1324                 goto out;
1325         }
1326
1327         new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
1328         if (!new_disk_conf) {
1329                 retcode = ERR_NOMEM;
1330                 goto fail;
1331         }
1332
1333         mutex_lock(&device->resource->conf_update);
1334         old_disk_conf = device->ldev->disk_conf;
1335         *new_disk_conf = *old_disk_conf;
1336         if (should_set_defaults(info))
1337                 set_disk_conf_defaults(new_disk_conf);
1338
1339         err = disk_conf_from_attrs_for_change(new_disk_conf, info);
1340         if (err && err != -ENOMSG) {
1341                 retcode = ERR_MANDATORY_TAG;
1342                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1343                 goto fail_unlock;
1344         }
1345
1346         if (!expect(new_disk_conf->resync_rate >= 1))
1347                 new_disk_conf->resync_rate = 1;
1348
1349         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1350                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1351         if (new_disk_conf->al_extents > drbd_al_extents_max(device->ldev))
1352                 new_disk_conf->al_extents = drbd_al_extents_max(device->ldev);
1353
1354         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1355                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1356
1357         fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1358         if (fifo_size != device->rs_plan_s->size) {
1359                 new_plan = fifo_alloc(fifo_size);
1360                 if (!new_plan) {
1361                         drbd_err(device, "kmalloc of fifo_buffer failed");
1362                         retcode = ERR_NOMEM;
1363                         goto fail_unlock;
1364                 }
1365         }
1366
1367         drbd_suspend_io(device);
1368         wait_event(device->al_wait, lc_try_lock(device->act_log));
1369         drbd_al_shrink(device);
1370         err = drbd_check_al_size(device, new_disk_conf);
1371         lc_unlock(device->act_log);
1372         wake_up(&device->al_wait);
1373         drbd_resume_io(device);
1374
1375         if (err) {
1376                 retcode = ERR_NOMEM;
1377                 goto fail_unlock;
1378         }
1379
1380         write_lock_irq(&global_state_lock);
1381         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1382         if (retcode == NO_ERROR) {
1383                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
1384                 drbd_resync_after_changed(device);
1385         }
1386         write_unlock_irq(&global_state_lock);
1387
1388         if (retcode != NO_ERROR)
1389                 goto fail_unlock;
1390
1391         if (new_plan) {
1392                 old_plan = device->rs_plan_s;
1393                 rcu_assign_pointer(device->rs_plan_s, new_plan);
1394         }
1395
1396         mutex_unlock(&device->resource->conf_update);
1397
1398         if (new_disk_conf->al_updates)
1399                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1400         else
1401                 device->ldev->md.flags |= MDF_AL_DISABLED;
1402
1403         if (new_disk_conf->md_flushes)
1404                 clear_bit(MD_NO_FUA, &device->flags);
1405         else
1406                 set_bit(MD_NO_FUA, &device->flags);
1407
1408         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1409
1410         drbd_md_sync(device);
1411
1412         if (device->state.conn >= C_CONNECTED) {
1413                 struct drbd_peer_device *peer_device;
1414
1415                 for_each_peer_device(peer_device, device)
1416                         drbd_send_sync_param(peer_device);
1417         }
1418
1419         synchronize_rcu();
1420         kfree(old_disk_conf);
1421         kfree(old_plan);
1422         mod_timer(&device->request_timer, jiffies + HZ);
1423         goto success;
1424
1425 fail_unlock:
1426         mutex_unlock(&device->resource->conf_update);
1427  fail:
1428         kfree(new_disk_conf);
1429         kfree(new_plan);
1430 success:
1431         put_ldev(device);
1432  out:
1433         mutex_unlock(&adm_ctx.resource->adm_mutex);
1434  finish:
1435         drbd_adm_finish(&adm_ctx, info, retcode);
1436         return 0;
1437 }
1438
1439 int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1440 {
1441         struct drbd_config_context adm_ctx;
1442         struct drbd_device *device;
1443         int err;
1444         enum drbd_ret_code retcode;
1445         enum determine_dev_size dd;
1446         sector_t max_possible_sectors;
1447         sector_t min_md_device_sectors;
1448         struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
1449         struct disk_conf *new_disk_conf = NULL;
1450         struct block_device *bdev;
1451         struct lru_cache *resync_lru = NULL;
1452         struct fifo_buffer *new_plan = NULL;
1453         union drbd_state ns, os;
1454         enum drbd_state_rv rv;
1455         struct net_conf *nc;
1456
1457         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1458         if (!adm_ctx.reply_skb)
1459                 return retcode;
1460         if (retcode != NO_ERROR)
1461                 goto finish;
1462
1463         device = adm_ctx.device;
1464         mutex_lock(&adm_ctx.resource->adm_mutex);
1465         conn_reconfig_start(first_peer_device(device)->connection);
1466
1467         /* if you want to reconfigure, please tear down first */
1468         if (device->state.disk > D_DISKLESS) {
1469                 retcode = ERR_DISK_CONFIGURED;
1470                 goto fail;
1471         }
1472         /* It may just now have detached because of IO error.  Make sure
1473          * drbd_ldev_destroy is done already, we may end up here very fast,
1474          * e.g. if someone calls attach from the on-io-error handler,
1475          * to realize a "hot spare" feature (not that I'd recommend that) */
1476         wait_event(device->misc_wait, !atomic_read(&device->local_cnt));
1477
1478         /* make sure there is no leftover from previous force-detach attempts */
1479         clear_bit(FORCE_DETACH, &device->flags);
1480         clear_bit(WAS_IO_ERROR, &device->flags);
1481         clear_bit(WAS_READ_ERROR, &device->flags);
1482
1483         /* and no leftover from previously aborted resync or verify, either */
1484         device->rs_total = 0;
1485         device->rs_failed = 0;
1486         atomic_set(&device->rs_pending_cnt, 0);
1487
1488         /* allocation not in the IO path, drbdsetup context */
1489         nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1490         if (!nbc) {
1491                 retcode = ERR_NOMEM;
1492                 goto fail;
1493         }
1494         spin_lock_init(&nbc->md.uuid_lock);
1495
1496         new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
1497         if (!new_disk_conf) {
1498                 retcode = ERR_NOMEM;
1499                 goto fail;
1500         }
1501         nbc->disk_conf = new_disk_conf;
1502
1503         set_disk_conf_defaults(new_disk_conf);
1504         err = disk_conf_from_attrs(new_disk_conf, info);
1505         if (err) {
1506                 retcode = ERR_MANDATORY_TAG;
1507                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1508                 goto fail;
1509         }
1510
1511         if (new_disk_conf->c_plan_ahead > DRBD_C_PLAN_AHEAD_MAX)
1512                 new_disk_conf->c_plan_ahead = DRBD_C_PLAN_AHEAD_MAX;
1513
1514         new_plan = fifo_alloc((new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ);
1515         if (!new_plan) {
1516                 retcode = ERR_NOMEM;
1517                 goto fail;
1518         }
1519
1520         if (new_disk_conf->meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
1521                 retcode = ERR_MD_IDX_INVALID;
1522                 goto fail;
1523         }
1524
1525         write_lock_irq(&global_state_lock);
1526         retcode = drbd_resync_after_valid(device, new_disk_conf->resync_after);
1527         write_unlock_irq(&global_state_lock);
1528         if (retcode != NO_ERROR)
1529                 goto fail;
1530
1531         rcu_read_lock();
1532         nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
1533         if (nc) {
1534                 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1535                         rcu_read_unlock();
1536                         retcode = ERR_STONITH_AND_PROT_A;
1537                         goto fail;
1538                 }
1539         }
1540         rcu_read_unlock();
1541
1542         bdev = blkdev_get_by_path(new_disk_conf->backing_dev,
1543                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL, device);
1544         if (IS_ERR(bdev)) {
1545                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->backing_dev,
1546                         PTR_ERR(bdev));
1547                 retcode = ERR_OPEN_DISK;
1548                 goto fail;
1549         }
1550         nbc->backing_bdev = bdev;
1551
1552         /*
1553          * meta_dev_idx >= 0: external fixed size, possibly multiple
1554          * drbd sharing one meta device.  TODO in that case, paranoia
1555          * check that [md_bdev, meta_dev_idx] is not yet used by some
1556          * other drbd minor!  (if you use drbd.conf + drbdadm, that
1557          * should check it for you already; but if you don't, or
1558          * someone fooled it, we need to double check here)
1559          */
1560         bdev = blkdev_get_by_path(new_disk_conf->meta_dev,
1561                                   FMODE_READ | FMODE_WRITE | FMODE_EXCL,
1562                                   (new_disk_conf->meta_dev_idx < 0) ?
1563                                   (void *)device : (void *)drbd_m_holder);
1564         if (IS_ERR(bdev)) {
1565                 drbd_err(device, "open(\"%s\") failed with %ld\n", new_disk_conf->meta_dev,
1566                         PTR_ERR(bdev));
1567                 retcode = ERR_OPEN_MD_DISK;
1568                 goto fail;
1569         }
1570         nbc->md_bdev = bdev;
1571
1572         if ((nbc->backing_bdev == nbc->md_bdev) !=
1573             (new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1574              new_disk_conf->meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1575                 retcode = ERR_MD_IDX_INVALID;
1576                 goto fail;
1577         }
1578
1579         resync_lru = lc_create("resync", drbd_bm_ext_cache,
1580                         1, 61, sizeof(struct bm_extent),
1581                         offsetof(struct bm_extent, lce));
1582         if (!resync_lru) {
1583                 retcode = ERR_NOMEM;
1584                 goto fail;
1585         }
1586
1587         /* Read our meta data super block early.
1588          * This also sets other on-disk offsets. */
1589         retcode = drbd_md_read(device, nbc);
1590         if (retcode != NO_ERROR)
1591                 goto fail;
1592
1593         if (new_disk_conf->al_extents < DRBD_AL_EXTENTS_MIN)
1594                 new_disk_conf->al_extents = DRBD_AL_EXTENTS_MIN;
1595         if (new_disk_conf->al_extents > drbd_al_extents_max(nbc))
1596                 new_disk_conf->al_extents = drbd_al_extents_max(nbc);
1597
1598         if (drbd_get_max_capacity(nbc) < new_disk_conf->disk_size) {
1599                 drbd_err(device, "max capacity %llu smaller than disk size %llu\n",
1600                         (unsigned long long) drbd_get_max_capacity(nbc),
1601                         (unsigned long long) new_disk_conf->disk_size);
1602                 retcode = ERR_DISK_TOO_SMALL;
1603                 goto fail;
1604         }
1605
1606         if (new_disk_conf->meta_dev_idx < 0) {
1607                 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1608                 /* at least one MB, otherwise it does not make sense */
1609                 min_md_device_sectors = (2<<10);
1610         } else {
1611                 max_possible_sectors = DRBD_MAX_SECTORS;
1612                 min_md_device_sectors = MD_128MB_SECT * (new_disk_conf->meta_dev_idx + 1);
1613         }
1614
1615         if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1616                 retcode = ERR_MD_DISK_TOO_SMALL;
1617                 drbd_warn(device, "refusing attach: md-device too small, "
1618                      "at least %llu sectors needed for this meta-disk type\n",
1619                      (unsigned long long) min_md_device_sectors);
1620                 goto fail;
1621         }
1622
1623         /* Make sure the new disk is big enough
1624          * (we may currently be R_PRIMARY with no local disk...) */
1625         if (drbd_get_max_capacity(nbc) <
1626             drbd_get_capacity(device->this_bdev)) {
1627                 retcode = ERR_DISK_TOO_SMALL;
1628                 goto fail;
1629         }
1630
1631         nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1632
1633         if (nbc->known_size > max_possible_sectors) {
1634                 drbd_warn(device, "==> truncating very big lower level device "
1635                         "to currently maximum possible %llu sectors <==\n",
1636                         (unsigned long long) max_possible_sectors);
1637                 if (new_disk_conf->meta_dev_idx >= 0)
1638                         drbd_warn(device, "==>> using internal or flexible "
1639                                       "meta data may help <<==\n");
1640         }
1641
1642         drbd_suspend_io(device);
1643         /* also wait for the last barrier ack. */
1644         /* FIXME see also https://daiquiri.linbit/cgi-bin/bugzilla/show_bug.cgi?id=171
1645          * We need a way to either ignore barrier acks for barriers sent before a device
1646          * was attached, or a way to wait for all pending barrier acks to come in.
1647          * As barriers are counted per resource,
1648          * we'd need to suspend io on all devices of a resource.
1649          */
1650         wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1651         /* and for any other previously queued work */
1652         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
1653
1654         rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1655         retcode = rv;  /* FIXME: Type mismatch. */
1656         drbd_resume_io(device);
1657         if (rv < SS_SUCCESS)
1658                 goto fail;
1659
1660         if (!get_ldev_if_state(device, D_ATTACHING))
1661                 goto force_diskless;
1662
1663         if (!device->bitmap) {
1664                 if (drbd_bm_init(device)) {
1665                         retcode = ERR_NOMEM;
1666                         goto force_diskless_dec;
1667                 }
1668         }
1669
1670         if (device->state.conn < C_CONNECTED &&
1671             device->state.role == R_PRIMARY && device->ed_uuid &&
1672             (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1673                 drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
1674                     (unsigned long long)device->ed_uuid);
1675                 retcode = ERR_DATA_NOT_CURRENT;
1676                 goto force_diskless_dec;
1677         }
1678
1679         /* Since we are diskless, fix the activity log first... */
1680         if (drbd_check_al_size(device, new_disk_conf)) {
1681                 retcode = ERR_NOMEM;
1682                 goto force_diskless_dec;
1683         }
1684
1685         /* Prevent shrinking of consistent devices ! */
1686         if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1687             drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
1688                 drbd_warn(device, "refusing to truncate a consistent device\n");
1689                 retcode = ERR_DISK_TOO_SMALL;
1690                 goto force_diskless_dec;
1691         }
1692
1693         /* Reset the "barriers don't work" bits here, then force meta data to
1694          * be written, to ensure we determine if barriers are supported. */
1695         if (new_disk_conf->md_flushes)
1696                 clear_bit(MD_NO_FUA, &device->flags);
1697         else
1698                 set_bit(MD_NO_FUA, &device->flags);
1699
1700         /* Point of no return reached.
1701          * Devices and memory are no longer released by error cleanup below.
1702          * now device takes over responsibility, and the state engine should
1703          * clean it up somewhere.  */
1704         D_ASSERT(device, device->ldev == NULL);
1705         device->ldev = nbc;
1706         device->resync = resync_lru;
1707         device->rs_plan_s = new_plan;
1708         nbc = NULL;
1709         resync_lru = NULL;
1710         new_disk_conf = NULL;
1711         new_plan = NULL;
1712
1713         drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush);
1714
1715         if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1716                 set_bit(CRASHED_PRIMARY, &device->flags);
1717         else
1718                 clear_bit(CRASHED_PRIMARY, &device->flags);
1719
1720         if (drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1721             !(device->state.role == R_PRIMARY && device->resource->susp_nod))
1722                 set_bit(CRASHED_PRIMARY, &device->flags);
1723
1724         device->send_cnt = 0;
1725         device->recv_cnt = 0;
1726         device->read_cnt = 0;
1727         device->writ_cnt = 0;
1728
1729         drbd_reconsider_max_bio_size(device);
1730
1731         /* If I am currently not R_PRIMARY,
1732          * but meta data primary indicator is set,
1733          * I just now recover from a hard crash,
1734          * and have been R_PRIMARY before that crash.
1735          *
1736          * Now, if I had no connection before that crash
1737          * (have been degraded R_PRIMARY), chances are that
1738          * I won't find my peer now either.
1739          *
1740          * In that case, and _only_ in that case,
1741          * we use the degr-wfc-timeout instead of the default,
1742          * so we can automatically recover from a crash of a
1743          * degraded but active "cluster" after a certain timeout.
1744          */
1745         clear_bit(USE_DEGR_WFC_T, &device->flags);
1746         if (device->state.role != R_PRIMARY &&
1747              drbd_md_test_flag(device->ldev, MDF_PRIMARY_IND) &&
1748             !drbd_md_test_flag(device->ldev, MDF_CONNECTED_IND))
1749                 set_bit(USE_DEGR_WFC_T, &device->flags);
1750
1751         dd = drbd_determine_dev_size(device, 0, NULL);
1752         if (dd <= DS_ERROR) {
1753                 retcode = ERR_NOMEM_BITMAP;
1754                 goto force_diskless_dec;
1755         } else if (dd == DS_GREW)
1756                 set_bit(RESYNC_AFTER_NEG, &device->flags);
1757
1758         if (drbd_md_test_flag(device->ldev, MDF_FULL_SYNC) ||
1759             (test_bit(CRASHED_PRIMARY, &device->flags) &&
1760              drbd_md_test_flag(device->ldev, MDF_AL_DISABLED))) {
1761                 drbd_info(device, "Assuming that all blocks are out of sync "
1762                      "(aka FullSync)\n");
1763                 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
1764                         "set_n_write from attaching", BM_LOCKED_MASK)) {
1765                         retcode = ERR_IO_MD_DISK;
1766                         goto force_diskless_dec;
1767                 }
1768         } else {
1769                 if (drbd_bitmap_io(device, &drbd_bm_read,
1770                         "read from attaching", BM_LOCKED_MASK)) {
1771                         retcode = ERR_IO_MD_DISK;
1772                         goto force_diskless_dec;
1773                 }
1774         }
1775
1776         if (_drbd_bm_total_weight(device) == drbd_bm_bits(device))
1777                 drbd_suspend_al(device); /* IO is still suspended here... */
1778
1779         spin_lock_irq(&device->resource->req_lock);
1780         os = drbd_read_state(device);
1781         ns = os;
1782         /* If MDF_CONSISTENT is not set go into inconsistent state,
1783            otherwise investigate MDF_WasUpToDate...
1784            If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1785            otherwise into D_CONSISTENT state.
1786         */
1787         if (drbd_md_test_flag(device->ldev, MDF_CONSISTENT)) {
1788                 if (drbd_md_test_flag(device->ldev, MDF_WAS_UP_TO_DATE))
1789                         ns.disk = D_CONSISTENT;
1790                 else
1791                         ns.disk = D_OUTDATED;
1792         } else {
1793                 ns.disk = D_INCONSISTENT;
1794         }
1795
1796         if (drbd_md_test_flag(device->ldev, MDF_PEER_OUT_DATED))
1797                 ns.pdsk = D_OUTDATED;
1798
1799         rcu_read_lock();
1800         if (ns.disk == D_CONSISTENT &&
1801             (ns.pdsk == D_OUTDATED || rcu_dereference(device->ldev->disk_conf)->fencing == FP_DONT_CARE))
1802                 ns.disk = D_UP_TO_DATE;
1803
1804         /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1805            MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1806            this point, because drbd_request_state() modifies these
1807            flags. */
1808
1809         if (rcu_dereference(device->ldev->disk_conf)->al_updates)
1810                 device->ldev->md.flags &= ~MDF_AL_DISABLED;
1811         else
1812                 device->ldev->md.flags |= MDF_AL_DISABLED;
1813
1814         rcu_read_unlock();
1815
1816         /* In case we are C_CONNECTED postpone any decision on the new disk
1817            state after the negotiation phase. */
1818         if (device->state.conn == C_CONNECTED) {
1819                 device->new_state_tmp.i = ns.i;
1820                 ns.i = os.i;
1821                 ns.disk = D_NEGOTIATING;
1822
1823                 /* We expect to receive up-to-date UUIDs soon.
1824                    To avoid a race in receive_state, free p_uuid while
1825                    holding req_lock. I.e. atomic with the state change */
1826                 kfree(device->p_uuid);
1827                 device->p_uuid = NULL;
1828         }
1829
1830         rv = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
1831         spin_unlock_irq(&device->resource->req_lock);
1832
1833         if (rv < SS_SUCCESS)
1834                 goto force_diskless_dec;
1835
1836         mod_timer(&device->request_timer, jiffies + HZ);
1837
1838         if (device->state.role == R_PRIMARY)
1839                 device->ldev->md.uuid[UI_CURRENT] |=  (u64)1;
1840         else
1841                 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1842
1843         drbd_md_mark_dirty(device);
1844         drbd_md_sync(device);
1845
1846         kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1847         put_ldev(device);
1848         conn_reconfig_done(first_peer_device(device)->connection);
1849         mutex_unlock(&adm_ctx.resource->adm_mutex);
1850         drbd_adm_finish(&adm_ctx, info, retcode);
1851         return 0;
1852
1853  force_diskless_dec:
1854         put_ldev(device);
1855  force_diskless:
1856         drbd_force_state(device, NS(disk, D_DISKLESS));
1857         drbd_md_sync(device);
1858  fail:
1859         conn_reconfig_done(first_peer_device(device)->connection);
1860         if (nbc) {
1861                 if (nbc->backing_bdev)
1862                         blkdev_put(nbc->backing_bdev,
1863                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1864                 if (nbc->md_bdev)
1865                         blkdev_put(nbc->md_bdev,
1866                                    FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1867                 kfree(nbc);
1868         }
1869         kfree(new_disk_conf);
1870         lc_destroy(resync_lru);
1871         kfree(new_plan);
1872         mutex_unlock(&adm_ctx.resource->adm_mutex);
1873  finish:
1874         drbd_adm_finish(&adm_ctx, info, retcode);
1875         return 0;
1876 }
1877
1878 static int adm_detach(struct drbd_device *device, int force)
1879 {
1880         enum drbd_state_rv retcode;
1881         int ret;
1882
1883         if (force) {
1884                 set_bit(FORCE_DETACH, &device->flags);
1885                 drbd_force_state(device, NS(disk, D_FAILED));
1886                 retcode = SS_SUCCESS;
1887                 goto out;
1888         }
1889
1890         drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1891         drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */
1892         retcode = drbd_request_state(device, NS(disk, D_FAILED));
1893         drbd_md_put_buffer(device);
1894         /* D_FAILED will transition to DISKLESS. */
1895         ret = wait_event_interruptible(device->misc_wait,
1896                         device->state.disk != D_FAILED);
1897         drbd_resume_io(device);
1898         if ((int)retcode == (int)SS_IS_DISKLESS)
1899                 retcode = SS_NOTHING_TO_DO;
1900         if (ret)
1901                 retcode = ERR_INTR;
1902 out:
1903         return retcode;
1904 }
1905
1906 /* Detaching the disk is a process in multiple stages.  First we need to lock
1907  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1908  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1909  * internal references as well.
1910  * Only then we have finally detached. */
1911 int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
1912 {
1913         struct drbd_config_context adm_ctx;
1914         enum drbd_ret_code retcode;
1915         struct detach_parms parms = { };
1916         int err;
1917
1918         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
1919         if (!adm_ctx.reply_skb)
1920                 return retcode;
1921         if (retcode != NO_ERROR)
1922                 goto out;
1923
1924         if (info->attrs[DRBD_NLA_DETACH_PARMS]) {
1925                 err = detach_parms_from_attrs(&parms, info);
1926                 if (err) {
1927                         retcode = ERR_MANDATORY_TAG;
1928                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
1929                         goto out;
1930                 }
1931         }
1932
1933         mutex_lock(&adm_ctx.resource->adm_mutex);
1934         retcode = adm_detach(adm_ctx.device, parms.force_detach);
1935         mutex_unlock(&adm_ctx.resource->adm_mutex);
1936 out:
1937         drbd_adm_finish(&adm_ctx, info, retcode);
1938         return 0;
1939 }
1940
1941 static bool conn_resync_running(struct drbd_connection *connection)
1942 {
1943         struct drbd_peer_device *peer_device;
1944         bool rv = false;
1945         int vnr;
1946
1947         rcu_read_lock();
1948         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1949                 struct drbd_device *device = peer_device->device;
1950                 if (device->state.conn == C_SYNC_SOURCE ||
1951                     device->state.conn == C_SYNC_TARGET ||
1952                     device->state.conn == C_PAUSED_SYNC_S ||
1953                     device->state.conn == C_PAUSED_SYNC_T) {
1954                         rv = true;
1955                         break;
1956                 }
1957         }
1958         rcu_read_unlock();
1959
1960         return rv;
1961 }
1962
1963 static bool conn_ov_running(struct drbd_connection *connection)
1964 {
1965         struct drbd_peer_device *peer_device;
1966         bool rv = false;
1967         int vnr;
1968
1969         rcu_read_lock();
1970         idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1971                 struct drbd_device *device = peer_device->device;
1972                 if (device->state.conn == C_VERIFY_S ||
1973                     device->state.conn == C_VERIFY_T) {
1974                         rv = true;
1975                         break;
1976                 }
1977         }
1978         rcu_read_unlock();
1979
1980         return rv;
1981 }
1982
1983 static enum drbd_ret_code
1984 _check_net_options(struct drbd_connection *connection, struct net_conf *old_net_conf, struct net_conf *new_net_conf)
1985 {
1986         struct drbd_peer_device *peer_device;
1987         int i;
1988
1989         if (old_net_conf && connection->cstate == C_WF_REPORT_PARAMS && connection->agreed_pro_version < 100) {
1990                 if (new_net_conf->wire_protocol != old_net_conf->wire_protocol)
1991                         return ERR_NEED_APV_100;
1992
1993                 if (new_net_conf->two_primaries != old_net_conf->two_primaries)
1994                         return ERR_NEED_APV_100;
1995
1996                 if (strcmp(new_net_conf->integrity_alg, old_net_conf->integrity_alg))
1997                         return ERR_NEED_APV_100;
1998         }
1999
2000         if (!new_net_conf->two_primaries &&
2001             conn_highest_role(connection) == R_PRIMARY &&
2002             conn_highest_peer(connection) == R_PRIMARY)
2003                 return ERR_NEED_ALLOW_TWO_PRI;
2004
2005         if (new_net_conf->two_primaries &&
2006             (new_net_conf->wire_protocol != DRBD_PROT_C))
2007                 return ERR_NOT_PROTO_C;
2008
2009         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2010                 struct drbd_device *device = peer_device->device;
2011                 if (get_ldev(device)) {
2012                         enum drbd_fencing_p fp = rcu_dereference(device->ldev->disk_conf)->fencing;
2013                         put_ldev(device);
2014                         if (new_net_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH)
2015                                 return ERR_STONITH_AND_PROT_A;
2016                 }
2017                 if (device->state.role == R_PRIMARY && new_net_conf->discard_my_data)
2018                         return ERR_DISCARD_IMPOSSIBLE;
2019         }
2020
2021         if (new_net_conf->on_congestion != OC_BLOCK && new_net_conf->wire_protocol != DRBD_PROT_A)
2022                 return ERR_CONG_NOT_PROTO_A;
2023
2024         return NO_ERROR;
2025 }
2026
2027 static enum drbd_ret_code
2028 check_net_options(struct drbd_connection *connection, struct net_conf *new_net_conf)
2029 {
2030         static enum drbd_ret_code rv;
2031         struct drbd_peer_device *peer_device;
2032         int i;
2033
2034         rcu_read_lock();
2035         rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
2036         rcu_read_unlock();
2037
2038         /* connection->volumes protected by genl_lock() here */
2039         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2040                 struct drbd_device *device = peer_device->device;
2041                 if (!device->bitmap) {
2042                         if (drbd_bm_init(device))
2043                                 return ERR_NOMEM;
2044                 }
2045         }
2046
2047         return rv;
2048 }
2049
2050 struct crypto {
2051         struct crypto_hash *verify_tfm;
2052         struct crypto_hash *csums_tfm;
2053         struct crypto_hash *cram_hmac_tfm;
2054         struct crypto_hash *integrity_tfm;
2055 };
2056
2057 static int
2058 alloc_hash(struct crypto_hash **tfm, char *tfm_name, int err_alg)
2059 {
2060         if (!tfm_name[0])
2061                 return NO_ERROR;
2062
2063         *tfm = crypto_alloc_hash(tfm_name, 0, CRYPTO_ALG_ASYNC);
2064         if (IS_ERR(*tfm)) {
2065                 *tfm = NULL;
2066                 return err_alg;
2067         }
2068
2069         return NO_ERROR;
2070 }
2071
2072 static enum drbd_ret_code
2073 alloc_crypto(struct crypto *crypto, struct net_conf *new_net_conf)
2074 {
2075         char hmac_name[CRYPTO_MAX_ALG_NAME];
2076         enum drbd_ret_code rv;
2077
2078         rv = alloc_hash(&crypto->csums_tfm, new_net_conf->csums_alg,
2079                        ERR_CSUMS_ALG);
2080         if (rv != NO_ERROR)
2081                 return rv;
2082         rv = alloc_hash(&crypto->verify_tfm, new_net_conf->verify_alg,
2083                        ERR_VERIFY_ALG);
2084         if (rv != NO_ERROR)
2085                 return rv;
2086         rv = alloc_hash(&crypto->integrity_tfm, new_net_conf->integrity_alg,
2087                        ERR_INTEGRITY_ALG);
2088         if (rv != NO_ERROR)
2089                 return rv;
2090         if (new_net_conf->cram_hmac_alg[0] != 0) {
2091                 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
2092                          new_net_conf->cram_hmac_alg);
2093
2094                 rv = alloc_hash(&crypto->cram_hmac_tfm, hmac_name,
2095                                ERR_AUTH_ALG);
2096         }
2097
2098         return rv;
2099 }
2100
2101 static void free_crypto(struct crypto *crypto)
2102 {
2103         crypto_free_hash(crypto->cram_hmac_tfm);
2104         crypto_free_hash(crypto->integrity_tfm);
2105         crypto_free_hash(crypto->csums_tfm);
2106         crypto_free_hash(crypto->verify_tfm);
2107 }
2108
2109 int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
2110 {
2111         struct drbd_config_context adm_ctx;
2112         enum drbd_ret_code retcode;
2113         struct drbd_connection *connection;
2114         struct net_conf *old_net_conf, *new_net_conf = NULL;
2115         int err;
2116         int ovr; /* online verify running */
2117         int rsr; /* re-sync running */
2118         struct crypto crypto = { };
2119
2120         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2121         if (!adm_ctx.reply_skb)
2122                 return retcode;
2123         if (retcode != NO_ERROR)
2124                 goto finish;
2125
2126         connection = adm_ctx.connection;
2127         mutex_lock(&adm_ctx.resource->adm_mutex);
2128
2129         new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
2130         if (!new_net_conf) {
2131                 retcode = ERR_NOMEM;
2132                 goto out;
2133         }
2134
2135         conn_reconfig_start(connection);
2136
2137         mutex_lock(&connection->data.mutex);
2138         mutex_lock(&connection->resource->conf_update);
2139         old_net_conf = connection->net_conf;
2140
2141         if (!old_net_conf) {
2142                 drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect");
2143                 retcode = ERR_INVALID_REQUEST;
2144                 goto fail;
2145         }
2146
2147         *new_net_conf = *old_net_conf;
2148         if (should_set_defaults(info))
2149                 set_net_conf_defaults(new_net_conf);
2150
2151         err = net_conf_from_attrs_for_change(new_net_conf, info);
2152         if (err && err != -ENOMSG) {
2153                 retcode = ERR_MANDATORY_TAG;
2154                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2155                 goto fail;
2156         }
2157
2158         retcode = check_net_options(connection, new_net_conf);
2159         if (retcode != NO_ERROR)
2160                 goto fail;
2161
2162         /* re-sync running */
2163         rsr = conn_resync_running(connection);
2164         if (rsr && strcmp(new_net_conf->csums_alg, old_net_conf->csums_alg)) {
2165                 retcode = ERR_CSUMS_RESYNC_RUNNING;
2166                 goto fail;
2167         }
2168
2169         /* online verify running */
2170         ovr = conn_ov_running(connection);
2171         if (ovr && strcmp(new_net_conf->verify_alg, old_net_conf->verify_alg)) {
2172                 retcode = ERR_VERIFY_RUNNING;
2173                 goto fail;
2174         }
2175
2176         retcode = alloc_crypto(&crypto, new_net_conf);
2177         if (retcode != NO_ERROR)
2178                 goto fail;
2179
2180         rcu_assign_pointer(connection->net_conf, new_net_conf);
2181
2182         if (!rsr) {
2183                 crypto_free_hash(connection->csums_tfm);
2184                 connection->csums_tfm = crypto.csums_tfm;
2185                 crypto.csums_tfm = NULL;
2186         }
2187         if (!ovr) {
2188                 crypto_free_hash(connection->verify_tfm);
2189                 connection->verify_tfm = crypto.verify_tfm;
2190                 crypto.verify_tfm = NULL;
2191         }
2192
2193         crypto_free_hash(connection->integrity_tfm);
2194         connection->integrity_tfm = crypto.integrity_tfm;
2195         if (connection->cstate >= C_WF_REPORT_PARAMS && connection->agreed_pro_version >= 100)
2196                 /* Do this without trying to take connection->data.mutex again.  */
2197                 __drbd_send_protocol(connection, P_PROTOCOL_UPDATE);
2198
2199         crypto_free_hash(connection->cram_hmac_tfm);
2200         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2201
2202         mutex_unlock(&connection->resource->conf_update);
2203         mutex_unlock(&connection->data.mutex);
2204         synchronize_rcu();
2205         kfree(old_net_conf);
2206
2207         if (connection->cstate >= C_WF_REPORT_PARAMS) {
2208                 struct drbd_peer_device *peer_device;
2209                 int vnr;
2210
2211                 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
2212                         drbd_send_sync_param(peer_device);
2213         }
2214
2215         goto done;
2216
2217  fail:
2218         mutex_unlock(&connection->resource->conf_update);
2219         mutex_unlock(&connection->data.mutex);
2220         free_crypto(&crypto);
2221         kfree(new_net_conf);
2222  done:
2223         conn_reconfig_done(connection);
2224  out:
2225         mutex_unlock(&adm_ctx.resource->adm_mutex);
2226  finish:
2227         drbd_adm_finish(&adm_ctx, info, retcode);
2228         return 0;
2229 }
2230
2231 int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
2232 {
2233         struct drbd_config_context adm_ctx;
2234         struct drbd_peer_device *peer_device;
2235         struct net_conf *old_net_conf, *new_net_conf = NULL;
2236         struct crypto crypto = { };
2237         struct drbd_resource *resource;
2238         struct drbd_connection *connection;
2239         enum drbd_ret_code retcode;
2240         int i;
2241         int err;
2242
2243         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2244
2245         if (!adm_ctx.reply_skb)
2246                 return retcode;
2247         if (retcode != NO_ERROR)
2248                 goto out;
2249         if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) {
2250                 drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing");
2251                 retcode = ERR_INVALID_REQUEST;
2252                 goto out;
2253         }
2254
2255         /* No need for _rcu here. All reconfiguration is
2256          * strictly serialized on genl_lock(). We are protected against
2257          * concurrent reconfiguration/addition/deletion */
2258         for_each_resource(resource, &drbd_resources) {
2259                 for_each_connection(connection, resource) {
2260                         if (nla_len(adm_ctx.my_addr) == connection->my_addr_len &&
2261                             !memcmp(nla_data(adm_ctx.my_addr), &connection->my_addr,
2262                                     connection->my_addr_len)) {
2263                                 retcode = ERR_LOCAL_ADDR;
2264                                 goto out;
2265                         }
2266
2267                         if (nla_len(adm_ctx.peer_addr) == connection->peer_addr_len &&
2268                             !memcmp(nla_data(adm_ctx.peer_addr), &connection->peer_addr,
2269                                     connection->peer_addr_len)) {
2270                                 retcode = ERR_PEER_ADDR;
2271                                 goto out;
2272                         }
2273                 }
2274         }
2275
2276         mutex_lock(&adm_ctx.resource->adm_mutex);
2277         connection = first_connection(adm_ctx.resource);
2278         conn_reconfig_start(connection);
2279
2280         if (connection->cstate > C_STANDALONE) {
2281                 retcode = ERR_NET_CONFIGURED;
2282                 goto fail;
2283         }
2284
2285         /* allocation not in the IO path, drbdsetup / netlink process context */
2286         new_net_conf = kzalloc(sizeof(*new_net_conf), GFP_KERNEL);
2287         if (!new_net_conf) {
2288                 retcode = ERR_NOMEM;
2289                 goto fail;
2290         }
2291
2292         set_net_conf_defaults(new_net_conf);
2293
2294         err = net_conf_from_attrs(new_net_conf, info);
2295         if (err && err != -ENOMSG) {
2296                 retcode = ERR_MANDATORY_TAG;
2297                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2298                 goto fail;
2299         }
2300
2301         retcode = check_net_options(connection, new_net_conf);
2302         if (retcode != NO_ERROR)
2303                 goto fail;
2304
2305         retcode = alloc_crypto(&crypto, new_net_conf);
2306         if (retcode != NO_ERROR)
2307                 goto fail;
2308
2309         ((char *)new_net_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
2310
2311         drbd_flush_workqueue(&connection->sender_work);
2312
2313         mutex_lock(&adm_ctx.resource->conf_update);
2314         old_net_conf = connection->net_conf;
2315         if (old_net_conf) {
2316                 retcode = ERR_NET_CONFIGURED;
2317                 mutex_unlock(&adm_ctx.resource->conf_update);
2318                 goto fail;
2319         }
2320         rcu_assign_pointer(connection->net_conf, new_net_conf);
2321
2322         conn_free_crypto(connection);
2323         connection->cram_hmac_tfm = crypto.cram_hmac_tfm;
2324         connection->integrity_tfm = crypto.integrity_tfm;
2325         connection->csums_tfm = crypto.csums_tfm;
2326         connection->verify_tfm = crypto.verify_tfm;
2327
2328         connection->my_addr_len = nla_len(adm_ctx.my_addr);
2329         memcpy(&connection->my_addr, nla_data(adm_ctx.my_addr), connection->my_addr_len);
2330         connection->peer_addr_len = nla_len(adm_ctx.peer_addr);
2331         memcpy(&connection->peer_addr, nla_data(adm_ctx.peer_addr), connection->peer_addr_len);
2332
2333         mutex_unlock(&adm_ctx.resource->conf_update);
2334
2335         rcu_read_lock();
2336         idr_for_each_entry(&connection->peer_devices, peer_device, i) {
2337                 struct drbd_device *device = peer_device->device;
2338                 device->send_cnt = 0;
2339                 device->recv_cnt = 0;
2340         }
2341         rcu_read_unlock();
2342
2343         retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2344
2345         conn_reconfig_done(connection);
2346         mutex_unlock(&adm_ctx.resource->adm_mutex);
2347         drbd_adm_finish(&adm_ctx, info, retcode);
2348         return 0;
2349
2350 fail:
2351         free_crypto(&crypto);
2352         kfree(new_net_conf);
2353
2354         conn_reconfig_done(connection);
2355         mutex_unlock(&adm_ctx.resource->adm_mutex);
2356 out:
2357         drbd_adm_finish(&adm_ctx, info, retcode);
2358         return 0;
2359 }
2360
2361 static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
2362 {
2363         enum drbd_state_rv rv;
2364
2365         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2366                         force ? CS_HARD : 0);
2367
2368         switch (rv) {
2369         case SS_NOTHING_TO_DO:
2370                 break;
2371         case SS_ALREADY_STANDALONE:
2372                 return SS_SUCCESS;
2373         case SS_PRIMARY_NOP:
2374                 /* Our state checking code wants to see the peer outdated. */
2375                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING, pdsk, D_OUTDATED), 0);
2376
2377                 if (rv == SS_OUTDATE_WO_CONN) /* lost connection before graceful disconnect succeeded */
2378                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_VERBOSE);
2379
2380                 break;
2381         case SS_CW_FAILED_BY_PEER:
2382                 /* The peer probably wants to see us outdated. */
2383                 rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
2384                                                         disk, D_OUTDATED), 0);
2385                 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2386                         rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
2387                                         CS_HARD);
2388                 }
2389                 break;
2390         default:;
2391                 /* no special handling necessary */
2392         }
2393
2394         if (rv >= SS_SUCCESS) {
2395                 enum drbd_state_rv rv2;
2396                 /* No one else can reconfigure the network while I am here.
2397                  * The state handling only uses drbd_thread_stop_nowait(),
2398                  * we want to really wait here until the receiver is no more.
2399                  */
2400                 drbd_thread_stop(&connection->receiver);
2401
2402                 /* Race breaker.  This additional state change request may be
2403                  * necessary, if this was a forced disconnect during a receiver
2404                  * restart.  We may have "killed" the receiver thread just
2405                  * after drbd_receiver() returned.  Typically, we should be
2406                  * C_STANDALONE already, now, and this becomes a no-op.
2407                  */
2408                 rv2 = conn_request_state(connection, NS(conn, C_STANDALONE),
2409                                 CS_VERBOSE | CS_HARD);
2410                 if (rv2 < SS_SUCCESS)
2411                         drbd_err(connection,
2412                                 "unexpected rv2=%d in conn_try_disconnect()\n",
2413                                 rv2);
2414         }
2415         return rv;
2416 }
2417
2418 int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
2419 {
2420         struct drbd_config_context adm_ctx;
2421         struct disconnect_parms parms;
2422         struct drbd_connection *connection;
2423         enum drbd_state_rv rv;
2424         enum drbd_ret_code retcode;
2425         int err;
2426
2427         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION);
2428         if (!adm_ctx.reply_skb)
2429                 return retcode;
2430         if (retcode != NO_ERROR)
2431                 goto fail;
2432
2433         connection = adm_ctx.connection;
2434         memset(&parms, 0, sizeof(parms));
2435         if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
2436                 err = disconnect_parms_from_attrs(&parms, info);
2437                 if (err) {
2438                         retcode = ERR_MANDATORY_TAG;
2439                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2440                         goto fail;
2441                 }
2442         }
2443
2444         mutex_lock(&adm_ctx.resource->adm_mutex);
2445         rv = conn_try_disconnect(connection, parms.force_disconnect);
2446         if (rv < SS_SUCCESS)
2447                 retcode = rv;  /* FIXME: Type mismatch. */
2448         else
2449                 retcode = NO_ERROR;
2450         mutex_unlock(&adm_ctx.resource->adm_mutex);
2451  fail:
2452         drbd_adm_finish(&adm_ctx, info, retcode);
2453         return 0;
2454 }
2455
2456 void resync_after_online_grow(struct drbd_device *device)
2457 {
2458         int iass; /* I am sync source */
2459
2460         drbd_info(device, "Resync of new storage after online grow\n");
2461         if (device->state.role != device->state.peer)
2462                 iass = (device->state.role == R_PRIMARY);
2463         else
2464                 iass = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
2465
2466         if (iass)
2467                 drbd_start_resync(device, C_SYNC_SOURCE);
2468         else
2469                 _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2470 }
2471
2472 int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
2473 {
2474         struct drbd_config_context adm_ctx;
2475         struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
2476         struct resize_parms rs;
2477         struct drbd_device *device;
2478         enum drbd_ret_code retcode;
2479         enum determine_dev_size dd;
2480         bool change_al_layout = false;
2481         enum dds_flags ddsf;
2482         sector_t u_size;
2483         int err;
2484
2485         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2486         if (!adm_ctx.reply_skb)
2487                 return retcode;
2488         if (retcode != NO_ERROR)
2489                 goto finish;
2490
2491         mutex_lock(&adm_ctx.resource->adm_mutex);
2492         device = adm_ctx.device;
2493         if (!get_ldev(device)) {
2494                 retcode = ERR_NO_DISK;
2495                 goto fail;
2496         }
2497
2498         memset(&rs, 0, sizeof(struct resize_parms));
2499         rs.al_stripes = device->ldev->md.al_stripes;
2500         rs.al_stripe_size = device->ldev->md.al_stripe_size_4k * 4;
2501         if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
2502                 err = resize_parms_from_attrs(&rs, info);
2503                 if (err) {
2504                         retcode = ERR_MANDATORY_TAG;
2505                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2506                         goto fail_ldev;
2507                 }
2508         }
2509
2510         if (device->state.conn > C_CONNECTED) {
2511                 retcode = ERR_RESIZE_RESYNC;
2512                 goto fail_ldev;
2513         }
2514
2515         if (device->state.role == R_SECONDARY &&
2516             device->state.peer == R_SECONDARY) {
2517                 retcode = ERR_NO_PRIMARY;
2518                 goto fail_ldev;
2519         }
2520
2521         if (rs.no_resync && first_peer_device(device)->connection->agreed_pro_version < 93) {
2522                 retcode = ERR_NEED_APV_93;
2523                 goto fail_ldev;
2524         }
2525
2526         rcu_read_lock();
2527         u_size = rcu_dereference(device->ldev->disk_conf)->disk_size;
2528         rcu_read_unlock();
2529         if (u_size != (sector_t)rs.resize_size) {
2530                 new_disk_conf = kmalloc(sizeof(struct disk_conf), GFP_KERNEL);
2531                 if (!new_disk_conf) {
2532                         retcode = ERR_NOMEM;
2533                         goto fail_ldev;
2534                 }
2535         }
2536
2537         if (device->ldev->md.al_stripes != rs.al_stripes ||
2538             device->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
2539                 u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
2540
2541                 if (al_size_k > (16 * 1024 * 1024)) {
2542                         retcode = ERR_MD_LAYOUT_TOO_BIG;
2543                         goto fail_ldev;
2544                 }
2545
2546                 if (al_size_k < MD_32kB_SECT/2) {
2547                         retcode = ERR_MD_LAYOUT_TOO_SMALL;
2548                         goto fail_ldev;
2549                 }
2550
2551                 if (device->state.conn != C_CONNECTED && !rs.resize_force) {
2552                         retcode = ERR_MD_LAYOUT_CONNECTED;
2553                         goto fail_ldev;
2554                 }
2555
2556                 change_al_layout = true;
2557         }
2558
2559         if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev))
2560                 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
2561
2562         if (new_disk_conf) {
2563                 mutex_lock(&device->resource->conf_update);
2564                 old_disk_conf = device->ldev->disk_conf;
2565                 *new_disk_conf = *old_disk_conf;
2566                 new_disk_conf->disk_size = (sector_t)rs.resize_size;
2567                 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
2568                 mutex_unlock(&device->resource->conf_update);
2569                 synchronize_rcu();
2570                 kfree(old_disk_conf);
2571         }
2572
2573         ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
2574         dd = drbd_determine_dev_size(device, ddsf, change_al_layout ? &rs : NULL);
2575         drbd_md_sync(device);
2576         put_ldev(device);
2577         if (dd == DS_ERROR) {
2578                 retcode = ERR_NOMEM_BITMAP;
2579                 goto fail;
2580         } else if (dd == DS_ERROR_SPACE_MD) {
2581                 retcode = ERR_MD_LAYOUT_NO_FIT;
2582                 goto fail;
2583         } else if (dd == DS_ERROR_SHRINK) {
2584                 retcode = ERR_IMPLICIT_SHRINK;
2585                 goto fail;
2586         }
2587
2588         if (device->state.conn == C_CONNECTED) {
2589                 if (dd == DS_GREW)
2590                         set_bit(RESIZE_PENDING, &device->flags);
2591
2592                 drbd_send_uuids(first_peer_device(device));
2593                 drbd_send_sizes(first_peer_device(device), 1, ddsf);
2594         }
2595
2596  fail:
2597         mutex_unlock(&adm_ctx.resource->adm_mutex);
2598  finish:
2599         drbd_adm_finish(&adm_ctx, info, retcode);
2600         return 0;
2601
2602  fail_ldev:
2603         put_ldev(device);
2604         goto fail;
2605 }
2606
2607 int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
2608 {
2609         struct drbd_config_context adm_ctx;
2610         enum drbd_ret_code retcode;
2611         struct res_opts res_opts;
2612         int err;
2613
2614         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
2615         if (!adm_ctx.reply_skb)
2616                 return retcode;
2617         if (retcode != NO_ERROR)
2618                 goto fail;
2619
2620         res_opts = adm_ctx.resource->res_opts;
2621         if (should_set_defaults(info))
2622                 set_res_opts_defaults(&res_opts);
2623
2624         err = res_opts_from_attrs(&res_opts, info);
2625         if (err && err != -ENOMSG) {
2626                 retcode = ERR_MANDATORY_TAG;
2627                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
2628                 goto fail;
2629         }
2630
2631         mutex_lock(&adm_ctx.resource->adm_mutex);
2632         err = set_resource_options(adm_ctx.resource, &res_opts);
2633         if (err) {
2634                 retcode = ERR_INVALID_REQUEST;
2635                 if (err == -ENOMEM)
2636                         retcode = ERR_NOMEM;
2637         }
2638         mutex_unlock(&adm_ctx.resource->adm_mutex);
2639
2640 fail:
2641         drbd_adm_finish(&adm_ctx, info, retcode);
2642         return 0;
2643 }
2644
2645 int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2646 {
2647         struct drbd_config_context adm_ctx;
2648         struct drbd_device *device;
2649         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2650
2651         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2652         if (!adm_ctx.reply_skb)
2653                 return retcode;
2654         if (retcode != NO_ERROR)
2655                 goto out;
2656
2657         mutex_lock(&adm_ctx.resource->adm_mutex);
2658         device = adm_ctx.device;
2659
2660         /* If there is still bitmap IO pending, probably because of a previous
2661          * resync just being finished, wait for it before requesting a new resync.
2662          * Also wait for it's after_state_ch(). */
2663         drbd_suspend_io(device);
2664         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2665         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2666
2667         /* If we happen to be C_STANDALONE R_SECONDARY, just change to
2668          * D_INCONSISTENT, and set all bits in the bitmap.  Otherwise,
2669          * try to start a resync handshake as sync target for full sync.
2670          */
2671         if (device->state.conn == C_STANDALONE && device->state.role == R_SECONDARY) {
2672                 retcode = drbd_request_state(device, NS(disk, D_INCONSISTENT));
2673                 if (retcode >= SS_SUCCESS) {
2674                         if (drbd_bitmap_io(device, &drbd_bmio_set_n_write,
2675                                 "set_n_write from invalidate", BM_LOCKED_MASK))
2676                                 retcode = ERR_IO_MD_DISK;
2677                 }
2678         } else
2679                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2680         drbd_resume_io(device);
2681         mutex_unlock(&adm_ctx.resource->adm_mutex);
2682 out:
2683         drbd_adm_finish(&adm_ctx, info, retcode);
2684         return 0;
2685 }
2686
2687 static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2688                 union drbd_state mask, union drbd_state val)
2689 {
2690         struct drbd_config_context adm_ctx;
2691         enum drbd_ret_code retcode;
2692
2693         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2694         if (!adm_ctx.reply_skb)
2695                 return retcode;
2696         if (retcode != NO_ERROR)
2697                 goto out;
2698
2699         mutex_lock(&adm_ctx.resource->adm_mutex);
2700         retcode = drbd_request_state(adm_ctx.device, mask, val);
2701         mutex_unlock(&adm_ctx.resource->adm_mutex);
2702 out:
2703         drbd_adm_finish(&adm_ctx, info, retcode);
2704         return 0;
2705 }
2706
2707 static int drbd_bmio_set_susp_al(struct drbd_device *device)
2708 {
2709         int rv;
2710
2711         rv = drbd_bmio_set_n_write(device);
2712         drbd_suspend_al(device);
2713         return rv;
2714 }
2715
2716 int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2717 {
2718         struct drbd_config_context adm_ctx;
2719         int retcode; /* drbd_ret_code, drbd_state_rv */
2720         struct drbd_device *device;
2721
2722         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2723         if (!adm_ctx.reply_skb)
2724                 return retcode;
2725         if (retcode != NO_ERROR)
2726                 goto out;
2727
2728         mutex_lock(&adm_ctx.resource->adm_mutex);
2729         device = adm_ctx.device;
2730
2731         /* If there is still bitmap IO pending, probably because of a previous
2732          * resync just being finished, wait for it before requesting a new resync.
2733          * Also wait for it's after_state_ch(). */
2734         drbd_suspend_io(device);
2735         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
2736         drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work);
2737
2738         /* If we happen to be C_STANDALONE R_PRIMARY, just set all bits
2739          * in the bitmap.  Otherwise, try to start a resync handshake
2740          * as sync source for full sync.
2741          */
2742         if (device->state.conn == C_STANDALONE && device->state.role == R_PRIMARY) {
2743                 /* The peer will get a resync upon connect anyways. Just make that
2744                    into a full resync. */
2745                 retcode = drbd_request_state(device, NS(pdsk, D_INCONSISTENT));
2746                 if (retcode >= SS_SUCCESS) {
2747                         if (drbd_bitmap_io(device, &drbd_bmio_set_susp_al,
2748                                 "set_n_write from invalidate_peer",
2749                                 BM_LOCKED_SET_ALLOWED))
2750                                 retcode = ERR_IO_MD_DISK;
2751                 }
2752         } else
2753                 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2754         drbd_resume_io(device);
2755         mutex_unlock(&adm_ctx.resource->adm_mutex);
2756 out:
2757         drbd_adm_finish(&adm_ctx, info, retcode);
2758         return 0;
2759 }
2760
2761 int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2762 {
2763         struct drbd_config_context adm_ctx;
2764         enum drbd_ret_code retcode;
2765
2766         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2767         if (!adm_ctx.reply_skb)
2768                 return retcode;
2769         if (retcode != NO_ERROR)
2770                 goto out;
2771
2772         mutex_lock(&adm_ctx.resource->adm_mutex);
2773         if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2774                 retcode = ERR_PAUSE_IS_SET;
2775         mutex_unlock(&adm_ctx.resource->adm_mutex);
2776 out:
2777         drbd_adm_finish(&adm_ctx, info, retcode);
2778         return 0;
2779 }
2780
2781 int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
2782 {
2783         struct drbd_config_context adm_ctx;
2784         union drbd_dev_state s;
2785         enum drbd_ret_code retcode;
2786
2787         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2788         if (!adm_ctx.reply_skb)
2789                 return retcode;
2790         if (retcode != NO_ERROR)
2791                 goto out;
2792
2793         mutex_lock(&adm_ctx.resource->adm_mutex);
2794         if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2795                 s = adm_ctx.device->state;
2796                 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2797                         retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2798                                   s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2799                 } else {
2800                         retcode = ERR_PAUSE_IS_CLEAR;
2801                 }
2802         }
2803         mutex_unlock(&adm_ctx.resource->adm_mutex);
2804 out:
2805         drbd_adm_finish(&adm_ctx, info, retcode);
2806         return 0;
2807 }
2808
2809 int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
2810 {
2811         return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
2812 }
2813
2814 int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
2815 {
2816         struct drbd_config_context adm_ctx;
2817         struct drbd_device *device;
2818         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2819
2820         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
2821         if (!adm_ctx.reply_skb)
2822                 return retcode;
2823         if (retcode != NO_ERROR)
2824                 goto out;
2825
2826         mutex_lock(&adm_ctx.resource->adm_mutex);
2827         device = adm_ctx.device;
2828         if (test_bit(NEW_CUR_UUID, &device->flags)) {
2829                 drbd_uuid_new_current(device);
2830                 clear_bit(NEW_CUR_UUID, &device->flags);
2831         }
2832         drbd_suspend_io(device);
2833         retcode = drbd_request_state(device, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2834         if (retcode == SS_SUCCESS) {
2835                 if (device->state.conn < C_CONNECTED)
2836                         tl_clear(first_peer_device(device)->connection);
2837                 if (device->state.disk == D_DISKLESS || device->state.disk == D_FAILED)
2838                         tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO);
2839         }
2840         drbd_resume_io(device);
2841         mutex_unlock(&adm_ctx.resource->adm_mutex);
2842 out:
2843         drbd_adm_finish(&adm_ctx, info, retcode);
2844         return 0;
2845 }
2846
2847 int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
2848 {
2849         return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
2850 }
2851
2852 static int nla_put_drbd_cfg_context(struct sk_buff *skb,
2853                                     struct drbd_resource *resource,
2854                                     struct drbd_connection *connection,
2855                                     struct drbd_device *device)
2856 {
2857         struct nlattr *nla;
2858         nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2859         if (!nla)
2860                 goto nla_put_failure;
2861         if (device &&
2862             nla_put_u32(skb, T_ctx_volume, device->vnr))
2863                 goto nla_put_failure;
2864         if (nla_put_string(skb, T_ctx_resource_name, resource->name))
2865                 goto nla_put_failure;
2866         if (connection) {
2867                 if (connection->my_addr_len &&
2868                     nla_put(skb, T_ctx_my_addr, connection->my_addr_len, &connection->my_addr))
2869                         goto nla_put_failure;
2870                 if (connection->peer_addr_len &&
2871                     nla_put(skb, T_ctx_peer_addr, connection->peer_addr_len, &connection->peer_addr))
2872                         goto nla_put_failure;
2873         }
2874         nla_nest_end(skb, nla);
2875         return 0;
2876
2877 nla_put_failure:
2878         if (nla)
2879                 nla_nest_cancel(skb, nla);
2880         return -EMSGSIZE;
2881 }
2882
2883 /*
2884  * Return the connection of @resource if @resource has exactly one connection.
2885  */
2886 static struct drbd_connection *the_only_connection(struct drbd_resource *resource)
2887 {
2888         struct list_head *connections = &resource->connections;
2889
2890         if (list_empty(connections) || connections->next->next != connections)
2891                 return NULL;
2892         return list_first_entry(&resource->connections, struct drbd_connection, connections);
2893 }
2894
2895 int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2896                 const struct sib_info *sib)
2897 {
2898         struct drbd_resource *resource = device->resource;
2899         struct state_info *si = NULL; /* for sizeof(si->member); */
2900         struct nlattr *nla;
2901         int got_ldev;
2902         int err = 0;
2903         int exclude_sensitive;
2904
2905         /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2906          * to.  So we better exclude_sensitive information.
2907          *
2908          * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2909          * in the context of the requesting user process. Exclude sensitive
2910          * information, unless current has superuser.
2911          *
2912          * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2913          * relies on the current implementation of netlink_dump(), which
2914          * executes the dump callback successively from netlink_recvmsg(),
2915          * always in the context of the receiving process */
2916         exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2917
2918         got_ldev = get_ldev(device);
2919
2920         /* We need to add connection name and volume number information still.
2921          * Minor number is in drbd_genlmsghdr. */
2922         if (nla_put_drbd_cfg_context(skb, resource, the_only_connection(resource), device))
2923                 goto nla_put_failure;
2924
2925         if (res_opts_to_skb(skb, &device->resource->res_opts, exclude_sensitive))
2926                 goto nla_put_failure;
2927
2928         rcu_read_lock();
2929         if (got_ldev) {
2930                 struct disk_conf *disk_conf;
2931
2932                 disk_conf = rcu_dereference(device->ldev->disk_conf);
2933                 err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
2934         }
2935         if (!err) {
2936                 struct net_conf *nc;
2937
2938                 nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
2939                 if (nc)
2940                         err = net_conf_to_skb(skb, nc, exclude_sensitive);
2941         }
2942         rcu_read_unlock();
2943         if (err)
2944                 goto nla_put_failure;
2945
2946         nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2947         if (!nla)
2948                 goto nla_put_failure;
2949         if (nla_put_u32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY) ||
2950             nla_put_u32(skb, T_current_state, device->state.i) ||
2951             nla_put_u64(skb, T_ed_uuid, device->ed_uuid) ||
2952             nla_put_u64(skb, T_capacity, drbd_get_capacity(device->this_bdev)) ||
2953             nla_put_u64(skb, T_send_cnt, device->send_cnt) ||
2954             nla_put_u64(skb, T_recv_cnt, device->recv_cnt) ||
2955             nla_put_u64(skb, T_read_cnt, device->read_cnt) ||
2956             nla_put_u64(skb, T_writ_cnt, device->writ_cnt) ||
2957             nla_put_u64(skb, T_al_writ_cnt, device->al_writ_cnt) ||
2958             nla_put_u64(skb, T_bm_writ_cnt, device->bm_writ_cnt) ||
2959             nla_put_u32(skb, T_ap_bio_cnt, atomic_read(&device->ap_bio_cnt)) ||
2960             nla_put_u32(skb, T_ap_pending_cnt, atomic_read(&device->ap_pending_cnt)) ||
2961             nla_put_u32(skb, T_rs_pending_cnt, atomic_read(&device->rs_pending_cnt)))
2962                 goto nla_put_failure;
2963
2964         if (got_ldev) {
2965                 int err;
2966
2967                 spin_lock_irq(&device->ldev->md.uuid_lock);
2968                 err = nla_put(skb, T_uuids, sizeof(si->uuids), device->ldev->md.uuid);
2969                 spin_unlock_irq(&device->ldev->md.uuid_lock);
2970
2971                 if (err)
2972                         goto nla_put_failure;
2973
2974                 if (nla_put_u32(skb, T_disk_flags, device->ldev->md.flags) ||
2975                     nla_put_u64(skb, T_bits_total, drbd_bm_bits(device)) ||
2976                     nla_put_u64(skb, T_bits_oos, drbd_bm_total_weight(device)))
2977                         goto nla_put_failure;
2978                 if (C_SYNC_SOURCE <= device->state.conn &&
2979                     C_PAUSED_SYNC_T >= device->state.conn) {
2980                         if (nla_put_u64(skb, T_bits_rs_total, device->rs_total) ||
2981                             nla_put_u64(skb, T_bits_rs_failed, device->rs_failed))
2982                                 goto nla_put_failure;
2983                 }
2984         }
2985
2986         if (sib) {
2987                 switch(sib->sib_reason) {
2988                 case SIB_SYNC_PROGRESS:
2989                 case SIB_GET_STATUS_REPLY:
2990                         break;
2991                 case SIB_STATE_CHANGE:
2992                         if (nla_put_u32(skb, T_prev_state, sib->os.i) ||
2993                             nla_put_u32(skb, T_new_state, sib->ns.i))
2994                                 goto nla_put_failure;
2995                         break;
2996                 case SIB_HELPER_POST:
2997                         if (nla_put_u32(skb, T_helper_exit_code,
2998                                         sib->helper_exit_code))
2999                                 goto nla_put_failure;
3000                         /* fall through */
3001                 case SIB_HELPER_PRE:
3002                         if (nla_put_string(skb, T_helper, sib->helper_name))
3003                                 goto nla_put_failure;
3004                         break;
3005                 }
3006         }
3007         nla_nest_end(skb, nla);
3008
3009         if (0)
3010 nla_put_failure:
3011                 err = -EMSGSIZE;
3012         if (got_ldev)
3013                 put_ldev(device);
3014         return err;
3015 }
3016
3017 int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
3018 {
3019         struct drbd_config_context adm_ctx;
3020         enum drbd_ret_code retcode;
3021         int err;
3022
3023         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3024         if (!adm_ctx.reply_skb)
3025                 return retcode;
3026         if (retcode != NO_ERROR)
3027                 goto out;
3028
3029         err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.device, NULL);
3030         if (err) {
3031                 nlmsg_free(adm_ctx.reply_skb);
3032                 return err;
3033         }
3034 out:
3035         drbd_adm_finish(&adm_ctx, info, retcode);
3036         return 0;
3037 }
3038
3039 static int get_one_status(struct sk_buff *skb, struct netlink_callback *cb)
3040 {
3041         struct drbd_device *device;
3042         struct drbd_genlmsghdr *dh;
3043         struct drbd_resource *pos = (struct drbd_resource *)cb->args[0];
3044         struct drbd_resource *resource = NULL;
3045         struct drbd_resource *tmp;
3046         unsigned volume = cb->args[1];
3047
3048         /* Open coded, deferred, iteration:
3049          * for_each_resource_safe(resource, tmp, &drbd_resources) {
3050          *      connection = "first connection of resource or undefined";
3051          *      idr_for_each_entry(&resource->devices, device, i) {
3052          *        ...
3053          *      }
3054          * }
3055          * where resource is cb->args[0];
3056          * and i is cb->args[1];
3057          *
3058          * cb->args[2] indicates if we shall loop over all resources,
3059          * or just dump all volumes of a single resource.
3060          *
3061          * This may miss entries inserted after this dump started,
3062          * or entries deleted before they are reached.
3063          *
3064          * We need to make sure the device won't disappear while
3065          * we are looking at it, and revalidate our iterators
3066          * on each iteration.
3067          */
3068
3069         /* synchronize with conn_create()/drbd_destroy_connection() */
3070         rcu_read_lock();
3071         /* revalidate iterator position */
3072         for_each_resource_rcu(tmp, &drbd_resources) {
3073                 if (pos == NULL) {
3074                         /* first iteration */
3075                         pos = tmp;
3076                         resource = pos;
3077                         break;
3078                 }
3079                 if (tmp == pos) {
3080                         resource = pos;
3081                         break;
3082                 }
3083         }
3084         if (resource) {
3085 next_resource:
3086                 device = idr_get_next(&resource->devices, &volume);
3087                 if (!device) {
3088                         /* No more volumes to dump on this resource.
3089                          * Advance resource iterator. */
3090                         pos = list_entry_rcu(resource->resources.next,
3091                                              struct drbd_resource, resources);
3092                         /* Did we dump any volume of this resource yet? */
3093                         if (volume != 0) {
3094                                 /* If we reached the end of the list,
3095                                  * or only a single resource dump was requested,
3096                                  * we are done. */
3097                                 if (&pos->resources == &drbd_resources || cb->args[2])
3098                                         goto out;
3099                                 volume = 0;
3100                                 resource = pos;
3101                                 goto next_resource;
3102                         }
3103                 }
3104
3105                 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
3106                                 cb->nlh->nlmsg_seq, &drbd_genl_family,
3107                                 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
3108                 if (!dh)
3109                         goto out;
3110
3111                 if (!device) {
3112                         /* This is a connection without a single volume.
3113                          * Suprisingly enough, it may have a network
3114                          * configuration. */
3115                         struct drbd_connection *connection;
3116
3117                         dh->minor = -1U;
3118                         dh->ret_code = NO_ERROR;
3119                         connection = the_only_connection(resource);
3120                         if (nla_put_drbd_cfg_context(skb, resource, connection, NULL))
3121                                 goto cancel;
3122                         if (connection) {
3123                                 struct net_conf *nc;
3124
3125                                 nc = rcu_dereference(connection->net_conf);
3126                                 if (nc && net_conf_to_skb(skb, nc, 1) != 0)
3127                                         goto cancel;
3128                         }
3129                         goto done;
3130                 }
3131
3132                 D_ASSERT(device, device->vnr == volume);
3133                 D_ASSERT(device, device->resource == resource);
3134
3135                 dh->minor = device_to_minor(device);
3136                 dh->ret_code = NO_ERROR;
3137
3138                 if (nla_put_status_info(skb, device, NULL)) {
3139 cancel:
3140                         genlmsg_cancel(skb, dh);
3141                         goto out;
3142                 }
3143 done:
3144                 genlmsg_end(skb, dh);
3145         }
3146
3147 out:
3148         rcu_read_unlock();
3149         /* where to start the next iteration */
3150         cb->args[0] = (long)pos;
3151         cb->args[1] = (pos == resource) ? volume + 1 : 0;
3152
3153         /* No more resources/volumes/minors found results in an empty skb.
3154          * Which will terminate the dump. */
3155         return skb->len;
3156 }
3157
3158 /*
3159  * Request status of all resources, or of all volumes within a single resource.
3160  *
3161  * This is a dump, as the answer may not fit in a single reply skb otherwise.
3162  * Which means we cannot use the family->attrbuf or other such members, because
3163  * dump is NOT protected by the genl_lock().  During dump, we only have access
3164  * to the incoming skb, and need to opencode "parsing" of the nlattr payload.
3165  *
3166  * Once things are setup properly, we call into get_one_status().
3167  */
3168 int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
3169 {
3170         const unsigned hdrlen = GENL_HDRLEN + GENL_MAGIC_FAMILY_HDRSZ;
3171         struct nlattr *nla;
3172         const char *resource_name;
3173         struct drbd_resource *resource;
3174         int maxtype;
3175
3176         /* Is this a followup call? */
3177         if (cb->args[0]) {
3178                 /* ... of a single resource dump,
3179                  * and the resource iterator has been advanced already? */
3180                 if (cb->args[2] && cb->args[2] != cb->args[0])
3181                         return 0; /* DONE. */
3182                 goto dump;
3183         }
3184
3185         /* First call (from netlink_dump_start).  We need to figure out
3186          * which resource(s) the user wants us to dump. */
3187         nla = nla_find(nlmsg_attrdata(cb->nlh, hdrlen),
3188                         nlmsg_attrlen(cb->nlh, hdrlen),
3189                         DRBD_NLA_CFG_CONTEXT);
3190
3191         /* No explicit context given.  Dump all. */
3192         if (!nla)
3193                 goto dump;
3194         maxtype = ARRAY_SIZE(drbd_cfg_context_nl_policy) - 1;
3195         nla = drbd_nla_find_nested(maxtype, nla, __nla_type(T_ctx_resource_name));
3196         if (IS_ERR(nla))
3197                 return PTR_ERR(nla);
3198         /* context given, but no name present? */
3199         if (!nla)
3200                 return -EINVAL;
3201         resource_name = nla_data(nla);
3202         if (!*resource_name)
3203                 return -ENODEV;
3204         resource = drbd_find_resource(resource_name);
3205         if (!resource)
3206                 return -ENODEV;
3207
3208         kref_put(&resource->kref, drbd_destroy_resource); /* get_one_status() revalidates the resource */
3209
3210         /* prime iterators, and set "filter" mode mark:
3211          * only dump this connection. */
3212         cb->args[0] = (long)resource;
3213         /* cb->args[1] = 0; passed in this way. */
3214         cb->args[2] = (long)resource;
3215
3216 dump:
3217         return get_one_status(skb, cb);
3218 }
3219
3220 int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
3221 {
3222         struct drbd_config_context adm_ctx;
3223         enum drbd_ret_code retcode;
3224         struct timeout_parms tp;
3225         int err;
3226
3227         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3228         if (!adm_ctx.reply_skb)
3229                 return retcode;
3230         if (retcode != NO_ERROR)
3231                 goto out;
3232
3233         tp.timeout_type =
3234                 adm_ctx.device->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
3235                 test_bit(USE_DEGR_WFC_T, &adm_ctx.device->flags) ? UT_DEGRADED :
3236                 UT_DEFAULT;
3237
3238         err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
3239         if (err) {
3240                 nlmsg_free(adm_ctx.reply_skb);
3241                 return err;
3242         }
3243 out:
3244         drbd_adm_finish(&adm_ctx, info, retcode);
3245         return 0;
3246 }
3247
3248 int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
3249 {
3250         struct drbd_config_context adm_ctx;
3251         struct drbd_device *device;
3252         enum drbd_ret_code retcode;
3253         struct start_ov_parms parms;
3254
3255         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3256         if (!adm_ctx.reply_skb)
3257                 return retcode;
3258         if (retcode != NO_ERROR)
3259                 goto out;
3260
3261         device = adm_ctx.device;
3262
3263         /* resume from last known position, if possible */
3264         parms.ov_start_sector = device->ov_start_sector;
3265         parms.ov_stop_sector = ULLONG_MAX;
3266         if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
3267                 int err = start_ov_parms_from_attrs(&parms, info);
3268                 if (err) {
3269                         retcode = ERR_MANDATORY_TAG;
3270                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3271                         goto out;
3272                 }
3273         }
3274         mutex_lock(&adm_ctx.resource->adm_mutex);
3275
3276         /* w_make_ov_request expects position to be aligned */
3277         device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1);
3278         device->ov_stop_sector = parms.ov_stop_sector;
3279
3280         /* If there is still bitmap IO pending, e.g. previous resync or verify
3281          * just being finished, wait for it before requesting a new resync. */
3282         drbd_suspend_io(device);
3283         wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
3284         retcode = drbd_request_state(device, NS(conn, C_VERIFY_S));
3285         drbd_resume_io(device);
3286
3287         mutex_unlock(&adm_ctx.resource->adm_mutex);
3288 out:
3289         drbd_adm_finish(&adm_ctx, info, retcode);
3290         return 0;
3291 }
3292
3293
3294 int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
3295 {
3296         struct drbd_config_context adm_ctx;
3297         struct drbd_device *device;
3298         enum drbd_ret_code retcode;
3299         int skip_initial_sync = 0;
3300         int err;
3301         struct new_c_uuid_parms args;
3302
3303         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3304         if (!adm_ctx.reply_skb)
3305                 return retcode;
3306         if (retcode != NO_ERROR)
3307                 goto out_nolock;
3308
3309         device = adm_ctx.device;
3310         memset(&args, 0, sizeof(args));
3311         if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
3312                 err = new_c_uuid_parms_from_attrs(&args, info);
3313                 if (err) {
3314                         retcode = ERR_MANDATORY_TAG;
3315                         drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3316                         goto out_nolock;
3317                 }
3318         }
3319
3320         mutex_lock(&adm_ctx.resource->adm_mutex);
3321         mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */
3322
3323         if (!get_ldev(device)) {
3324                 retcode = ERR_NO_DISK;
3325                 goto out;
3326         }
3327
3328         /* this is "skip initial sync", assume to be clean */
3329         if (device->state.conn == C_CONNECTED &&
3330             first_peer_device(device)->connection->agreed_pro_version >= 90 &&
3331             device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
3332                 drbd_info(device, "Preparing to skip initial sync\n");
3333                 skip_initial_sync = 1;
3334         } else if (device->state.conn != C_STANDALONE) {
3335                 retcode = ERR_CONNECTED;
3336                 goto out_dec;
3337         }
3338
3339         drbd_uuid_set(device, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
3340         drbd_uuid_new_current(device); /* New current, previous to UI_BITMAP */
3341
3342         if (args.clear_bm) {
3343                 err = drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
3344                         "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
3345                 if (err) {
3346                         drbd_err(device, "Writing bitmap failed with %d\n", err);
3347                         retcode = ERR_IO_MD_DISK;
3348                 }
3349                 if (skip_initial_sync) {
3350                         drbd_send_uuids_skip_initial_sync(first_peer_device(device));
3351                         _drbd_uuid_set(device, UI_BITMAP, 0);
3352                         drbd_print_uuids(device, "cleared bitmap UUID");
3353                         spin_lock_irq(&device->resource->req_lock);
3354                         _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3355                                         CS_VERBOSE, NULL);
3356                         spin_unlock_irq(&device->resource->req_lock);
3357                 }
3358         }
3359
3360         drbd_md_sync(device);
3361 out_dec:
3362         put_ldev(device);
3363 out:
3364         mutex_unlock(device->state_mutex);
3365         mutex_unlock(&adm_ctx.resource->adm_mutex);
3366 out_nolock:
3367         drbd_adm_finish(&adm_ctx, info, retcode);
3368         return 0;
3369 }
3370
3371 static enum drbd_ret_code
3372 drbd_check_resource_name(struct drbd_config_context *adm_ctx)
3373 {
3374         const char *name = adm_ctx->resource_name;
3375         if (!name || !name[0]) {
3376                 drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing");
3377                 return ERR_MANDATORY_TAG;
3378         }
3379         /* if we want to use these in sysfs/configfs/debugfs some day,
3380          * we must not allow slashes */
3381         if (strchr(name, '/')) {
3382                 drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name");
3383                 return ERR_INVALID_REQUEST;
3384         }
3385         return NO_ERROR;
3386 }
3387
3388 int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info)
3389 {
3390         struct drbd_config_context adm_ctx;
3391         enum drbd_ret_code retcode;
3392         struct res_opts res_opts;
3393         int err;
3394
3395         retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0);
3396         if (!adm_ctx.reply_skb)
3397                 return retcode;
3398         if (retcode != NO_ERROR)
3399                 goto out;
3400
3401         set_res_opts_defaults(&res_opts);
3402         err = res_opts_from_attrs(&res_opts, info);
3403         if (err && err != -ENOMSG) {
3404                 retcode = ERR_MANDATORY_TAG;
3405                 drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err));
3406                 goto out;
3407         }
3408
3409         retcode = drbd_check_resource_name(&adm_ctx);
3410         if (retcode != NO_ERROR)
3411                 goto out;
3412
3413         if (adm_ctx.resource) {
3414                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
3415                         retcode = ERR_INVALID_REQUEST;
3416                         drbd_msg_put_info(adm_ctx.reply_skb, "resource exists");
3417                 }
3418                 /* else: still NO_ERROR */
3419                 goto out;
3420         }
3421
3422         /* not yet safe for genl_family.parallel_ops */
3423         if (!conn_create(adm_ctx.resource_name, &res_opts))
3424                 retcode = ERR_NOMEM;
3425 out:
3426         drbd_adm_finish(&adm_ctx, info, retcode);
3427         return 0;
3428 }
3429
3430 int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
3431 {
3432         struct drbd_config_context adm_ctx;
3433         struct drbd_genlmsghdr *dh = info->userhdr;
3434         enum drbd_ret_code retcode;
3435
3436         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3437         if (!adm_ctx.reply_skb)
3438                 return retcode;
3439         if (retcode != NO_ERROR)
3440                 goto out;
3441
3442         if (dh->minor > MINORMASK) {
3443                 drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range");
3444                 retcode = ERR_INVALID_REQUEST;
3445                 goto out;
3446         }
3447         if (adm_ctx.volume > DRBD_VOLUME_MAX) {
3448                 drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range");
3449                 retcode = ERR_INVALID_REQUEST;
3450                 goto out;
3451         }
3452
3453         /* drbd_adm_prepare made sure already
3454          * that first_peer_device(device)->connection and device->vnr match the request. */
3455         if (adm_ctx.device) {
3456                 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
3457                         retcode = ERR_MINOR_EXISTS;
3458                 /* else: still NO_ERROR */
3459                 goto out;
3460         }
3461
3462         mutex_lock(&adm_ctx.resource->adm_mutex);
3463         retcode = drbd_create_device(&adm_ctx, dh->minor);
3464         mutex_unlock(&adm_ctx.resource->adm_mutex);
3465 out:
3466         drbd_adm_finish(&adm_ctx, info, retcode);
3467         return 0;
3468 }
3469
3470 static enum drbd_ret_code adm_del_minor(struct drbd_device *device)
3471 {
3472         if (device->state.disk == D_DISKLESS &&
3473             /* no need to be device->state.conn == C_STANDALONE &&
3474              * we may want to delete a minor from a live replication group.
3475              */
3476             device->state.role == R_SECONDARY) {
3477                 _drbd_request_state(device, NS(conn, C_WF_REPORT_PARAMS),
3478                                     CS_VERBOSE + CS_WAIT_COMPLETE);
3479                 drbd_delete_device(device);
3480                 return NO_ERROR;
3481         } else
3482                 return ERR_MINOR_CONFIGURED;
3483 }
3484
3485 int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info)
3486 {
3487         struct drbd_config_context adm_ctx;
3488         enum drbd_ret_code retcode;
3489
3490         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR);
3491         if (!adm_ctx.reply_skb)
3492                 return retcode;
3493         if (retcode != NO_ERROR)
3494                 goto out;
3495
3496         mutex_lock(&adm_ctx.resource->adm_mutex);
3497         retcode = adm_del_minor(adm_ctx.device);
3498         mutex_unlock(&adm_ctx.resource->adm_mutex);
3499 out:
3500         drbd_adm_finish(&adm_ctx, info, retcode);
3501         return 0;
3502 }
3503
3504 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3505 {
3506         struct drbd_config_context adm_ctx;
3507         struct drbd_resource *resource;
3508         struct drbd_connection *connection;
3509         struct drbd_device *device;
3510         int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
3511         unsigned i;
3512
3513         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3514         if (!adm_ctx.reply_skb)
3515                 return retcode;
3516         if (retcode != NO_ERROR)
3517                 goto finish;
3518
3519         resource = adm_ctx.resource;
3520         mutex_lock(&resource->adm_mutex);
3521         /* demote */
3522         for_each_connection(connection, resource) {
3523                 struct drbd_peer_device *peer_device;
3524
3525                 idr_for_each_entry(&connection->peer_devices, peer_device, i) {
3526                         retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0);
3527                         if (retcode < SS_SUCCESS) {
3528                                 drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote");
3529                                 goto out;
3530                         }
3531                 }
3532
3533                 retcode = conn_try_disconnect(connection, 0);
3534                 if (retcode < SS_SUCCESS) {
3535                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect");
3536                         goto out;
3537                 }
3538         }
3539
3540         /* detach */
3541         idr_for_each_entry(&resource->devices, device, i) {
3542                 retcode = adm_detach(device, 0);
3543                 if (retcode < SS_SUCCESS || retcode > NO_ERROR) {
3544                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach");
3545                         goto out;
3546                 }
3547         }
3548
3549         /* If we reach this, all volumes (of this connection) are Secondary,
3550          * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
3551          * actually stopped, state handling only does drbd_thread_stop_nowait(). */
3552         for_each_connection(connection, resource)
3553                 drbd_thread_stop(&connection->worker);
3554
3555         /* Now, nothing can fail anymore */
3556
3557         /* delete volumes */
3558         idr_for_each_entry(&resource->devices, device, i) {
3559                 retcode = adm_del_minor(device);
3560                 if (retcode != NO_ERROR) {
3561                         /* "can not happen" */
3562                         drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume");
3563                         goto out;
3564                 }
3565         }
3566
3567         list_del_rcu(&resource->resources);
3568         synchronize_rcu();
3569         drbd_free_resource(resource);
3570         retcode = NO_ERROR;
3571 out:
3572         mutex_unlock(&resource->adm_mutex);
3573 finish:
3574         drbd_adm_finish(&adm_ctx, info, retcode);
3575         return 0;
3576 }
3577
3578 int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
3579 {
3580         struct drbd_config_context adm_ctx;
3581         struct drbd_resource *resource;
3582         struct drbd_connection *connection;
3583         enum drbd_ret_code retcode;
3584
3585         retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
3586         if (!adm_ctx.reply_skb)
3587                 return retcode;
3588         if (retcode != NO_ERROR)
3589                 goto finish;
3590
3591         resource = adm_ctx.resource;
3592         mutex_lock(&resource->adm_mutex);
3593         for_each_connection(connection, resource) {
3594                 if (connection->cstate > C_STANDALONE) {
3595                         retcode = ERR_NET_CONFIGURED;
3596                         goto out;
3597                 }
3598         }
3599         if (!idr_is_empty(&resource->devices)) {
3600                 retcode = ERR_RES_IN_USE;
3601                 goto out;
3602         }
3603
3604         list_del_rcu(&resource->resources);
3605         for_each_connection(connection, resource)
3606                 drbd_thread_stop(&connection->worker);
3607         synchronize_rcu();
3608         drbd_free_resource(resource);
3609         retcode = NO_ERROR;
3610 out:
3611         mutex_unlock(&resource->adm_mutex);
3612 finish:
3613         drbd_adm_finish(&adm_ctx, info, retcode);
3614         return 0;
3615 }
3616
3617 void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3618 {
3619         static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3620         struct sk_buff *msg;
3621         struct drbd_genlmsghdr *d_out;
3622         unsigned seq;
3623         int err = -ENOMEM;
3624
3625         if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3626                 if (time_after(jiffies, device->rs_last_bcast + HZ))
3627                         device->rs_last_bcast = jiffies;
3628                 else
3629                         return;
3630         }
3631
3632         seq = atomic_inc_return(&drbd_genl_seq);
3633         msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3634         if (!msg)
3635                 goto failed;
3636
3637         err = -EMSGSIZE;
3638         d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3639         if (!d_out) /* cannot happen, but anyways. */
3640                 goto nla_put_failure;
3641         d_out->minor = device_to_minor(device);
3642         d_out->ret_code = NO_ERROR;
3643
3644         if (nla_put_status_info(msg, device, sib))
3645                 goto nla_put_failure;
3646         genlmsg_end(msg, d_out);
3647         err = drbd_genl_multicast_events(msg, 0);
3648         /* msg has been consumed or freed in netlink_broadcast() */
3649         if (err && err != -ESRCH)
3650                 goto failed;
3651
3652         return;
3653
3654 nla_put_failure:
3655         nlmsg_free(msg);
3656 failed:
3657         drbd_err(device, "Error %d while broadcasting event. "
3658                         "Event seq:%u sib_reason:%u\n",
3659                         err, seq, sib->sib_reason);
3660 }