drbd: Moved susp, susp_nod and susp_fen to the connection object
[linux-2.6-block.git] / drivers / block / drbd / drbd_nl.c
CommitLineData
b411b363
PR
1/*
2 drbd_nl.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
b411b363
PR
26#include <linux/module.h>
27#include <linux/drbd.h>
28#include <linux/in.h>
29#include <linux/fs.h>
30#include <linux/file.h>
31#include <linux/slab.h>
b411b363
PR
32#include <linux/blkpg.h>
33#include <linux/cpumask.h>
34#include "drbd_int.h"
265be2d0 35#include "drbd_req.h"
b411b363
PR
36#include "drbd_wrappers.h"
37#include <asm/unaligned.h>
b411b363 38#include <linux/drbd_limits.h>
87f7be4c 39#include <linux/kthread.h>
b411b363 40
3b98c0c2
LE
41#include <net/genetlink.h>
42
43/* .doit */
44// int drbd_adm_create_resource(struct sk_buff *skb, struct genl_info *info);
45// int drbd_adm_delete_resource(struct sk_buff *skb, struct genl_info *info);
46
47int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info);
48int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info);
49
50int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info);
51int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info);
85f75dd7 52int drbd_adm_down(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
53
54int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info);
55int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info);
f399002e 56int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
57int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info);
58int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info);
f399002e 59int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
60int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info);
61int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info);
62int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info);
63int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info);
64int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info);
65int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info);
66int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info);
67int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info);
68int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info);
69int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info);
70int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info);
f399002e 71int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info);
3b98c0c2
LE
72int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info);
73int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info);
74/* .dumpit */
75int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb);
76
77#include <linux/drbd_genl_api.h>
78#include <linux/genl_magic_func.h>
79
80/* used blkdev_get_by_path, to claim our meta data device(s) */
b411b363
PR
81static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
82
3b98c0c2
LE
83/* Configuration is strictly serialized, because generic netlink message
84 * processing is strictly serialized by the genl_lock().
85 * Which means we can use one static global drbd_config_context struct.
86 */
87static struct drbd_config_context {
88 /* assigned from drbd_genlmsghdr */
89 unsigned int minor;
90 /* assigned from request attributes, if present */
91 unsigned int volume;
92#define VOLUME_UNSPECIFIED (-1U)
93 /* pointer into the request skb,
94 * limited lifetime! */
95 char *conn_name;
96
97 /* reply buffer */
98 struct sk_buff *reply_skb;
99 /* pointer into reply buffer */
100 struct drbd_genlmsghdr *reply_dh;
101 /* resolved from attributes, if possible */
102 struct drbd_conf *mdev;
103 struct drbd_tconn *tconn;
104} adm_ctx;
105
106static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
107{
108 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
109 if (genlmsg_reply(skb, info))
110 printk(KERN_ERR "drbd: error sending genl reply\n");
b411b363 111}
3b98c0c2
LE
112
113/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
114 * reason it could fail was no space in skb, and there are 4k available. */
8432b314 115int drbd_msg_put_info(const char *info)
3b98c0c2
LE
116{
117 struct sk_buff *skb = adm_ctx.reply_skb;
118 struct nlattr *nla;
119 int err = -EMSGSIZE;
120
121 if (!info || !info[0])
122 return 0;
123
124 nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
125 if (!nla)
126 return err;
127
128 err = nla_put_string(skb, T_info_text, info);
129 if (err) {
130 nla_nest_cancel(skb, nla);
131 return err;
132 } else
133 nla_nest_end(skb, nla);
134 return 0;
b411b363
PR
135}
136
3b98c0c2
LE
137/* This would be a good candidate for a "pre_doit" hook,
138 * and per-family private info->pointers.
139 * But we need to stay compatible with older kernels.
140 * If it returns successfully, adm_ctx members are valid.
141 */
142#define DRBD_ADM_NEED_MINOR 1
143#define DRBD_ADM_NEED_CONN 2
144static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info,
145 unsigned flags)
146{
147 struct drbd_genlmsghdr *d_in = info->userhdr;
148 const u8 cmd = info->genlhdr->cmd;
149 int err;
150
151 memset(&adm_ctx, 0, sizeof(adm_ctx));
152
153 /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */
154 if (cmd != DRBD_ADM_GET_STATUS
155 && security_netlink_recv(skb, CAP_SYS_ADMIN))
156 return -EPERM;
157
158 adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
159 if (!adm_ctx.reply_skb)
160 goto fail;
161
162 adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb,
163 info, &drbd_genl_family, 0, cmd);
164 /* put of a few bytes into a fresh skb of >= 4k will always succeed.
165 * but anyways */
166 if (!adm_ctx.reply_dh)
167 goto fail;
168
169 adm_ctx.reply_dh->minor = d_in->minor;
170 adm_ctx.reply_dh->ret_code = NO_ERROR;
171
172 if (info->attrs[DRBD_NLA_CFG_CONTEXT]) {
173 struct nlattr *nla;
174 /* parse and validate only */
f399002e 175 err = drbd_cfg_context_from_attrs(NULL, info);
3b98c0c2
LE
176 if (err)
177 goto fail;
178
179 /* It was present, and valid,
180 * copy it over to the reply skb. */
181 err = nla_put_nohdr(adm_ctx.reply_skb,
182 info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len,
183 info->attrs[DRBD_NLA_CFG_CONTEXT]);
184 if (err)
185 goto fail;
186
187 /* and assign stuff to the global adm_ctx */
188 nla = nested_attr_tb[__nla_type(T_ctx_volume)];
189 adm_ctx.volume = nla ? nla_get_u32(nla) : VOLUME_UNSPECIFIED;
190 nla = nested_attr_tb[__nla_type(T_ctx_conn_name)];
191 if (nla)
192 adm_ctx.conn_name = nla_data(nla);
193 } else
194 adm_ctx.volume = VOLUME_UNSPECIFIED;
195
196 adm_ctx.minor = d_in->minor;
197 adm_ctx.mdev = minor_to_mdev(d_in->minor);
198 adm_ctx.tconn = conn_by_name(adm_ctx.conn_name);
199
200 if (!adm_ctx.mdev && (flags & DRBD_ADM_NEED_MINOR)) {
201 drbd_msg_put_info("unknown minor");
202 return ERR_MINOR_INVALID;
203 }
204 if (!adm_ctx.tconn && (flags & DRBD_ADM_NEED_CONN)) {
205 drbd_msg_put_info("unknown connection");
206 return ERR_INVALID_REQUEST;
207 }
208
209 /* some more paranoia, if the request was over-determined */
527f4b24
LE
210 if (adm_ctx.mdev && adm_ctx.tconn &&
211 adm_ctx.mdev->tconn != adm_ctx.tconn) {
212 pr_warning("request: minor=%u, conn=%s; but that minor belongs to connection %s\n",
213 adm_ctx.minor, adm_ctx.conn_name, adm_ctx.mdev->tconn->name);
214 drbd_msg_put_info("minor exists in different connection");
215 return ERR_INVALID_REQUEST;
216 }
3b98c0c2
LE
217 if (adm_ctx.mdev &&
218 adm_ctx.volume != VOLUME_UNSPECIFIED &&
219 adm_ctx.volume != adm_ctx.mdev->vnr) {
220 pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n",
221 adm_ctx.minor, adm_ctx.volume,
222 adm_ctx.mdev->vnr, adm_ctx.mdev->tconn->name);
527f4b24 223 drbd_msg_put_info("minor exists as different volume");
3b98c0c2
LE
224 return ERR_INVALID_REQUEST;
225 }
cffec5b2
LE
226 if (adm_ctx.mdev && !adm_ctx.tconn)
227 adm_ctx.tconn = adm_ctx.mdev->tconn;
3b98c0c2
LE
228 return NO_ERROR;
229
230fail:
231 nlmsg_free(adm_ctx.reply_skb);
232 adm_ctx.reply_skb = NULL;
233 return -ENOMEM;
234}
235
236static int drbd_adm_finish(struct genl_info *info, int retcode)
237{
238 struct nlattr *nla;
239 const char *conn_name = NULL;
240
241 if (!adm_ctx.reply_skb)
242 return -ENOMEM;
243
244 adm_ctx.reply_dh->ret_code = retcode;
245
246 nla = info->attrs[DRBD_NLA_CFG_CONTEXT];
247 if (nla) {
248 nla = nla_find_nested(nla, __nla_type(T_ctx_conn_name));
249 if (nla)
250 conn_name = nla_data(nla);
251 }
252
253 drbd_adm_send_reply(adm_ctx.reply_skb, info);
254 return 0;
255}
b411b363 256
6b75dced 257static void setup_khelper_env(struct drbd_tconn *tconn, char **envp)
b411b363 258{
6b75dced 259 char *afs;
b411b363 260
6b75dced
PR
261 if (get_net_conf(tconn)) {
262 switch (((struct sockaddr *)tconn->net_conf->peer_addr)->sa_family) {
b411b363
PR
263 case AF_INET6:
264 afs = "ipv6";
6b75dced
PR
265 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI6",
266 &((struct sockaddr_in6 *)tconn->net_conf->peer_addr)->sin6_addr);
b411b363
PR
267 break;
268 case AF_INET:
269 afs = "ipv4";
6b75dced
PR
270 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
271 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363
PR
272 break;
273 default:
274 afs = "ssocks";
6b75dced
PR
275 snprintf(envp[4], 60, "DRBD_PEER_ADDRESS=%pI4",
276 &((struct sockaddr_in *)tconn->net_conf->peer_addr)->sin_addr);
b411b363 277 }
6b75dced
PR
278 snprintf(envp[3], 20, "DRBD_PEER_AF=%s", afs);
279 put_net_conf(tconn);
b411b363 280 }
6b75dced
PR
281}
282
283int drbd_khelper(struct drbd_conf *mdev, char *cmd)
284{
285 char *envp[] = { "HOME=/",
286 "TERM=linux",
287 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
288 (char[20]) { }, /* address family */
289 (char[60]) { }, /* address */
290 NULL };
291 char mb[12];
292 char *argv[] = {usermode_helper, cmd, mb, NULL };
293 struct sib_info sib;
294 int ret;
295
296 snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
297 setup_khelper_env(mdev->tconn, envp);
b411b363 298
1090c056
LE
299 /* The helper may take some time.
300 * write out any unsynced meta data changes now */
301 drbd_md_sync(mdev);
302
b411b363 303 dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
3b98c0c2
LE
304 sib.sib_reason = SIB_HELPER_PRE;
305 sib.helper_name = cmd;
306 drbd_bcast_event(mdev, &sib);
b411b363
PR
307 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
308 if (ret)
309 dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
310 usermode_helper, cmd, mb,
311 (ret >> 8) & 0xff, ret);
312 else
313 dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
314 usermode_helper, cmd, mb,
315 (ret >> 8) & 0xff, ret);
3b98c0c2
LE
316 sib.sib_reason = SIB_HELPER_POST;
317 sib.helper_exit_code = ret;
318 drbd_bcast_event(mdev, &sib);
b411b363
PR
319
320 if (ret < 0) /* Ignore any ERRNOs we got. */
321 ret = 0;
322
323 return ret;
324}
325
6b75dced
PR
326static void conn_md_sync(struct drbd_tconn *tconn)
327{
328 struct drbd_conf *mdev;
e90285e0 329 int vnr;
6b75dced 330
e90285e0 331 idr_for_each_entry(&tconn->volumes, mdev, vnr)
6b75dced
PR
332 drbd_md_sync(mdev);
333}
334
335int conn_khelper(struct drbd_tconn *tconn, char *cmd)
336{
337 char *envp[] = { "HOME=/",
338 "TERM=linux",
339 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
340 (char[20]) { }, /* address family */
341 (char[60]) { }, /* address */
342 NULL };
343 char *argv[] = {usermode_helper, cmd, tconn->name, NULL };
344 int ret;
345
346 setup_khelper_env(tconn, envp);
347 conn_md_sync(tconn);
348
349 conn_info(tconn, "helper command: %s %s %s\n", usermode_helper, cmd, tconn->name);
350 /* TODO: conn_bcast_event() ?? */
351
352 ret = call_usermodehelper(usermode_helper, argv, envp, 1);
353 if (ret)
354 conn_warn(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
355 usermode_helper, cmd, tconn->name,
356 (ret >> 8) & 0xff, ret);
357 else
358 conn_info(tconn, "helper command: %s %s %s exit code %u (0x%x)\n",
359 usermode_helper, cmd, tconn->name,
360 (ret >> 8) & 0xff, ret);
361 /* TODO: conn_bcast_event() ?? */
362
363 if (ret < 0) /* Ignore any ERRNOs we got. */
364 ret = 0;
365
366 return ret;
367}
368
cb703454 369static enum drbd_fencing_p highest_fencing_policy(struct drbd_tconn *tconn)
b411b363 370{
cb703454
PR
371 enum drbd_fencing_p fp = FP_NOT_AVAIL;
372 struct drbd_conf *mdev;
373 int vnr;
374
375 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
376 if (get_ldev_if_state(mdev, D_CONSISTENT)) {
377 fp = max_t(enum drbd_fencing_p, fp, mdev->ldev->dc.fencing);
378 put_ldev(mdev);
379 }
380 }
381
382 return fp;
383}
384
385bool conn_try_outdate_peer(struct drbd_tconn *tconn)
386{
387 union drbd_state mask = { };
388 union drbd_state val = { };
389 enum drbd_fencing_p fp;
b411b363
PR
390 char *ex_to_string;
391 int r;
b411b363 392
cb703454
PR
393 if (tconn->cstate >= C_WF_REPORT_PARAMS) {
394 conn_err(tconn, "Expected cstate < C_WF_REPORT_PARAMS\n");
395 return false;
396 }
b411b363 397
cb703454
PR
398 fp = highest_fencing_policy(tconn);
399 switch (fp) {
400 case FP_NOT_AVAIL:
401 conn_warn(tconn, "Not fencing peer, I'm not even Consistent myself.\n");
fb22c402 402 goto out;
cb703454
PR
403 case FP_DONT_CARE:
404 return true;
405 default: ;
b411b363
PR
406 }
407
cb703454 408 r = conn_khelper(tconn, "fence-peer");
b411b363
PR
409
410 switch ((r>>8) & 0xff) {
411 case 3: /* peer is inconsistent */
412 ex_to_string = "peer is inconsistent or worse";
cb703454
PR
413 mask.pdsk = D_MASK;
414 val.pdsk = D_INCONSISTENT;
b411b363
PR
415 break;
416 case 4: /* peer got outdated, or was already outdated */
417 ex_to_string = "peer was fenced";
cb703454
PR
418 mask.pdsk = D_MASK;
419 val.pdsk = D_OUTDATED;
b411b363
PR
420 break;
421 case 5: /* peer was down */
cb703454 422 if (conn_highest_disk(tconn) == D_UP_TO_DATE) {
b411b363
PR
423 /* we will(have) create(d) a new UUID anyways... */
424 ex_to_string = "peer is unreachable, assumed to be dead";
cb703454
PR
425 mask.pdsk = D_MASK;
426 val.pdsk = D_OUTDATED;
b411b363
PR
427 } else {
428 ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
b411b363
PR
429 }
430 break;
431 case 6: /* Peer is primary, voluntarily outdate myself.
432 * This is useful when an unconnected R_SECONDARY is asked to
433 * become R_PRIMARY, but finds the other peer being active. */
434 ex_to_string = "peer is active";
cb703454
PR
435 conn_warn(tconn, "Peer is primary, outdating myself.\n");
436 mask.disk = D_MASK;
437 val.disk = D_OUTDATED;
b411b363
PR
438 break;
439 case 7:
440 if (fp != FP_STONITH)
cb703454 441 conn_err(tconn, "fence-peer() = 7 && fencing != Stonith !!!\n");
b411b363 442 ex_to_string = "peer was stonithed";
cb703454
PR
443 mask.pdsk = D_MASK;
444 val.pdsk = D_OUTDATED;
b411b363
PR
445 break;
446 default:
447 /* The script is broken ... */
cb703454
PR
448 conn_err(tconn, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
449 return false; /* Eventually leave IO frozen */
b411b363
PR
450 }
451
cb703454
PR
452 conn_info(tconn, "fence-peer helper returned %d (%s)\n",
453 (r>>8) & 0xff, ex_to_string);
fb22c402 454
cb703454 455 out:
fb22c402 456
cb703454
PR
457 /* Not using
458 conn_request_state(tconn, mask, val, CS_VERBOSE);
459 here, because we might were able to re-establish the connection in the
460 meantime. */
461 spin_lock_irq(&tconn->req_lock);
462 if (tconn->cstate < C_WF_REPORT_PARAMS)
463 _conn_request_state(tconn, mask, val, CS_VERBOSE);
464 spin_unlock_irq(&tconn->req_lock);
465
466 return conn_highest_pdsk(tconn) <= D_OUTDATED;
b411b363
PR
467}
468
87f7be4c
PR
469static int _try_outdate_peer_async(void *data)
470{
cb703454 471 struct drbd_tconn *tconn = (struct drbd_tconn *)data;
87f7be4c 472
cb703454 473 conn_try_outdate_peer(tconn);
87f7be4c
PR
474
475 return 0;
476}
477
cb703454 478void conn_try_outdate_peer_async(struct drbd_tconn *tconn)
87f7be4c
PR
479{
480 struct task_struct *opa;
481
cb703454 482 opa = kthread_run(_try_outdate_peer_async, tconn, "drbd_async_h");
87f7be4c 483 if (IS_ERR(opa))
cb703454 484 conn_err(tconn, "out of mem, failed to invoke fence-peer helper\n");
87f7be4c 485}
b411b363 486
bf885f8a
AG
487enum drbd_state_rv
488drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
b411b363
PR
489{
490 const int max_tries = 4;
bf885f8a 491 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
b411b363
PR
492 int try = 0;
493 int forced = 0;
494 union drbd_state mask, val;
b411b363
PR
495
496 if (new_role == R_PRIMARY)
0625ac19 497 request_ping(mdev->tconn); /* Detect a dead peer ASAP */
b411b363 498
8410da8f 499 mutex_lock(mdev->state_mutex);
b411b363
PR
500
501 mask.i = 0; mask.role = R_MASK;
502 val.i = 0; val.role = new_role;
503
504 while (try++ < max_tries) {
bf885f8a 505 rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
b411b363
PR
506
507 /* in case we first succeeded to outdate,
508 * but now suddenly could establish a connection */
bf885f8a 509 if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
b411b363
PR
510 val.pdsk = 0;
511 mask.pdsk = 0;
512 continue;
513 }
514
bf885f8a 515 if (rv == SS_NO_UP_TO_DATE_DISK && force &&
d10a33c6
PR
516 (mdev->state.disk < D_UP_TO_DATE &&
517 mdev->state.disk >= D_INCONSISTENT)) {
b411b363
PR
518 mask.disk = D_MASK;
519 val.disk = D_UP_TO_DATE;
520 forced = 1;
521 continue;
522 }
523
bf885f8a 524 if (rv == SS_NO_UP_TO_DATE_DISK &&
b411b363
PR
525 mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
526 D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
b411b363 527
cb703454 528 if (conn_try_outdate_peer(mdev->tconn)) {
b411b363
PR
529 val.disk = D_UP_TO_DATE;
530 mask.disk = D_MASK;
531 }
b411b363
PR
532 continue;
533 }
534
bf885f8a 535 if (rv == SS_NOTHING_TO_DO)
3b98c0c2 536 goto out;
bf885f8a 537 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
cb703454 538 if (!conn_try_outdate_peer(mdev->tconn) && force) {
b411b363 539 dev_warn(DEV, "Forced into split brain situation!\n");
cb703454
PR
540 mask.pdsk = D_MASK;
541 val.pdsk = D_OUTDATED;
b411b363 542
cb703454 543 }
b411b363
PR
544 continue;
545 }
bf885f8a 546 if (rv == SS_TWO_PRIMARIES) {
b411b363
PR
547 /* Maybe the peer is detected as dead very soon...
548 retry at most once more in this case. */
89e58e75 549 schedule_timeout_interruptible((mdev->tconn->net_conf->ping_timeo+1)*HZ/10);
b411b363
PR
550 if (try < max_tries)
551 try = max_tries - 1;
552 continue;
553 }
bf885f8a
AG
554 if (rv < SS_SUCCESS) {
555 rv = _drbd_request_state(mdev, mask, val,
b411b363 556 CS_VERBOSE + CS_WAIT_COMPLETE);
bf885f8a 557 if (rv < SS_SUCCESS)
3b98c0c2 558 goto out;
b411b363
PR
559 }
560 break;
561 }
562
bf885f8a 563 if (rv < SS_SUCCESS)
3b98c0c2 564 goto out;
b411b363
PR
565
566 if (forced)
567 dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
568
569 /* Wait until nothing is on the fly :) */
570 wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
571
572 if (new_role == R_SECONDARY) {
81e84650 573 set_disk_ro(mdev->vdisk, true);
b411b363
PR
574 if (get_ldev(mdev)) {
575 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
576 put_ldev(mdev);
577 }
578 } else {
b2fb6dbe 579 if (get_net_conf(mdev->tconn)) {
89e58e75 580 mdev->tconn->net_conf->want_lose = 0;
b2fb6dbe 581 put_net_conf(mdev->tconn);
b411b363 582 }
81e84650 583 set_disk_ro(mdev->vdisk, false);
b411b363
PR
584 if (get_ldev(mdev)) {
585 if (((mdev->state.conn < C_CONNECTED ||
586 mdev->state.pdsk <= D_FAILED)
587 && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
588 drbd_uuid_new_current(mdev);
589
590 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
591 put_ldev(mdev);
592 }
593 }
594
19f843aa
LE
595 /* writeout of activity log covered areas of the bitmap
596 * to stable storage done in after state change already */
b411b363
PR
597
598 if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
599 /* if this was forced, we should consider sync */
600 if (forced)
601 drbd_send_uuids(mdev);
602 drbd_send_state(mdev);
603 }
604
605 drbd_md_sync(mdev);
606
607 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
3b98c0c2 608out:
8410da8f 609 mutex_unlock(mdev->state_mutex);
bf885f8a 610 return rv;
b411b363
PR
611}
612
3b98c0c2 613static const char *from_attrs_err_to_txt(int err)
b411b363 614{
3b98c0c2
LE
615 return err == -ENOMSG ? "required attribute missing" :
616 err == -EOPNOTSUPP ? "unknown mandatory attribute" :
f399002e 617 err == -EEXIST ? "can not change invariant setting" :
3b98c0c2 618 "invalid attribute value";
b411b363
PR
619}
620
3b98c0c2 621int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info)
b411b363 622{
3b98c0c2
LE
623 struct set_role_parms parms;
624 int err;
625 enum drbd_ret_code retcode;
b411b363 626
3b98c0c2
LE
627 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
628 if (!adm_ctx.reply_skb)
629 return retcode;
630 if (retcode != NO_ERROR)
631 goto out;
632
633 memset(&parms, 0, sizeof(parms));
634 if (info->attrs[DRBD_NLA_SET_ROLE_PARMS]) {
f399002e 635 err = set_role_parms_from_attrs(&parms, info);
3b98c0c2
LE
636 if (err) {
637 retcode = ERR_MANDATORY_TAG;
638 drbd_msg_put_info(from_attrs_err_to_txt(err));
639 goto out;
640 }
641 }
642
643 if (info->genlhdr->cmd == DRBD_ADM_PRIMARY)
644 retcode = drbd_set_role(adm_ctx.mdev, R_PRIMARY, parms.assume_uptodate);
645 else
646 retcode = drbd_set_role(adm_ctx.mdev, R_SECONDARY, 0);
647out:
648 drbd_adm_finish(info, retcode);
b411b363
PR
649 return 0;
650}
651
652/* initializes the md.*_offset members, so we are able to find
653 * the on disk meta data */
654static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
655 struct drbd_backing_dev *bdev)
656{
657 sector_t md_size_sect = 0;
658 switch (bdev->dc.meta_dev_idx) {
659 default:
660 /* v07 style fixed size indexed meta data */
661 bdev->md.md_size_sect = MD_RESERVED_SECT;
662 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
663 bdev->md.al_offset = MD_AL_OFFSET;
664 bdev->md.bm_offset = MD_BM_OFFSET;
665 break;
666 case DRBD_MD_INDEX_FLEX_EXT:
667 /* just occupy the full device; unit: sectors */
668 bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
669 bdev->md.md_offset = 0;
670 bdev->md.al_offset = MD_AL_OFFSET;
671 bdev->md.bm_offset = MD_BM_OFFSET;
672 break;
673 case DRBD_MD_INDEX_INTERNAL:
674 case DRBD_MD_INDEX_FLEX_INT:
675 bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
676 /* al size is still fixed */
7ad651b5 677 bdev->md.al_offset = -MD_AL_SECTORS;
b411b363
PR
678 /* we need (slightly less than) ~ this much bitmap sectors: */
679 md_size_sect = drbd_get_capacity(bdev->backing_bdev);
680 md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
681 md_size_sect = BM_SECT_TO_EXT(md_size_sect);
682 md_size_sect = ALIGN(md_size_sect, 8);
683
684 /* plus the "drbd meta data super block",
685 * and the activity log; */
686 md_size_sect += MD_BM_OFFSET;
687
688 bdev->md.md_size_sect = md_size_sect;
689 /* bitmap offset is adjusted by 'super' block size */
690 bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
691 break;
692 }
693}
694
4b0715f0 695/* input size is expected to be in KB */
b411b363
PR
696char *ppsize(char *buf, unsigned long long size)
697{
4b0715f0
LE
698 /* Needs 9 bytes at max including trailing NUL:
699 * -1ULL ==> "16384 EB" */
b411b363
PR
700 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
701 int base = 0;
4b0715f0 702 while (size >= 10000 && base < sizeof(units)-1) {
b411b363
PR
703 /* shift + round */
704 size = (size >> 10) + !!(size & (1<<9));
705 base++;
706 }
4b0715f0 707 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
b411b363
PR
708
709 return buf;
710}
711
712/* there is still a theoretical deadlock when called from receiver
713 * on an D_INCONSISTENT R_PRIMARY:
714 * remote READ does inc_ap_bio, receiver would need to receive answer
715 * packet from remote to dec_ap_bio again.
716 * receiver receive_sizes(), comes here,
717 * waits for ap_bio_cnt == 0. -> deadlock.
718 * but this cannot happen, actually, because:
719 * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
720 * (not connected, or bad/no disk on peer):
721 * see drbd_fail_request_early, ap_bio_cnt is zero.
722 * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
723 * peer may not initiate a resize.
724 */
3b98c0c2
LE
725/* Note these are not to be confused with
726 * drbd_adm_suspend_io/drbd_adm_resume_io,
727 * which are (sub) state changes triggered by admin (drbdsetup),
728 * and can be long lived.
729 * This changes an mdev->flag, is triggered by drbd internals,
730 * and should be short-lived. */
b411b363
PR
731void drbd_suspend_io(struct drbd_conf *mdev)
732{
733 set_bit(SUSPEND_IO, &mdev->flags);
2aebfabb 734 if (drbd_suspended(mdev))
265be2d0 735 return;
b411b363
PR
736 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
737}
738
739void drbd_resume_io(struct drbd_conf *mdev)
740{
741 clear_bit(SUSPEND_IO, &mdev->flags);
742 wake_up(&mdev->misc_wait);
743}
744
745/**
746 * drbd_determine_dev_size() - Sets the right device size obeying all constraints
747 * @mdev: DRBD device.
748 *
749 * Returns 0 on success, negative return values indicate errors.
750 * You should call drbd_md_sync() after calling this function.
751 */
24c4830c 752enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
b411b363
PR
753{
754 sector_t prev_first_sect, prev_size; /* previous meta location */
755 sector_t la_size;
756 sector_t size;
757 char ppb[10];
758
759 int md_moved, la_size_changed;
760 enum determine_dev_size rv = unchanged;
761
762 /* race:
763 * application request passes inc_ap_bio,
764 * but then cannot get an AL-reference.
765 * this function later may wait on ap_bio_cnt == 0. -> deadlock.
766 *
767 * to avoid that:
768 * Suspend IO right here.
769 * still lock the act_log to not trigger ASSERTs there.
770 */
771 drbd_suspend_io(mdev);
772
773 /* no wait necessary anymore, actually we could assert that */
774 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
775
776 prev_first_sect = drbd_md_first_sector(mdev->ldev);
777 prev_size = mdev->ldev->md.md_size_sect;
778 la_size = mdev->ldev->md.la_size_sect;
779
780 /* TODO: should only be some assert here, not (re)init... */
781 drbd_md_set_sector_offsets(mdev, mdev->ldev);
782
d845030f 783 size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
b411b363
PR
784
785 if (drbd_get_capacity(mdev->this_bdev) != size ||
786 drbd_bm_capacity(mdev) != size) {
787 int err;
02d9a94b 788 err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
b411b363
PR
789 if (unlikely(err)) {
790 /* currently there is only one error: ENOMEM! */
791 size = drbd_bm_capacity(mdev)>>1;
792 if (size == 0) {
793 dev_err(DEV, "OUT OF MEMORY! "
794 "Could not allocate bitmap!\n");
795 } else {
796 dev_err(DEV, "BM resizing failed. "
797 "Leaving size unchanged at size = %lu KB\n",
798 (unsigned long)size);
799 }
800 rv = dev_size_error;
801 }
802 /* racy, see comments above. */
803 drbd_set_my_capacity(mdev, size);
804 mdev->ldev->md.la_size_sect = size;
805 dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
806 (unsigned long long)size>>1);
807 }
808 if (rv == dev_size_error)
809 goto out;
810
811 la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
812
813 md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
814 || prev_size != mdev->ldev->md.md_size_sect;
815
816 if (la_size_changed || md_moved) {
24dccabb
AG
817 int err;
818
b411b363
PR
819 drbd_al_shrink(mdev); /* All extents inactive. */
820 dev_info(DEV, "Writing the whole bitmap, %s\n",
821 la_size_changed && md_moved ? "size changed and md moved" :
822 la_size_changed ? "size changed" : "md moved");
20ceb2b2
LE
823 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
824 err = drbd_bitmap_io(mdev, &drbd_bm_write,
825 "size changed", BM_LOCKED_MASK);
24dccabb
AG
826 if (err) {
827 rv = dev_size_error;
828 goto out;
829 }
b411b363
PR
830 drbd_md_mark_dirty(mdev);
831 }
832
833 if (size > la_size)
834 rv = grew;
835 if (size < la_size)
836 rv = shrunk;
837out:
838 lc_unlock(mdev->act_log);
839 wake_up(&mdev->al_wait);
840 drbd_resume_io(mdev);
841
842 return rv;
843}
844
845sector_t
a393db6f 846drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
b411b363
PR
847{
848 sector_t p_size = mdev->p_size; /* partner's disk size. */
849 sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
850 sector_t m_size; /* my size */
851 sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
852 sector_t size = 0;
853
854 m_size = drbd_get_max_capacity(bdev);
855
a393db6f
PR
856 if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
857 dev_warn(DEV, "Resize while not connected was forced by the user!\n");
858 p_size = m_size;
859 }
860
b411b363
PR
861 if (p_size && m_size) {
862 size = min_t(sector_t, p_size, m_size);
863 } else {
864 if (la_size) {
865 size = la_size;
866 if (m_size && m_size < size)
867 size = m_size;
868 if (p_size && p_size < size)
869 size = p_size;
870 } else {
871 if (m_size)
872 size = m_size;
873 if (p_size)
874 size = p_size;
875 }
876 }
877
878 if (size == 0)
879 dev_err(DEV, "Both nodes diskless!\n");
880
881 if (u_size) {
882 if (u_size > size)
883 dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
884 (unsigned long)u_size>>1, (unsigned long)size>>1);
885 else
886 size = u_size;
887 }
888
889 return size;
890}
891
892/**
893 * drbd_check_al_size() - Ensures that the AL is of the right size
894 * @mdev: DRBD device.
895 *
896 * Returns -EBUSY if current al lru is still used, -ENOMEM when allocation
897 * failed, and 0 on success. You should call drbd_md_sync() after you called
898 * this function.
899 */
f399002e 900static int drbd_check_al_size(struct drbd_conf *mdev, struct disk_conf *dc)
b411b363
PR
901{
902 struct lru_cache *n, *t;
903 struct lc_element *e;
904 unsigned int in_use;
905 int i;
906
f399002e
LE
907 if (!expect(dc->al_extents >= DRBD_AL_EXTENTS_MIN))
908 dc->al_extents = DRBD_AL_EXTENTS_MIN;
b411b363
PR
909
910 if (mdev->act_log &&
f399002e 911 mdev->act_log->nr_elements == dc->al_extents)
b411b363
PR
912 return 0;
913
914 in_use = 0;
915 t = mdev->act_log;
7ad651b5 916 n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION,
f399002e 917 dc->al_extents, sizeof(struct lc_element), 0);
b411b363
PR
918
919 if (n == NULL) {
920 dev_err(DEV, "Cannot allocate act_log lru!\n");
921 return -ENOMEM;
922 }
923 spin_lock_irq(&mdev->al_lock);
924 if (t) {
925 for (i = 0; i < t->nr_elements; i++) {
926 e = lc_element_by_index(t, i);
927 if (e->refcnt)
928 dev_err(DEV, "refcnt(%d)==%d\n",
929 e->lc_number, e->refcnt);
930 in_use += e->refcnt;
931 }
932 }
933 if (!in_use)
934 mdev->act_log = n;
935 spin_unlock_irq(&mdev->al_lock);
936 if (in_use) {
937 dev_err(DEV, "Activity log still in use!\n");
938 lc_destroy(n);
939 return -EBUSY;
940 } else {
941 if (t)
942 lc_destroy(t);
943 }
944 drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
945 return 0;
946}
947
99432fcc 948static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
b411b363
PR
949{
950 struct request_queue * const q = mdev->rq_queue;
99432fcc
PR
951 int max_hw_sectors = max_bio_size >> 9;
952 int max_segments = 0;
953
954 if (get_ldev_if_state(mdev, D_ATTACHING)) {
955 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
956
957 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
958 max_segments = mdev->ldev->dc.max_bio_bvecs;
959 put_ldev(mdev);
960 }
b411b363 961
b411b363 962 blk_queue_logical_block_size(q, 512);
1816a2b4
LE
963 blk_queue_max_hw_sectors(q, max_hw_sectors);
964 /* This is the workaround for "bio would need to, but cannot, be split" */
965 blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
966 blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1);
b411b363 967
99432fcc
PR
968 if (get_ldev_if_state(mdev, D_ATTACHING)) {
969 struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
970
971 blk_queue_stack_limits(q, b);
972
973 if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
974 dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
975 q->backing_dev_info.ra_pages,
976 b->backing_dev_info.ra_pages);
977 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
978 }
979 put_ldev(mdev);
980 }
981}
982
983void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
984{
985 int now, new, local, peer;
986
987 now = queue_max_hw_sectors(mdev->rq_queue) << 9;
988 local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
989 peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
b411b363 990
99432fcc
PR
991 if (get_ldev_if_state(mdev, D_ATTACHING)) {
992 local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
993 mdev->local_max_bio_size = local;
994 put_ldev(mdev);
b411b363 995 }
99432fcc
PR
996
997 /* We may ignore peer limits if the peer is modern enough.
998 Because new from 8.3.8 onwards the peer can use multiple
999 BIOs for a single peer_request */
1000 if (mdev->state.conn >= C_CONNECTED) {
31890f4a 1001 if (mdev->tconn->agreed_pro_version < 94)
99432fcc 1002 peer = mdev->peer_max_bio_size;
31890f4a 1003 else if (mdev->tconn->agreed_pro_version == 94)
99432fcc
PR
1004 peer = DRBD_MAX_SIZE_H80_PACKET;
1005 else /* drbd 8.3.8 onwards */
1006 peer = DRBD_MAX_BIO_SIZE;
1007 }
1008
1009 new = min_t(int, local, peer);
1010
1011 if (mdev->state.role == R_PRIMARY && new < now)
1012 dev_err(DEV, "ASSERT FAILED new < now; (%d < %d)\n", new, now);
1013
1014 if (new != now)
1015 dev_info(DEV, "max BIO size = %u\n", new);
1016
1017 drbd_setup_queue_param(mdev, new);
b411b363
PR
1018}
1019
1020/* serialize deconfig (worker exiting, doing cleanup)
1021 * and reconfig (drbdsetup disk, drbdsetup net)
1022 *
c518d04f
LE
1023 * Wait for a potentially exiting worker, then restart it,
1024 * or start a new one. Flush any pending work, there may still be an
1025 * after_state_change queued.
b411b363 1026 */
0e29d163 1027static void conn_reconfig_start(struct drbd_tconn *tconn)
b411b363 1028{
0e29d163
PR
1029 wait_event(tconn->ping_wait, !test_and_set_bit(CONFIG_PENDING, &tconn->flags));
1030 wait_event(tconn->ping_wait, !test_bit(OBJECT_DYING, &tconn->flags));
1031 drbd_thread_start(&tconn->worker);
1032 conn_flush_workqueue(tconn);
b411b363
PR
1033}
1034
1035/* if still unconfigured, stops worker again.
1036 * if configured now, clears CONFIG_PENDING.
1037 * wakes potential waiters */
0e29d163 1038static void conn_reconfig_done(struct drbd_tconn *tconn)
b411b363 1039{
0e29d163
PR
1040 spin_lock_irq(&tconn->req_lock);
1041 if (conn_all_vols_unconf(tconn)) {
1042 set_bit(OBJECT_DYING, &tconn->flags);
1043 drbd_thread_stop_nowait(&tconn->worker);
b411b363 1044 } else
0e29d163
PR
1045 clear_bit(CONFIG_PENDING, &tconn->flags);
1046 spin_unlock_irq(&tconn->req_lock);
1047 wake_up(&tconn->ping_wait);
b411b363
PR
1048}
1049
0778286a
PR
1050/* Make sure IO is suspended before calling this function(). */
1051static void drbd_suspend_al(struct drbd_conf *mdev)
1052{
1053 int s = 0;
1054
61610420 1055 if (!lc_try_lock(mdev->act_log)) {
0778286a
PR
1056 dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
1057 return;
1058 }
1059
61610420 1060 drbd_al_shrink(mdev);
87eeee41 1061 spin_lock_irq(&mdev->tconn->req_lock);
0778286a
PR
1062 if (mdev->state.conn < C_CONNECTED)
1063 s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
87eeee41 1064 spin_unlock_irq(&mdev->tconn->req_lock);
61610420 1065 lc_unlock(mdev->act_log);
0778286a
PR
1066
1067 if (s)
1068 dev_info(DEV, "Suspended AL updates\n");
1069}
1070
f399002e
LE
1071int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1072{
1073 enum drbd_ret_code retcode;
1074 struct drbd_conf *mdev;
1075 struct disk_conf *ndc; /* new disk conf */
1076 int err, fifo_size;
1077 int *rs_plan_s = NULL;
1078
1079 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1080 if (!adm_ctx.reply_skb)
1081 return retcode;
1082 if (retcode != NO_ERROR)
1083 goto out;
1084
1085 mdev = adm_ctx.mdev;
1086
1087 /* we also need a disk
1088 * to change the options on */
1089 if (!get_ldev(mdev)) {
1090 retcode = ERR_NO_DISK;
1091 goto out;
1092 }
1093
1094/* FIXME freeze IO, cluster wide.
1095 *
1096 * We should make sure no-one uses
1097 * some half-updated struct when we
1098 * assign it later. */
1099
1100 ndc = kmalloc(sizeof(*ndc), GFP_KERNEL);
1101 if (!ndc) {
1102 retcode = ERR_NOMEM;
1103 goto fail;
1104 }
1105
1106 memcpy(ndc, &mdev->ldev->dc, sizeof(*ndc));
1107 err = disk_conf_from_attrs_for_change(ndc, info);
1108 if (err) {
1109 retcode = ERR_MANDATORY_TAG;
1110 drbd_msg_put_info(from_attrs_err_to_txt(err));
1111 }
1112
1113 if (!expect(ndc->resync_rate >= 1))
1114 ndc->resync_rate = 1;
1115
1116 /* clip to allowed range */
1117 if (!expect(ndc->al_extents >= DRBD_AL_EXTENTS_MIN))
1118 ndc->al_extents = DRBD_AL_EXTENTS_MIN;
1119 if (!expect(ndc->al_extents <= DRBD_AL_EXTENTS_MAX))
1120 ndc->al_extents = DRBD_AL_EXTENTS_MAX;
1121
1122 /* most sanity checks done, try to assign the new sync-after
1123 * dependency. need to hold the global lock in there,
1124 * to avoid a race in the dependency loop check. */
1125 retcode = drbd_alter_sa(mdev, ndc->resync_after);
1126 if (retcode != NO_ERROR)
1127 goto fail;
1128
1129 fifo_size = (ndc->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1130 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1131 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1132 if (!rs_plan_s) {
1133 dev_err(DEV, "kmalloc of fifo_buffer failed");
1134 retcode = ERR_NOMEM;
1135 goto fail;
1136 }
1137 }
1138
1139 if (fifo_size != mdev->rs_plan_s.size) {
1140 kfree(mdev->rs_plan_s.values);
1141 mdev->rs_plan_s.values = rs_plan_s;
1142 mdev->rs_plan_s.size = fifo_size;
1143 mdev->rs_planed = 0;
1144 rs_plan_s = NULL;
1145 }
1146
1147 wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
1148 drbd_al_shrink(mdev);
1149 err = drbd_check_al_size(mdev, ndc);
1150 lc_unlock(mdev->act_log);
1151 wake_up(&mdev->al_wait);
1152
1153 if (err) {
1154 retcode = ERR_NOMEM;
1155 goto fail;
1156 }
1157
1158 /* FIXME
1159 * To avoid someone looking at a half-updated struct, we probably
1160 * should have a rw-semaphor on net_conf and disk_conf.
1161 */
1162 mdev->ldev->dc = *ndc;
1163
1164 drbd_md_sync(mdev);
1165
1166
1167 if (mdev->state.conn >= C_CONNECTED)
1168 drbd_send_sync_param(mdev);
1169
1170 fail:
1171 put_ldev(mdev);
1172 kfree(ndc);
1173 kfree(rs_plan_s);
1174 out:
1175 drbd_adm_finish(info, retcode);
1176 return 0;
1177}
1178
3b98c0c2 1179int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
b411b363 1180{
3b98c0c2
LE
1181 struct drbd_conf *mdev;
1182 int err;
116676ca 1183 enum drbd_ret_code retcode;
b411b363
PR
1184 enum determine_dev_size dd;
1185 sector_t max_possible_sectors;
1186 sector_t min_md_device_sectors;
1187 struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
e525fd89 1188 struct block_device *bdev;
b411b363
PR
1189 struct lru_cache *resync_lru = NULL;
1190 union drbd_state ns, os;
f2024e7c 1191 enum drbd_state_rv rv;
b411b363 1192 int cp_discovered = 0;
b411b363 1193
3b98c0c2
LE
1194 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1195 if (!adm_ctx.reply_skb)
1196 return retcode;
1197 if (retcode != NO_ERROR)
40cbf085 1198 goto finish;
3b98c0c2
LE
1199
1200 mdev = adm_ctx.mdev;
0e29d163 1201 conn_reconfig_start(mdev->tconn);
b411b363
PR
1202
1203 /* if you want to reconfigure, please tear down first */
1204 if (mdev->state.disk > D_DISKLESS) {
1205 retcode = ERR_DISK_CONFIGURED;
1206 goto fail;
1207 }
82f59cc6
LE
1208 /* It may just now have detached because of IO error. Make sure
1209 * drbd_ldev_destroy is done already, we may end up here very fast,
1210 * e.g. if someone calls attach from the on-io-error handler,
1211 * to realize a "hot spare" feature (not that I'd recommend that) */
1212 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
b411b363 1213
3b98c0c2 1214 /* allocation not in the IO path, drbdsetup context */
b411b363
PR
1215 nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
1216 if (!nbc) {
1217 retcode = ERR_NOMEM;
1218 goto fail;
1219 }
1220
f399002e
LE
1221 nbc->dc = (struct disk_conf) {
1222 {}, 0, /* backing_dev */
1223 {}, 0, /* meta_dev */
1224 0, /* meta_dev_idx */
1225 DRBD_DISK_SIZE_SECT_DEF, /* disk_size */
1226 DRBD_MAX_BIO_BVECS_DEF, /* max_bio_bvecs */
1227 DRBD_ON_IO_ERROR_DEF, /* on_io_error */
1228 DRBD_FENCING_DEF, /* fencing */
1229 DRBD_RATE_DEF, /* resync_rate */
1230 DRBD_AFTER_DEF, /* resync_after */
1231 DRBD_AL_EXTENTS_DEF, /* al_extents */
1232 DRBD_C_PLAN_AHEAD_DEF, /* c_plan_ahead */
1233 DRBD_C_DELAY_TARGET_DEF, /* c_delay_target */
1234 DRBD_C_FILL_TARGET_DEF, /* c_fill_target */
1235 DRBD_C_MAX_RATE_DEF, /* c_max_rate */
1236 DRBD_C_MIN_RATE_DEF, /* c_min_rate */
1237 0, /* no_disk_barrier */
1238 0, /* no_disk_flush */
1239 0, /* no_disk_drain */
1240 0, /* no_md_flush */
1241 };
1242
1243 err = disk_conf_from_attrs(&nbc->dc, info);
3b98c0c2 1244 if (err) {
b411b363 1245 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1246 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1247 goto fail;
1248 }
1249
3b98c0c2 1250 if ((int)nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
b411b363
PR
1251 retcode = ERR_MD_IDX_INVALID;
1252 goto fail;
1253 }
1254
b2fb6dbe 1255 if (get_net_conf(mdev->tconn)) {
89e58e75 1256 int prot = mdev->tconn->net_conf->wire_protocol;
b2fb6dbe 1257 put_net_conf(mdev->tconn);
47ff2d0a
PR
1258 if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
1259 retcode = ERR_STONITH_AND_PROT_A;
1260 goto fail;
1261 }
1262 }
1263
d4d77629
TH
1264 bdev = blkdev_get_by_path(nbc->dc.backing_dev,
1265 FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
e525fd89 1266 if (IS_ERR(bdev)) {
b411b363 1267 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
e525fd89 1268 PTR_ERR(bdev));
b411b363
PR
1269 retcode = ERR_OPEN_DISK;
1270 goto fail;
1271 }
e525fd89
TH
1272 nbc->backing_bdev = bdev;
1273
1274 /*
1275 * meta_dev_idx >= 0: external fixed size, possibly multiple
1276 * drbd sharing one meta device. TODO in that case, paranoia
1277 * check that [md_bdev, meta_dev_idx] is not yet used by some
1278 * other drbd minor! (if you use drbd.conf + drbdadm, that
1279 * should check it for you already; but if you don't, or
1280 * someone fooled it, we need to double check here)
1281 */
d4d77629
TH
1282 bdev = blkdev_get_by_path(nbc->dc.meta_dev,
1283 FMODE_READ | FMODE_WRITE | FMODE_EXCL,
3b98c0c2 1284 ((int)nbc->dc.meta_dev_idx < 0) ?
d4d77629 1285 (void *)mdev : (void *)drbd_m_holder);
e525fd89 1286 if (IS_ERR(bdev)) {
b411b363 1287 dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
e525fd89 1288 PTR_ERR(bdev));
b411b363
PR
1289 retcode = ERR_OPEN_MD_DISK;
1290 goto fail;
1291 }
e525fd89 1292 nbc->md_bdev = bdev;
b411b363 1293
e525fd89
TH
1294 if ((nbc->backing_bdev == nbc->md_bdev) !=
1295 (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1296 nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1297 retcode = ERR_MD_IDX_INVALID;
b411b363
PR
1298 goto fail;
1299 }
1300
1301 resync_lru = lc_create("resync", drbd_bm_ext_cache,
46a15bc3 1302 1, 61, sizeof(struct bm_extent),
b411b363
PR
1303 offsetof(struct bm_extent, lce));
1304 if (!resync_lru) {
1305 retcode = ERR_NOMEM;
e525fd89 1306 goto fail;
b411b363
PR
1307 }
1308
1309 /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1310 drbd_md_set_sector_offsets(mdev, nbc);
1311
1312 if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
1313 dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1314 (unsigned long long) drbd_get_max_capacity(nbc),
1315 (unsigned long long) nbc->dc.disk_size);
1316 retcode = ERR_DISK_TO_SMALL;
e525fd89 1317 goto fail;
b411b363
PR
1318 }
1319
3b98c0c2 1320 if ((int)nbc->dc.meta_dev_idx < 0) {
b411b363
PR
1321 max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1322 /* at least one MB, otherwise it does not make sense */
1323 min_md_device_sectors = (2<<10);
1324 } else {
1325 max_possible_sectors = DRBD_MAX_SECTORS;
1326 min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
1327 }
1328
b411b363
PR
1329 if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1330 retcode = ERR_MD_DISK_TO_SMALL;
1331 dev_warn(DEV, "refusing attach: md-device too small, "
1332 "at least %llu sectors needed for this meta-disk type\n",
1333 (unsigned long long) min_md_device_sectors);
e525fd89 1334 goto fail;
b411b363
PR
1335 }
1336
1337 /* Make sure the new disk is big enough
1338 * (we may currently be R_PRIMARY with no local disk...) */
1339 if (drbd_get_max_capacity(nbc) <
1340 drbd_get_capacity(mdev->this_bdev)) {
1341 retcode = ERR_DISK_TO_SMALL;
e525fd89 1342 goto fail;
b411b363
PR
1343 }
1344
1345 nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1346
1352994b
LE
1347 if (nbc->known_size > max_possible_sectors) {
1348 dev_warn(DEV, "==> truncating very big lower level device "
1349 "to currently maximum possible %llu sectors <==\n",
1350 (unsigned long long) max_possible_sectors);
3b98c0c2 1351 if ((int)nbc->dc.meta_dev_idx >= 0)
1352994b
LE
1352 dev_warn(DEV, "==>> using internal or flexible "
1353 "meta data may help <<==\n");
1354 }
1355
b411b363
PR
1356 drbd_suspend_io(mdev);
1357 /* also wait for the last barrier ack. */
2aebfabb 1358 wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || drbd_suspended(mdev));
b411b363 1359 /* and for any other previously queued work */
a21e9298 1360 drbd_flush_workqueue(mdev);
b411b363 1361
f2024e7c
AG
1362 rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1363 retcode = rv; /* FIXME: Type mismatch. */
b411b363 1364 drbd_resume_io(mdev);
f2024e7c 1365 if (rv < SS_SUCCESS)
e525fd89 1366 goto fail;
b411b363
PR
1367
1368 if (!get_ldev_if_state(mdev, D_ATTACHING))
1369 goto force_diskless;
1370
1371 drbd_md_set_sector_offsets(mdev, nbc);
1372
1373 if (!mdev->bitmap) {
1374 if (drbd_bm_init(mdev)) {
1375 retcode = ERR_NOMEM;
1376 goto force_diskless_dec;
1377 }
1378 }
1379
1380 retcode = drbd_md_read(mdev, nbc);
1381 if (retcode != NO_ERROR)
1382 goto force_diskless_dec;
1383
1384 if (mdev->state.conn < C_CONNECTED &&
1385 mdev->state.role == R_PRIMARY &&
1386 (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1387 dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
1388 (unsigned long long)mdev->ed_uuid);
1389 retcode = ERR_DATA_NOT_CURRENT;
1390 goto force_diskless_dec;
1391 }
1392
1393 /* Since we are diskless, fix the activity log first... */
f399002e 1394 if (drbd_check_al_size(mdev, &nbc->dc)) {
b411b363
PR
1395 retcode = ERR_NOMEM;
1396 goto force_diskless_dec;
1397 }
1398
1399 /* Prevent shrinking of consistent devices ! */
1400 if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
a393db6f 1401 drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
b411b363
PR
1402 dev_warn(DEV, "refusing to truncate a consistent device\n");
1403 retcode = ERR_DISK_TO_SMALL;
1404 goto force_diskless_dec;
1405 }
1406
1407 if (!drbd_al_read_log(mdev, nbc)) {
1408 retcode = ERR_IO_MD_DISK;
1409 goto force_diskless_dec;
1410 }
1411
b411b363
PR
1412 /* Reset the "barriers don't work" bits here, then force meta data to
1413 * be written, to ensure we determine if barriers are supported. */
1414 if (nbc->dc.no_md_flush)
a8a4e51e 1415 set_bit(MD_NO_FUA, &mdev->flags);
b411b363 1416 else
a8a4e51e 1417 clear_bit(MD_NO_FUA, &mdev->flags);
b411b363
PR
1418
1419 /* Point of no return reached.
1420 * Devices and memory are no longer released by error cleanup below.
1421 * now mdev takes over responsibility, and the state engine should
1422 * clean it up somewhere. */
1423 D_ASSERT(mdev->ldev == NULL);
1424 mdev->ldev = nbc;
1425 mdev->resync = resync_lru;
1426 nbc = NULL;
1427 resync_lru = NULL;
1428
2451fc3b
PR
1429 mdev->write_ordering = WO_bdev_flush;
1430 drbd_bump_write_ordering(mdev, WO_bdev_flush);
b411b363
PR
1431
1432 if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1433 set_bit(CRASHED_PRIMARY, &mdev->flags);
1434 else
1435 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1436
894c6a94 1437 if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
fb22c402 1438 !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
b411b363
PR
1439 set_bit(CRASHED_PRIMARY, &mdev->flags);
1440 cp_discovered = 1;
1441 }
1442
1443 mdev->send_cnt = 0;
1444 mdev->recv_cnt = 0;
1445 mdev->read_cnt = 0;
1446 mdev->writ_cnt = 0;
1447
99432fcc 1448 drbd_reconsider_max_bio_size(mdev);
b411b363
PR
1449
1450 /* If I am currently not R_PRIMARY,
1451 * but meta data primary indicator is set,
1452 * I just now recover from a hard crash,
1453 * and have been R_PRIMARY before that crash.
1454 *
1455 * Now, if I had no connection before that crash
1456 * (have been degraded R_PRIMARY), chances are that
1457 * I won't find my peer now either.
1458 *
1459 * In that case, and _only_ in that case,
1460 * we use the degr-wfc-timeout instead of the default,
1461 * so we can automatically recover from a crash of a
1462 * degraded but active "cluster" after a certain timeout.
1463 */
1464 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
1465 if (mdev->state.role != R_PRIMARY &&
1466 drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1467 !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1468 set_bit(USE_DEGR_WFC_T, &mdev->flags);
1469
24c4830c 1470 dd = drbd_determine_dev_size(mdev, 0);
b411b363
PR
1471 if (dd == dev_size_error) {
1472 retcode = ERR_NOMEM_BITMAP;
1473 goto force_diskless_dec;
1474 } else if (dd == grew)
1475 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
1476
1477 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1478 dev_info(DEV, "Assuming that all blocks are out of sync "
1479 "(aka FullSync)\n");
20ceb2b2
LE
1480 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1481 "set_n_write from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1482 retcode = ERR_IO_MD_DISK;
1483 goto force_diskless_dec;
1484 }
1485 } else {
20ceb2b2 1486 if (drbd_bitmap_io(mdev, &drbd_bm_read,
22ab6a30 1487 "read from attaching", BM_LOCKED_MASK)) {
b411b363
PR
1488 retcode = ERR_IO_MD_DISK;
1489 goto force_diskless_dec;
1490 }
1491 }
1492
1493 if (cp_discovered) {
1494 drbd_al_apply_to_bm(mdev);
20ceb2b2
LE
1495 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1496 "crashed primary apply AL", BM_LOCKED_MASK)) {
19f843aa
LE
1497 retcode = ERR_IO_MD_DISK;
1498 goto force_diskless_dec;
1499 }
b411b363
PR
1500 }
1501
0778286a
PR
1502 if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1503 drbd_suspend_al(mdev); /* IO is still suspended here... */
1504
87eeee41 1505 spin_lock_irq(&mdev->tconn->req_lock);
78bae59b
PR
1506 os = drbd_read_state(mdev);
1507 ns = os;
b411b363
PR
1508 /* If MDF_CONSISTENT is not set go into inconsistent state,
1509 otherwise investigate MDF_WasUpToDate...
1510 If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1511 otherwise into D_CONSISTENT state.
1512 */
1513 if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
1514 if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
1515 ns.disk = D_CONSISTENT;
1516 else
1517 ns.disk = D_OUTDATED;
1518 } else {
1519 ns.disk = D_INCONSISTENT;
1520 }
1521
1522 if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
1523 ns.pdsk = D_OUTDATED;
1524
1525 if ( ns.disk == D_CONSISTENT &&
1526 (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
1527 ns.disk = D_UP_TO_DATE;
1528
1529 /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1530 MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1531 this point, because drbd_request_state() modifies these
1532 flags. */
1533
1534 /* In case we are C_CONNECTED postpone any decision on the new disk
1535 state after the negotiation phase. */
1536 if (mdev->state.conn == C_CONNECTED) {
1537 mdev->new_state_tmp.i = ns.i;
1538 ns.i = os.i;
1539 ns.disk = D_NEGOTIATING;
dc66c74d
PR
1540
1541 /* We expect to receive up-to-date UUIDs soon.
1542 To avoid a race in receive_state, free p_uuid while
1543 holding req_lock. I.e. atomic with the state change */
1544 kfree(mdev->p_uuid);
1545 mdev->p_uuid = NULL;
b411b363
PR
1546 }
1547
1548 rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
87eeee41 1549 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
1550
1551 if (rv < SS_SUCCESS)
1552 goto force_diskless_dec;
1553
1554 if (mdev->state.role == R_PRIMARY)
1555 mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1556 else
1557 mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1558
1559 drbd_md_mark_dirty(mdev);
1560 drbd_md_sync(mdev);
1561
1562 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1563 put_ldev(mdev);
0e29d163 1564 conn_reconfig_done(mdev->tconn);
3b98c0c2 1565 drbd_adm_finish(info, retcode);
b411b363
PR
1566 return 0;
1567
1568 force_diskless_dec:
1569 put_ldev(mdev);
1570 force_diskless:
82f59cc6 1571 drbd_force_state(mdev, NS(disk, D_FAILED));
b411b363 1572 drbd_md_sync(mdev);
b411b363 1573 fail:
40cbf085 1574 conn_reconfig_done(mdev->tconn);
b411b363 1575 if (nbc) {
e525fd89
TH
1576 if (nbc->backing_bdev)
1577 blkdev_put(nbc->backing_bdev,
1578 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
1579 if (nbc->md_bdev)
1580 blkdev_put(nbc->md_bdev,
1581 FMODE_READ | FMODE_WRITE | FMODE_EXCL);
b411b363
PR
1582 kfree(nbc);
1583 }
1584 lc_destroy(resync_lru);
1585
40cbf085 1586 finish:
3b98c0c2 1587 drbd_adm_finish(info, retcode);
b411b363
PR
1588 return 0;
1589}
1590
85f75dd7
LE
1591static int adm_detach(struct drbd_conf *mdev)
1592{
1593 enum drbd_ret_code retcode;
1594 drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1595 retcode = drbd_request_state(mdev, NS(disk, D_DISKLESS));
1596 wait_event(mdev->misc_wait,
1597 mdev->state.disk != D_DISKLESS ||
1598 !atomic_read(&mdev->local_cnt));
1599 drbd_resume_io(mdev);
1600 return retcode;
1601}
1602
82f59cc6
LE
1603/* Detaching the disk is a process in multiple stages. First we need to lock
1604 * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1605 * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1606 * internal references as well.
1607 * Only then we have finally detached. */
3b98c0c2 1608int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info)
b411b363 1609{
9a0d9d03 1610 enum drbd_ret_code retcode;
3b98c0c2
LE
1611
1612 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
1613 if (!adm_ctx.reply_skb)
1614 return retcode;
1615 if (retcode != NO_ERROR)
1616 goto out;
1617
85f75dd7 1618 retcode = adm_detach(adm_ctx.mdev);
3b98c0c2
LE
1619out:
1620 drbd_adm_finish(info, retcode);
b411b363
PR
1621 return 0;
1622}
1623
f399002e
LE
1624static bool conn_resync_running(struct drbd_tconn *tconn)
1625{
1626 struct drbd_conf *mdev;
1627 int vnr;
1628
1629 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1630 if (mdev->state.conn == C_SYNC_SOURCE ||
1631 mdev->state.conn == C_SYNC_TARGET ||
1632 mdev->state.conn == C_PAUSED_SYNC_S ||
1633 mdev->state.conn == C_PAUSED_SYNC_T)
1634 return true;
1635 }
1636 return false;
1637}
1638
1639static bool conn_ov_running(struct drbd_tconn *tconn)
1640{
1641 struct drbd_conf *mdev;
1642 int vnr;
1643
1644 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1645 if (mdev->state.conn == C_VERIFY_S ||
1646 mdev->state.conn == C_VERIFY_T)
1647 return true;
1648 }
1649 return false;
1650}
1651
1652int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info)
1653{
1654 enum drbd_ret_code retcode;
1655 struct drbd_tconn *tconn;
1656 struct net_conf *new_conf = NULL;
1657 int err;
1658 int ovr; /* online verify running */
1659 int rsr; /* re-sync running */
1660 struct crypto_hash *verify_tfm = NULL;
1661 struct crypto_hash *csums_tfm = NULL;
1662
1663
1664 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1665 if (!adm_ctx.reply_skb)
1666 return retcode;
1667 if (retcode != NO_ERROR)
1668 goto out;
1669
1670 tconn = adm_ctx.tconn;
1671
1672 new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
1673 if (!new_conf) {
1674 retcode = ERR_NOMEM;
1675 goto out;
1676 }
1677
1678 /* we also need a net config
1679 * to change the options on */
1680 if (!get_net_conf(tconn)) {
1681 drbd_msg_put_info("net conf missing, try connect");
1682 retcode = ERR_INVALID_REQUEST;
1683 goto out;
1684 }
1685
1686 conn_reconfig_start(tconn);
1687
1688 memcpy(new_conf, tconn->net_conf, sizeof(*new_conf));
1689 err = net_conf_from_attrs_for_change(new_conf, info);
1690 if (err) {
1691 retcode = ERR_MANDATORY_TAG;
1692 drbd_msg_put_info(from_attrs_err_to_txt(err));
1693 goto fail;
1694 }
1695
1696 /* re-sync running */
1697 rsr = conn_resync_running(tconn);
1698 if (rsr && strcmp(new_conf->csums_alg, tconn->net_conf->csums_alg)) {
1699 retcode = ERR_CSUMS_RESYNC_RUNNING;
1700 goto fail;
1701 }
1702
1703 if (!rsr && new_conf->csums_alg[0]) {
1704 csums_tfm = crypto_alloc_hash(new_conf->csums_alg, 0, CRYPTO_ALG_ASYNC);
1705 if (IS_ERR(csums_tfm)) {
1706 csums_tfm = NULL;
1707 retcode = ERR_CSUMS_ALG;
1708 goto fail;
1709 }
1710
1711 if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
1712 retcode = ERR_CSUMS_ALG_ND;
1713 goto fail;
1714 }
1715 }
1716
1717 /* online verify running */
1718 ovr = conn_ov_running(tconn);
1719 if (ovr) {
1720 if (strcmp(new_conf->verify_alg, tconn->net_conf->verify_alg)) {
1721 retcode = ERR_VERIFY_RUNNING;
1722 goto fail;
1723 }
1724 }
1725
1726 if (!ovr && new_conf->verify_alg[0]) {
1727 verify_tfm = crypto_alloc_hash(new_conf->verify_alg, 0, CRYPTO_ALG_ASYNC);
1728 if (IS_ERR(verify_tfm)) {
1729 verify_tfm = NULL;
1730 retcode = ERR_VERIFY_ALG;
1731 goto fail;
1732 }
1733
1734 if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
1735 retcode = ERR_VERIFY_ALG_ND;
1736 goto fail;
1737 }
1738 }
1739
1740
1741 /* For now, use struct assignment, not pointer assignment.
1742 * We don't have any means to determine who might still
1743 * keep a local alias into the struct,
1744 * so we cannot just free it and hope for the best :(
1745 * FIXME
1746 * To avoid someone looking at a half-updated struct, we probably
1747 * should have a rw-semaphor on net_conf and disk_conf.
1748 */
1749 *tconn->net_conf = *new_conf;
1750
1751 if (!rsr) {
1752 crypto_free_hash(tconn->csums_tfm);
1753 tconn->csums_tfm = csums_tfm;
1754 csums_tfm = NULL;
1755 }
1756 if (!ovr) {
1757 crypto_free_hash(tconn->verify_tfm);
1758 tconn->verify_tfm = verify_tfm;
1759 verify_tfm = NULL;
1760 }
1761
1762 if (tconn->cstate >= C_WF_REPORT_PARAMS)
1763 drbd_send_sync_param(minor_to_mdev(conn_lowest_minor(tconn)));
1764
1765 fail:
1766 crypto_free_hash(csums_tfm);
1767 crypto_free_hash(verify_tfm);
1768 kfree(new_conf);
1769 put_net_conf(tconn);
1770 conn_reconfig_done(tconn);
1771 out:
1772 drbd_adm_finish(info, retcode);
1773 return 0;
1774}
1775
3b98c0c2 1776int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info)
b411b363 1777{
3b98c0c2
LE
1778 char hmac_name[CRYPTO_MAX_ALG_NAME];
1779 struct drbd_conf *mdev;
b411b363
PR
1780 struct net_conf *new_conf = NULL;
1781 struct crypto_hash *tfm = NULL;
1782 struct crypto_hash *integrity_w_tfm = NULL;
1783 struct crypto_hash *integrity_r_tfm = NULL;
b411b363
PR
1784 void *int_dig_out = NULL;
1785 void *int_dig_in = NULL;
1786 void *int_dig_vv = NULL;
80883197 1787 struct drbd_tconn *oconn;
3b98c0c2 1788 struct drbd_tconn *tconn;
b411b363 1789 struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
3b98c0c2
LE
1790 enum drbd_ret_code retcode;
1791 int i;
1792 int err;
b411b363 1793
3b98c0c2
LE
1794 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
1795 if (!adm_ctx.reply_skb)
1796 return retcode;
1797 if (retcode != NO_ERROR)
1798 goto out;
1799
1800 tconn = adm_ctx.tconn;
80883197 1801 conn_reconfig_start(tconn);
b411b363 1802
80883197 1803 if (tconn->cstate > C_STANDALONE) {
b411b363
PR
1804 retcode = ERR_NET_CONFIGURED;
1805 goto fail;
1806 }
1807
1808 /* allocation not in the IO path, cqueue thread context */
f399002e 1809 new_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
b411b363
PR
1810 if (!new_conf) {
1811 retcode = ERR_NOMEM;
1812 goto fail;
1813 }
1814
f399002e
LE
1815 *new_conf = (struct net_conf) {
1816 {}, 0, /* my_addr */
1817 {}, 0, /* peer_addr */
1818 {}, 0, /* shared_secret */
1819 {}, 0, /* cram_hmac_alg */
1820 {}, 0, /* integrity_alg */
1821 {}, 0, /* verify_alg */
1822 {}, 0, /* csums_alg */
1823 DRBD_PROTOCOL_DEF, /* wire_protocol */
1824 DRBD_CONNECT_INT_DEF, /* try_connect_int */
1825 DRBD_TIMEOUT_DEF, /* timeout */
1826 DRBD_PING_INT_DEF, /* ping_int */
1827 DRBD_PING_TIMEO_DEF, /* ping_timeo */
1828 DRBD_SNDBUF_SIZE_DEF, /* sndbuf_size */
1829 DRBD_RCVBUF_SIZE_DEF, /* rcvbuf_size */
1830 DRBD_KO_COUNT_DEF, /* ko_count */
1831 DRBD_MAX_BUFFERS_DEF, /* max_buffers */
1832 DRBD_MAX_EPOCH_SIZE_DEF, /* max_epoch_size */
1833 DRBD_UNPLUG_WATERMARK_DEF, /* unplug_watermark */
1834 DRBD_AFTER_SB_0P_DEF, /* after_sb_0p */
1835 DRBD_AFTER_SB_1P_DEF, /* after_sb_1p */
1836 DRBD_AFTER_SB_2P_DEF, /* after_sb_2p */
1837 DRBD_RR_CONFLICT_DEF, /* rr_conflict */
1838 DRBD_ON_CONGESTION_DEF, /* on_congestion */
1839 DRBD_CONG_FILL_DEF, /* cong_fill */
1840 DRBD_CONG_EXTENTS_DEF, /* cong_extents */
1841 0, /* two_primaries */
1842 0, /* want_lose */
1843 0, /* no_cork */
1844 0, /* always_asbp */
1845 0, /* dry_run */
1846 0, /* use_rle */
1847 };
1848
1849 err = net_conf_from_attrs(new_conf, info);
3b98c0c2 1850 if (err) {
b411b363 1851 retcode = ERR_MANDATORY_TAG;
3b98c0c2 1852 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
1853 goto fail;
1854 }
1855
1856 if (new_conf->two_primaries
1857 && (new_conf->wire_protocol != DRBD_PROT_C)) {
1858 retcode = ERR_NOT_PROTO_C;
1859 goto fail;
47ff2d0a
PR
1860 }
1861
80883197
PR
1862 idr_for_each_entry(&tconn->volumes, mdev, i) {
1863 if (get_ldev(mdev)) {
1864 enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
1865 put_ldev(mdev);
1866 if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
1867 retcode = ERR_STONITH_AND_PROT_A;
1868 goto fail;
1869 }
1870 }
1871 if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1872 retcode = ERR_DISCARD;
47ff2d0a
PR
1873 goto fail;
1874 }
80883197
PR
1875 if (!mdev->bitmap) {
1876 if(drbd_bm_init(mdev)) {
1877 retcode = ERR_NOMEM;
1878 goto fail;
1879 }
1880 }
47ff2d0a 1881 }
b411b363 1882
422028b1
PR
1883 if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
1884 retcode = ERR_CONG_NOT_PROTO_A;
1885 goto fail;
1886 }
1887
b411b363
PR
1888 retcode = NO_ERROR;
1889
1890 new_my_addr = (struct sockaddr *)&new_conf->my_addr;
1891 new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
543cc10b
LE
1892
1893 /* No need to take drbd_cfg_mutex here. All reconfiguration is
1894 * strictly serialized on genl_lock(). We are protected against
1895 * concurrent reconfiguration/addition/deletion */
80883197
PR
1896 list_for_each_entry(oconn, &drbd_tconns, all_tconn) {
1897 if (oconn == tconn)
b411b363 1898 continue;
80883197
PR
1899 if (get_net_conf(oconn)) {
1900 taken_addr = (struct sockaddr *)&oconn->net_conf->my_addr;
1901 if (new_conf->my_addr_len == oconn->net_conf->my_addr_len &&
b411b363
PR
1902 !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
1903 retcode = ERR_LOCAL_ADDR;
1904
80883197
PR
1905 taken_addr = (struct sockaddr *)&oconn->net_conf->peer_addr;
1906 if (new_conf->peer_addr_len == oconn->net_conf->peer_addr_len &&
b411b363
PR
1907 !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
1908 retcode = ERR_PEER_ADDR;
1909
80883197 1910 put_net_conf(oconn);
b411b363
PR
1911 if (retcode != NO_ERROR)
1912 goto fail;
1913 }
1914 }
1915
1916 if (new_conf->cram_hmac_alg[0] != 0) {
1917 snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
1918 new_conf->cram_hmac_alg);
1919 tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
1920 if (IS_ERR(tfm)) {
1921 tfm = NULL;
1922 retcode = ERR_AUTH_ALG;
1923 goto fail;
1924 }
1925
0798219f 1926 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
b411b363
PR
1927 retcode = ERR_AUTH_ALG_ND;
1928 goto fail;
1929 }
1930 }
1931
1932 if (new_conf->integrity_alg[0]) {
1933 integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1934 if (IS_ERR(integrity_w_tfm)) {
1935 integrity_w_tfm = NULL;
1936 retcode=ERR_INTEGRITY_ALG;
1937 goto fail;
1938 }
1939
1940 if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
1941 retcode=ERR_INTEGRITY_ALG_ND;
1942 goto fail;
1943 }
1944
1945 integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1946 if (IS_ERR(integrity_r_tfm)) {
1947 integrity_r_tfm = NULL;
1948 retcode=ERR_INTEGRITY_ALG;
1949 goto fail;
1950 }
1951 }
1952
b411b363
PR
1953 ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
1954
80883197 1955 /* allocation not in the IO path, cqueue thread context */
b411b363
PR
1956 if (integrity_w_tfm) {
1957 i = crypto_hash_digestsize(integrity_w_tfm);
1958 int_dig_out = kmalloc(i, GFP_KERNEL);
1959 if (!int_dig_out) {
1960 retcode = ERR_NOMEM;
1961 goto fail;
1962 }
1963 int_dig_in = kmalloc(i, GFP_KERNEL);
1964 if (!int_dig_in) {
1965 retcode = ERR_NOMEM;
1966 goto fail;
1967 }
1968 int_dig_vv = kmalloc(i, GFP_KERNEL);
1969 if (!int_dig_vv) {
1970 retcode = ERR_NOMEM;
1971 goto fail;
1972 }
1973 }
1974
80883197
PR
1975 conn_flush_workqueue(tconn);
1976 spin_lock_irq(&tconn->req_lock);
1977 if (tconn->net_conf != NULL) {
b411b363 1978 retcode = ERR_NET_CONFIGURED;
80883197 1979 spin_unlock_irq(&tconn->req_lock);
b411b363
PR
1980 goto fail;
1981 }
80883197 1982 tconn->net_conf = new_conf;
b411b363 1983
80883197
PR
1984 crypto_free_hash(tconn->cram_hmac_tfm);
1985 tconn->cram_hmac_tfm = tfm;
b411b363 1986
80883197
PR
1987 crypto_free_hash(tconn->integrity_w_tfm);
1988 tconn->integrity_w_tfm = integrity_w_tfm;
b411b363 1989
80883197
PR
1990 crypto_free_hash(tconn->integrity_r_tfm);
1991 tconn->integrity_r_tfm = integrity_r_tfm;
b411b363 1992
80883197
PR
1993 kfree(tconn->int_dig_out);
1994 kfree(tconn->int_dig_in);
1995 kfree(tconn->int_dig_vv);
1996 tconn->int_dig_out=int_dig_out;
1997 tconn->int_dig_in=int_dig_in;
1998 tconn->int_dig_vv=int_dig_vv;
1999 retcode = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
2000 spin_unlock_irq(&tconn->req_lock);
b411b363 2001
80883197
PR
2002 idr_for_each_entry(&tconn->volumes, mdev, i) {
2003 mdev->send_cnt = 0;
2004 mdev->recv_cnt = 0;
2005 kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
2006 }
80883197 2007 conn_reconfig_done(tconn);
3b98c0c2 2008 drbd_adm_finish(info, retcode);
b411b363
PR
2009 return 0;
2010
2011fail:
2012 kfree(int_dig_out);
2013 kfree(int_dig_in);
2014 kfree(int_dig_vv);
2015 crypto_free_hash(tfm);
2016 crypto_free_hash(integrity_w_tfm);
2017 crypto_free_hash(integrity_r_tfm);
b411b363
PR
2018 kfree(new_conf);
2019
80883197 2020 conn_reconfig_done(tconn);
3b98c0c2
LE
2021out:
2022 drbd_adm_finish(info, retcode);
b411b363
PR
2023 return 0;
2024}
2025
85f75dd7
LE
2026static enum drbd_state_rv conn_try_disconnect(struct drbd_tconn *tconn, bool force)
2027{
2028 enum drbd_state_rv rv;
2029 if (force) {
2030 spin_lock_irq(&tconn->req_lock);
2031 if (tconn->cstate >= C_WF_CONNECTION)
2032 _conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2033 spin_unlock_irq(&tconn->req_lock);
2034 return SS_SUCCESS;
2035 }
2036
2037 rv = conn_request_state(tconn, NS(conn, C_DISCONNECTING), 0);
2038
2039 switch (rv) {
2040 case SS_NOTHING_TO_DO:
2041 case SS_ALREADY_STANDALONE:
2042 return SS_SUCCESS;
2043 case SS_PRIMARY_NOP:
2044 /* Our state checking code wants to see the peer outdated. */
2045 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2046 pdsk, D_OUTDATED), CS_VERBOSE);
2047 break;
2048 case SS_CW_FAILED_BY_PEER:
2049 /* The peer probably wants to see us outdated. */
2050 rv = conn_request_state(tconn, NS2(conn, C_DISCONNECTING,
2051 disk, D_OUTDATED), 0);
2052 if (rv == SS_IS_DISKLESS || rv == SS_LOWER_THAN_OUTDATED) {
2053 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
2054 rv = SS_SUCCESS;
2055 }
2056 break;
2057 default:;
2058 /* no special handling necessary */
2059 }
2060
2061 return rv;
2062}
2063
3b98c0c2 2064int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info)
b411b363 2065{
3b98c0c2
LE
2066 struct disconnect_parms parms;
2067 struct drbd_tconn *tconn;
85f75dd7 2068 enum drbd_state_rv rv;
3b98c0c2
LE
2069 enum drbd_ret_code retcode;
2070 int err;
2561b9c1 2071
3b98c0c2
LE
2072 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2073 if (!adm_ctx.reply_skb)
2074 return retcode;
2075 if (retcode != NO_ERROR)
2561b9c1 2076 goto fail;
3b98c0c2
LE
2077
2078 tconn = adm_ctx.tconn;
2079 memset(&parms, 0, sizeof(parms));
2080 if (info->attrs[DRBD_NLA_DISCONNECT_PARMS]) {
f399002e 2081 err = disconnect_parms_from_attrs(&parms, info);
3b98c0c2
LE
2082 if (err) {
2083 retcode = ERR_MANDATORY_TAG;
2084 drbd_msg_put_info(from_attrs_err_to_txt(err));
2085 goto fail;
2086 }
2561b9c1
PR
2087 }
2088
85f75dd7
LE
2089 rv = conn_try_disconnect(tconn, parms.force_disconnect);
2090 if (rv < SS_SUCCESS)
b411b363
PR
2091 goto fail;
2092
df24aa45
PR
2093 if (wait_event_interruptible(tconn->ping_wait,
2094 tconn->cstate != C_DISCONNECTING)) {
b411b363
PR
2095 /* Do not test for mdev->state.conn == C_STANDALONE, since
2096 someone else might connect us in the mean time! */
2097 retcode = ERR_INTR;
2098 goto fail;
2099 }
2100
b411b363
PR
2101 retcode = NO_ERROR;
2102 fail:
3b98c0c2 2103 drbd_adm_finish(info, retcode);
b411b363
PR
2104 return 0;
2105}
2106
2107void resync_after_online_grow(struct drbd_conf *mdev)
2108{
2109 int iass; /* I am sync source */
2110
2111 dev_info(DEV, "Resync of new storage after online grow\n");
2112 if (mdev->state.role != mdev->state.peer)
2113 iass = (mdev->state.role == R_PRIMARY);
2114 else
25703f83 2115 iass = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
b411b363
PR
2116
2117 if (iass)
2118 drbd_start_resync(mdev, C_SYNC_SOURCE);
2119 else
2120 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE + CS_SERIALIZE);
2121}
2122
3b98c0c2 2123int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
b411b363 2124{
3b98c0c2
LE
2125 struct resize_parms rs;
2126 struct drbd_conf *mdev;
2127 enum drbd_ret_code retcode;
b411b363 2128 enum determine_dev_size dd;
6495d2c6 2129 enum dds_flags ddsf;
3b98c0c2 2130 int err;
b411b363 2131
3b98c0c2
LE
2132 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2133 if (!adm_ctx.reply_skb)
2134 return retcode;
2135 if (retcode != NO_ERROR)
b411b363 2136 goto fail;
3b98c0c2
LE
2137
2138 memset(&rs, 0, sizeof(struct resize_parms));
2139 if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
f399002e 2140 err = resize_parms_from_attrs(&rs, info);
3b98c0c2
LE
2141 if (err) {
2142 retcode = ERR_MANDATORY_TAG;
2143 drbd_msg_put_info(from_attrs_err_to_txt(err));
2144 goto fail;
2145 }
b411b363
PR
2146 }
2147
3b98c0c2 2148 mdev = adm_ctx.mdev;
b411b363
PR
2149 if (mdev->state.conn > C_CONNECTED) {
2150 retcode = ERR_RESIZE_RESYNC;
2151 goto fail;
2152 }
2153
2154 if (mdev->state.role == R_SECONDARY &&
2155 mdev->state.peer == R_SECONDARY) {
2156 retcode = ERR_NO_PRIMARY;
2157 goto fail;
2158 }
2159
2160 if (!get_ldev(mdev)) {
2161 retcode = ERR_NO_DISK;
2162 goto fail;
2163 }
2164
31890f4a 2165 if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
6495d2c6
PR
2166 retcode = ERR_NEED_APV_93;
2167 goto fail;
2168 }
2169
087c2492 2170 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
b411b363 2171 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
b411b363
PR
2172
2173 mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
6495d2c6 2174 ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
24c4830c 2175 dd = drbd_determine_dev_size(mdev, ddsf);
b411b363
PR
2176 drbd_md_sync(mdev);
2177 put_ldev(mdev);
2178 if (dd == dev_size_error) {
2179 retcode = ERR_NOMEM_BITMAP;
2180 goto fail;
2181 }
2182
087c2492 2183 if (mdev->state.conn == C_CONNECTED) {
b411b363
PR
2184 if (dd == grew)
2185 set_bit(RESIZE_PENDING, &mdev->flags);
2186
2187 drbd_send_uuids(mdev);
6495d2c6 2188 drbd_send_sizes(mdev, 1, ddsf);
b411b363
PR
2189 }
2190
2191 fail:
3b98c0c2 2192 drbd_adm_finish(info, retcode);
b411b363
PR
2193 return 0;
2194}
2195
f399002e 2196int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info)
b411b363 2197{
3b98c0c2 2198 enum drbd_ret_code retcode;
b411b363 2199 cpumask_var_t new_cpu_mask;
f399002e 2200 struct drbd_tconn *tconn;
778f271d 2201 int *rs_plan_s = NULL;
f399002e
LE
2202 struct res_opts sc;
2203 int err;
b411b363 2204
f399002e 2205 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
3b98c0c2
LE
2206 if (!adm_ctx.reply_skb)
2207 return retcode;
2208 if (retcode != NO_ERROR)
2209 goto fail;
f399002e 2210 tconn = adm_ctx.tconn;
3b98c0c2 2211
b411b363
PR
2212 if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
2213 retcode = ERR_NOMEM;
3b98c0c2 2214 drbd_msg_put_info("unable to allocate cpumask");
b411b363
PR
2215 goto fail;
2216 }
2217
3b98c0c2
LE
2218 if (((struct drbd_genlmsghdr*)info->userhdr)->flags
2219 & DRBD_GENL_F_SET_DEFAULTS) {
f399002e 2220 memset(&sc, 0, sizeof(struct res_opts));
265be2d0 2221 sc.on_no_data = DRBD_ON_NO_DATA_DEF;
b411b363 2222 } else
f399002e 2223 sc = tconn->res_opts;
b411b363 2224
f399002e 2225 err = res_opts_from_attrs(&sc, info);
3b98c0c2 2226 if (err) {
b411b363 2227 retcode = ERR_MANDATORY_TAG;
3b98c0c2 2228 drbd_msg_put_info(from_attrs_err_to_txt(err));
b411b363
PR
2229 goto fail;
2230 }
2231
b411b363
PR
2232 /* silently ignore cpu mask on UP kernel */
2233 if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
2234 err = __bitmap_parse(sc.cpu_mask, 32, 0,
2235 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2236 if (err) {
f399002e 2237 conn_warn(tconn, "__bitmap_parse() failed with %d\n", err);
b411b363
PR
2238 retcode = ERR_CPU_MASK_PARSE;
2239 goto fail;
2240 }
2241 }
2242
b411b363 2243
f399002e 2244 tconn->res_opts = sc;
b411b363 2245
f399002e
LE
2246 if (!cpumask_equal(tconn->cpu_mask, new_cpu_mask)) {
2247 cpumask_copy(tconn->cpu_mask, new_cpu_mask);
2248 drbd_calc_cpu_mask(tconn);
2249 tconn->receiver.reset_cpu_mask = 1;
2250 tconn->asender.reset_cpu_mask = 1;
2251 tconn->worker.reset_cpu_mask = 1;
b411b363
PR
2252 }
2253
b411b363 2254fail:
778f271d 2255 kfree(rs_plan_s);
b411b363 2256 free_cpumask_var(new_cpu_mask);
3b98c0c2
LE
2257
2258 drbd_adm_finish(info, retcode);
b411b363
PR
2259 return 0;
2260}
2261
3b98c0c2 2262int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
b411b363 2263{
3b98c0c2
LE
2264 struct drbd_conf *mdev;
2265 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2266
2267 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2268 if (!adm_ctx.reply_skb)
2269 return retcode;
2270 if (retcode != NO_ERROR)
2271 goto out;
2272
2273 mdev = adm_ctx.mdev;
b411b363 2274
194bfb32
LE
2275 /* If there is still bitmap IO pending, probably because of a previous
2276 * resync just being finished, wait for it before requesting a new resync. */
2277 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2278
b411b363
PR
2279 retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
2280
2281 if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
2282 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2283
2284 while (retcode == SS_NEED_CONNECTION) {
87eeee41 2285 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2286 if (mdev->state.conn < C_CONNECTED)
2287 retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
87eeee41 2288 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2289
2290 if (retcode != SS_NEED_CONNECTION)
2291 break;
2292
2293 retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2294 }
2295
3b98c0c2
LE
2296out:
2297 drbd_adm_finish(info, retcode);
b411b363
PR
2298 return 0;
2299}
2300
0778286a
PR
2301static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
2302{
2303 int rv;
2304
2305 rv = drbd_bmio_set_n_write(mdev);
2306 drbd_suspend_al(mdev);
2307 return rv;
2308}
2309
3b98c0c2
LE
2310static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info,
2311 union drbd_state mask, union drbd_state val)
b411b363 2312{
3b98c0c2 2313 enum drbd_ret_code retcode;
194bfb32 2314
3b98c0c2
LE
2315 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2316 if (!adm_ctx.reply_skb)
2317 return retcode;
2318 if (retcode != NO_ERROR)
2319 goto out;
b411b363 2320
3b98c0c2
LE
2321 retcode = drbd_request_state(adm_ctx.mdev, mask, val);
2322out:
2323 drbd_adm_finish(info, retcode);
b411b363
PR
2324 return 0;
2325}
2326
3b98c0c2 2327int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
b411b363 2328{
3b98c0c2
LE
2329 return drbd_adm_simple_request_state(skb, info, NS(conn, C_STARTING_SYNC_S));
2330}
b411b363 2331
3b98c0c2
LE
2332int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info)
2333{
2334 enum drbd_ret_code retcode;
2335
2336 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2337 if (!adm_ctx.reply_skb)
2338 return retcode;
2339 if (retcode != NO_ERROR)
2340 goto out;
b411b363 2341
3b98c0c2
LE
2342 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2343 retcode = ERR_PAUSE_IS_SET;
2344out:
2345 drbd_adm_finish(info, retcode);
b411b363
PR
2346 return 0;
2347}
2348
3b98c0c2 2349int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info)
b411b363 2350{
cd88d030 2351 union drbd_state s;
3b98c0c2
LE
2352 enum drbd_ret_code retcode;
2353
2354 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2355 if (!adm_ctx.reply_skb)
2356 return retcode;
2357 if (retcode != NO_ERROR)
2358 goto out;
b411b363 2359
3b98c0c2
LE
2360 if (drbd_request_state(adm_ctx.mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2361 s = adm_ctx.mdev->state;
cd88d030
PR
2362 if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2363 retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2364 s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2365 } else {
2366 retcode = ERR_PAUSE_IS_CLEAR;
2367 }
2368 }
b411b363 2369
3b98c0c2
LE
2370out:
2371 drbd_adm_finish(info, retcode);
b411b363
PR
2372 return 0;
2373}
2374
3b98c0c2 2375int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2376{
3b98c0c2 2377 return drbd_adm_simple_request_state(skb, info, NS(susp, 1));
b411b363
PR
2378}
2379
3b98c0c2 2380int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
b411b363 2381{
3b98c0c2
LE
2382 struct drbd_conf *mdev;
2383 int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */
2384
2385 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2386 if (!adm_ctx.reply_skb)
2387 return retcode;
2388 if (retcode != NO_ERROR)
2389 goto out;
2390
2391 mdev = adm_ctx.mdev;
43a5182c
PR
2392 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
2393 drbd_uuid_new_current(mdev);
2394 clear_bit(NEW_CUR_UUID, &mdev->flags);
43a5182c 2395 }
265be2d0 2396 drbd_suspend_io(mdev);
3b98c0c2
LE
2397 retcode = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2398 if (retcode == SS_SUCCESS) {
265be2d0 2399 if (mdev->state.conn < C_CONNECTED)
2f5cdd0b 2400 tl_clear(mdev->tconn);
265be2d0 2401 if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
2f5cdd0b 2402 tl_restart(mdev->tconn, FAIL_FROZEN_DISK_IO);
265be2d0
PR
2403 }
2404 drbd_resume_io(mdev);
2405
3b98c0c2
LE
2406out:
2407 drbd_adm_finish(info, retcode);
b411b363
PR
2408 return 0;
2409}
2410
3b98c0c2 2411int drbd_adm_outdate(struct sk_buff *skb, struct genl_info *info)
b411b363 2412{
3b98c0c2 2413 return drbd_adm_simple_request_state(skb, info, NS(disk, D_OUTDATED));
b411b363
PR
2414}
2415
543cc10b
LE
2416int nla_put_drbd_cfg_context(struct sk_buff *skb, const char *conn_name, unsigned vnr)
2417{
2418 struct nlattr *nla;
2419 nla = nla_nest_start(skb, DRBD_NLA_CFG_CONTEXT);
2420 if (!nla)
2421 goto nla_put_failure;
2422 if (vnr != VOLUME_UNSPECIFIED)
2423 NLA_PUT_U32(skb, T_ctx_volume, vnr);
2424 NLA_PUT_STRING(skb, T_ctx_conn_name, conn_name);
2425 nla_nest_end(skb, nla);
2426 return 0;
2427
2428nla_put_failure:
2429 if (nla)
2430 nla_nest_cancel(skb, nla);
2431 return -EMSGSIZE;
2432}
2433
3b98c0c2
LE
2434int nla_put_status_info(struct sk_buff *skb, struct drbd_conf *mdev,
2435 const struct sib_info *sib)
b411b363 2436{
3b98c0c2
LE
2437 struct state_info *si = NULL; /* for sizeof(si->member); */
2438 struct nlattr *nla;
2439 int got_ldev;
2440 int got_net;
2441 int err = 0;
2442 int exclude_sensitive;
2443
2444 /* If sib != NULL, this is drbd_bcast_event, which anyone can listen
2445 * to. So we better exclude_sensitive information.
2446 *
2447 * If sib == NULL, this is drbd_adm_get_status, executed synchronously
2448 * in the context of the requesting user process. Exclude sensitive
2449 * information, unless current has superuser.
2450 *
2451 * NOTE: for drbd_adm_get_status_all(), this is a netlink dump, and
2452 * relies on the current implementation of netlink_dump(), which
2453 * executes the dump callback successively from netlink_recvmsg(),
2454 * always in the context of the receiving process */
2455 exclude_sensitive = sib || !capable(CAP_SYS_ADMIN);
2456
2457 got_ldev = get_ldev(mdev);
2458 got_net = get_net_conf(mdev->tconn);
2459
2460 /* We need to add connection name and volume number information still.
2461 * Minor number is in drbd_genlmsghdr. */
543cc10b 2462 if (nla_put_drbd_cfg_context(skb, mdev->tconn->name, mdev->vnr))
3b98c0c2 2463 goto nla_put_failure;
3b98c0c2 2464
f399002e
LE
2465 if (res_opts_to_skb(skb, &mdev->tconn->res_opts, exclude_sensitive))
2466 goto nla_put_failure;
2467
3b98c0c2
LE
2468 if (got_ldev)
2469 if (disk_conf_to_skb(skb, &mdev->ldev->dc, exclude_sensitive))
2470 goto nla_put_failure;
2471 if (got_net)
2472 if (net_conf_to_skb(skb, mdev->tconn->net_conf, exclude_sensitive))
2473 goto nla_put_failure;
2474
3b98c0c2
LE
2475 nla = nla_nest_start(skb, DRBD_NLA_STATE_INFO);
2476 if (!nla)
2477 goto nla_put_failure;
2478 NLA_PUT_U32(skb, T_sib_reason, sib ? sib->sib_reason : SIB_GET_STATUS_REPLY);
2479 NLA_PUT_U32(skb, T_current_state, mdev->state.i);
2480 NLA_PUT_U64(skb, T_ed_uuid, mdev->ed_uuid);
2481 NLA_PUT_U64(skb, T_capacity, drbd_get_capacity(mdev->this_bdev));
2482
2483 if (got_ldev) {
2484 NLA_PUT_U32(skb, T_disk_flags, mdev->ldev->md.flags);
2485 NLA_PUT(skb, T_uuids, sizeof(si->uuids), mdev->ldev->md.uuid);
2486 NLA_PUT_U64(skb, T_bits_total, drbd_bm_bits(mdev));
2487 NLA_PUT_U64(skb, T_bits_oos, drbd_bm_total_weight(mdev));
2488 if (C_SYNC_SOURCE <= mdev->state.conn &&
2489 C_PAUSED_SYNC_T >= mdev->state.conn) {
2490 NLA_PUT_U64(skb, T_bits_rs_total, mdev->rs_total);
2491 NLA_PUT_U64(skb, T_bits_rs_failed, mdev->rs_failed);
2492 }
b411b363
PR
2493 }
2494
3b98c0c2
LE
2495 if (sib) {
2496 switch(sib->sib_reason) {
2497 case SIB_SYNC_PROGRESS:
2498 case SIB_GET_STATUS_REPLY:
2499 break;
2500 case SIB_STATE_CHANGE:
2501 NLA_PUT_U32(skb, T_prev_state, sib->os.i);
2502 NLA_PUT_U32(skb, T_new_state, sib->ns.i);
2503 break;
2504 case SIB_HELPER_POST:
2505 NLA_PUT_U32(skb,
2506 T_helper_exit_code, sib->helper_exit_code);
2507 /* fall through */
2508 case SIB_HELPER_PRE:
2509 NLA_PUT_STRING(skb, T_helper, sib->helper_name);
2510 break;
2511 }
b411b363 2512 }
3b98c0c2 2513 nla_nest_end(skb, nla);
b411b363 2514
3b98c0c2
LE
2515 if (0)
2516nla_put_failure:
2517 err = -EMSGSIZE;
2518 if (got_ldev)
2519 put_ldev(mdev);
2520 if (got_net)
2521 put_net_conf(mdev->tconn);
2522 return err;
b411b363
PR
2523}
2524
3b98c0c2 2525int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info)
b411b363 2526{
3b98c0c2
LE
2527 enum drbd_ret_code retcode;
2528 int err;
b411b363 2529
3b98c0c2
LE
2530 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2531 if (!adm_ctx.reply_skb)
2532 return retcode;
2533 if (retcode != NO_ERROR)
2534 goto out;
b411b363 2535
3b98c0c2
LE
2536 err = nla_put_status_info(adm_ctx.reply_skb, adm_ctx.mdev, NULL);
2537 if (err) {
2538 nlmsg_free(adm_ctx.reply_skb);
2539 return err;
b411b363 2540 }
3b98c0c2
LE
2541out:
2542 drbd_adm_finish(info, retcode);
2543 return 0;
b411b363
PR
2544}
2545
3b98c0c2 2546int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb)
b411b363 2547{
3b98c0c2
LE
2548 struct drbd_conf *mdev;
2549 struct drbd_genlmsghdr *dh;
543cc10b
LE
2550 struct drbd_tconn *pos = (struct drbd_tconn*)cb->args[0];
2551 struct drbd_tconn *tconn = NULL;
2552 struct drbd_tconn *tmp;
2553 unsigned volume = cb->args[1];
2554
2555 /* Open coded, deferred, iteration:
2556 * list_for_each_entry_safe(tconn, tmp, &drbd_tconns, all_tconn) {
2557 * idr_for_each_entry(&tconn->volumes, mdev, i) {
2558 * ...
2559 * }
2560 * }
2561 * where tconn is cb->args[0];
2562 * and i is cb->args[1];
2563 *
3b98c0c2
LE
2564 * This may miss entries inserted after this dump started,
2565 * or entries deleted before they are reached.
543cc10b
LE
2566 *
2567 * We need to make sure the mdev won't disappear while
2568 * we are looking at it, and revalidate our iterators
2569 * on each iteration.
2570 */
3b98c0c2 2571
543cc10b
LE
2572 /* synchronize with drbd_new_tconn/drbd_free_tconn */
2573 mutex_lock(&drbd_cfg_mutex);
2574 /* synchronize with drbd_delete_device */
3b98c0c2 2575 rcu_read_lock();
543cc10b
LE
2576next_tconn:
2577 /* revalidate iterator position */
2578 list_for_each_entry(tmp, &drbd_tconns, all_tconn) {
2579 if (pos == NULL) {
2580 /* first iteration */
2581 pos = tmp;
2582 tconn = pos;
2583 break;
2584 }
2585 if (tmp == pos) {
2586 tconn = pos;
2587 break;
2588 }
2589 }
2590 if (tconn) {
2591 mdev = idr_get_next(&tconn->volumes, &volume);
2592 if (!mdev) {
2593 /* No more volumes to dump on this tconn.
2594 * Advance tconn iterator. */
2595 pos = list_entry(tconn->all_tconn.next,
2596 struct drbd_tconn, all_tconn);
2597 /* But, did we dump any volume on this tconn yet? */
2598 if (volume != 0) {
2599 tconn = NULL;
2600 volume = 0;
2601 goto next_tconn;
2602 }
2603 }
2604
3b98c0c2
LE
2605 dh = genlmsg_put(skb, NETLINK_CB(cb->skb).pid,
2606 cb->nlh->nlmsg_seq, &drbd_genl_family,
2607 NLM_F_MULTI, DRBD_ADM_GET_STATUS);
2608 if (!dh)
543cc10b
LE
2609 goto out;
2610
2611 if (!mdev) {
2612 /* this is a tconn without a single volume */
2613 dh->minor = -1U;
2614 dh->ret_code = NO_ERROR;
2615 if (nla_put_drbd_cfg_context(skb, tconn->name, VOLUME_UNSPECIFIED))
2616 genlmsg_cancel(skb, dh);
2617 else
2618 genlmsg_end(skb, dh);
2619 goto out;
2620 }
3b98c0c2 2621
543cc10b
LE
2622 D_ASSERT(mdev->vnr == volume);
2623 D_ASSERT(mdev->tconn == tconn);
3b98c0c2 2624
543cc10b 2625 dh->minor = mdev_to_minor(mdev);
3b98c0c2
LE
2626 dh->ret_code = NO_ERROR;
2627
2628 if (nla_put_status_info(skb, mdev, NULL)) {
2629 genlmsg_cancel(skb, dh);
543cc10b 2630 goto out;
3b98c0c2
LE
2631 }
2632 genlmsg_end(skb, dh);
2633 }
b411b363 2634
543cc10b 2635out:
3b98c0c2 2636 rcu_read_unlock();
543cc10b
LE
2637 mutex_unlock(&drbd_cfg_mutex);
2638 /* where to start the next iteration */
2639 cb->args[0] = (long)pos;
2640 cb->args[1] = (pos == tconn) ? volume + 1 : 0;
b411b363 2641
543cc10b
LE
2642 /* No more tconns/volumes/minors found results in an empty skb.
2643 * Which will terminate the dump. */
3b98c0c2 2644 return skb->len;
b411b363
PR
2645}
2646
3b98c0c2 2647int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info)
b411b363 2648{
3b98c0c2
LE
2649 enum drbd_ret_code retcode;
2650 struct timeout_parms tp;
2651 int err;
b411b363 2652
3b98c0c2
LE
2653 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2654 if (!adm_ctx.reply_skb)
2655 return retcode;
2656 if (retcode != NO_ERROR)
2657 goto out;
b411b363 2658
3b98c0c2
LE
2659 tp.timeout_type =
2660 adm_ctx.mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
2661 test_bit(USE_DEGR_WFC_T, &adm_ctx.mdev->flags) ? UT_DEGRADED :
2662 UT_DEFAULT;
b411b363 2663
3b98c0c2
LE
2664 err = timeout_parms_to_priv_skb(adm_ctx.reply_skb, &tp);
2665 if (err) {
2666 nlmsg_free(adm_ctx.reply_skb);
2667 return err;
2668 }
2669out:
2670 drbd_adm_finish(info, retcode);
2671 return 0;
b411b363
PR
2672}
2673
3b98c0c2 2674int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info)
b411b363 2675{
3b98c0c2
LE
2676 struct drbd_conf *mdev;
2677 enum drbd_ret_code retcode;
b411b363 2678
3b98c0c2
LE
2679 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2680 if (!adm_ctx.reply_skb)
2681 return retcode;
2682 if (retcode != NO_ERROR)
2683 goto out;
873b0d5f 2684
3b98c0c2
LE
2685 mdev = adm_ctx.mdev;
2686 if (info->attrs[DRBD_NLA_START_OV_PARMS]) {
2687 /* resume from last known position, if possible */
2688 struct start_ov_parms parms =
2689 { .ov_start_sector = mdev->ov_start_sector };
f399002e 2690 int err = start_ov_parms_from_attrs(&parms, info);
3b98c0c2
LE
2691 if (err) {
2692 retcode = ERR_MANDATORY_TAG;
2693 drbd_msg_put_info(from_attrs_err_to_txt(err));
2694 goto out;
2695 }
2696 /* w_make_ov_request expects position to be aligned */
2697 mdev->ov_start_sector = parms.ov_start_sector & ~BM_SECT_PER_BIT;
2698 }
873b0d5f
LE
2699 /* If there is still bitmap IO pending, e.g. previous resync or verify
2700 * just being finished, wait for it before requesting a new resync. */
2701 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3b98c0c2
LE
2702 retcode = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2703out:
2704 drbd_adm_finish(info, retcode);
b411b363
PR
2705 return 0;
2706}
2707
2708
3b98c0c2 2709int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info)
b411b363 2710{
3b98c0c2
LE
2711 struct drbd_conf *mdev;
2712 enum drbd_ret_code retcode;
b411b363
PR
2713 int skip_initial_sync = 0;
2714 int err;
3b98c0c2 2715 struct new_c_uuid_parms args;
b411b363 2716
3b98c0c2
LE
2717 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2718 if (!adm_ctx.reply_skb)
2719 return retcode;
2720 if (retcode != NO_ERROR)
2721 goto out_nolock;
b411b363 2722
3b98c0c2
LE
2723 mdev = adm_ctx.mdev;
2724 memset(&args, 0, sizeof(args));
2725 if (info->attrs[DRBD_NLA_NEW_C_UUID_PARMS]) {
f399002e 2726 err = new_c_uuid_parms_from_attrs(&args, info);
3b98c0c2
LE
2727 if (err) {
2728 retcode = ERR_MANDATORY_TAG;
2729 drbd_msg_put_info(from_attrs_err_to_txt(err));
2730 goto out_nolock;
2731 }
b411b363
PR
2732 }
2733
8410da8f 2734 mutex_lock(mdev->state_mutex); /* Protects us against serialized state changes. */
b411b363
PR
2735
2736 if (!get_ldev(mdev)) {
2737 retcode = ERR_NO_DISK;
2738 goto out;
2739 }
2740
2741 /* this is "skip initial sync", assume to be clean */
31890f4a 2742 if (mdev->state.conn == C_CONNECTED && mdev->tconn->agreed_pro_version >= 90 &&
b411b363
PR
2743 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
2744 dev_info(DEV, "Preparing to skip initial sync\n");
2745 skip_initial_sync = 1;
2746 } else if (mdev->state.conn != C_STANDALONE) {
2747 retcode = ERR_CONNECTED;
2748 goto out_dec;
2749 }
2750
2751 drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
2752 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2753
2754 if (args.clear_bm) {
20ceb2b2
LE
2755 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
2756 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
b411b363
PR
2757 if (err) {
2758 dev_err(DEV, "Writing bitmap failed with %d\n",err);
2759 retcode = ERR_IO_MD_DISK;
2760 }
2761 if (skip_initial_sync) {
2762 drbd_send_uuids_skip_initial_sync(mdev);
2763 _drbd_uuid_set(mdev, UI_BITMAP, 0);
62b0da3a 2764 drbd_print_uuids(mdev, "cleared bitmap UUID");
87eeee41 2765 spin_lock_irq(&mdev->tconn->req_lock);
b411b363
PR
2766 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2767 CS_VERBOSE, NULL);
87eeee41 2768 spin_unlock_irq(&mdev->tconn->req_lock);
b411b363
PR
2769 }
2770 }
2771
2772 drbd_md_sync(mdev);
2773out_dec:
2774 put_ldev(mdev);
2775out:
8410da8f 2776 mutex_unlock(mdev->state_mutex);
3b98c0c2
LE
2777out_nolock:
2778 drbd_adm_finish(info, retcode);
774b3055
PR
2779 return 0;
2780}
2781
3b98c0c2
LE
2782static enum drbd_ret_code
2783drbd_check_conn_name(const char *name)
774b3055 2784{
3b98c0c2
LE
2785 if (!name || !name[0]) {
2786 drbd_msg_put_info("connection name missing");
2787 return ERR_MANDATORY_TAG;
774b3055 2788 }
3b98c0c2
LE
2789 /* if we want to use these in sysfs/configfs/debugfs some day,
2790 * we must not allow slashes */
2791 if (strchr(name, '/')) {
2792 drbd_msg_put_info("invalid connection name");
2793 return ERR_INVALID_REQUEST;
774b3055 2794 }
3b98c0c2 2795 return NO_ERROR;
774b3055
PR
2796}
2797
3b98c0c2 2798int drbd_adm_create_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2799{
3b98c0c2 2800 enum drbd_ret_code retcode;
9f5180e5 2801
3b98c0c2
LE
2802 retcode = drbd_adm_prepare(skb, info, 0);
2803 if (!adm_ctx.reply_skb)
2804 return retcode;
2805 if (retcode != NO_ERROR)
2806 goto out;
b411b363 2807
3b98c0c2
LE
2808 retcode = drbd_check_conn_name(adm_ctx.conn_name);
2809 if (retcode != NO_ERROR)
2810 goto out;
b411b363 2811
3b98c0c2 2812 if (adm_ctx.tconn) {
38f19616
LE
2813 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) {
2814 retcode = ERR_INVALID_REQUEST;
2815 drbd_msg_put_info("connection exists");
2816 }
2817 /* else: still NO_ERROR */
3b98c0c2 2818 goto out;
b411b363
PR
2819 }
2820
3b98c0c2 2821 if (!drbd_new_tconn(adm_ctx.conn_name))
b411b363 2822 retcode = ERR_NOMEM;
3b98c0c2
LE
2823out:
2824 drbd_adm_finish(info, retcode);
2825 return 0;
b411b363
PR
2826}
2827
3b98c0c2 2828int drbd_adm_add_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2829{
3b98c0c2
LE
2830 struct drbd_genlmsghdr *dh = info->userhdr;
2831 enum drbd_ret_code retcode;
b411b363 2832
3b98c0c2
LE
2833 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2834 if (!adm_ctx.reply_skb)
2835 return retcode;
2836 if (retcode != NO_ERROR)
2837 goto out;
b411b363 2838
3b98c0c2
LE
2839 /* FIXME drop minor_count parameter, limit to MINORMASK */
2840 if (dh->minor >= minor_count) {
2841 drbd_msg_put_info("requested minor out of range");
2842 retcode = ERR_INVALID_REQUEST;
2843 goto out;
b411b363 2844 }
3b98c0c2
LE
2845 /* FIXME we need a define here */
2846 if (adm_ctx.volume >= 256) {
2847 drbd_msg_put_info("requested volume id out of range");
2848 retcode = ERR_INVALID_REQUEST;
2849 goto out;
b411b363 2850 }
b411b363 2851
38f19616
LE
2852 /* drbd_adm_prepare made sure already
2853 * that mdev->tconn and mdev->vnr match the request. */
2854 if (adm_ctx.mdev) {
2855 if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
2856 retcode = ERR_MINOR_EXISTS;
2857 /* else: still NO_ERROR */
2858 goto out;
2859 }
2860
3b98c0c2
LE
2861 retcode = conn_new_minor(adm_ctx.tconn, dh->minor, adm_ctx.volume);
2862out:
2863 drbd_adm_finish(info, retcode);
2864 return 0;
b411b363
PR
2865}
2866
85f75dd7
LE
2867static enum drbd_ret_code adm_delete_minor(struct drbd_conf *mdev)
2868{
2869 if (mdev->state.disk == D_DISKLESS &&
2870 /* no need to be mdev->state.conn == C_STANDALONE &&
2871 * we may want to delete a minor from a live replication group.
2872 */
2873 mdev->state.role == R_SECONDARY) {
2874 drbd_delete_device(mdev_to_minor(mdev));
2875 return NO_ERROR;
2876 } else
2877 return ERR_MINOR_CONFIGURED;
2878}
2879
3b98c0c2 2880int drbd_adm_delete_minor(struct sk_buff *skb, struct genl_info *info)
b411b363 2881{
3b98c0c2 2882 enum drbd_ret_code retcode;
b411b363 2883
3b98c0c2
LE
2884 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR);
2885 if (!adm_ctx.reply_skb)
2886 return retcode;
2887 if (retcode != NO_ERROR)
2888 goto out;
b411b363 2889
85f75dd7
LE
2890 mutex_lock(&drbd_cfg_mutex);
2891 retcode = adm_delete_minor(adm_ctx.mdev);
2892 mutex_unlock(&drbd_cfg_mutex);
2893 /* if this was the last volume of this connection,
2894 * this will terminate all threads */
2895 if (retcode == NO_ERROR)
cffec5b2 2896 conn_reconfig_done(adm_ctx.tconn);
85f75dd7
LE
2897out:
2898 drbd_adm_finish(info, retcode);
2899 return 0;
2900}
2901
2902int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
2903{
2904 enum drbd_ret_code retcode;
2905 enum drbd_state_rv rv;
2906 struct drbd_conf *mdev;
2907 unsigned i;
2908
2909 retcode = drbd_adm_prepare(skb, info, 0);
2910 if (!adm_ctx.reply_skb)
2911 return retcode;
2912 if (retcode != NO_ERROR)
2913 goto out;
2914
2915 if (!adm_ctx.tconn) {
2916 retcode = ERR_CONN_NOT_KNOWN;
2917 goto out;
2918 }
2919
2920 mutex_lock(&drbd_cfg_mutex);
2921 /* demote */
2922 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2923 retcode = drbd_set_role(mdev, R_SECONDARY, 0);
2924 if (retcode < SS_SUCCESS) {
2925 drbd_msg_put_info("failed to demote");
2926 goto out_unlock;
2927 }
2928 }
2929
2930 /* disconnect */
2931 rv = conn_try_disconnect(adm_ctx.tconn, 0);
2932 if (rv < SS_SUCCESS) {
2933 retcode = rv; /* enum type mismatch! */
2934 drbd_msg_put_info("failed to disconnect");
2935 goto out_unlock;
2936 }
2937
2938 /* detach */
2939 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2940 rv = adm_detach(mdev);
2941 if (rv < SS_SUCCESS) {
2942 retcode = rv; /* enum type mismatch! */
2943 drbd_msg_put_info("failed to detach");
2944 goto out_unlock;
2945 }
2946 }
2947
2948 /* delete volumes */
2949 idr_for_each_entry(&adm_ctx.tconn->volumes, mdev, i) {
2950 retcode = adm_delete_minor(mdev);
2951 if (retcode != NO_ERROR) {
2952 /* "can not happen" */
2953 drbd_msg_put_info("failed to delete volume");
2954 goto out_unlock;
2955 }
2956 }
2957
2958 /* stop all threads */
2959 conn_reconfig_done(adm_ctx.tconn);
2960
2961 /* delete connection */
2962 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2963 drbd_free_tconn(adm_ctx.tconn);
2964 retcode = NO_ERROR;
2965 } else {
2966 /* "can not happen" */
2967 retcode = ERR_CONN_IN_USE;
2968 drbd_msg_put_info("failed to delete connection");
2969 goto out_unlock;
2970 }
2971out_unlock:
2972 mutex_unlock(&drbd_cfg_mutex);
3b98c0c2
LE
2973out:
2974 drbd_adm_finish(info, retcode);
2975 return 0;
b411b363
PR
2976}
2977
3b98c0c2 2978int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
b411b363 2979{
3b98c0c2 2980 enum drbd_ret_code retcode;
b411b363 2981
3b98c0c2
LE
2982 retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONN);
2983 if (!adm_ctx.reply_skb)
2984 return retcode;
2985 if (retcode != NO_ERROR)
2986 goto out;
2987
85f75dd7 2988 mutex_lock(&drbd_cfg_mutex);
3b98c0c2
LE
2989 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
2990 drbd_free_tconn(adm_ctx.tconn);
2991 retcode = NO_ERROR;
2992 } else {
2993 retcode = ERR_CONN_IN_USE;
b411b363 2994 }
85f75dd7 2995 mutex_unlock(&drbd_cfg_mutex);
b411b363 2996
3b98c0c2
LE
2997out:
2998 drbd_adm_finish(info, retcode);
b411b363
PR
2999 return 0;
3000}
3001
3b98c0c2 3002void drbd_bcast_event(struct drbd_conf *mdev, const struct sib_info *sib)
b411b363 3003{
3b98c0c2
LE
3004 static atomic_t drbd_genl_seq = ATOMIC_INIT(2); /* two. */
3005 struct sk_buff *msg;
3006 struct drbd_genlmsghdr *d_out;
3007 unsigned seq;
3008 int err = -ENOMEM;
3009
3010 seq = atomic_inc_return(&drbd_genl_seq);
3011 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3012 if (!msg)
3013 goto failed;
3014
3015 err = -EMSGSIZE;
3016 d_out = genlmsg_put(msg, 0, seq, &drbd_genl_family, 0, DRBD_EVENT);
3017 if (!d_out) /* cannot happen, but anyways. */
3018 goto nla_put_failure;
3019 d_out->minor = mdev_to_minor(mdev);
3020 d_out->ret_code = 0;
3021
3022 if (nla_put_status_info(msg, mdev, sib))
3023 goto nla_put_failure;
3024 genlmsg_end(msg, d_out);
3025 err = drbd_genl_multicast_events(msg, 0);
3026 /* msg has been consumed or freed in netlink_broadcast() */
3027 if (err && err != -ESRCH)
3028 goto failed;
b411b363 3029
3b98c0c2 3030 return;
b411b363 3031
3b98c0c2
LE
3032nla_put_failure:
3033 nlmsg_free(msg);
3034failed:
3035 dev_err(DEV, "Error %d while broadcasting event. "
3036 "Event seq:%u sib_reason:%u\n",
3037 err, seq, sib->sib_reason);
b411b363 3038}