summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2015-03-18 17:13:26 +0100
committerJens Axboe <axboe@fb.com>2015-11-25 09:22:02 -0700
commit9fa48269197f7fcb8ccfbef03c3f3a8616872579 (patch)
tree7b8062af32ffd98faf1113894b5eba1a7c121e01
parent668700b40a7c8727bbd2b3fd4fd22e0ce3f1aeb6 (diff)
drbd: prevent NULL pointer deref when resuming diskless primary
In a multiple error scenario, we may end up with a "frozen" Primary, that has no access to any data (no local disk, no replication link). If we then resume-io, we try to generate a new data generation id, which will fail if there is no longer a local disk. Double check for available local data, which prevents the NULL pointer deref. If we are diskless, turn the resume-io in this situation into the first stage of a "force down", by bumping the "effective" data gen id, which will prevent later attach or connect to the former data set without first being demoted (deconfigured). Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/drbd/drbd_nl.c25
1 files changed, 24 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index f35cefb20e25..5e4adff91d1d 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2920,7 +2920,30 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info)
mutex_lock(&adm_ctx.resource->adm_mutex);
device = adm_ctx.device;
if (test_bit(NEW_CUR_UUID, &device->flags)) {
- drbd_uuid_new_current(device);
+ if (get_ldev_if_state(device, D_ATTACHING)) {
+ drbd_uuid_new_current(device);
+ put_ldev(device);
+ } else {
+ /* This is effectively a multi-stage "forced down".
+ * The NEW_CUR_UUID bit is supposedly only set, if we
+ * lost the replication connection, and are configured
+ * to freeze IO and wait for some fence-peer handler.
+ * So we still don't have a replication connection.
+ * And now we don't have a local disk either. After
+ * resume, we will fail all pending and new IO, because
+ * we don't have any data anymore. Which means we will
+ * eventually be able to terminate all users of this
+ * device, and then take it down. By bumping the
+ * "effective" data uuid, we make sure that you really
+ * need to tear down before you reconfigure, we will
+ * the refuse to re-connect or re-attach (because no
+ * matching real data uuid exists).
+ */
+ u64 val;
+ get_random_bytes(&val, sizeof(u64));
+ drbd_set_ed_uuid(device, val);
+ drbd_warn(device, "Resumed without access to data; please tear down before attempting to re-configure.\n");
+ }
clear_bit(NEW_CUR_UUID, &device->flags);
}
drbd_suspend_io(device);