int err;
};
-static int al_write_transaction(struct drbd_conf *mdev);
+static int al_write_transaction(struct drbd_conf *mdev, bool delegate);
void *drbd_md_get_buffer(struct drbd_conf *mdev)
{
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;
- if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
+ if (!(rw & WRITE) && mdev->state.disk == D_DISKLESS && mdev->ldev == NULL)
+ /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
+ ;
+ else if (!get_ldev_if_state(mdev, D_ATTACHING)) {
+ /* Corresponding put_ldev in drbd_md_io_complete() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
err = -ENODEV;
goto out;
BUG_ON(!bdev->md_bdev);
- dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
+ dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__,
- (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+ (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
+ (void*)_RET_IP_ );
if (sector < drbd_md_first_sector(bdev) ||
sector + 7 > drbd_md_last_sector(bdev))
current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
- err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
+ /* we do all our meta data IO in aligned 4k blocks. */
+ err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, 4096);
if (err) {
dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
return al_ext;
}
-void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
+/*
+ * @delegate: delegate activity log I/O to the worker thread
+ */
+void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i, bool delegate)
{
/* for bios crossing activity log extent boundaries,
* we may need to activate two extents in one go */
unsigned enr;
bool locked = false;
+ /* When called through generic_make_request(), we must delegate
+ * activity log I/O to the worker thread: a further request
+ * submitted via generic_make_request() within the same task
+ * would be queued on current->bio_list, and would only start
+ * after this function returns (see generic_make_request()).
+ *
+ * However, if we *are* the worker, we must not delegate to ourselves.
+ */
+
+ if (delegate)
+ BUG_ON(current == mdev->tconn->worker.task);
D_ASSERT(first <= last);
D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
(locked = lc_try_lock_for_transaction(mdev->act_log)));
if (locked) {
- /* drbd_al_write_transaction(mdev,al_ext,enr);
- * recurses into generic_make_request(), which
- * disallows recursion, bios being serialized on the
- * current->bio_tail list now.
- * we have to delegate updates to the activity log
- * to the worker thread. */
-
/* Double check: it may have been committed by someone else,
* while we have been waiting for the lock. */
if (mdev->act_log->pending_changes) {
rcu_read_unlock();
if (write_al_updates) {
- al_write_transaction(mdev);
+ al_write_transaction(mdev, delegate);
mdev->al_writ_cnt++;
}
(BM_EXT_SHIFT - BM_BLOCK_SHIFT));
}
+static sector_t al_tr_number_to_on_disk_sector(struct drbd_conf *mdev)
+{
+ const unsigned int stripes = mdev->ldev->md.al_stripes;
+ const unsigned int stripe_size_4kB = mdev->ldev->md.al_stripe_size_4k;
+
+ /* transaction number, modulo on-disk ring buffer wrap around */
+ unsigned int t = mdev->al_tr_number % (mdev->ldev->md.al_size_4k);
+
+ /* ... to aligned 4k on disk block */
+ t = ((t % stripes) * stripe_size_4kB) + t/stripes;
+
+ /* ... to 512 byte sector in activity log */
+ t *= 8;
+
+ /* ... plus offset to the on disk position */
+ return mdev->ldev->md.md_offset + mdev->ldev->md.al_offset + t;
+}
+
static int
_al_write_transaction(struct drbd_conf *mdev)
{
if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
mdev->al_tr_cycle = 0;
- sector = mdev->ldev->md.md_offset
- + mdev->ldev->md.al_offset
- + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
+ sector = al_tr_number_to_on_disk_sector(mdev);
crc = crc32c(0, buffer, 4096);
buffer->crc32c = cpu_to_be32(crc);
+ /* normal execution path goes through all three branches */
if (drbd_bm_write_hinted(mdev))
err = -EIO;
/* drbd_chk_io_error done already */
err = -EIO;
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
} else {
- /* advance ringbuffer position and transaction counter */
- mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
mdev->al_tr_number++;
}
/* Calls from worker context (see w_restart_disk_io()) need to write the
transaction directly. Others came through generic_make_request(),
those need to delegate it to the worker. */
-static int al_write_transaction(struct drbd_conf *mdev)
+static int al_write_transaction(struct drbd_conf *mdev, bool delegate)
{
- struct update_al_work al_work;
-
- if (current == mdev->tconn->worker.task)
+ if (delegate) {
+ struct update_al_work al_work;
+ init_completion(&al_work.event);
+ al_work.w.cb = w_al_write_transaction;
+ al_work.w.mdev = mdev;
+ drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
+ wait_for_completion(&al_work.event);
+ return al_work.err;
+ } else
return _al_write_transaction(mdev);
-
- init_completion(&al_work.event);
- al_work.w.cb = w_al_write_transaction;
- al_work.w.mdev = mdev;
- drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
- wait_for_completion(&al_work.event);
-
- return al_work.err;
}
static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)