Merge branch 'xfs-for-3.9'
[linux-2.6-block.git] / drivers / block / drbd / drbd_actlog.c
CommitLineData
b411b363
PR
1/*
2 drbd_actlog.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2003-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 2003-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2003-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23
24 */
25
26#include <linux/slab.h>
7ad651b5 27#include <linux/crc32c.h>
b411b363 28#include <linux/drbd.h>
7ad651b5
LE
29#include <linux/drbd_limits.h>
30#include <linux/dynamic_debug.h>
b411b363 31#include "drbd_int.h"
b411b363
PR
32#include "drbd_wrappers.h"
33
85f103d8
LE
34
35enum al_transaction_types {
36 AL_TR_UPDATE = 0,
37 AL_TR_INITIALIZED = 0xffff
38};
7ad651b5
LE
39/* all fields on disc in big endian */
40struct __packed al_transaction_on_disk {
41 /* don't we all like magic */
42 __be32 magic;
43
44 /* to identify the most recent transaction block
45 * in the on disk ring buffer */
46 __be32 tr_number;
47
48 /* checksum on the full 4k block, with this field set to 0. */
49 __be32 crc32c;
50
51 /* type of transaction, special transaction types like:
85f103d8
LE
52 * purge-all, set-all-idle, set-all-active, ... to-be-defined
53 * see also enum al_transaction_types */
7ad651b5
LE
54 __be16 transaction_type;
55
56 /* we currently allow only a few thousand extents,
57 * so 16bit will be enough for the slot number. */
58
59 /* how many updates in this transaction */
60 __be16 n_updates;
61
62 /* maximum slot number, "al-extents" in drbd.conf speak.
63 * Having this in each transaction should make reconfiguration
64 * of that parameter easier. */
65 __be16 context_size;
66
67 /* slot number the context starts with */
68 __be16 context_start_slot_nr;
69
70 /* Some reserved bytes. Expected usage is a 64bit counter of
71 * sectors-written since device creation, and other data generation tag
72 * supporting usage */
73 __be32 __reserved[4];
74
75 /* --- 36 byte used --- */
76
77 /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes
78 * in one transaction, then use the remaining byte in the 4k block for
79 * context information. "Flexible" number of updates per transaction
80 * does not help, as we have to account for the case when all update
81 * slots are used anyways, so it would only complicate code without
82 * additional benefit.
83 */
84 __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION];
85
86 /* but the extent number is 32bit, which at an extent size of 4 MiB
87 * allows to cover device sizes of up to 2**54 Byte (16 PiB) */
88 __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION];
89
90 /* --- 420 bytes used (36 + 64*6) --- */
91
92 /* 4096 - 420 = 3676 = 919 * 4 */
93 __be32 context[AL_CONTEXT_PER_TRANSACTION];
b411b363
PR
94};
95
96struct update_odbm_work {
97 struct drbd_work w;
98 unsigned int enr;
99};
100
101struct update_al_work {
102 struct drbd_work w;
b411b363 103 struct completion event;
7ad651b5 104 int err;
b411b363
PR
105};
106
1b7ab15b 107static int al_write_transaction(struct drbd_conf *mdev);
b411b363 108
cdfda633
PR
109void *drbd_md_get_buffer(struct drbd_conf *mdev)
110{
111 int r;
112
113 wait_event(mdev->misc_wait,
114 (r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
115 mdev->state.disk <= D_FAILED);
116
117 return r ? NULL : page_address(mdev->md_io_page);
118}
119
120void drbd_md_put_buffer(struct drbd_conf *mdev)
121{
122 if (atomic_dec_and_test(&mdev->md_io_in_use))
123 wake_up(&mdev->misc_wait);
124}
125
e34b677d 126void wait_until_done_or_force_detached(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
32db80f6 127 unsigned int *done)
cdfda633 128{
32db80f6
PR
129 long dt;
130
131 rcu_read_lock();
132 dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
133 rcu_read_unlock();
134 dt = dt * HZ / 10;
135 if (dt == 0)
136 dt = MAX_SCHEDULE_TIMEOUT;
137
e34b677d
LE
138 dt = wait_event_timeout(mdev->misc_wait,
139 *done || test_bit(FORCE_DETACH, &mdev->flags), dt);
140 if (dt == 0) {
32db80f6 141 dev_err(DEV, "meta-data IO operation timed out\n");
e34b677d
LE
142 drbd_chk_io_error(mdev, 1, DRBD_FORCE_DETACH);
143 }
cdfda633
PR
144}
145
b411b363
PR
146static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
147 struct drbd_backing_dev *bdev,
148 struct page *page, sector_t sector,
149 int rw, int size)
150{
151 struct bio *bio;
ac29f403 152 int err;
b411b363 153
cdfda633
PR
154 mdev->md_io.done = 0;
155 mdev->md_io.error = -ENODEV;
b411b363 156
a8a4e51e 157 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
86e1e98e 158 rw |= REQ_FUA | REQ_FLUSH;
721a9602 159 rw |= REQ_SYNC;
b411b363 160
da4a75d2 161 bio = bio_alloc_drbd(GFP_NOIO);
b411b363
PR
162 bio->bi_bdev = bdev->md_bdev;
163 bio->bi_sector = sector;
ac29f403
AG
164 err = -EIO;
165 if (bio_add_page(bio, page, size, 0) != size)
b411b363 166 goto out;
cdfda633 167 bio->bi_private = &mdev->md_io;
b411b363
PR
168 bio->bi_end_io = drbd_md_io_complete;
169 bio->bi_rw = rw;
170
cdfda633
PR
171 if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
172 dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
173 err = -ENODEV;
174 goto out;
175 }
176
177 bio_get(bio); /* one bio_put() is in the completion handler */
178 atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
0cf9d27e 179 if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
b411b363
PR
180 bio_endio(bio, -EIO);
181 else
182 submit_bio(rw, bio);
e34b677d 183 wait_until_done_or_force_detached(mdev, bdev, &mdev->md_io.done);
ac29f403 184 if (bio_flagged(bio, BIO_UPTODATE))
cdfda633 185 err = mdev->md_io.error;
b411b363 186
b411b363
PR
187 out:
188 bio_put(bio);
ac29f403 189 return err;
b411b363
PR
190}
191
192int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
193 sector_t sector, int rw)
194{
3fbf4d21 195 int err;
b411b363
PR
196 struct page *iop = mdev->md_io_page;
197
cdfda633 198 D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
b411b363
PR
199
200 BUG_ON(!bdev->md_bdev);
201
7ad651b5
LE
202 dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n",
203 current->comm, current->pid, __func__,
204 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
b411b363
PR
205
206 if (sector < drbd_md_first_sector(bdev) ||
7ad651b5 207 sector + 7 > drbd_md_last_sector(bdev))
b411b363
PR
208 dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
209 current->comm, current->pid, __func__,
210 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
211
3fbf4d21
AG
212 err = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE);
213 if (err) {
935be260
AG
214 dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
b411b363 216 }
3fbf4d21 217 return err;
b411b363
PR
218}
219
220static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr)
221{
222 struct lc_element *al_ext;
223 struct lc_element *tmp;
f91ab628 224 int wake;
b411b363
PR
225
226 spin_lock_irq(&mdev->al_lock);
227 tmp = lc_find(mdev->resync, enr/AL_EXT_PER_BM_SECT);
228 if (unlikely(tmp != NULL)) {
229 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
230 if (test_bit(BME_NO_WRITES, &bm_ext->flags)) {
f91ab628 231 wake = !test_and_set_bit(BME_PRIORITY, &bm_ext->flags);
b411b363 232 spin_unlock_irq(&mdev->al_lock);
f91ab628
PR
233 if (wake)
234 wake_up(&mdev->al_wait);
b411b363
PR
235 return NULL;
236 }
237 }
46a15bc3 238 al_ext = lc_get(mdev->act_log, enr);
b411b363 239 spin_unlock_irq(&mdev->al_lock);
b411b363
PR
240 return al_ext;
241}
242
181286ad 243void drbd_al_begin_io(struct drbd_conf *mdev, struct drbd_interval *i)
b411b363 244{
7726547e
LE
245 /* for bios crossing activity log extent boundaries,
246 * we may need to activate two extents in one go */
e15766e9 247 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
81a3537a 248 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
e15766e9 249 unsigned enr;
7dc1d67f
LE
250 bool locked = false;
251
b411b363 252
81a3537a 253 D_ASSERT(first <= last);
b411b363
PR
254 D_ASSERT(atomic_read(&mdev->local_cnt) > 0);
255
e15766e9
LE
256 for (enr = first; enr <= last; enr++)
257 wait_event(mdev->al_wait, _al_get(mdev, enr) != NULL);
b411b363 258
7dc1d67f
LE
259 /* Serialize multiple transactions.
260 * This uses test_and_set_bit, memory barrier is implicit.
261 */
262 wait_event(mdev->al_wait,
263 mdev->act_log->pending_changes == 0 ||
264 (locked = lc_try_lock_for_transaction(mdev->act_log)));
265
266 if (locked) {
b411b363
PR
267 /* drbd_al_write_transaction(mdev,al_ext,enr);
268 * recurses into generic_make_request(), which
269 * disallows recursion, bios being serialized on the
270 * current->bio_tail list now.
271 * we have to delegate updates to the activity log
272 * to the worker thread. */
b411b363 273
7ad651b5
LE
274 /* Double check: it may have been committed by someone else,
275 * while we have been waiting for the lock. */
e15766e9 276 if (mdev->act_log->pending_changes) {
9a51ab1c
PR
277 bool write_al_updates;
278
279 rcu_read_lock();
280 write_al_updates = rcu_dereference(mdev->ldev->disk_conf)->al_updates;
281 rcu_read_unlock();
282
283 if (write_al_updates) {
284 al_write_transaction(mdev);
285 mdev->al_writ_cnt++;
286 }
7ad651b5
LE
287
288 spin_lock_irq(&mdev->al_lock);
289 /* FIXME
1b7ab15b 290 if (err)
7ad651b5
LE
291 we need an "lc_cancel" here;
292 */
293 lc_committed(mdev->act_log);
294 spin_unlock_irq(&mdev->al_lock);
295 }
296 lc_unlock(mdev->act_log);
b411b363
PR
297 wake_up(&mdev->al_wait);
298 }
299}
300
181286ad 301void drbd_al_complete_io(struct drbd_conf *mdev, struct drbd_interval *i)
b411b363 302{
e15766e9
LE
303 /* for bios crossing activity log extent boundaries,
304 * we may need to activate two extents in one go */
305 unsigned first = i->sector >> (AL_EXTENT_SHIFT-9);
81a3537a 306 unsigned last = i->size == 0 ? first : (i->sector + (i->size >> 9) - 1) >> (AL_EXTENT_SHIFT-9);
e15766e9 307 unsigned enr;
b411b363
PR
308 struct lc_element *extent;
309 unsigned long flags;
310
81a3537a 311 D_ASSERT(first <= last);
b411b363
PR
312 spin_lock_irqsave(&mdev->al_lock, flags);
313
e15766e9
LE
314 for (enr = first; enr <= last; enr++) {
315 extent = lc_find(mdev->act_log, enr);
316 if (!extent) {
317 dev_err(DEV, "al_complete_io() called on inactive extent %u\n", enr);
318 continue;
319 }
376694a0 320 lc_put(mdev->act_log, extent);
b411b363 321 }
b411b363 322 spin_unlock_irqrestore(&mdev->al_lock, flags);
e15766e9 323 wake_up(&mdev->al_wait);
b411b363
PR
324}
325
19f843aa
LE
326#if (PAGE_SHIFT + 3) < (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)
327/* Currently BM_BLOCK_SHIFT, BM_EXT_SHIFT and AL_EXTENT_SHIFT
328 * are still coupled, or assume too much about their relation.
329 * Code below will not work if this is violated.
330 * Will be cleaned up with some followup patch.
331 */
332# error FIXME
333#endif
334
335static unsigned int al_extent_to_bm_page(unsigned int al_enr)
336{
337 return al_enr >>
338 /* bit to page */
339 ((PAGE_SHIFT + 3) -
340 /* al extent number to bit */
341 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
342}
343
344static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
345{
346 return rs_enr >>
347 /* bit to page */
348 ((PAGE_SHIFT + 3) -
acb104c3 349 /* resync extent number to bit */
19f843aa
LE
350 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
351}
352
99920dc5 353static int
1b7ab15b 354_al_write_transaction(struct drbd_conf *mdev)
b411b363 355{
7ad651b5
LE
356 struct al_transaction_on_disk *buffer;
357 struct lc_element *e;
b411b363 358 sector_t sector;
7ad651b5
LE
359 int i, mx;
360 unsigned extent_nr;
361 unsigned crc = 0;
1b7ab15b 362 int err = 0;
b411b363
PR
363
364 if (!get_ldev(mdev)) {
7ad651b5
LE
365 dev_err(DEV, "disk is %s, cannot start al transaction\n",
366 drbd_disk_str(mdev->state.disk));
1b7ab15b 367 return -EIO;
b411b363 368 }
b411b363 369
6719fb03
LE
370 /* The bitmap write may have failed, causing a state change. */
371 if (mdev->state.disk < D_INCONSISTENT) {
372 dev_err(DEV,
7ad651b5
LE
373 "disk is %s, cannot write al transaction\n",
374 drbd_disk_str(mdev->state.disk));
6719fb03 375 put_ldev(mdev);
1b7ab15b 376 return -EIO;
6719fb03
LE
377 }
378
cdfda633
PR
379 buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
380 if (!buffer) {
381 dev_err(DEV, "disk failed while waiting for md_io buffer\n");
cdfda633 382 put_ldev(mdev);
1b7ab15b 383 return -ENODEV;
cdfda633 384 }
b411b363 385
7ad651b5
LE
386 memset(buffer, 0, sizeof(*buffer));
387 buffer->magic = cpu_to_be32(DRBD_AL_MAGIC);
b411b363
PR
388 buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
389
7ad651b5
LE
390 i = 0;
391
392 /* Even though no one can start to change this list
393 * once we set the LC_LOCKED -- from drbd_al_begin_io(),
394 * lc_try_lock_for_transaction() --, someone may still
395 * be in the process of changing it. */
396 spin_lock_irq(&mdev->al_lock);
397 list_for_each_entry(e, &mdev->act_log->to_be_changed, list) {
398 if (i == AL_UPDATES_PER_TRANSACTION) {
399 i++;
400 break;
401 }
402 buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index);
403 buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number);
404 if (e->lc_number != LC_FREE)
405 drbd_bm_mark_for_writeout(mdev,
406 al_extent_to_bm_page(e->lc_number));
407 i++;
408 }
409 spin_unlock_irq(&mdev->al_lock);
410 BUG_ON(i > AL_UPDATES_PER_TRANSACTION);
b411b363 411
7ad651b5
LE
412 buffer->n_updates = cpu_to_be16(i);
413 for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) {
414 buffer->update_slot_nr[i] = cpu_to_be16(-1);
415 buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE);
416 }
b411b363 417
7ad651b5
LE
418 buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements);
419 buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle);
b411b363 420
7ad651b5 421 mx = min_t(int, AL_CONTEXT_PER_TRANSACTION,
b411b363
PR
422 mdev->act_log->nr_elements - mdev->al_tr_cycle);
423 for (i = 0; i < mx; i++) {
424 unsigned idx = mdev->al_tr_cycle + i;
425 extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number;
7ad651b5 426 buffer->context[i] = cpu_to_be32(extent_nr);
b411b363 427 }
7ad651b5
LE
428 for (; i < AL_CONTEXT_PER_TRANSACTION; i++)
429 buffer->context[i] = cpu_to_be32(LC_FREE);
430
431 mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION;
b411b363
PR
432 if (mdev->al_tr_cycle >= mdev->act_log->nr_elements)
433 mdev->al_tr_cycle = 0;
434
b411b363 435 sector = mdev->ldev->md.md_offset
7ad651b5
LE
436 + mdev->ldev->md.al_offset
437 + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9);
b411b363 438
7ad651b5
LE
439 crc = crc32c(0, buffer, 4096);
440 buffer->crc32c = cpu_to_be32(crc);
b411b363 441
7ad651b5 442 if (drbd_bm_write_hinted(mdev))
1b7ab15b 443 err = -EIO;
7ad651b5 444 /* drbd_chk_io_error done already */
3fbf4d21 445 else if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
1b7ab15b 446 err = -EIO;
0c849666 447 drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
7ad651b5
LE
448 } else {
449 /* advance ringbuffer position and transaction counter */
450 mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE);
451 mdev->al_tr_number++;
452 }
b411b363 453
cdfda633 454 drbd_md_put_buffer(mdev);
b411b363
PR
455 put_ldev(mdev);
456
1b7ab15b
PR
457 return err;
458}
459
460
461static int w_al_write_transaction(struct drbd_work *w, int unused)
462{
463 struct update_al_work *aw = container_of(w, struct update_al_work, w);
464 struct drbd_conf *mdev = w->mdev;
465 int err;
466
467 err = _al_write_transaction(mdev);
468 aw->err = err;
469 complete(&aw->event);
470
471 return err != -EIO ? err : 0;
472}
473
474/* Calls from worker context (see w_restart_disk_io()) need to write the
475 transaction directly. Others came through generic_make_request(),
476 those need to delegate it to the worker. */
477static int al_write_transaction(struct drbd_conf *mdev)
478{
479 struct update_al_work al_work;
480
481 if (current == mdev->tconn->worker.task)
482 return _al_write_transaction(mdev);
483
484 init_completion(&al_work.event);
485 al_work.w.cb = w_al_write_transaction;
486 al_work.w.mdev = mdev;
d5b27b01 487 drbd_queue_work_front(&mdev->tconn->sender_work, &al_work.w);
1b7ab15b
PR
488 wait_for_completion(&al_work.event);
489
490 return al_work.err;
b411b363
PR
491}
492
b411b363
PR
493static int _try_lc_del(struct drbd_conf *mdev, struct lc_element *al_ext)
494{
495 int rv;
496
497 spin_lock_irq(&mdev->al_lock);
498 rv = (al_ext->refcnt == 0);
499 if (likely(rv))
500 lc_del(mdev->act_log, al_ext);
501 spin_unlock_irq(&mdev->al_lock);
502
503 return rv;
504}
505
506/**
507 * drbd_al_shrink() - Removes all active extents form the activity log
508 * @mdev: DRBD device.
509 *
510 * Removes all active extents form the activity log, waiting until
511 * the reference count of each entry dropped to 0 first, of course.
512 *
513 * You need to lock mdev->act_log with lc_try_lock() / lc_unlock()
514 */
515void drbd_al_shrink(struct drbd_conf *mdev)
516{
517 struct lc_element *al_ext;
518 int i;
519
46a15bc3 520 D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags));
b411b363
PR
521
522 for (i = 0; i < mdev->act_log->nr_elements; i++) {
523 al_ext = lc_element_by_index(mdev->act_log, i);
524 if (al_ext->lc_number == LC_FREE)
525 continue;
526 wait_event(mdev->al_wait, _try_lc_del(mdev, al_ext));
527 }
528
529 wake_up(&mdev->al_wait);
530}
531
99920dc5 532static int w_update_odbm(struct drbd_work *w, int unused)
b411b363
PR
533{
534 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
00d56944 535 struct drbd_conf *mdev = w->mdev;
3b98c0c2 536 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
b411b363
PR
537
538 if (!get_ldev(mdev)) {
539 if (__ratelimit(&drbd_ratelimit_state))
540 dev_warn(DEV, "Can not update on disk bitmap, local IO disabled.\n");
541 kfree(udw);
99920dc5 542 return 0;
b411b363
PR
543 }
544
19f843aa 545 drbd_bm_write_page(mdev, rs_extent_to_bm_page(udw->enr));
b411b363
PR
546 put_ldev(mdev);
547
548 kfree(udw);
549
550 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) {
551 switch (mdev->state.conn) {
552 case C_SYNC_SOURCE: case C_SYNC_TARGET:
553 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
554 drbd_resync_finished(mdev);
555 default:
556 /* nothing to do */
557 break;
558 }
559 }
3b98c0c2 560 drbd_bcast_event(mdev, &sib);
b411b363 561
99920dc5 562 return 0;
b411b363
PR
563}
564
565
566/* ATTENTION. The AL's extents are 4MB each, while the extents in the
567 * resync LRU-cache are 16MB each.
568 * The caller of this function has to hold an get_ldev() reference.
569 *
570 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
571 */
572static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
573 int count, int success)
574{
575 struct lc_element *e;
576 struct update_odbm_work *udw;
577
578 unsigned int enr;
579
580 D_ASSERT(atomic_read(&mdev->local_cnt));
581
582 /* I simply assume that a sector/size pair never crosses
583 * a 16 MB extent border. (Currently this is true...) */
584 enr = BM_SECT_TO_EXT(sector);
585
586 e = lc_get(mdev->resync, enr);
587 if (e) {
588 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
589 if (ext->lce.lc_number == enr) {
590 if (success)
591 ext->rs_left -= count;
592 else
593 ext->rs_failed += count;
594 if (ext->rs_left < ext->rs_failed) {
975b2979
PR
595 dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
596 "rs_failed=%d count=%d cstate=%s\n",
b411b363
PR
597 (unsigned long long)sector,
598 ext->lce.lc_number, ext->rs_left,
975b2979
PR
599 ext->rs_failed, count,
600 drbd_conn_str(mdev->state.conn));
601
602 /* We don't expect to be able to clear more bits
603 * than have been set when we originally counted
604 * the set bits to cache that value in ext->rs_left.
605 * Whatever the reason (disconnect during resync,
606 * delayed local completion of an application write),
607 * try to fix it up by recounting here. */
608 ext->rs_left = drbd_bm_e_weight(mdev, enr);
b411b363
PR
609 }
610 } else {
611 /* Normally this element should be in the cache,
612 * since drbd_rs_begin_io() pulled it already in.
613 *
614 * But maybe an application write finished, and we set
615 * something outside the resync lru_cache in sync.
616 */
617 int rs_left = drbd_bm_e_weight(mdev, enr);
618 if (ext->flags != 0) {
619 dev_warn(DEV, "changing resync lce: %d[%u;%02lx]"
620 " -> %d[%u;00]\n",
621 ext->lce.lc_number, ext->rs_left,
622 ext->flags, enr, rs_left);
623 ext->flags = 0;
624 }
625 if (ext->rs_failed) {
626 dev_warn(DEV, "Kicking resync_lru element enr=%u "
627 "out with rs_failed=%d\n",
628 ext->lce.lc_number, ext->rs_failed);
b411b363
PR
629 }
630 ext->rs_left = rs_left;
631 ext->rs_failed = success ? 0 : count;
46a15bc3
LE
632 /* we don't keep a persistent log of the resync lru,
633 * we can commit any change right away. */
634 lc_committed(mdev->resync);
b411b363
PR
635 }
636 lc_put(mdev->resync, &ext->lce);
637 /* no race, we are within the al_lock! */
638
639 if (ext->rs_left == ext->rs_failed) {
640 ext->rs_failed = 0;
641
642 udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
643 if (udw) {
644 udw->enr = ext->lce.lc_number;
645 udw->w.cb = w_update_odbm;
a21e9298 646 udw->w.mdev = mdev;
d5b27b01 647 drbd_queue_work_front(&mdev->tconn->sender_work, &udw->w);
b411b363
PR
648 } else {
649 dev_warn(DEV, "Could not kmalloc an udw\n");
b411b363
PR
650 }
651 }
652 } else {
653 dev_err(DEV, "lc_get() failed! locked=%d/%d flags=%lu\n",
654 mdev->resync_locked,
655 mdev->resync->nr_elements,
656 mdev->resync->flags);
657 }
658}
659
c6ea14df
LE
660void drbd_advance_rs_marks(struct drbd_conf *mdev, unsigned long still_to_go)
661{
662 unsigned long now = jiffies;
663 unsigned long last = mdev->rs_mark_time[mdev->rs_last_mark];
664 int next = (mdev->rs_last_mark + 1) % DRBD_SYNC_MARKS;
665 if (time_after_eq(now, last + DRBD_SYNC_MARK_STEP)) {
666 if (mdev->rs_mark_left[mdev->rs_last_mark] != still_to_go &&
667 mdev->state.conn != C_PAUSED_SYNC_T &&
668 mdev->state.conn != C_PAUSED_SYNC_S) {
669 mdev->rs_mark_time[next] = now;
670 mdev->rs_mark_left[next] = still_to_go;
671 mdev->rs_last_mark = next;
672 }
673 }
674}
675
b411b363
PR
676/* clear the bit corresponding to the piece of storage in question:
677 * size byte of data starting from sector. Only clear a bits of the affected
678 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
679 *
680 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
681 *
682 */
683void __drbd_set_in_sync(struct drbd_conf *mdev, sector_t sector, int size,
684 const char *file, const unsigned int line)
685{
686 /* Is called from worker and receiver context _only_ */
687 unsigned long sbnr, ebnr, lbnr;
688 unsigned long count = 0;
689 sector_t esector, nr_sectors;
690 int wake_up = 0;
691 unsigned long flags;
692
c670a398 693 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
694 dev_err(DEV, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n",
695 (unsigned long long)sector, size);
696 return;
697 }
518a4d53
PR
698
699 if (!get_ldev(mdev))
700 return; /* no disk, no metadata, no bitmap to clear bits in */
701
b411b363
PR
702 nr_sectors = drbd_get_capacity(mdev->this_bdev);
703 esector = sector + (size >> 9) - 1;
704
841ce241 705 if (!expect(sector < nr_sectors))
518a4d53 706 goto out;
841ce241
AG
707 if (!expect(esector < nr_sectors))
708 esector = nr_sectors - 1;
b411b363
PR
709
710 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
711
712 /* we clear it (in sync).
713 * round up start sector, round down end sector. we make sure we only
714 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
715 if (unlikely(esector < BM_SECT_PER_BIT-1))
518a4d53 716 goto out;
b411b363
PR
717 if (unlikely(esector == (nr_sectors-1)))
718 ebnr = lbnr;
719 else
720 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
721 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
722
b411b363 723 if (sbnr > ebnr)
518a4d53 724 goto out;
b411b363
PR
725
726 /*
727 * ok, (capacity & 7) != 0 sometimes, but who cares...
728 * we count rs_{total,left} in bits, not sectors.
729 */
b411b363 730 count = drbd_bm_clear_bits(mdev, sbnr, ebnr);
518a4d53 731 if (count) {
c6ea14df 732 drbd_advance_rs_marks(mdev, drbd_bm_total_weight(mdev));
1d7734a0 733 spin_lock_irqsave(&mdev->al_lock, flags);
81e84650 734 drbd_try_clear_on_disk_bm(mdev, sector, count, true);
1d7734a0
LE
735 spin_unlock_irqrestore(&mdev->al_lock, flags);
736
b411b363
PR
737 /* just wake_up unconditional now, various lc_chaged(),
738 * lc_put() in drbd_try_clear_on_disk_bm(). */
739 wake_up = 1;
740 }
518a4d53
PR
741out:
742 put_ldev(mdev);
b411b363
PR
743 if (wake_up)
744 wake_up(&mdev->al_wait);
745}
746
747/*
748 * this is intended to set one request worth of data out of sync.
749 * affects at least 1 bit,
1816a2b4 750 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits.
b411b363
PR
751 *
752 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
753 * so this can be _any_ process.
754 */
73a01a18 755int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector, int size,
b411b363
PR
756 const char *file, const unsigned int line)
757{
376694a0 758 unsigned long sbnr, ebnr, flags;
b411b363 759 sector_t esector, nr_sectors;
73a01a18 760 unsigned int enr, count = 0;
b411b363
PR
761 struct lc_element *e;
762
81a3537a
LE
763 /* this should be an empty REQ_FLUSH */
764 if (size == 0)
765 return 0;
766
767 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
768 dev_err(DEV, "sector: %llus, size: %d\n",
769 (unsigned long long)sector, size);
73a01a18 770 return 0;
b411b363
PR
771 }
772
773 if (!get_ldev(mdev))
73a01a18 774 return 0; /* no disk, no metadata, no bitmap to set bits in */
b411b363
PR
775
776 nr_sectors = drbd_get_capacity(mdev->this_bdev);
777 esector = sector + (size >> 9) - 1;
778
841ce241 779 if (!expect(sector < nr_sectors))
b411b363 780 goto out;
841ce241
AG
781 if (!expect(esector < nr_sectors))
782 esector = nr_sectors - 1;
b411b363 783
b411b363
PR
784 /* we set it out of sync,
785 * we do not need to round anything here */
786 sbnr = BM_SECT_TO_BIT(sector);
787 ebnr = BM_SECT_TO_BIT(esector);
788
b411b363
PR
789 /* ok, (capacity & 7) != 0 sometimes, but who cares...
790 * we count rs_{total,left} in bits, not sectors. */
791 spin_lock_irqsave(&mdev->al_lock, flags);
792 count = drbd_bm_set_bits(mdev, sbnr, ebnr);
793
794 enr = BM_SECT_TO_EXT(sector);
795 e = lc_find(mdev->resync, enr);
796 if (e)
797 lc_entry(e, struct bm_extent, lce)->rs_left += count;
798 spin_unlock_irqrestore(&mdev->al_lock, flags);
799
800out:
801 put_ldev(mdev);
73a01a18
PR
802
803 return count;
b411b363
PR
804}
805
806static
807struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr)
808{
809 struct lc_element *e;
810 struct bm_extent *bm_ext;
811 int wakeup = 0;
812 unsigned long rs_flags;
813
814 spin_lock_irq(&mdev->al_lock);
815 if (mdev->resync_locked > mdev->resync->nr_elements/2) {
816 spin_unlock_irq(&mdev->al_lock);
817 return NULL;
818 }
819 e = lc_get(mdev->resync, enr);
820 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
821 if (bm_ext) {
822 if (bm_ext->lce.lc_number != enr) {
823 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
824 bm_ext->rs_failed = 0;
46a15bc3 825 lc_committed(mdev->resync);
b411b363
PR
826 wakeup = 1;
827 }
828 if (bm_ext->lce.refcnt == 1)
829 mdev->resync_locked++;
830 set_bit(BME_NO_WRITES, &bm_ext->flags);
831 }
832 rs_flags = mdev->resync->flags;
833 spin_unlock_irq(&mdev->al_lock);
834 if (wakeup)
835 wake_up(&mdev->al_wait);
836
837 if (!bm_ext) {
838 if (rs_flags & LC_STARVING)
839 dev_warn(DEV, "Have to wait for element"
840 " (resync LRU too small?)\n");
46a15bc3 841 BUG_ON(rs_flags & LC_LOCKED);
b411b363
PR
842 }
843
844 return bm_ext;
845}
846
847static int _is_in_al(struct drbd_conf *mdev, unsigned int enr)
848{
46a15bc3 849 int rv;
b411b363
PR
850
851 spin_lock_irq(&mdev->al_lock);
46a15bc3 852 rv = lc_is_used(mdev->act_log, enr);
b411b363
PR
853 spin_unlock_irq(&mdev->al_lock);
854
b411b363
PR
855 return rv;
856}
857
858/**
859 * drbd_rs_begin_io() - Gets an extent in the resync LRU cache and sets it to BME_LOCKED
860 * @mdev: DRBD device.
861 * @sector: The sector number.
862 *
80a40e43 863 * This functions sleeps on al_wait. Returns 0 on success, -EINTR if interrupted.
b411b363
PR
864 */
865int drbd_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
866{
867 unsigned int enr = BM_SECT_TO_EXT(sector);
868 struct bm_extent *bm_ext;
869 int i, sig;
f91ab628
PR
870 int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait.
871 200 times -> 20 seconds. */
b411b363 872
f91ab628 873retry:
b411b363
PR
874 sig = wait_event_interruptible(mdev->al_wait,
875 (bm_ext = _bme_get(mdev, enr)));
876 if (sig)
80a40e43 877 return -EINTR;
b411b363
PR
878
879 if (test_bit(BME_LOCKED, &bm_ext->flags))
80a40e43 880 return 0;
b411b363
PR
881
882 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
883 sig = wait_event_interruptible(mdev->al_wait,
f91ab628 884 !_is_in_al(mdev, enr * AL_EXT_PER_BM_SECT + i) ||
c507f46f 885 test_bit(BME_PRIORITY, &bm_ext->flags));
f91ab628
PR
886
887 if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) {
b411b363
PR
888 spin_lock_irq(&mdev->al_lock);
889 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
f91ab628 890 bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */
b411b363
PR
891 mdev->resync_locked--;
892 wake_up(&mdev->al_wait);
893 }
894 spin_unlock_irq(&mdev->al_lock);
f91ab628
PR
895 if (sig)
896 return -EINTR;
897 if (schedule_timeout_interruptible(HZ/10))
898 return -EINTR;
c507f46f
PR
899 if (sa && --sa == 0)
900 dev_warn(DEV,"drbd_rs_begin_io() stepped aside for 20sec."
901 "Resync stalled?\n");
f91ab628 902 goto retry;
b411b363
PR
903 }
904 }
b411b363 905 set_bit(BME_LOCKED, &bm_ext->flags);
80a40e43 906 return 0;
b411b363
PR
907}
908
909/**
910 * drbd_try_rs_begin_io() - Gets an extent in the resync LRU cache, does not sleep
911 * @mdev: DRBD device.
912 * @sector: The sector number.
913 *
914 * Gets an extent in the resync LRU cache, sets it to BME_NO_WRITES, then
915 * tries to set it to BME_LOCKED. Returns 0 upon success, and -EAGAIN
916 * if there is still application IO going on in this area.
917 */
918int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector)
919{
920 unsigned int enr = BM_SECT_TO_EXT(sector);
921 const unsigned int al_enr = enr*AL_EXT_PER_BM_SECT;
922 struct lc_element *e;
923 struct bm_extent *bm_ext;
924 int i;
925
b411b363
PR
926 spin_lock_irq(&mdev->al_lock);
927 if (mdev->resync_wenr != LC_FREE && mdev->resync_wenr != enr) {
928 /* in case you have very heavy scattered io, it may
929 * stall the syncer undefined if we give up the ref count
930 * when we try again and requeue.
931 *
932 * if we don't give up the refcount, but the next time
933 * we are scheduled this extent has been "synced" by new
934 * application writes, we'd miss the lc_put on the
935 * extent we keep the refcount on.
936 * so we remembered which extent we had to try again, and
937 * if the next requested one is something else, we do
938 * the lc_put here...
939 * we also have to wake_up
940 */
b411b363
PR
941 e = lc_find(mdev->resync, mdev->resync_wenr);
942 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
943 if (bm_ext) {
944 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
945 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
946 clear_bit(BME_NO_WRITES, &bm_ext->flags);
947 mdev->resync_wenr = LC_FREE;
948 if (lc_put(mdev->resync, &bm_ext->lce) == 0)
949 mdev->resync_locked--;
950 wake_up(&mdev->al_wait);
951 } else {
952 dev_alert(DEV, "LOGIC BUG\n");
953 }
954 }
955 /* TRY. */
956 e = lc_try_get(mdev->resync, enr);
957 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
958 if (bm_ext) {
959 if (test_bit(BME_LOCKED, &bm_ext->flags))
960 goto proceed;
961 if (!test_and_set_bit(BME_NO_WRITES, &bm_ext->flags)) {
962 mdev->resync_locked++;
963 } else {
964 /* we did set the BME_NO_WRITES,
965 * but then could not set BME_LOCKED,
966 * so we tried again.
967 * drop the extra reference. */
b411b363
PR
968 bm_ext->lce.refcnt--;
969 D_ASSERT(bm_ext->lce.refcnt > 0);
970 }
971 goto check_al;
972 } else {
973 /* do we rather want to try later? */
6a0afdf5 974 if (mdev->resync_locked > mdev->resync->nr_elements-3)
b411b363 975 goto try_again;
b411b363
PR
976 /* Do or do not. There is no try. -- Yoda */
977 e = lc_get(mdev->resync, enr);
978 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
979 if (!bm_ext) {
980 const unsigned long rs_flags = mdev->resync->flags;
981 if (rs_flags & LC_STARVING)
982 dev_warn(DEV, "Have to wait for element"
983 " (resync LRU too small?)\n");
46a15bc3 984 BUG_ON(rs_flags & LC_LOCKED);
b411b363
PR
985 goto try_again;
986 }
987 if (bm_ext->lce.lc_number != enr) {
988 bm_ext->rs_left = drbd_bm_e_weight(mdev, enr);
989 bm_ext->rs_failed = 0;
46a15bc3 990 lc_committed(mdev->resync);
b411b363
PR
991 wake_up(&mdev->al_wait);
992 D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0);
993 }
994 set_bit(BME_NO_WRITES, &bm_ext->flags);
995 D_ASSERT(bm_ext->lce.refcnt == 1);
996 mdev->resync_locked++;
997 goto check_al;
998 }
999check_al:
b411b363 1000 for (i = 0; i < AL_EXT_PER_BM_SECT; i++) {
b411b363
PR
1001 if (lc_is_used(mdev->act_log, al_enr+i))
1002 goto try_again;
1003 }
1004 set_bit(BME_LOCKED, &bm_ext->flags);
1005proceed:
1006 mdev->resync_wenr = LC_FREE;
1007 spin_unlock_irq(&mdev->al_lock);
1008 return 0;
1009
1010try_again:
b411b363
PR
1011 if (bm_ext)
1012 mdev->resync_wenr = enr;
1013 spin_unlock_irq(&mdev->al_lock);
1014 return -EAGAIN;
1015}
1016
1017void drbd_rs_complete_io(struct drbd_conf *mdev, sector_t sector)
1018{
1019 unsigned int enr = BM_SECT_TO_EXT(sector);
1020 struct lc_element *e;
1021 struct bm_extent *bm_ext;
1022 unsigned long flags;
1023
b411b363
PR
1024 spin_lock_irqsave(&mdev->al_lock, flags);
1025 e = lc_find(mdev->resync, enr);
1026 bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
1027 if (!bm_ext) {
1028 spin_unlock_irqrestore(&mdev->al_lock, flags);
1029 if (__ratelimit(&drbd_ratelimit_state))
1030 dev_err(DEV, "drbd_rs_complete_io() called, but extent not found\n");
1031 return;
1032 }
1033
1034 if (bm_ext->lce.refcnt == 0) {
1035 spin_unlock_irqrestore(&mdev->al_lock, flags);
1036 dev_err(DEV, "drbd_rs_complete_io(,%llu [=%u]) called, "
1037 "but refcnt is 0!?\n",
1038 (unsigned long long)sector, enr);
1039 return;
1040 }
1041
1042 if (lc_put(mdev->resync, &bm_ext->lce) == 0) {
e3555d85 1043 bm_ext->flags = 0; /* clear BME_LOCKED, BME_NO_WRITES and BME_PRIORITY */
b411b363
PR
1044 mdev->resync_locked--;
1045 wake_up(&mdev->al_wait);
1046 }
1047
1048 spin_unlock_irqrestore(&mdev->al_lock, flags);
1049}
1050
1051/**
1052 * drbd_rs_cancel_all() - Removes all extents from the resync LRU (even BME_LOCKED)
1053 * @mdev: DRBD device.
1054 */
1055void drbd_rs_cancel_all(struct drbd_conf *mdev)
1056{
b411b363
PR
1057 spin_lock_irq(&mdev->al_lock);
1058
1059 if (get_ldev_if_state(mdev, D_FAILED)) { /* Makes sure ->resync is there. */
1060 lc_reset(mdev->resync);
1061 put_ldev(mdev);
1062 }
1063 mdev->resync_locked = 0;
1064 mdev->resync_wenr = LC_FREE;
1065 spin_unlock_irq(&mdev->al_lock);
1066 wake_up(&mdev->al_wait);
1067}
1068
1069/**
1070 * drbd_rs_del_all() - Gracefully remove all extents from the resync LRU
1071 * @mdev: DRBD device.
1072 *
1073 * Returns 0 upon success, -EAGAIN if at least one reference count was
1074 * not zero.
1075 */
1076int drbd_rs_del_all(struct drbd_conf *mdev)
1077{
1078 struct lc_element *e;
1079 struct bm_extent *bm_ext;
1080 int i;
1081
b411b363
PR
1082 spin_lock_irq(&mdev->al_lock);
1083
1084 if (get_ldev_if_state(mdev, D_FAILED)) {
1085 /* ok, ->resync is there. */
1086 for (i = 0; i < mdev->resync->nr_elements; i++) {
1087 e = lc_element_by_index(mdev->resync, i);
b2b163dd 1088 bm_ext = lc_entry(e, struct bm_extent, lce);
b411b363
PR
1089 if (bm_ext->lce.lc_number == LC_FREE)
1090 continue;
1091 if (bm_ext->lce.lc_number == mdev->resync_wenr) {
1092 dev_info(DEV, "dropping %u in drbd_rs_del_all, apparently"
1093 " got 'synced' by application io\n",
1094 mdev->resync_wenr);
1095 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1096 D_ASSERT(test_bit(BME_NO_WRITES, &bm_ext->flags));
1097 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1098 mdev->resync_wenr = LC_FREE;
1099 lc_put(mdev->resync, &bm_ext->lce);
1100 }
1101 if (bm_ext->lce.refcnt != 0) {
1102 dev_info(DEV, "Retrying drbd_rs_del_all() later. "
1103 "refcnt=%d\n", bm_ext->lce.refcnt);
1104 put_ldev(mdev);
1105 spin_unlock_irq(&mdev->al_lock);
1106 return -EAGAIN;
1107 }
1108 D_ASSERT(!test_bit(BME_LOCKED, &bm_ext->flags));
1109 D_ASSERT(!test_bit(BME_NO_WRITES, &bm_ext->flags));
1110 lc_del(mdev->resync, &bm_ext->lce);
1111 }
1112 D_ASSERT(mdev->resync->used == 0);
1113 put_ldev(mdev);
1114 }
1115 spin_unlock_irq(&mdev->al_lock);
a6a7d4f0 1116 wake_up(&mdev->al_wait);
b411b363
PR
1117
1118 return 0;
1119}
1120
1121/**
1122 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1123 * @mdev: DRBD device.
1124 * @sector: The sector number.
1125 * @size: Size of failed IO operation, in byte.
1126 */
1127void drbd_rs_failed_io(struct drbd_conf *mdev, sector_t sector, int size)
1128{
1129 /* Is called from worker and receiver context _only_ */
1130 unsigned long sbnr, ebnr, lbnr;
1131 unsigned long count;
1132 sector_t esector, nr_sectors;
1133 int wake_up = 0;
1134
c670a398 1135 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
b411b363
PR
1136 dev_err(DEV, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1137 (unsigned long long)sector, size);
1138 return;
1139 }
1140 nr_sectors = drbd_get_capacity(mdev->this_bdev);
1141 esector = sector + (size >> 9) - 1;
1142
841ce241
AG
1143 if (!expect(sector < nr_sectors))
1144 return;
1145 if (!expect(esector < nr_sectors))
1146 esector = nr_sectors - 1;
b411b363
PR
1147
1148 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1149
1150 /*
1151 * round up start sector, round down end sector. we make sure we only
1152 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1153 if (unlikely(esector < BM_SECT_PER_BIT-1))
1154 return;
1155 if (unlikely(esector == (nr_sectors-1)))
1156 ebnr = lbnr;
1157 else
1158 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1159 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1160
1161 if (sbnr > ebnr)
1162 return;
1163
1164 /*
1165 * ok, (capacity & 7) != 0 sometimes, but who cares...
1166 * we count rs_{total,left} in bits, not sectors.
1167 */
1168 spin_lock_irq(&mdev->al_lock);
1169 count = drbd_bm_count_bits(mdev, sbnr, ebnr);
1170 if (count) {
1171 mdev->rs_failed += count;
1172
1173 if (get_ldev(mdev)) {
81e84650 1174 drbd_try_clear_on_disk_bm(mdev, sector, count, false);
b411b363
PR
1175 put_ldev(mdev);
1176 }
1177
1178 /* just wake_up unconditional now, various lc_chaged(),
1179 * lc_put() in drbd_try_clear_on_disk_bm(). */
1180 wake_up = 1;
1181 }
1182 spin_unlock_irq(&mdev->al_lock);
1183 if (wake_up)
1184 wake_up(&mdev->al_wait);
1185}