Merge tag 'devprop-5.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-block.git] / drivers / block / drbd / drbd_worker.c
CommitLineData
c6ae4c04 1// SPDX-License-Identifier: GPL-2.0-or-later
b411b363
PR
2/*
3 drbd_worker.c
4
5 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
6
7 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
8 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
9 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
10
b411b363 11
84b8c06b 12*/
b411b363 13
b411b363 14#include <linux/module.h>
b411b363 15#include <linux/drbd.h>
174cd4b1 16#include <linux/sched/signal.h>
b411b363
PR
17#include <linux/wait.h>
18#include <linux/mm.h>
19#include <linux/memcontrol.h>
20#include <linux/mm_inline.h>
21#include <linux/slab.h>
22#include <linux/random.h>
b411b363
PR
23#include <linux/string.h>
24#include <linux/scatterlist.h>
c6a564ff 25#include <linux/part_stat.h>
b411b363
PR
26
27#include "drbd_int.h"
a3603a6e 28#include "drbd_protocol.h"
b411b363 29#include "drbd_req.h"
b411b363 30
d448a2e1
AG
31static int make_ov_request(struct drbd_device *, int);
32static int make_resync_request(struct drbd_device *, int);
b411b363 33
c5a91619 34/* endio handlers:
ed15b795 35 * drbd_md_endio (defined here)
fcefa62e
AG
36 * drbd_request_endio (defined here)
37 * drbd_peer_request_endio (defined here)
ed15b795 38 * drbd_bm_endio (defined in drbd_bitmap.c)
c5a91619 39 *
b411b363
PR
40 * For all these callbacks, note the following:
41 * The callbacks will be called in irq context by the IDE drivers,
42 * and in Softirqs/Tasklets/BH context by the SCSI drivers.
43 * Try to get the locking right :)
44 *
45 */
46
b411b363
PR
47/* used for synchronous meta data and bitmap IO
48 * submitted by drbd_md_sync_page_io()
49 */
4246a0b6 50void drbd_md_endio(struct bio *bio)
b411b363 51{
b30ab791 52 struct drbd_device *device;
b411b363 53
e37d2438 54 device = bio->bi_private;
4e4cbee9 55 device->md_io.error = blk_status_to_errno(bio->bi_status);
b411b363 56
7c752ed3
LE
57 /* special case: drbd_md_read() during drbd_adm_attach() */
58 if (device->ldev)
59 put_ldev(device);
60 bio_put(bio);
61
0cfac5dd
PR
62 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
63 * to timeout on the lower level device, and eventually detach from it.
64 * If this io completion runs after that timeout expired, this
65 * drbd_md_put_buffer() may allow us to finally try and re-attach.
66 * During normal operation, this only puts that extra reference
67 * down to 1 again.
68 * Make sure we first drop the reference, and only then signal
69 * completion, or we may (in drbd_al_read_log()) cycle so fast into the
70 * next drbd_md_sync_page_io(), that we trigger the
b30ab791 71 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
0cfac5dd 72 */
b30ab791 73 drbd_md_put_buffer(device);
e37d2438 74 device->md_io.done = 1;
b30ab791 75 wake_up(&device->misc_wait);
b411b363
PR
76}
77
78/* reads on behalf of the partner,
79 * "submitted" by the receiver
80 */
a186e478 81static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
82{
83 unsigned long flags = 0;
6780139c
AG
84 struct drbd_peer_device *peer_device = peer_req->peer_device;
85 struct drbd_device *device = peer_device->device;
b411b363 86
0500813f 87 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 88 device->read_cnt += peer_req->i.size >> 9;
a8cd15ba 89 list_del(&peer_req->w.list);
b30ab791
AG
90 if (list_empty(&device->read_ee))
91 wake_up(&device->ee_wait);
db830c46 92 if (test_bit(__EE_WAS_ERROR, &peer_req->flags))
b30ab791 93 __drbd_chk_io_error(device, DRBD_READ_ERROR);
0500813f 94 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 95
6780139c 96 drbd_queue_work(&peer_device->connection->sender_work, &peer_req->w);
b30ab791 97 put_ldev(device);
b411b363
PR
98}
99
100/* writes on behalf of the partner, or resync writes,
45bb912b 101 * "submitted" by the receiver, final stage. */
a0fb3c47 102void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local)
b411b363
PR
103{
104 unsigned long flags = 0;
6780139c
AG
105 struct drbd_peer_device *peer_device = peer_req->peer_device;
106 struct drbd_device *device = peer_device->device;
668700b4 107 struct drbd_connection *connection = peer_device->connection;
181286ad 108 struct drbd_interval i;
b411b363 109 int do_wake;
579b57ed 110 u64 block_id;
b411b363 111 int do_al_complete_io;
b411b363 112
db830c46 113 /* after we moved peer_req to done_ee,
b411b363
PR
114 * we may no longer access it,
115 * it may be freed/reused already!
116 * (as soon as we release the req_lock) */
181286ad 117 i = peer_req->i;
db830c46
AG
118 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
119 block_id = peer_req->block_id;
21ae5d7f 120 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
b411b363 121
e1fbc4ca
LE
122 if (peer_req->flags & EE_WAS_ERROR) {
123 /* In protocol != C, we usually do not send write acks.
124 * In case of a write error, send the neg ack anyways. */
125 if (!__test_and_set_bit(__EE_SEND_WRITE_ACK, &peer_req->flags))
126 inc_unacked(device);
127 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
128 }
129
0500813f 130 spin_lock_irqsave(&device->resource->req_lock, flags);
b30ab791 131 device->writ_cnt += peer_req->i.size >> 9;
a8cd15ba 132 list_move_tail(&peer_req->w.list, &device->done_ee);
b411b363 133
bb3bfe96 134 /*
5e472264 135 * Do not remove from the write_requests tree here: we did not send the
bb3bfe96
AG
136 * Ack yet and did not wake possibly waiting conflicting requests.
137 * Removed from the tree from "drbd_process_done_ee" within the
84b8c06b 138 * appropriate dw.cb (e_end_block/e_end_resync_block) or from
bb3bfe96
AG
139 * _drbd_clear_done_ee.
140 */
b411b363 141
b30ab791 142 do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee);
b411b363 143
9305455a 144 /* FIXME do we want to detach for failed REQ_OP_DISCARD?
f31e583a 145 * ((peer_req->flags & (EE_WAS_ERROR|EE_TRIM)) == EE_WAS_ERROR) */
a0fb3c47 146 if (peer_req->flags & EE_WAS_ERROR)
b30ab791 147 __drbd_chk_io_error(device, DRBD_WRITE_ERROR);
668700b4
PR
148
149 if (connection->cstate >= C_WF_REPORT_PARAMS) {
150 kref_get(&device->kref); /* put is in drbd_send_acks_wf() */
151 if (!queue_work(connection->ack_sender, &peer_device->send_acks_work))
152 kref_put(&device->kref, drbd_destroy_device);
153 }
0500813f 154 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b411b363 155
579b57ed 156 if (block_id == ID_SYNCER)
b30ab791 157 drbd_rs_complete_io(device, i.sector);
b411b363
PR
158
159 if (do_wake)
b30ab791 160 wake_up(&device->ee_wait);
b411b363
PR
161
162 if (do_al_complete_io)
b30ab791 163 drbd_al_complete_io(device, &i);
b411b363 164
b30ab791 165 put_ldev(device);
45bb912b 166}
b411b363 167
45bb912b
LE
168/* writes on behalf of the partner, or resync writes,
169 * "submitted" by the receiver.
170 */
4246a0b6 171void drbd_peer_request_endio(struct bio *bio)
45bb912b 172{
db830c46 173 struct drbd_peer_request *peer_req = bio->bi_private;
a8cd15ba 174 struct drbd_device *device = peer_req->peer_device->device;
7e5fec31 175 bool is_write = bio_data_dir(bio) == WRITE;
45c21793
CH
176 bool is_discard = bio_op(bio) == REQ_OP_WRITE_ZEROES ||
177 bio_op(bio) == REQ_OP_DISCARD;
45bb912b 178
4e4cbee9 179 if (bio->bi_status && __ratelimit(&drbd_ratelimit_state))
d0180171 180 drbd_warn(device, "%s: error=%d s=%llus\n",
a0fb3c47 181 is_write ? (is_discard ? "discard" : "write")
4e4cbee9 182 : "read", bio->bi_status,
db830c46 183 (unsigned long long)peer_req->i.sector);
45bb912b 184
4e4cbee9 185 if (bio->bi_status)
db830c46 186 set_bit(__EE_WAS_ERROR, &peer_req->flags);
45bb912b
LE
187
188 bio_put(bio); /* no need for the bio anymore */
db830c46 189 if (atomic_dec_and_test(&peer_req->pending_bios)) {
45bb912b 190 if (is_write)
db830c46 191 drbd_endio_write_sec_final(peer_req);
45bb912b 192 else
db830c46 193 drbd_endio_read_sec_final(peer_req);
45bb912b 194 }
b411b363
PR
195}
196
1ffa7bfa
BX
197static void
198drbd_panic_after_delayed_completion_of_aborted_request(struct drbd_device *device)
142207f7
LE
199{
200 panic("drbd%u %s/%u potential random memory corruption caused by delayed completion of aborted local request\n",
201 device->minor, device->resource->name, device->vnr);
202}
203
b411b363
PR
204/* read, readA or write requests on R_PRIMARY coming from drbd_make_request
205 */
4246a0b6 206void drbd_request_endio(struct bio *bio)
b411b363 207{
a115413d 208 unsigned long flags;
b411b363 209 struct drbd_request *req = bio->bi_private;
84b8c06b 210 struct drbd_device *device = req->device;
a115413d 211 struct bio_and_error m;
b411b363 212 enum drbd_req_event what;
1b6dd252
PR
213
214 /* If this request was aborted locally before,
215 * but now was completed "successfully",
216 * chances are that this caused arbitrary data corruption.
217 *
218 * "aborting" requests, or force-detaching the disk, is intended for
219 * completely blocked/hung local backing devices which do no longer
220 * complete requests at all, not even do error completions. In this
221 * situation, usually a hard-reset and failover is the only way out.
222 *
223 * By "aborting", basically faking a local error-completion,
224 * we allow for a more graceful swichover by cleanly migrating services.
225 * Still the affected node has to be rebooted "soon".
226 *
227 * By completing these requests, we allow the upper layers to re-use
228 * the associated data pages.
229 *
230 * If later the local backing device "recovers", and now DMAs some data
231 * from disk into the original request pages, in the best case it will
232 * just put random data into unused pages; but typically it will corrupt
233 * meanwhile completely unrelated data, causing all sorts of damage.
234 *
235 * Which means delayed successful completion,
236 * especially for READ requests,
237 * is a reason to panic().
238 *
239 * We assume that a delayed *error* completion is OK,
240 * though we still will complain noisily about it.
241 */
242 if (unlikely(req->rq_state & RQ_LOCAL_ABORTED)) {
243 if (__ratelimit(&drbd_ratelimit_state))
d0180171 244 drbd_emerg(device, "delayed completion of aborted local request; disk-timeout may be too aggressive\n");
1b6dd252 245
4e4cbee9 246 if (!bio->bi_status)
142207f7 247 drbd_panic_after_delayed_completion_of_aborted_request(device);
1b6dd252
PR
248 }
249
b411b363 250 /* to avoid recursion in __req_mod */
4e4cbee9 251 if (unlikely(bio->bi_status)) {
70246286 252 switch (bio_op(bio)) {
45c21793 253 case REQ_OP_WRITE_ZEROES:
70246286 254 case REQ_OP_DISCARD:
4e4cbee9 255 if (bio->bi_status == BLK_STS_NOTSUPP)
70246286
CH
256 what = DISCARD_COMPLETED_NOTSUPP;
257 else
258 what = DISCARD_COMPLETED_WITH_ERROR;
259 break;
260 case REQ_OP_READ:
1eff9d32 261 if (bio->bi_opf & REQ_RAHEAD)
70246286
CH
262 what = READ_AHEAD_COMPLETED_WITH_ERROR;
263 else
264 what = READ_COMPLETED_WITH_ERROR;
265 break;
266 default:
267 what = WRITE_COMPLETED_WITH_ERROR;
268 break;
269 }
270 } else {
8554df1c 271 what = COMPLETED_OK;
70246286 272 }
b411b363 273
4e4cbee9 274 req->private_bio = ERR_PTR(blk_status_to_errno(bio->bi_status));
64dafbc9 275 bio_put(bio);
b411b363 276
a115413d 277 /* not req_mod(), we need irqsave here! */
0500813f 278 spin_lock_irqsave(&device->resource->req_lock, flags);
a115413d 279 __req_mod(req, what, &m);
0500813f 280 spin_unlock_irqrestore(&device->resource->req_lock, flags);
b30ab791 281 put_ldev(device);
a115413d
LE
282
283 if (m.bio)
b30ab791 284 complete_master_bio(device, &m);
b411b363
PR
285}
286
3d0e6375 287void drbd_csum_ee(struct crypto_shash *tfm, struct drbd_peer_request *peer_req, void *digest)
45bb912b 288{
3d0e6375 289 SHASH_DESC_ON_STACK(desc, tfm);
db830c46 290 struct page *page = peer_req->pages;
45bb912b
LE
291 struct page *tmp;
292 unsigned len;
3d0e6375 293 void *src;
45bb912b 294
3d0e6375 295 desc->tfm = tfm;
45bb912b 296
3d0e6375 297 crypto_shash_init(desc);
45bb912b 298
3d0e6375 299 src = kmap_atomic(page);
45bb912b
LE
300 while ((tmp = page_chain_next(page))) {
301 /* all but the last page will be fully used */
3d0e6375
KC
302 crypto_shash_update(desc, src, PAGE_SIZE);
303 kunmap_atomic(src);
45bb912b 304 page = tmp;
3d0e6375 305 src = kmap_atomic(page);
45bb912b
LE
306 }
307 /* and now the last, possibly only partially used page */
db830c46 308 len = peer_req->i.size & (PAGE_SIZE - 1);
3d0e6375
KC
309 crypto_shash_update(desc, src, len ?: PAGE_SIZE);
310 kunmap_atomic(src);
311
312 crypto_shash_final(desc, digest);
313 shash_desc_zero(desc);
45bb912b
LE
314}
315
3d0e6375 316void drbd_csum_bio(struct crypto_shash *tfm, struct bio *bio, void *digest)
b411b363 317{
3d0e6375 318 SHASH_DESC_ON_STACK(desc, tfm);
7988613b
KO
319 struct bio_vec bvec;
320 struct bvec_iter iter;
b411b363 321
3d0e6375 322 desc->tfm = tfm;
b411b363 323
3d0e6375 324 crypto_shash_init(desc);
b411b363 325
7988613b 326 bio_for_each_segment(bvec, bio, iter) {
3d0e6375
KC
327 u8 *src;
328
47227850
CH
329 src = bvec_kmap_local(&bvec);
330 crypto_shash_update(desc, src, bvec.bv_len);
331 kunmap_local(src);
b411b363 332 }
3d0e6375
KC
333 crypto_shash_final(desc, digest);
334 shash_desc_zero(desc);
b411b363
PR
335}
336
9676c760 337/* MAYBE merge common code with w_e_end_ov_req */
99920dc5 338static int w_e_send_csum(struct drbd_work *w, int cancel)
b411b363 339{
a8cd15ba 340 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
341 struct drbd_peer_device *peer_device = peer_req->peer_device;
342 struct drbd_device *device = peer_device->device;
b411b363
PR
343 int digest_size;
344 void *digest;
99920dc5 345 int err = 0;
b411b363 346
53ea4331
LE
347 if (unlikely(cancel))
348 goto out;
b411b363 349
9676c760 350 if (unlikely((peer_req->flags & EE_WAS_ERROR) != 0))
53ea4331 351 goto out;
b411b363 352
3d0e6375 353 digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
53ea4331
LE
354 digest = kmalloc(digest_size, GFP_NOIO);
355 if (digest) {
db830c46
AG
356 sector_t sector = peer_req->i.sector;
357 unsigned int size = peer_req->i.size;
6780139c 358 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
9676c760 359 /* Free peer_req and pages before send.
53ea4331
LE
360 * In case we block on congestion, we could otherwise run into
361 * some distributed deadlock, if the other side blocks on
362 * congestion as well, because our receiver blocks in
c37c8ecf 363 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 364 drbd_free_peer_req(device, peer_req);
db830c46 365 peer_req = NULL;
b30ab791 366 inc_rs_pending(device);
6780139c 367 err = drbd_send_drequest_csum(peer_device, sector, size,
db1b0b72
AG
368 digest, digest_size,
369 P_CSUM_RS_REQUEST);
53ea4331
LE
370 kfree(digest);
371 } else {
d0180171 372 drbd_err(device, "kmalloc() of digest failed.\n");
99920dc5 373 err = -ENOMEM;
53ea4331 374 }
b411b363 375
53ea4331 376out:
db830c46 377 if (peer_req)
b30ab791 378 drbd_free_peer_req(device, peer_req);
b411b363 379
99920dc5 380 if (unlikely(err))
d0180171 381 drbd_err(device, "drbd_send_drequest(..., csum) failed\n");
99920dc5 382 return err;
b411b363
PR
383}
384
385#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
386
69a22773 387static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, int size)
b411b363 388{
69a22773 389 struct drbd_device *device = peer_device->device;
db830c46 390 struct drbd_peer_request *peer_req;
b411b363 391
b30ab791 392 if (!get_ldev(device))
80a40e43 393 return -EIO;
b411b363
PR
394
395 /* GFP_TRY, because if there is no memory available right now, this may
396 * be rescheduled for later. It is "only" background resync, after all. */
69a22773 397 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
9104d31a 398 size, size, GFP_TRY);
db830c46 399 if (!peer_req)
80a40e43 400 goto defer;
b411b363 401
a8cd15ba 402 peer_req->w.cb = w_e_send_csum;
0500813f 403 spin_lock_irq(&device->resource->req_lock);
b9ed7080 404 list_add_tail(&peer_req->w.list, &device->read_ee);
0500813f 405 spin_unlock_irq(&device->resource->req_lock);
b411b363 406
b30ab791 407 atomic_add(size >> 9, &device->rs_sect_ev);
bb3cc85e
MC
408 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
409 DRBD_FAULT_RS_RD) == 0)
80a40e43 410 return 0;
b411b363 411
10f6d992
LE
412 /* If it failed because of ENOMEM, retry should help. If it failed
413 * because bio_add_page failed (probably broken lower level driver),
414 * retry may or may not help.
415 * If it does not, you may need to force disconnect. */
0500813f 416 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 417 list_del(&peer_req->w.list);
0500813f 418 spin_unlock_irq(&device->resource->req_lock);
22cc37a9 419
b30ab791 420 drbd_free_peer_req(device, peer_req);
80a40e43 421defer:
b30ab791 422 put_ldev(device);
80a40e43 423 return -EAGAIN;
b411b363
PR
424}
425
99920dc5 426int w_resync_timer(struct drbd_work *w, int cancel)
b411b363 427{
84b8c06b
AG
428 struct drbd_device *device =
429 container_of(w, struct drbd_device, resync_work);
430
b30ab791 431 switch (device->state.conn) {
63106d3c 432 case C_VERIFY_S:
d448a2e1 433 make_ov_request(device, cancel);
63106d3c
PR
434 break;
435 case C_SYNC_TARGET:
d448a2e1 436 make_resync_request(device, cancel);
63106d3c 437 break;
b411b363
PR
438 }
439
99920dc5 440 return 0;
794abb75
PR
441}
442
2bccef39 443void resync_timer_fn(struct timer_list *t)
794abb75 444{
2bccef39 445 struct drbd_device *device = from_timer(device, t, resync_timer);
794abb75 446
15e26f6a
LE
447 drbd_queue_work_if_unqueued(
448 &first_peer_device(device)->connection->sender_work,
449 &device->resync_work);
b411b363
PR
450}
451
778f271d
PR
452static void fifo_set(struct fifo_buffer *fb, int value)
453{
454 int i;
455
456 for (i = 0; i < fb->size; i++)
f10f2623 457 fb->values[i] = value;
778f271d
PR
458}
459
460static int fifo_push(struct fifo_buffer *fb, int value)
461{
462 int ov;
463
464 ov = fb->values[fb->head_index];
465 fb->values[fb->head_index++] = value;
466
467 if (fb->head_index >= fb->size)
468 fb->head_index = 0;
469
470 return ov;
471}
472
473static void fifo_add_val(struct fifo_buffer *fb, int value)
474{
475 int i;
476
477 for (i = 0; i < fb->size; i++)
478 fb->values[i] += value;
479}
480
6a365874 481struct fifo_buffer *fifo_alloc(unsigned int fifo_size)
9958c857
PR
482{
483 struct fifo_buffer *fb;
484
6a365874 485 fb = kzalloc(struct_size(fb, values, fifo_size), GFP_NOIO);
9958c857
PR
486 if (!fb)
487 return NULL;
488
489 fb->head_index = 0;
490 fb->size = fifo_size;
491 fb->total = 0;
492
493 return fb;
494}
495
0e49d7b0 496static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
778f271d 497{
daeda1cc 498 struct disk_conf *dc;
7f34f614 499 unsigned int want; /* The number of sectors we want in-flight */
778f271d 500 int req_sect; /* Number of sectors to request in this turn */
7f34f614 501 int correction; /* Number of sectors more we need in-flight */
778f271d
PR
502 int cps; /* correction per invocation of drbd_rs_controller() */
503 int steps; /* Number of time steps to plan ahead */
504 int curr_corr;
505 int max_sect;
813472ce 506 struct fifo_buffer *plan;
778f271d 507
b30ab791
AG
508 dc = rcu_dereference(device->ldev->disk_conf);
509 plan = rcu_dereference(device->rs_plan_s);
778f271d 510
813472ce 511 steps = plan->size; /* (dc->c_plan_ahead * 10 * SLEEP_TIME) / HZ; */
778f271d 512
b30ab791 513 if (device->rs_in_flight + sect_in == 0) { /* At start of resync */
daeda1cc 514 want = ((dc->resync_rate * 2 * SLEEP_TIME) / HZ) * steps;
778f271d 515 } else { /* normal path */
daeda1cc
PR
516 want = dc->c_fill_target ? dc->c_fill_target :
517 sect_in * dc->c_delay_target * HZ / (SLEEP_TIME * 10);
778f271d
PR
518 }
519
b30ab791 520 correction = want - device->rs_in_flight - plan->total;
778f271d
PR
521
522 /* Plan ahead */
523 cps = correction / steps;
813472ce
PR
524 fifo_add_val(plan, cps);
525 plan->total += cps * steps;
778f271d
PR
526
527 /* What we do in this step */
813472ce
PR
528 curr_corr = fifo_push(plan, 0);
529 plan->total -= curr_corr;
778f271d
PR
530
531 req_sect = sect_in + curr_corr;
532 if (req_sect < 0)
533 req_sect = 0;
534
daeda1cc 535 max_sect = (dc->c_max_rate * 2 * SLEEP_TIME) / HZ;
778f271d
PR
536 if (req_sect > max_sect)
537 req_sect = max_sect;
538
539 /*
d0180171 540 drbd_warn(device, "si=%u if=%d wa=%u co=%d st=%d cps=%d pl=%d cc=%d rs=%d\n",
b30ab791
AG
541 sect_in, device->rs_in_flight, want, correction,
542 steps, cps, device->rs_planed, curr_corr, req_sect);
778f271d
PR
543 */
544
545 return req_sect;
546}
547
b30ab791 548static int drbd_rs_number_requests(struct drbd_device *device)
e65f440d 549{
0e49d7b0
LE
550 unsigned int sect_in; /* Number of sectors that came in since the last turn */
551 int number, mxb;
552
553 sect_in = atomic_xchg(&device->rs_sect_in, 0);
554 device->rs_in_flight -= sect_in;
813472ce
PR
555
556 rcu_read_lock();
0e49d7b0 557 mxb = drbd_get_max_buffers(device) / 2;
b30ab791 558 if (rcu_dereference(device->rs_plan_s)->size) {
0e49d7b0 559 number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9);
b30ab791 560 device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME;
e65f440d 561 } else {
b30ab791
AG
562 device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate;
563 number = SLEEP_TIME * device->c_sync_rate / ((BM_BLOCK_SIZE / 1024) * HZ);
e65f440d 564 }
813472ce 565 rcu_read_unlock();
e65f440d 566
0e49d7b0
LE
567 /* Don't have more than "max-buffers"/2 in-flight.
568 * Otherwise we may cause the remote site to stall on drbd_alloc_pages(),
569 * potentially causing a distributed deadlock on congestion during
570 * online-verify or (checksum-based) resync, if max-buffers,
571 * socket buffer sizes and resync rate settings are mis-configured. */
7f34f614
LE
572
573 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
574 * mxb (as used here, and in drbd_alloc_pages on the peer) is
575 * "number of pages" (typically also 4k),
576 * but "rs_in_flight" is in "sectors" (512 Byte). */
577 if (mxb - device->rs_in_flight/8 < number)
578 number = mxb - device->rs_in_flight/8;
0e49d7b0 579
e65f440d
LE
580 return number;
581}
582
44a4d551 583static int make_resync_request(struct drbd_device *const device, int cancel)
b411b363 584{
44a4d551
LE
585 struct drbd_peer_device *const peer_device = first_peer_device(device);
586 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
587 unsigned long bit;
588 sector_t sector;
155bd9d1 589 const sector_t capacity = get_capacity(device->vdisk);
1816a2b4 590 int max_bio_size;
e65f440d 591 int number, rollback_i, size;
506afb62 592 int align, requeue = 0;
0f0601f4 593 int i = 0;
92d94ae6 594 int discard_granularity = 0;
b411b363
PR
595
596 if (unlikely(cancel))
99920dc5 597 return 0;
b411b363 598
b30ab791 599 if (device->rs_total == 0) {
af85e8e8 600 /* empty resync? */
b30ab791 601 drbd_resync_finished(device);
99920dc5 602 return 0;
af85e8e8
LE
603 }
604
b30ab791
AG
605 if (!get_ldev(device)) {
606 /* Since we only need to access device->rsync a
607 get_ldev_if_state(device,D_FAILED) would be sufficient, but
b411b363
PR
608 to continue resync with a broken disk makes no sense at
609 all */
d0180171 610 drbd_err(device, "Disk broke down during resync!\n");
99920dc5 611 return 0;
b411b363
PR
612 }
613
9104d31a 614 if (connection->agreed_features & DRBD_FF_THIN_RESYNC) {
92d94ae6
PR
615 rcu_read_lock();
616 discard_granularity = rcu_dereference(device->ldev->disk_conf)->rs_discard_granularity;
617 rcu_read_unlock();
618 }
619
b30ab791
AG
620 max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9;
621 number = drbd_rs_number_requests(device);
0e49d7b0 622 if (number <= 0)
0f0601f4 623 goto requeue;
b411b363 624
b411b363 625 for (i = 0; i < number; i++) {
506afb62
LE
626 /* Stop generating RS requests when half of the send buffer is filled,
627 * but notify TCP that we'd like to have more space. */
44a4d551
LE
628 mutex_lock(&connection->data.mutex);
629 if (connection->data.socket) {
506afb62
LE
630 struct sock *sk = connection->data.socket->sk;
631 int queued = sk->sk_wmem_queued;
632 int sndbuf = sk->sk_sndbuf;
633 if (queued > sndbuf / 2) {
634 requeue = 1;
635 if (sk->sk_socket)
636 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
637 }
638 } else
639 requeue = 1;
44a4d551 640 mutex_unlock(&connection->data.mutex);
506afb62 641 if (requeue)
b411b363
PR
642 goto requeue;
643
644next_sector:
645 size = BM_BLOCK_SIZE;
b30ab791 646 bit = drbd_bm_find_next(device, device->bm_resync_fo);
b411b363 647
4b0715f0 648 if (bit == DRBD_END_OF_BITMAP) {
b30ab791
AG
649 device->bm_resync_fo = drbd_bm_bits(device);
650 put_ldev(device);
99920dc5 651 return 0;
b411b363
PR
652 }
653
654 sector = BM_BIT_TO_SECT(bit);
655
ad3fee79 656 if (drbd_try_rs_begin_io(device, sector)) {
b30ab791 657 device->bm_resync_fo = bit;
b411b363
PR
658 goto requeue;
659 }
b30ab791 660 device->bm_resync_fo = bit + 1;
b411b363 661
b30ab791
AG
662 if (unlikely(drbd_bm_test_bit(device, bit) == 0)) {
663 drbd_rs_complete_io(device, sector);
b411b363
PR
664 goto next_sector;
665 }
666
1816a2b4 667#if DRBD_MAX_BIO_SIZE > BM_BLOCK_SIZE
b411b363
PR
668 /* try to find some adjacent bits.
669 * we stop if we have already the maximum req size.
670 *
671 * Additionally always align bigger requests, in order to
672 * be prepared for all stripe sizes of software RAIDs.
b411b363
PR
673 */
674 align = 1;
d207450c 675 rollback_i = i;
6377b923 676 while (i < number) {
1816a2b4 677 if (size + BM_BLOCK_SIZE > max_bio_size)
b411b363
PR
678 break;
679
680 /* Be always aligned */
681 if (sector & ((1<<(align+3))-1))
682 break;
683
92d94ae6
PR
684 if (discard_granularity && size == discard_granularity)
685 break;
686
b411b363
PR
687 /* do not cross extent boundaries */
688 if (((bit+1) & BM_BLOCKS_PER_BM_EXT_MASK) == 0)
689 break;
690 /* now, is it actually dirty, after all?
691 * caution, drbd_bm_test_bit is tri-state for some
692 * obscure reason; ( b == 0 ) would get the out-of-band
693 * only accidentally right because of the "oddly sized"
694 * adjustment below */
b30ab791 695 if (drbd_bm_test_bit(device, bit+1) != 1)
b411b363
PR
696 break;
697 bit++;
698 size += BM_BLOCK_SIZE;
699 if ((BM_BLOCK_SIZE << align) <= size)
700 align++;
701 i++;
702 }
703 /* if we merged some,
704 * reset the offset to start the next drbd_bm_find_next from */
705 if (size > BM_BLOCK_SIZE)
b30ab791 706 device->bm_resync_fo = bit + 1;
b411b363
PR
707#endif
708
709 /* adjust very last sectors, in case we are oddly sized */
710 if (sector + (size>>9) > capacity)
711 size = (capacity-sector)<<9;
aaaba345
LE
712
713 if (device->use_csums) {
44a4d551 714 switch (read_for_csum(peer_device, sector, size)) {
80a40e43 715 case -EIO: /* Disk failure */
b30ab791 716 put_ldev(device);
99920dc5 717 return -EIO;
80a40e43 718 case -EAGAIN: /* allocation failed, or ldev busy */
b30ab791
AG
719 drbd_rs_complete_io(device, sector);
720 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
d207450c 721 i = rollback_i;
b411b363 722 goto requeue;
80a40e43
LE
723 case 0:
724 /* everything ok */
725 break;
726 default:
727 BUG();
b411b363
PR
728 }
729 } else {
99920dc5
AG
730 int err;
731
b30ab791 732 inc_rs_pending(device);
92d94ae6
PR
733 err = drbd_send_drequest(peer_device,
734 size == discard_granularity ? P_RS_THIN_REQ : P_RS_DATA_REQUEST,
99920dc5
AG
735 sector, size, ID_SYNCER);
736 if (err) {
d0180171 737 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
b30ab791
AG
738 dec_rs_pending(device);
739 put_ldev(device);
99920dc5 740 return err;
b411b363
PR
741 }
742 }
743 }
744
b30ab791 745 if (device->bm_resync_fo >= drbd_bm_bits(device)) {
b411b363
PR
746 /* last syncer _request_ was sent,
747 * but the P_RS_DATA_REPLY not yet received. sync will end (and
748 * next sync group will resume), as soon as we receive the last
749 * resync data block, and the last bit is cleared.
750 * until then resync "work" is "inactive" ...
751 */
b30ab791 752 put_ldev(device);
99920dc5 753 return 0;
b411b363
PR
754 }
755
756 requeue:
b30ab791
AG
757 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
758 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
759 put_ldev(device);
99920dc5 760 return 0;
b411b363
PR
761}
762
d448a2e1 763static int make_ov_request(struct drbd_device *device, int cancel)
b411b363
PR
764{
765 int number, i, size;
766 sector_t sector;
155bd9d1 767 const sector_t capacity = get_capacity(device->vdisk);
58ffa580 768 bool stop_sector_reached = false;
b411b363
PR
769
770 if (unlikely(cancel))
771 return 1;
772
b30ab791 773 number = drbd_rs_number_requests(device);
b411b363 774
b30ab791 775 sector = device->ov_position;
b411b363 776 for (i = 0; i < number; i++) {
58ffa580 777 if (sector >= capacity)
b411b363 778 return 1;
58ffa580
LE
779
780 /* We check for "finished" only in the reply path:
781 * w_e_end_ov_reply().
782 * We need to send at least one request out. */
783 stop_sector_reached = i > 0
b30ab791
AG
784 && verify_can_do_stop_sector(device)
785 && sector >= device->ov_stop_sector;
58ffa580
LE
786 if (stop_sector_reached)
787 break;
b411b363
PR
788
789 size = BM_BLOCK_SIZE;
790
ad3fee79 791 if (drbd_try_rs_begin_io(device, sector)) {
b30ab791 792 device->ov_position = sector;
b411b363
PR
793 goto requeue;
794 }
795
796 if (sector + (size>>9) > capacity)
797 size = (capacity-sector)<<9;
798
b30ab791 799 inc_rs_pending(device);
69a22773 800 if (drbd_send_ov_request(first_peer_device(device), sector, size)) {
b30ab791 801 dec_rs_pending(device);
b411b363
PR
802 return 0;
803 }
804 sector += BM_SECT_PER_BIT;
805 }
b30ab791 806 device->ov_position = sector;
b411b363
PR
807
808 requeue:
b30ab791 809 device->rs_in_flight += (i << (BM_BLOCK_SHIFT - 9));
58ffa580 810 if (i == 0 || !stop_sector_reached)
b30ab791 811 mod_timer(&device->resync_timer, jiffies + SLEEP_TIME);
b411b363
PR
812 return 1;
813}
814
99920dc5 815int w_ov_finished(struct drbd_work *w, int cancel)
b411b363 816{
84b8c06b
AG
817 struct drbd_device_work *dw =
818 container_of(w, struct drbd_device_work, w);
819 struct drbd_device *device = dw->device;
820 kfree(dw);
b30ab791
AG
821 ov_out_of_sync_print(device);
822 drbd_resync_finished(device);
b411b363 823
99920dc5 824 return 0;
b411b363
PR
825}
826
99920dc5 827static int w_resync_finished(struct drbd_work *w, int cancel)
b411b363 828{
84b8c06b
AG
829 struct drbd_device_work *dw =
830 container_of(w, struct drbd_device_work, w);
831 struct drbd_device *device = dw->device;
832 kfree(dw);
b411b363 833
b30ab791 834 drbd_resync_finished(device);
b411b363 835
99920dc5 836 return 0;
b411b363
PR
837}
838
b30ab791 839static void ping_peer(struct drbd_device *device)
af85e8e8 840{
a6b32bc3 841 struct drbd_connection *connection = first_peer_device(device)->connection;
2a67d8b9 842
bde89a9e
AG
843 clear_bit(GOT_PING_ACK, &connection->flags);
844 request_ping(connection);
845 wait_event(connection->ping_wait,
846 test_bit(GOT_PING_ACK, &connection->flags) || device->state.conn < C_CONNECTED);
af85e8e8
LE
847}
848
b30ab791 849int drbd_resync_finished(struct drbd_device *device)
b411b363 850{
26a96110 851 struct drbd_connection *connection = first_peer_device(device)->connection;
b411b363
PR
852 unsigned long db, dt, dbdt;
853 unsigned long n_oos;
854 union drbd_state os, ns;
84b8c06b 855 struct drbd_device_work *dw;
b411b363 856 char *khelper_cmd = NULL;
26525618 857 int verify_done = 0;
b411b363
PR
858
859 /* Remove all elements from the resync LRU. Since future actions
860 * might set bits in the (main) bitmap, then the entries in the
861 * resync LRU would be wrong. */
b30ab791 862 if (drbd_rs_del_all(device)) {
b411b363
PR
863 /* In case this is not possible now, most probably because
864 * there are P_RS_DATA_REPLY Packets lingering on the worker's
865 * queue (or even the read operations for those packets
866 * is not finished by now). Retry in 100ms. */
867
20ee6390 868 schedule_timeout_interruptible(HZ / 10);
84b8c06b
AG
869 dw = kmalloc(sizeof(struct drbd_device_work), GFP_ATOMIC);
870 if (dw) {
871 dw->w.cb = w_resync_finished;
872 dw->device = device;
26a96110 873 drbd_queue_work(&connection->sender_work, &dw->w);
b411b363
PR
874 return 1;
875 }
84b8c06b 876 drbd_err(device, "Warn failed to drbd_rs_del_all() and to kmalloc(dw).\n");
b411b363
PR
877 }
878
b30ab791 879 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
b411b363
PR
880 if (dt <= 0)
881 dt = 1;
84b8c06b 882
b30ab791 883 db = device->rs_total;
58ffa580 884 /* adjust for verify start and stop sectors, respective reached position */
b30ab791
AG
885 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
886 db -= device->ov_left;
58ffa580 887
b411b363 888 dbdt = Bit2KB(db/dt);
b30ab791 889 device->rs_paused /= HZ;
b411b363 890
b30ab791 891 if (!get_ldev(device))
b411b363
PR
892 goto out;
893
b30ab791 894 ping_peer(device);
af85e8e8 895
0500813f 896 spin_lock_irq(&device->resource->req_lock);
b30ab791 897 os = drbd_read_state(device);
b411b363 898
26525618
LE
899 verify_done = (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T);
900
b411b363
PR
901 /* This protects us against multiple calls (that can happen in the presence
902 of application IO), and against connectivity loss just before we arrive here. */
903 if (os.conn <= C_CONNECTED)
904 goto out_unlock;
905
906 ns = os;
907 ns.conn = C_CONNECTED;
908
d0180171 909 drbd_info(device, "%s done (total %lu sec; paused %lu sec; %lu K/sec)\n",
58ffa580 910 verify_done ? "Online verify" : "Resync",
b30ab791 911 dt + device->rs_paused, device->rs_paused, dbdt);
b411b363 912
b30ab791 913 n_oos = drbd_bm_total_weight(device);
b411b363
PR
914
915 if (os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) {
916 if (n_oos) {
d0180171 917 drbd_alert(device, "Online verify found %lu %dk block out of sync!\n",
b411b363
PR
918 n_oos, Bit2KB(1));
919 khelper_cmd = "out-of-sync";
920 }
921 } else {
0b0ba1ef 922 D_ASSERT(device, (n_oos - device->rs_failed) == 0);
b411b363
PR
923
924 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
925 khelper_cmd = "after-resync-target";
926
aaaba345 927 if (device->use_csums && device->rs_total) {
b30ab791
AG
928 const unsigned long s = device->rs_same_csum;
929 const unsigned long t = device->rs_total;
b411b363
PR
930 const int ratio =
931 (t == 0) ? 0 :
932 (t < 100000) ? ((s*100)/t) : (s/(t/100));
d0180171 933 drbd_info(device, "%u %% had equal checksums, eliminated: %luK; "
b411b363
PR
934 "transferred %luK total %luK\n",
935 ratio,
b30ab791
AG
936 Bit2KB(device->rs_same_csum),
937 Bit2KB(device->rs_total - device->rs_same_csum),
938 Bit2KB(device->rs_total));
b411b363
PR
939 }
940 }
941
b30ab791 942 if (device->rs_failed) {
d0180171 943 drbd_info(device, " %lu failed blocks\n", device->rs_failed);
b411b363
PR
944
945 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
946 ns.disk = D_INCONSISTENT;
947 ns.pdsk = D_UP_TO_DATE;
948 } else {
949 ns.disk = D_UP_TO_DATE;
950 ns.pdsk = D_INCONSISTENT;
951 }
952 } else {
953 ns.disk = D_UP_TO_DATE;
954 ns.pdsk = D_UP_TO_DATE;
955
956 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) {
b30ab791 957 if (device->p_uuid) {
b411b363
PR
958 int i;
959 for (i = UI_BITMAP ; i <= UI_HISTORY_END ; i++)
b30ab791
AG
960 _drbd_uuid_set(device, i, device->p_uuid[i]);
961 drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_CURRENT]);
962 _drbd_uuid_set(device, UI_CURRENT, device->p_uuid[UI_CURRENT]);
b411b363 963 } else {
d0180171 964 drbd_err(device, "device->p_uuid is NULL! BUG\n");
b411b363
PR
965 }
966 }
967
62b0da3a
LE
968 if (!(os.conn == C_VERIFY_S || os.conn == C_VERIFY_T)) {
969 /* for verify runs, we don't update uuids here,
970 * so there would be nothing to report. */
b30ab791
AG
971 drbd_uuid_set_bm(device, 0UL);
972 drbd_print_uuids(device, "updated UUIDs");
973 if (device->p_uuid) {
62b0da3a
LE
974 /* Now the two UUID sets are equal, update what we
975 * know of the peer. */
976 int i;
977 for (i = UI_CURRENT ; i <= UI_HISTORY_END ; i++)
b30ab791 978 device->p_uuid[i] = device->ldev->md.uuid[i];
62b0da3a 979 }
b411b363
PR
980 }
981 }
982
b30ab791 983 _drbd_set_state(device, ns, CS_VERBOSE, NULL);
b411b363 984out_unlock:
0500813f 985 spin_unlock_irq(&device->resource->req_lock);
26a96110
LE
986
987 /* If we have been sync source, and have an effective fencing-policy,
988 * once *all* volumes are back in sync, call "unfence". */
989 if (os.conn == C_SYNC_SOURCE) {
990 enum drbd_disk_state disk_state = D_MASK;
991 enum drbd_disk_state pdsk_state = D_MASK;
992 enum drbd_fencing_p fp = FP_DONT_CARE;
993
994 rcu_read_lock();
995 fp = rcu_dereference(device->ldev->disk_conf)->fencing;
996 if (fp != FP_DONT_CARE) {
997 struct drbd_peer_device *peer_device;
998 int vnr;
999 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1000 struct drbd_device *device = peer_device->device;
1001 disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
1002 pdsk_state = min_t(enum drbd_disk_state, pdsk_state, device->state.pdsk);
1003 }
1004 }
1005 rcu_read_unlock();
1006 if (disk_state == D_UP_TO_DATE && pdsk_state == D_UP_TO_DATE)
1007 conn_khelper(connection, "unfence-peer");
1008 }
1009
b30ab791 1010 put_ldev(device);
b411b363 1011out:
b30ab791
AG
1012 device->rs_total = 0;
1013 device->rs_failed = 0;
1014 device->rs_paused = 0;
58ffa580
LE
1015
1016 /* reset start sector, if we reached end of device */
b30ab791
AG
1017 if (verify_done && device->ov_left == 0)
1018 device->ov_start_sector = 0;
b411b363 1019
b30ab791 1020 drbd_md_sync(device);
13d42685 1021
b411b363 1022 if (khelper_cmd)
b30ab791 1023 drbd_khelper(device, khelper_cmd);
b411b363
PR
1024
1025 return 1;
1026}
1027
1028/* helper */
b30ab791 1029static void move_to_net_ee_or_free(struct drbd_device *device, struct drbd_peer_request *peer_req)
b411b363 1030{
045417f7 1031 if (drbd_peer_req_has_active_page(peer_req)) {
b411b363 1032 /* This might happen if sendpage() has not finished */
ba6bee98 1033 int i = PFN_UP(peer_req->i.size);
b30ab791
AG
1034 atomic_add(i, &device->pp_in_use_by_net);
1035 atomic_sub(i, &device->pp_in_use);
0500813f 1036 spin_lock_irq(&device->resource->req_lock);
a8cd15ba 1037 list_add_tail(&peer_req->w.list, &device->net_ee);
0500813f 1038 spin_unlock_irq(&device->resource->req_lock);
435f0740 1039 wake_up(&drbd_pp_wait);
b411b363 1040 } else
b30ab791 1041 drbd_free_peer_req(device, peer_req);
b411b363
PR
1042}
1043
1044/**
1045 * w_e_end_data_req() - Worker callback, to send a P_DATA_REPLY packet in response to a P_DATA_REQUEST
b411b363
PR
1046 * @w: work object.
1047 * @cancel: The connection will be closed anyways
1048 */
99920dc5 1049int w_e_end_data_req(struct drbd_work *w, int cancel)
b411b363 1050{
a8cd15ba 1051 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1052 struct drbd_peer_device *peer_device = peer_req->peer_device;
1053 struct drbd_device *device = peer_device->device;
99920dc5 1054 int err;
b411b363
PR
1055
1056 if (unlikely(cancel)) {
b30ab791
AG
1057 drbd_free_peer_req(device, peer_req);
1058 dec_unacked(device);
99920dc5 1059 return 0;
b411b363
PR
1060 }
1061
db830c46 1062 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
6780139c 1063 err = drbd_send_block(peer_device, P_DATA_REPLY, peer_req);
b411b363
PR
1064 } else {
1065 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1066 drbd_err(device, "Sending NegDReply. sector=%llus.\n",
db830c46 1067 (unsigned long long)peer_req->i.sector);
b411b363 1068
6780139c 1069 err = drbd_send_ack(peer_device, P_NEG_DREPLY, peer_req);
b411b363
PR
1070 }
1071
b30ab791 1072 dec_unacked(device);
b411b363 1073
b30ab791 1074 move_to_net_ee_or_free(device, peer_req);
b411b363 1075
99920dc5 1076 if (unlikely(err))
d0180171 1077 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1078 return err;
b411b363
PR
1079}
1080
700ca8c0
PR
1081static bool all_zero(struct drbd_peer_request *peer_req)
1082{
1083 struct page *page = peer_req->pages;
1084 unsigned int len = peer_req->i.size;
1085
1086 page_chain_for_each(page) {
1087 unsigned int l = min_t(unsigned int, len, PAGE_SIZE);
1088 unsigned int i, words = l / sizeof(long);
1089 unsigned long *d;
1090
1091 d = kmap_atomic(page);
1092 for (i = 0; i < words; i++) {
1093 if (d[i]) {
1094 kunmap_atomic(d);
1095 return false;
1096 }
1097 }
1098 kunmap_atomic(d);
1099 len -= l;
1100 }
1101
1102 return true;
1103}
1104
b411b363 1105/**
a209b4ae 1106 * w_e_end_rsdata_req() - Worker callback to send a P_RS_DATA_REPLY packet in response to a P_RS_DATA_REQUEST
b411b363
PR
1107 * @w: work object.
1108 * @cancel: The connection will be closed anyways
1109 */
99920dc5 1110int w_e_end_rsdata_req(struct drbd_work *w, int cancel)
b411b363 1111{
a8cd15ba 1112 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1113 struct drbd_peer_device *peer_device = peer_req->peer_device;
1114 struct drbd_device *device = peer_device->device;
99920dc5 1115 int err;
b411b363
PR
1116
1117 if (unlikely(cancel)) {
b30ab791
AG
1118 drbd_free_peer_req(device, peer_req);
1119 dec_unacked(device);
99920dc5 1120 return 0;
b411b363
PR
1121 }
1122
b30ab791
AG
1123 if (get_ldev_if_state(device, D_FAILED)) {
1124 drbd_rs_complete_io(device, peer_req->i.sector);
1125 put_ldev(device);
b411b363
PR
1126 }
1127
b30ab791 1128 if (device->state.conn == C_AHEAD) {
6780139c 1129 err = drbd_send_ack(peer_device, P_RS_CANCEL, peer_req);
db830c46 1130 } else if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b30ab791
AG
1131 if (likely(device->state.pdsk >= D_INCONSISTENT)) {
1132 inc_rs_pending(device);
700ca8c0
PR
1133 if (peer_req->flags & EE_RS_THIN_REQ && all_zero(peer_req))
1134 err = drbd_send_rs_deallocated(peer_device, peer_req);
1135 else
1136 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1137 } else {
1138 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1139 drbd_err(device, "Not sending RSDataReply, "
b411b363 1140 "partner DISKLESS!\n");
99920dc5 1141 err = 0;
b411b363
PR
1142 }
1143 } else {
1144 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1145 drbd_err(device, "Sending NegRSDReply. sector %llus.\n",
db830c46 1146 (unsigned long long)peer_req->i.sector);
b411b363 1147
6780139c 1148 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
b411b363
PR
1149
1150 /* update resync data with failure */
b30ab791 1151 drbd_rs_failed_io(device, peer_req->i.sector, peer_req->i.size);
b411b363
PR
1152 }
1153
b30ab791 1154 dec_unacked(device);
b411b363 1155
b30ab791 1156 move_to_net_ee_or_free(device, peer_req);
b411b363 1157
99920dc5 1158 if (unlikely(err))
d0180171 1159 drbd_err(device, "drbd_send_block() failed\n");
99920dc5 1160 return err;
b411b363
PR
1161}
1162
99920dc5 1163int w_e_end_csum_rs_req(struct drbd_work *w, int cancel)
b411b363 1164{
a8cd15ba 1165 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1166 struct drbd_peer_device *peer_device = peer_req->peer_device;
1167 struct drbd_device *device = peer_device->device;
b411b363
PR
1168 struct digest_info *di;
1169 int digest_size;
1170 void *digest = NULL;
99920dc5 1171 int err, eq = 0;
b411b363
PR
1172
1173 if (unlikely(cancel)) {
b30ab791
AG
1174 drbd_free_peer_req(device, peer_req);
1175 dec_unacked(device);
99920dc5 1176 return 0;
b411b363
PR
1177 }
1178
b30ab791
AG
1179 if (get_ldev(device)) {
1180 drbd_rs_complete_io(device, peer_req->i.sector);
1181 put_ldev(device);
1d53f09e 1182 }
b411b363 1183
db830c46 1184 di = peer_req->digest;
b411b363 1185
db830c46 1186 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
b411b363
PR
1187 /* quick hack to try to avoid a race against reconfiguration.
1188 * a real fix would be much more involved,
1189 * introducing more locking mechanisms */
6780139c 1190 if (peer_device->connection->csums_tfm) {
3d0e6375 1191 digest_size = crypto_shash_digestsize(peer_device->connection->csums_tfm);
0b0ba1ef 1192 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1193 digest = kmalloc(digest_size, GFP_NOIO);
1194 }
1195 if (digest) {
6780139c 1196 drbd_csum_ee(peer_device->connection->csums_tfm, peer_req, digest);
b411b363
PR
1197 eq = !memcmp(digest, di->digest, digest_size);
1198 kfree(digest);
1199 }
1200
1201 if (eq) {
b30ab791 1202 drbd_set_in_sync(device, peer_req->i.sector, peer_req->i.size);
676396d5 1203 /* rs_same_csums unit is BM_BLOCK_SIZE */
b30ab791 1204 device->rs_same_csum += peer_req->i.size >> BM_BLOCK_SHIFT;
6780139c 1205 err = drbd_send_ack(peer_device, P_RS_IS_IN_SYNC, peer_req);
b411b363 1206 } else {
b30ab791 1207 inc_rs_pending(device);
db830c46
AG
1208 peer_req->block_id = ID_SYNCER; /* By setting block_id, digest pointer becomes invalid! */
1209 peer_req->flags &= ~EE_HAS_DIGEST; /* This peer request no longer has a digest pointer */
204bba99 1210 kfree(di);
6780139c 1211 err = drbd_send_block(peer_device, P_RS_DATA_REPLY, peer_req);
b411b363
PR
1212 }
1213 } else {
6780139c 1214 err = drbd_send_ack(peer_device, P_NEG_RS_DREPLY, peer_req);
b411b363 1215 if (__ratelimit(&drbd_ratelimit_state))
d0180171 1216 drbd_err(device, "Sending NegDReply. I guess it gets messy.\n");
b411b363
PR
1217 }
1218
b30ab791
AG
1219 dec_unacked(device);
1220 move_to_net_ee_or_free(device, peer_req);
b411b363 1221
99920dc5 1222 if (unlikely(err))
d0180171 1223 drbd_err(device, "drbd_send_block/ack() failed\n");
99920dc5 1224 return err;
b411b363
PR
1225}
1226
99920dc5 1227int w_e_end_ov_req(struct drbd_work *w, int cancel)
b411b363 1228{
a8cd15ba 1229 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1230 struct drbd_peer_device *peer_device = peer_req->peer_device;
1231 struct drbd_device *device = peer_device->device;
db830c46
AG
1232 sector_t sector = peer_req->i.sector;
1233 unsigned int size = peer_req->i.size;
b411b363
PR
1234 int digest_size;
1235 void *digest;
99920dc5 1236 int err = 0;
b411b363
PR
1237
1238 if (unlikely(cancel))
1239 goto out;
1240
3d0e6375 1241 digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
b411b363 1242 digest = kmalloc(digest_size, GFP_NOIO);
8f21420e 1243 if (!digest) {
99920dc5 1244 err = 1; /* terminate the connection in case the allocation failed */
8f21420e 1245 goto out;
b411b363
PR
1246 }
1247
db830c46 1248 if (likely(!(peer_req->flags & EE_WAS_ERROR)))
6780139c 1249 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
8f21420e
PR
1250 else
1251 memset(digest, 0, digest_size);
1252
53ea4331
LE
1253 /* Free e and pages before send.
1254 * In case we block on congestion, we could otherwise run into
1255 * some distributed deadlock, if the other side blocks on
1256 * congestion as well, because our receiver blocks in
c37c8ecf 1257 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1258 drbd_free_peer_req(device, peer_req);
db830c46 1259 peer_req = NULL;
b30ab791 1260 inc_rs_pending(device);
6780139c 1261 err = drbd_send_drequest_csum(peer_device, sector, size, digest, digest_size, P_OV_REPLY);
99920dc5 1262 if (err)
b30ab791 1263 dec_rs_pending(device);
8f21420e
PR
1264 kfree(digest);
1265
b411b363 1266out:
db830c46 1267 if (peer_req)
b30ab791
AG
1268 drbd_free_peer_req(device, peer_req);
1269 dec_unacked(device);
99920dc5 1270 return err;
b411b363
PR
1271}
1272
b30ab791 1273void drbd_ov_out_of_sync_found(struct drbd_device *device, sector_t sector, int size)
b411b363 1274{
b30ab791
AG
1275 if (device->ov_last_oos_start + device->ov_last_oos_size == sector) {
1276 device->ov_last_oos_size += size>>9;
b411b363 1277 } else {
b30ab791
AG
1278 device->ov_last_oos_start = sector;
1279 device->ov_last_oos_size = size>>9;
b411b363 1280 }
b30ab791 1281 drbd_set_out_of_sync(device, sector, size);
b411b363
PR
1282}
1283
99920dc5 1284int w_e_end_ov_reply(struct drbd_work *w, int cancel)
b411b363 1285{
a8cd15ba 1286 struct drbd_peer_request *peer_req = container_of(w, struct drbd_peer_request, w);
6780139c
AG
1287 struct drbd_peer_device *peer_device = peer_req->peer_device;
1288 struct drbd_device *device = peer_device->device;
b411b363 1289 struct digest_info *di;
b411b363 1290 void *digest;
db830c46
AG
1291 sector_t sector = peer_req->i.sector;
1292 unsigned int size = peer_req->i.size;
53ea4331 1293 int digest_size;
99920dc5 1294 int err, eq = 0;
58ffa580 1295 bool stop_sector_reached = false;
b411b363
PR
1296
1297 if (unlikely(cancel)) {
b30ab791
AG
1298 drbd_free_peer_req(device, peer_req);
1299 dec_unacked(device);
99920dc5 1300 return 0;
b411b363
PR
1301 }
1302
1303 /* after "cancel", because after drbd_disconnect/drbd_rs_cancel_all
1304 * the resync lru has been cleaned up already */
b30ab791
AG
1305 if (get_ldev(device)) {
1306 drbd_rs_complete_io(device, peer_req->i.sector);
1307 put_ldev(device);
1d53f09e 1308 }
b411b363 1309
db830c46 1310 di = peer_req->digest;
b411b363 1311
db830c46 1312 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
3d0e6375 1313 digest_size = crypto_shash_digestsize(peer_device->connection->verify_tfm);
b411b363
PR
1314 digest = kmalloc(digest_size, GFP_NOIO);
1315 if (digest) {
6780139c 1316 drbd_csum_ee(peer_device->connection->verify_tfm, peer_req, digest);
b411b363 1317
0b0ba1ef 1318 D_ASSERT(device, digest_size == di->digest_size);
b411b363
PR
1319 eq = !memcmp(digest, di->digest, digest_size);
1320 kfree(digest);
1321 }
b411b363
PR
1322 }
1323
9676c760
LE
1324 /* Free peer_req and pages before send.
1325 * In case we block on congestion, we could otherwise run into
1326 * some distributed deadlock, if the other side blocks on
1327 * congestion as well, because our receiver blocks in
c37c8ecf 1328 * drbd_alloc_pages due to pp_in_use > max_buffers. */
b30ab791 1329 drbd_free_peer_req(device, peer_req);
b411b363 1330 if (!eq)
b30ab791 1331 drbd_ov_out_of_sync_found(device, sector, size);
b411b363 1332 else
b30ab791 1333 ov_out_of_sync_print(device);
b411b363 1334
6780139c 1335 err = drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size,
fa79abd8 1336 eq ? ID_IN_SYNC : ID_OUT_OF_SYNC);
b411b363 1337
b30ab791 1338 dec_unacked(device);
b411b363 1339
b30ab791 1340 --device->ov_left;
ea5442af
LE
1341
1342 /* let's advance progress step marks only for every other megabyte */
b30ab791
AG
1343 if ((device->ov_left & 0x200) == 0x200)
1344 drbd_advance_rs_marks(device, device->ov_left);
ea5442af 1345
b30ab791
AG
1346 stop_sector_reached = verify_can_do_stop_sector(device) &&
1347 (sector + (size>>9)) >= device->ov_stop_sector;
58ffa580 1348
b30ab791
AG
1349 if (device->ov_left == 0 || stop_sector_reached) {
1350 ov_out_of_sync_print(device);
1351 drbd_resync_finished(device);
b411b363
PR
1352 }
1353
99920dc5 1354 return err;
b411b363
PR
1355}
1356
b6dd1a89
LE
1357/* FIXME
1358 * We need to track the number of pending barrier acks,
1359 * and to be able to wait for them.
1360 * See also comment in drbd_adm_attach before drbd_suspend_io.
1361 */
bde89a9e 1362static int drbd_send_barrier(struct drbd_connection *connection)
b411b363 1363{
9f5bdc33 1364 struct p_barrier *p;
b6dd1a89 1365 struct drbd_socket *sock;
b411b363 1366
bde89a9e
AG
1367 sock = &connection->data;
1368 p = conn_prepare_command(connection, sock);
9f5bdc33
AG
1369 if (!p)
1370 return -EIO;
bde89a9e 1371 p->barrier = connection->send.current_epoch_nr;
b6dd1a89 1372 p->pad = 0;
bde89a9e 1373 connection->send.current_epoch_writes = 0;
84d34f2f 1374 connection->send.last_sent_barrier_jif = jiffies;
b6dd1a89 1375
bde89a9e 1376 return conn_send_command(connection, sock, P_BARRIER, sizeof(*p), NULL, 0);
b411b363
PR
1377}
1378
c51a0ef3
LE
1379static int pd_send_unplug_remote(struct drbd_peer_device *pd)
1380{
1381 struct drbd_socket *sock = &pd->connection->data;
1382 if (!drbd_prepare_command(pd, sock))
1383 return -EIO;
1384 return drbd_send_command(pd, sock, P_UNPLUG_REMOTE, 0, NULL, 0);
1385}
1386
99920dc5 1387int w_send_write_hint(struct drbd_work *w, int cancel)
b411b363 1388{
84b8c06b
AG
1389 struct drbd_device *device =
1390 container_of(w, struct drbd_device, unplug_work);
9f5bdc33 1391
b411b363 1392 if (cancel)
99920dc5 1393 return 0;
c51a0ef3 1394 return pd_send_unplug_remote(first_peer_device(device));
b411b363
PR
1395}
1396
bde89a9e 1397static void re_init_if_first_write(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb 1398{
bde89a9e
AG
1399 if (!connection->send.seen_any_write_yet) {
1400 connection->send.seen_any_write_yet = true;
1401 connection->send.current_epoch_nr = epoch;
1402 connection->send.current_epoch_writes = 0;
84d34f2f 1403 connection->send.last_sent_barrier_jif = jiffies;
4eb9b3cb
LE
1404 }
1405}
1406
bde89a9e 1407static void maybe_send_barrier(struct drbd_connection *connection, unsigned int epoch)
4eb9b3cb
LE
1408{
1409 /* re-init if first write on this connection */
bde89a9e 1410 if (!connection->send.seen_any_write_yet)
4eb9b3cb 1411 return;
bde89a9e
AG
1412 if (connection->send.current_epoch_nr != epoch) {
1413 if (connection->send.current_epoch_writes)
1414 drbd_send_barrier(connection);
1415 connection->send.current_epoch_nr = epoch;
4eb9b3cb
LE
1416 }
1417}
1418
8f7bed77 1419int w_send_out_of_sync(struct drbd_work *w, int cancel)
73a01a18
PR
1420{
1421 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1422 struct drbd_device *device = req->device;
44a4d551
LE
1423 struct drbd_peer_device *const peer_device = first_peer_device(device);
1424 struct drbd_connection *const connection = peer_device->connection;
99920dc5 1425 int err;
73a01a18
PR
1426
1427 if (unlikely(cancel)) {
8554df1c 1428 req_mod(req, SEND_CANCELED);
99920dc5 1429 return 0;
73a01a18 1430 }
e5f891b2 1431 req->pre_send_jif = jiffies;
73a01a18 1432
bde89a9e 1433 /* this time, no connection->send.current_epoch_writes++;
b6dd1a89
LE
1434 * If it was sent, it was the closing barrier for the last
1435 * replicated epoch, before we went into AHEAD mode.
1436 * No more barriers will be sent, until we leave AHEAD mode again. */
bde89a9e 1437 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1438
44a4d551 1439 err = drbd_send_out_of_sync(peer_device, req);
8554df1c 1440 req_mod(req, OOS_HANDED_TO_NETWORK);
73a01a18 1441
99920dc5 1442 return err;
73a01a18
PR
1443}
1444
b411b363
PR
1445/**
1446 * w_send_dblock() - Worker callback to send a P_DATA packet in order to mirror a write request
b411b363
PR
1447 * @w: work object.
1448 * @cancel: The connection will be closed anyways
1449 */
99920dc5 1450int w_send_dblock(struct drbd_work *w, int cancel)
b411b363
PR
1451{
1452 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1453 struct drbd_device *device = req->device;
44a4d551
LE
1454 struct drbd_peer_device *const peer_device = first_peer_device(device);
1455 struct drbd_connection *connection = peer_device->connection;
c51a0ef3 1456 bool do_send_unplug = req->rq_state & RQ_UNPLUG;
99920dc5 1457 int err;
b411b363
PR
1458
1459 if (unlikely(cancel)) {
8554df1c 1460 req_mod(req, SEND_CANCELED);
99920dc5 1461 return 0;
b411b363 1462 }
e5f891b2 1463 req->pre_send_jif = jiffies;
b411b363 1464
bde89a9e
AG
1465 re_init_if_first_write(connection, req->epoch);
1466 maybe_send_barrier(connection, req->epoch);
1467 connection->send.current_epoch_writes++;
b6dd1a89 1468
44a4d551 1469 err = drbd_send_dblock(peer_device, req);
99920dc5 1470 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1471
c51a0ef3
LE
1472 if (do_send_unplug && !err)
1473 pd_send_unplug_remote(peer_device);
1474
99920dc5 1475 return err;
b411b363
PR
1476}
1477
1478/**
1479 * w_send_read_req() - Worker callback to send a read request (P_DATA_REQUEST) packet
b411b363
PR
1480 * @w: work object.
1481 * @cancel: The connection will be closed anyways
1482 */
99920dc5 1483int w_send_read_req(struct drbd_work *w, int cancel)
b411b363
PR
1484{
1485 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1486 struct drbd_device *device = req->device;
44a4d551
LE
1487 struct drbd_peer_device *const peer_device = first_peer_device(device);
1488 struct drbd_connection *connection = peer_device->connection;
c51a0ef3 1489 bool do_send_unplug = req->rq_state & RQ_UNPLUG;
99920dc5 1490 int err;
b411b363
PR
1491
1492 if (unlikely(cancel)) {
8554df1c 1493 req_mod(req, SEND_CANCELED);
99920dc5 1494 return 0;
b411b363 1495 }
e5f891b2 1496 req->pre_send_jif = jiffies;
b411b363 1497
b6dd1a89
LE
1498 /* Even read requests may close a write epoch,
1499 * if there was any yet. */
bde89a9e 1500 maybe_send_barrier(connection, req->epoch);
b6dd1a89 1501
44a4d551 1502 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
6c1005e7 1503 (unsigned long)req);
b411b363 1504
99920dc5 1505 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
b411b363 1506
c51a0ef3
LE
1507 if (do_send_unplug && !err)
1508 pd_send_unplug_remote(peer_device);
1509
99920dc5 1510 return err;
b411b363
PR
1511}
1512
99920dc5 1513int w_restart_disk_io(struct drbd_work *w, int cancel)
265be2d0
PR
1514{
1515 struct drbd_request *req = container_of(w, struct drbd_request, w);
84b8c06b 1516 struct drbd_device *device = req->device;
265be2d0 1517
0778286a 1518 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
4dd726f0 1519 drbd_al_begin_io(device, &req->i);
265be2d0 1520
abfc426d
CH
1521 req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
1522 req->master_bio, GFP_NOIO,
ae7153f1 1523 &drbd_io_bio_set);
ae7153f1
CH
1524 req->private_bio->bi_private = req;
1525 req->private_bio->bi_end_io = drbd_request_endio;
ed00aabd 1526 submit_bio_noacct(req->private_bio);
265be2d0 1527
99920dc5 1528 return 0;
265be2d0
PR
1529}
1530
b30ab791 1531static int _drbd_may_sync_now(struct drbd_device *device)
b411b363 1532{
b30ab791 1533 struct drbd_device *odev = device;
95f8efd0 1534 int resync_after;
b411b363
PR
1535
1536 while (1) {
a3f8f7dc 1537 if (!odev->ldev || odev->state.disk == D_DISKLESS)
438c8374 1538 return 1;
daeda1cc 1539 rcu_read_lock();
95f8efd0 1540 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1541 rcu_read_unlock();
95f8efd0 1542 if (resync_after == -1)
b411b363 1543 return 1;
b30ab791 1544 odev = minor_to_device(resync_after);
a3f8f7dc 1545 if (!odev)
841ce241 1546 return 1;
b411b363
PR
1547 if ((odev->state.conn >= C_SYNC_SOURCE &&
1548 odev->state.conn <= C_PAUSED_SYNC_T) ||
1549 odev->state.aftr_isp || odev->state.peer_isp ||
1550 odev->state.user_isp)
1551 return 0;
1552 }
1553}
1554
1555/**
28bc3b8c 1556 * drbd_pause_after() - Pause resync on all devices that may not resync now
b30ab791 1557 * @device: DRBD device.
b411b363
PR
1558 *
1559 * Called from process context only (admin command and after_state_ch).
1560 */
28bc3b8c 1561static bool drbd_pause_after(struct drbd_device *device)
b411b363 1562{
28bc3b8c 1563 bool changed = false;
54761697 1564 struct drbd_device *odev;
28bc3b8c 1565 int i;
b411b363 1566
695d08fa 1567 rcu_read_lock();
05a10ec7 1568 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1569 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1570 continue;
28bc3b8c
AG
1571 if (!_drbd_may_sync_now(odev) &&
1572 _drbd_set_state(_NS(odev, aftr_isp, 1),
1573 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1574 changed = true;
b411b363 1575 }
695d08fa 1576 rcu_read_unlock();
b411b363 1577
28bc3b8c 1578 return changed;
b411b363
PR
1579}
1580
1581/**
28bc3b8c 1582 * drbd_resume_next() - Resume resync on all devices that may resync now
b30ab791 1583 * @device: DRBD device.
b411b363
PR
1584 *
1585 * Called from process context only (admin command and worker).
1586 */
28bc3b8c 1587static bool drbd_resume_next(struct drbd_device *device)
b411b363 1588{
28bc3b8c 1589 bool changed = false;
54761697 1590 struct drbd_device *odev;
28bc3b8c 1591 int i;
b411b363 1592
695d08fa 1593 rcu_read_lock();
05a10ec7 1594 idr_for_each_entry(&drbd_devices, odev, i) {
b411b363
PR
1595 if (odev->state.conn == C_STANDALONE && odev->state.disk == D_DISKLESS)
1596 continue;
1597 if (odev->state.aftr_isp) {
28bc3b8c
AG
1598 if (_drbd_may_sync_now(odev) &&
1599 _drbd_set_state(_NS(odev, aftr_isp, 0),
1600 CS_HARD, NULL) != SS_NOTHING_TO_DO)
1601 changed = true;
b411b363
PR
1602 }
1603 }
695d08fa 1604 rcu_read_unlock();
28bc3b8c 1605 return changed;
b411b363
PR
1606}
1607
b30ab791 1608void resume_next_sg(struct drbd_device *device)
b411b363 1609{
28bc3b8c
AG
1610 lock_all_resources();
1611 drbd_resume_next(device);
1612 unlock_all_resources();
b411b363
PR
1613}
1614
b30ab791 1615void suspend_other_sg(struct drbd_device *device)
b411b363 1616{
28bc3b8c
AG
1617 lock_all_resources();
1618 drbd_pause_after(device);
1619 unlock_all_resources();
b411b363
PR
1620}
1621
28bc3b8c 1622/* caller must lock_all_resources() */
b30ab791 1623enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor)
b411b363 1624{
54761697 1625 struct drbd_device *odev;
95f8efd0 1626 int resync_after;
b411b363
PR
1627
1628 if (o_minor == -1)
1629 return NO_ERROR;
a3f8f7dc 1630 if (o_minor < -1 || o_minor > MINORMASK)
95f8efd0 1631 return ERR_RESYNC_AFTER;
b411b363
PR
1632
1633 /* check for loops */
b30ab791 1634 odev = minor_to_device(o_minor);
b411b363 1635 while (1) {
b30ab791 1636 if (odev == device)
95f8efd0 1637 return ERR_RESYNC_AFTER_CYCLE;
b411b363 1638
a3f8f7dc
LE
1639 /* You are free to depend on diskless, non-existing,
1640 * or not yet/no longer existing minors.
1641 * We only reject dependency loops.
1642 * We cannot follow the dependency chain beyond a detached or
1643 * missing minor.
1644 */
1645 if (!odev || !odev->ldev || odev->state.disk == D_DISKLESS)
1646 return NO_ERROR;
1647
daeda1cc 1648 rcu_read_lock();
95f8efd0 1649 resync_after = rcu_dereference(odev->ldev->disk_conf)->resync_after;
daeda1cc 1650 rcu_read_unlock();
b411b363 1651 /* dependency chain ends here, no cycles. */
95f8efd0 1652 if (resync_after == -1)
b411b363
PR
1653 return NO_ERROR;
1654
1655 /* follow the dependency chain */
b30ab791 1656 odev = minor_to_device(resync_after);
b411b363
PR
1657 }
1658}
1659
28bc3b8c 1660/* caller must lock_all_resources() */
b30ab791 1661void drbd_resync_after_changed(struct drbd_device *device)
b411b363 1662{
28bc3b8c 1663 int changed;
b411b363 1664
dc97b708 1665 do {
28bc3b8c
AG
1666 changed = drbd_pause_after(device);
1667 changed |= drbd_resume_next(device);
1668 } while (changed);
b411b363
PR
1669}
1670
b30ab791 1671void drbd_rs_controller_reset(struct drbd_device *device)
9bd28d3c 1672{
8c40c7c4 1673 struct gendisk *disk = device->ldev->backing_bdev->bd_disk;
813472ce
PR
1674 struct fifo_buffer *plan;
1675
b30ab791
AG
1676 atomic_set(&device->rs_sect_in, 0);
1677 atomic_set(&device->rs_sect_ev, 0);
1678 device->rs_in_flight = 0;
cb8432d6 1679 device->rs_last_events =
8446fe92 1680 (int)part_stat_read_accum(disk->part0, sectors);
813472ce
PR
1681
1682 /* Updating the RCU protected object in place is necessary since
1683 this function gets called from atomic context.
1684 It is valid since all other updates also lead to an completely
1685 empty fifo */
1686 rcu_read_lock();
b30ab791 1687 plan = rcu_dereference(device->rs_plan_s);
813472ce
PR
1688 plan->total = 0;
1689 fifo_set(plan, 0);
1690 rcu_read_unlock();
9bd28d3c
LE
1691}
1692
2bccef39 1693void start_resync_timer_fn(struct timer_list *t)
1f04af33 1694{
2bccef39 1695 struct drbd_device *device = from_timer(device, t, start_resync_timer);
ac0acb9e 1696 drbd_device_post_work(device, RS_START);
1f04af33
PR
1697}
1698
ac0acb9e 1699static void do_start_resync(struct drbd_device *device)
1f04af33 1700{
b30ab791 1701 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
ac0acb9e 1702 drbd_warn(device, "postponing start_resync ...\n");
b30ab791
AG
1703 device->start_resync_timer.expires = jiffies + HZ/10;
1704 add_timer(&device->start_resync_timer);
ac0acb9e 1705 return;
1f04af33
PR
1706 }
1707
b30ab791
AG
1708 drbd_start_resync(device, C_SYNC_SOURCE);
1709 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
1f04af33
PR
1710}
1711
aaaba345
LE
1712static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1713{
1714 bool csums_after_crash_only;
1715 rcu_read_lock();
1716 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1717 rcu_read_unlock();
1718 return connection->agreed_pro_version >= 89 && /* supported? */
1719 connection->csums_tfm && /* configured? */
7e5fec31 1720 (csums_after_crash_only == false /* use for each resync? */
aaaba345
LE
1721 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1722}
1723
b411b363
PR
1724/**
1725 * drbd_start_resync() - Start the resync process
b30ab791 1726 * @device: DRBD device.
b411b363
PR
1727 * @side: Either C_SYNC_SOURCE or C_SYNC_TARGET
1728 *
1729 * This function might bring you directly into one of the
1730 * C_PAUSED_SYNC_* states.
1731 */
b30ab791 1732void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
b411b363 1733{
44a4d551
LE
1734 struct drbd_peer_device *peer_device = first_peer_device(device);
1735 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
b411b363
PR
1736 union drbd_state ns;
1737 int r;
1738
b30ab791 1739 if (device->state.conn >= C_SYNC_SOURCE && device->state.conn < C_AHEAD) {
d0180171 1740 drbd_err(device, "Resync already running!\n");
b411b363
PR
1741 return;
1742 }
1743
d3d2948f
RK
1744 if (!connection) {
1745 drbd_err(device, "No connection to peer, aborting!\n");
1746 return;
1747 }
1748
b30ab791 1749 if (!test_bit(B_RS_H_DONE, &device->flags)) {
e64a3294
PR
1750 if (side == C_SYNC_TARGET) {
1751 /* Since application IO was locked out during C_WF_BITMAP_T and
1752 C_WF_SYNC_UUID we are still unmodified. Before going to C_SYNC_TARGET
1753 we check that we might make the data inconsistent. */
b30ab791 1754 r = drbd_khelper(device, "before-resync-target");
e64a3294
PR
1755 r = (r >> 8) & 0xff;
1756 if (r > 0) {
d0180171 1757 drbd_info(device, "before-resync-target handler returned %d, "
09b9e797 1758 "dropping connection.\n", r);
44a4d551 1759 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
09b9e797
PR
1760 return;
1761 }
e64a3294 1762 } else /* C_SYNC_SOURCE */ {
b30ab791 1763 r = drbd_khelper(device, "before-resync-source");
e64a3294
PR
1764 r = (r >> 8) & 0xff;
1765 if (r > 0) {
1766 if (r == 3) {
d0180171 1767 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294
PR
1768 "ignoring. Old userland tools?", r);
1769 } else {
d0180171 1770 drbd_info(device, "before-resync-source handler returned %d, "
e64a3294 1771 "dropping connection.\n", r);
44a4d551 1772 conn_request_state(connection,
a6b32bc3 1773 NS(conn, C_DISCONNECTING), CS_HARD);
e64a3294
PR
1774 return;
1775 }
1776 }
09b9e797 1777 }
b411b363
PR
1778 }
1779
44a4d551 1780 if (current == connection->worker.task) {
dad20554 1781 /* The worker should not sleep waiting for state_mutex,
e64a3294 1782 that can take long */
b30ab791
AG
1783 if (!mutex_trylock(device->state_mutex)) {
1784 set_bit(B_RS_H_DONE, &device->flags);
1785 device->start_resync_timer.expires = jiffies + HZ/5;
1786 add_timer(&device->start_resync_timer);
e64a3294
PR
1787 return;
1788 }
1789 } else {
b30ab791 1790 mutex_lock(device->state_mutex);
e64a3294 1791 }
b411b363 1792
28bc3b8c
AG
1793 lock_all_resources();
1794 clear_bit(B_RS_H_DONE, &device->flags);
a700471b 1795 /* Did some connection breakage or IO error race with us? */
b30ab791
AG
1796 if (device->state.conn < C_CONNECTED
1797 || !get_ldev_if_state(device, D_NEGOTIATING)) {
28bc3b8c
AG
1798 unlock_all_resources();
1799 goto out;
b411b363
PR
1800 }
1801
b30ab791 1802 ns = drbd_read_state(device);
b411b363 1803
b30ab791 1804 ns.aftr_isp = !_drbd_may_sync_now(device);
b411b363
PR
1805
1806 ns.conn = side;
1807
1808 if (side == C_SYNC_TARGET)
1809 ns.disk = D_INCONSISTENT;
1810 else /* side == C_SYNC_SOURCE */
1811 ns.pdsk = D_INCONSISTENT;
1812
28bc3b8c 1813 r = _drbd_set_state(device, ns, CS_VERBOSE, NULL);
b30ab791 1814 ns = drbd_read_state(device);
b411b363
PR
1815
1816 if (ns.conn < C_CONNECTED)
1817 r = SS_UNKNOWN_ERROR;
1818
1819 if (r == SS_SUCCESS) {
b30ab791 1820 unsigned long tw = drbd_bm_total_weight(device);
1d7734a0
LE
1821 unsigned long now = jiffies;
1822 int i;
1823
b30ab791
AG
1824 device->rs_failed = 0;
1825 device->rs_paused = 0;
1826 device->rs_same_csum = 0;
b30ab791
AG
1827 device->rs_last_sect_ev = 0;
1828 device->rs_total = tw;
1829 device->rs_start = now;
1d7734a0 1830 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
b30ab791
AG
1831 device->rs_mark_left[i] = tw;
1832 device->rs_mark_time[i] = now;
1d7734a0 1833 }
28bc3b8c 1834 drbd_pause_after(device);
5ab7d2c0
LE
1835 /* Forget potentially stale cached per resync extent bit-counts.
1836 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1837 * disabled, and know the disk state is ok. */
1838 spin_lock(&device->al_lock);
1839 lc_reset(device->resync);
1840 device->resync_locked = 0;
1841 device->resync_wenr = LC_FREE;
1842 spin_unlock(&device->al_lock);
b411b363 1843 }
28bc3b8c 1844 unlock_all_resources();
5a22db89 1845
b411b363 1846 if (r == SS_SUCCESS) {
5ab7d2c0 1847 wake_up(&device->al_wait); /* for lc_reset() above */
328e0f12
PR
1848 /* reset rs_last_bcast when a resync or verify is started,
1849 * to deal with potential jiffies wrap. */
b30ab791 1850 device->rs_last_bcast = jiffies - HZ;
328e0f12 1851
d0180171 1852 drbd_info(device, "Began resync as %s (will sync %lu KB [%lu bits set]).\n",
b411b363 1853 drbd_conn_str(ns.conn),
b30ab791
AG
1854 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1855 (unsigned long) device->rs_total);
aaaba345 1856 if (side == C_SYNC_TARGET) {
b30ab791 1857 device->bm_resync_fo = 0;
aaaba345
LE
1858 device->use_csums = use_checksum_based_resync(connection, device);
1859 } else {
7e5fec31 1860 device->use_csums = false;
aaaba345 1861 }
6c922ed5
LE
1862
1863 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1864 * with w_send_oos, or the sync target will get confused as to
1865 * how much bits to resync. We cannot do that always, because for an
1866 * empty resync and protocol < 95, we need to do it here, as we call
1867 * drbd_resync_finished from here in that case.
1868 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1869 * and from after_state_ch otherwise. */
44a4d551
LE
1870 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1871 drbd_gen_and_send_sync_uuid(peer_device);
b411b363 1872
44a4d551 1873 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
af85e8e8
LE
1874 /* This still has a race (about when exactly the peers
1875 * detect connection loss) that can lead to a full sync
1876 * on next handshake. In 8.3.9 we fixed this with explicit
1877 * resync-finished notifications, but the fix
1878 * introduces a protocol change. Sleeping for some
1879 * time longer than the ping interval + timeout on the
1880 * SyncSource, to give the SyncTarget the chance to
1881 * detect connection loss, then waiting for a ping
1882 * response (implicit in drbd_resync_finished) reduces
1883 * the race considerably, but does not solve it. */
44ed167d
PR
1884 if (side == C_SYNC_SOURCE) {
1885 struct net_conf *nc;
1886 int timeo;
1887
1888 rcu_read_lock();
44a4d551 1889 nc = rcu_dereference(connection->net_conf);
44ed167d
PR
1890 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1891 rcu_read_unlock();
1892 schedule_timeout_interruptible(timeo);
1893 }
b30ab791 1894 drbd_resync_finished(device);
b411b363
PR
1895 }
1896
b30ab791
AG
1897 drbd_rs_controller_reset(device);
1898 /* ns.conn may already be != device->state.conn,
b411b363
PR
1899 * we may have been paused in between, or become paused until
1900 * the timer triggers.
1901 * No matter, that is handled in resync_timer_fn() */
1902 if (ns.conn == C_SYNC_TARGET)
b30ab791 1903 mod_timer(&device->resync_timer, jiffies);
b411b363 1904
b30ab791 1905 drbd_md_sync(device);
b411b363 1906 }
b30ab791 1907 put_ldev(device);
28bc3b8c 1908out:
b30ab791 1909 mutex_unlock(device->state_mutex);
b411b363
PR
1910}
1911
e334f550 1912static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
c7a58db4
LE
1913{
1914 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1915 device->rs_last_bcast = jiffies;
1916
1917 if (!get_ldev(device))
1918 return;
1919
1920 drbd_bm_write_lazy(device, 0);
5ab7d2c0 1921 if (resync_done && is_sync_state(device->state.conn))
c7a58db4 1922 drbd_resync_finished(device);
5ab7d2c0 1923
c7a58db4
LE
1924 drbd_bcast_event(device, &sib);
1925 /* update timestamp, in case it took a while to write out stuff */
1926 device->rs_last_bcast = jiffies;
1927 put_ldev(device);
1928}
1929
e334f550
LE
1930static void drbd_ldev_destroy(struct drbd_device *device)
1931{
1932 lc_destroy(device->resync);
1933 device->resync = NULL;
1934 lc_destroy(device->act_log);
1935 device->act_log = NULL;
d1b80853
AG
1936
1937 __acquire(local);
63a7c8ad 1938 drbd_backing_dev_free(device, device->ldev);
d1b80853
AG
1939 device->ldev = NULL;
1940 __release(local);
1941
e334f550
LE
1942 clear_bit(GOING_DISKLESS, &device->flags);
1943 wake_up(&device->misc_wait);
1944}
1945
1946static void go_diskless(struct drbd_device *device)
1947{
1948 D_ASSERT(device, device->state.disk == D_FAILED);
1949 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1950 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1951 * the protected members anymore, though, so once put_ldev reaches zero
1952 * again, it will be safe to free them. */
1953
1954 /* Try to write changed bitmap pages, read errors may have just
1955 * set some bits outside the area covered by the activity log.
1956 *
1957 * If we have an IO error during the bitmap writeout,
1958 * we will want a full sync next time, just in case.
1959 * (Do we want a specific meta data flag for this?)
1960 *
1961 * If that does not make it to stable storage either,
1962 * we cannot do anything about that anymore.
1963 *
1964 * We still need to check if both bitmap and ldev are present, we may
1965 * end up here after a failed attach, before ldev was even assigned.
1966 */
1967 if (device->bitmap && device->ldev) {
1968 /* An interrupted resync or similar is allowed to recounts bits
1969 * while we detach.
1970 * Any modifications would not be expected anymore, though.
1971 */
1972 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1973 "detach", BM_LOCKED_TEST_ALLOWED)) {
1974 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1975 drbd_md_set_flag(device, MDF_FULL_SYNC);
1976 drbd_md_sync(device);
1977 }
1978 }
1979 }
1980
1981 drbd_force_state(device, NS(disk, D_DISKLESS));
1982}
1983
ac0acb9e
LE
1984static int do_md_sync(struct drbd_device *device)
1985{
1986 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1987 drbd_md_sync(device);
1988 return 0;
1989}
1990
944410e9
LE
1991/* only called from drbd_worker thread, no locking */
1992void __update_timing_details(
1993 struct drbd_thread_timing_details *tdp,
1994 unsigned int *cb_nr,
1995 void *cb,
1996 const char *fn, const unsigned int line)
1997{
1998 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1999 struct drbd_thread_timing_details *td = tdp + i;
2000
2001 td->start_jif = jiffies;
2002 td->cb_addr = cb;
2003 td->caller_fn = fn;
2004 td->line = line;
2005 td->cb_nr = *cb_nr;
2006
2007 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
2008 td = tdp + i;
2009 memset(td, 0, sizeof(*td));
2010
2011 ++(*cb_nr);
2012}
2013
e334f550
LE
2014static void do_device_work(struct drbd_device *device, const unsigned long todo)
2015{
b47a06d1 2016 if (test_bit(MD_SYNC, &todo))
ac0acb9e 2017 do_md_sync(device);
b47a06d1
AG
2018 if (test_bit(RS_DONE, &todo) ||
2019 test_bit(RS_PROGRESS, &todo))
2020 update_on_disk_bitmap(device, test_bit(RS_DONE, &todo));
2021 if (test_bit(GO_DISKLESS, &todo))
e334f550 2022 go_diskless(device);
b47a06d1 2023 if (test_bit(DESTROY_DISK, &todo))
e334f550 2024 drbd_ldev_destroy(device);
b47a06d1 2025 if (test_bit(RS_START, &todo))
ac0acb9e 2026 do_start_resync(device);
e334f550
LE
2027}
2028
2029#define DRBD_DEVICE_WORK_MASK \
2030 ((1UL << GO_DISKLESS) \
2031 |(1UL << DESTROY_DISK) \
ac0acb9e
LE
2032 |(1UL << MD_SYNC) \
2033 |(1UL << RS_START) \
e334f550
LE
2034 |(1UL << RS_PROGRESS) \
2035 |(1UL << RS_DONE) \
2036 )
2037
2038static unsigned long get_work_bits(unsigned long *flags)
2039{
2040 unsigned long old, new;
2041 do {
2042 old = *flags;
2043 new = old & ~DRBD_DEVICE_WORK_MASK;
2044 } while (cmpxchg(flags, old, new) != old);
2045 return old & DRBD_DEVICE_WORK_MASK;
2046}
2047
2048static void do_unqueued_work(struct drbd_connection *connection)
c7a58db4
LE
2049{
2050 struct drbd_peer_device *peer_device;
2051 int vnr;
2052
2053 rcu_read_lock();
2054 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2055 struct drbd_device *device = peer_device->device;
e334f550
LE
2056 unsigned long todo = get_work_bits(&device->flags);
2057 if (!todo)
c7a58db4 2058 continue;
5ab7d2c0 2059
c7a58db4
LE
2060 kref_get(&device->kref);
2061 rcu_read_unlock();
e334f550 2062 do_device_work(device, todo);
c7a58db4
LE
2063 kref_put(&device->kref, drbd_destroy_device);
2064 rcu_read_lock();
2065 }
2066 rcu_read_unlock();
2067}
2068
a186e478 2069static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
8c0785a5
LE
2070{
2071 spin_lock_irq(&queue->q_lock);
15e26f6a 2072 list_splice_tail_init(&queue->q, work_list);
8c0785a5
LE
2073 spin_unlock_irq(&queue->q_lock);
2074 return !list_empty(work_list);
2075}
2076
bde89a9e 2077static void wait_for_work(struct drbd_connection *connection, struct list_head *work_list)
b6dd1a89
LE
2078{
2079 DEFINE_WAIT(wait);
2080 struct net_conf *nc;
2081 int uncork, cork;
2082
abde9cc6 2083 dequeue_work_batch(&connection->sender_work, work_list);
b6dd1a89
LE
2084 if (!list_empty(work_list))
2085 return;
2086
2087 /* Still nothing to do?
2088 * Maybe we still need to close the current epoch,
2089 * even if no new requests are queued yet.
2090 *
2091 * Also, poke TCP, just in case.
2092 * Then wait for new work (or signal). */
2093 rcu_read_lock();
2094 nc = rcu_dereference(connection->net_conf);
2095 uncork = nc ? nc->tcp_cork : 0;
2096 rcu_read_unlock();
2097 if (uncork) {
2098 mutex_lock(&connection->data.mutex);
2099 if (connection->data.socket)
db10538a 2100 tcp_sock_set_cork(connection->data.socket->sk, false);
b6dd1a89
LE
2101 mutex_unlock(&connection->data.mutex);
2102 }
2103
2104 for (;;) {
2105 int send_barrier;
2106 prepare_to_wait(&connection->sender_work.q_wait, &wait, TASK_INTERRUPTIBLE);
0500813f 2107 spin_lock_irq(&connection->resource->req_lock);
b6dd1a89 2108 spin_lock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
bc317a9e 2109 if (!list_empty(&connection->sender_work.q))
4dd726f0 2110 list_splice_tail_init(&connection->sender_work.q, work_list);
b6dd1a89
LE
2111 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
2112 if (!list_empty(work_list) || signal_pending(current)) {
0500813f 2113 spin_unlock_irq(&connection->resource->req_lock);
b6dd1a89
LE
2114 break;
2115 }
f9c78128
LE
2116
2117 /* We found nothing new to do, no to-be-communicated request,
2118 * no other work item. We may still need to close the last
2119 * epoch. Next incoming request epoch will be connection ->
2120 * current transfer log epoch number. If that is different
2121 * from the epoch of the last request we communicated, it is
2122 * safe to send the epoch separating barrier now.
2123 */
2124 send_barrier =
2125 atomic_read(&connection->current_tle_nr) !=
2126 connection->send.current_epoch_nr;
0500813f 2127 spin_unlock_irq(&connection->resource->req_lock);
f9c78128
LE
2128
2129 if (send_barrier)
2130 maybe_send_barrier(connection,
2131 connection->send.current_epoch_nr + 1);
5ab7d2c0 2132
e334f550 2133 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
5ab7d2c0
LE
2134 break;
2135
a80ca1ae
LE
2136 /* drbd_send() may have called flush_signals() */
2137 if (get_t_state(&connection->worker) != RUNNING)
2138 break;
5ab7d2c0 2139
b6dd1a89
LE
2140 schedule();
2141 /* may be woken up for other things but new work, too,
2142 * e.g. if the current epoch got closed.
2143 * In which case we send the barrier above. */
2144 }
2145 finish_wait(&connection->sender_work.q_wait, &wait);
2146
2147 /* someone may have changed the config while we have been waiting above. */
2148 rcu_read_lock();
2149 nc = rcu_dereference(connection->net_conf);
2150 cork = nc ? nc->tcp_cork : 0;
2151 rcu_read_unlock();
2152 mutex_lock(&connection->data.mutex);
2153 if (connection->data.socket) {
2154 if (cork)
db10538a 2155 tcp_sock_set_cork(connection->data.socket->sk, true);
b6dd1a89 2156 else if (!uncork)
db10538a 2157 tcp_sock_set_cork(connection->data.socket->sk, false);
b6dd1a89
LE
2158 }
2159 mutex_unlock(&connection->data.mutex);
2160}
2161
b411b363
PR
2162int drbd_worker(struct drbd_thread *thi)
2163{
bde89a9e 2164 struct drbd_connection *connection = thi->connection;
6db7e50a 2165 struct drbd_work *w = NULL;
c06ece6b 2166 struct drbd_peer_device *peer_device;
b411b363 2167 LIST_HEAD(work_list);
8c0785a5 2168 int vnr;
b411b363 2169
e77a0a5c 2170 while (get_t_state(thi) == RUNNING) {
80822284 2171 drbd_thread_current_set_cpu(thi);
b411b363 2172
944410e9
LE
2173 if (list_empty(&work_list)) {
2174 update_worker_timing_details(connection, wait_for_work);
bde89a9e 2175 wait_for_work(connection, &work_list);
944410e9 2176 }
b411b363 2177
944410e9
LE
2178 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2179 update_worker_timing_details(connection, do_unqueued_work);
e334f550 2180 do_unqueued_work(connection);
944410e9 2181 }
5ab7d2c0 2182
8c0785a5 2183 if (signal_pending(current)) {
b411b363 2184 flush_signals(current);
19393e10 2185 if (get_t_state(thi) == RUNNING) {
1ec861eb 2186 drbd_warn(connection, "Worker got an unexpected signal\n");
b411b363 2187 continue;
19393e10 2188 }
b411b363
PR
2189 break;
2190 }
2191
e77a0a5c 2192 if (get_t_state(thi) != RUNNING)
b411b363 2193 break;
b411b363 2194
729e8b87 2195 if (!list_empty(&work_list)) {
6db7e50a
AG
2196 w = list_first_entry(&work_list, struct drbd_work, list);
2197 list_del_init(&w->list);
944410e9 2198 update_worker_timing_details(connection, w->cb);
6db7e50a 2199 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
8c0785a5 2200 continue;
bde89a9e
AG
2201 if (connection->cstate >= C_WF_REPORT_PARAMS)
2202 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
b411b363
PR
2203 }
2204 }
b411b363 2205
8c0785a5 2206 do {
944410e9
LE
2207 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2208 update_worker_timing_details(connection, do_unqueued_work);
e334f550 2209 do_unqueued_work(connection);
944410e9 2210 }
729e8b87 2211 if (!list_empty(&work_list)) {
6db7e50a
AG
2212 w = list_first_entry(&work_list, struct drbd_work, list);
2213 list_del_init(&w->list);
944410e9 2214 update_worker_timing_details(connection, w->cb);
6db7e50a 2215 w->cb(w, 1);
729e8b87
LE
2216 } else
2217 dequeue_work_batch(&connection->sender_work, &work_list);
e334f550 2218 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
b411b363 2219
c141ebda 2220 rcu_read_lock();
c06ece6b
AG
2221 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
2222 struct drbd_device *device = peer_device->device;
0b0ba1ef 2223 D_ASSERT(device, device->state.disk == D_DISKLESS && device->state.conn == C_STANDALONE);
b30ab791 2224 kref_get(&device->kref);
c141ebda 2225 rcu_read_unlock();
b30ab791 2226 drbd_device_cleanup(device);
05a10ec7 2227 kref_put(&device->kref, drbd_destroy_device);
c141ebda 2228 rcu_read_lock();
0e29d163 2229 }
c141ebda 2230 rcu_read_unlock();
b411b363
PR
2231
2232 return 0;
2233}