Btrfs, scrub: repair the common data on RAID5/6 if it is corrupted
[linux-block.git] / fs / btrfs / raid56.c
CommitLineData
53b381b3
DW
1/*
2 * Copyright (C) 2012 Fusion-io All rights reserved.
3 * Copyright (C) 2012 Intel Corp. All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public
7 * License v2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public
15 * License along with this program; if not, write to the
16 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 * Boston, MA 021110-1307, USA.
18 */
19#include <linux/sched.h>
20#include <linux/wait.h>
21#include <linux/bio.h>
22#include <linux/slab.h>
23#include <linux/buffer_head.h>
24#include <linux/blkdev.h>
25#include <linux/random.h>
26#include <linux/iocontext.h>
27#include <linux/capability.h>
28#include <linux/ratelimit.h>
29#include <linux/kthread.h>
30#include <linux/raid/pq.h>
31#include <linux/hash.h>
32#include <linux/list_sort.h>
33#include <linux/raid/xor.h>
d7011f5b 34#include <linux/vmalloc.h>
53b381b3 35#include <asm/div64.h>
53b381b3
DW
36#include "ctree.h"
37#include "extent_map.h"
38#include "disk-io.h"
39#include "transaction.h"
40#include "print-tree.h"
41#include "volumes.h"
42#include "raid56.h"
43#include "async-thread.h"
44#include "check-integrity.h"
45#include "rcu-string.h"
46
47/* set when additional merges to this rbio are not allowed */
48#define RBIO_RMW_LOCKED_BIT 1
49
4ae10b3a
CM
50/*
51 * set when this rbio is sitting in the hash, but it is just a cache
52 * of past RMW
53 */
54#define RBIO_CACHE_BIT 2
55
56/*
57 * set when it is safe to trust the stripe_pages for caching
58 */
59#define RBIO_CACHE_READY_BIT 3
60
af8e2d1d
MX
61/*
62 * bbio and raid_map is managed by the caller, so we shouldn't free
63 * them here. And besides that, all rbios with this flag should not
64 * be cached, because we need raid_map to check the rbios' stripe
65 * is the same or not, but it is very likely that the caller has
66 * free raid_map, so don't cache those rbios.
67 */
68#define RBIO_HOLD_BBIO_MAP_BIT 4
69
4ae10b3a
CM
70#define RBIO_CACHE_SIZE 1024
71
53b381b3
DW
72struct btrfs_raid_bio {
73 struct btrfs_fs_info *fs_info;
74 struct btrfs_bio *bbio;
75
76 /*
77 * logical block numbers for the start of each stripe
78 * The last one or two are p/q. These are sorted,
79 * so raid_map[0] is the start of our full stripe
80 */
81 u64 *raid_map;
82
83 /* while we're doing rmw on a stripe
84 * we put it into a hash table so we can
85 * lock the stripe and merge more rbios
86 * into it.
87 */
88 struct list_head hash_list;
89
4ae10b3a
CM
90 /*
91 * LRU list for the stripe cache
92 */
93 struct list_head stripe_cache;
94
53b381b3
DW
95 /*
96 * for scheduling work in the helper threads
97 */
98 struct btrfs_work work;
99
100 /*
101 * bio list and bio_list_lock are used
102 * to add more bios into the stripe
103 * in hopes of avoiding the full rmw
104 */
105 struct bio_list bio_list;
106 spinlock_t bio_list_lock;
107
6ac0f488
CM
108 /* also protected by the bio_list_lock, the
109 * plug list is used by the plugging code
110 * to collect partial bios while plugged. The
111 * stripe locking code also uses it to hand off
53b381b3
DW
112 * the stripe lock to the next pending IO
113 */
114 struct list_head plug_list;
115
116 /*
117 * flags that tell us if it is safe to
118 * merge with this bio
119 */
120 unsigned long flags;
121
122 /* size of each individual stripe on disk */
123 int stripe_len;
124
125 /* number of data stripes (no p/q) */
126 int nr_data;
127
128 /*
129 * set if we're doing a parity rebuild
130 * for a read from higher up, which is handled
131 * differently from a parity rebuild as part of
132 * rmw
133 */
134 int read_rebuild;
135
136 /* first bad stripe */
137 int faila;
138
139 /* second bad stripe (for raid6 use) */
140 int failb;
141
142 /*
143 * number of pages needed to represent the full
144 * stripe
145 */
146 int nr_pages;
147
148 /*
149 * size of all the bios in the bio_list. This
150 * helps us decide if the rbio maps to a full
151 * stripe or not
152 */
153 int bio_list_bytes;
154
155 atomic_t refs;
156
b89e1b01
MX
157
158 atomic_t stripes_pending;
159
160 atomic_t error;
53b381b3
DW
161 /*
162 * these are two arrays of pointers. We allocate the
163 * rbio big enough to hold them both and setup their
164 * locations when the rbio is allocated
165 */
166
167 /* pointers to pages that we allocated for
168 * reading/writing stripes directly from the disk (including P/Q)
169 */
170 struct page **stripe_pages;
171
172 /*
173 * pointers to the pages in the bio_list. Stored
174 * here for faster lookup
175 */
176 struct page **bio_pages;
177};
178
179static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
180static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
181static void rmw_work(struct btrfs_work *work);
182static void read_rebuild_work(struct btrfs_work *work);
183static void async_rmw_stripe(struct btrfs_raid_bio *rbio);
184static void async_read_rebuild(struct btrfs_raid_bio *rbio);
185static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
186static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
187static void __free_raid_bio(struct btrfs_raid_bio *rbio);
188static void index_rbio_pages(struct btrfs_raid_bio *rbio);
189static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
190
191/*
192 * the stripe hash table is used for locking, and to collect
193 * bios in hopes of making a full stripe
194 */
195int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
196{
197 struct btrfs_stripe_hash_table *table;
198 struct btrfs_stripe_hash_table *x;
199 struct btrfs_stripe_hash *cur;
200 struct btrfs_stripe_hash *h;
201 int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
202 int i;
83c8266a 203 int table_size;
53b381b3
DW
204
205 if (info->stripe_hash_table)
206 return 0;
207
83c8266a
DS
208 /*
209 * The table is large, starting with order 4 and can go as high as
210 * order 7 in case lock debugging is turned on.
211 *
212 * Try harder to allocate and fallback to vmalloc to lower the chance
213 * of a failing mount.
214 */
215 table_size = sizeof(*table) + sizeof(*h) * num_entries;
216 table = kzalloc(table_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
217 if (!table) {
218 table = vzalloc(table_size);
219 if (!table)
220 return -ENOMEM;
221 }
53b381b3 222
4ae10b3a
CM
223 spin_lock_init(&table->cache_lock);
224 INIT_LIST_HEAD(&table->stripe_cache);
225
53b381b3
DW
226 h = table->table;
227
228 for (i = 0; i < num_entries; i++) {
229 cur = h + i;
230 INIT_LIST_HEAD(&cur->hash_list);
231 spin_lock_init(&cur->lock);
232 init_waitqueue_head(&cur->wait);
233 }
234
235 x = cmpxchg(&info->stripe_hash_table, NULL, table);
83c8266a
DS
236 if (x) {
237 if (is_vmalloc_addr(x))
238 vfree(x);
239 else
240 kfree(x);
241 }
53b381b3
DW
242 return 0;
243}
244
4ae10b3a
CM
245/*
246 * caching an rbio means to copy anything from the
247 * bio_pages array into the stripe_pages array. We
248 * use the page uptodate bit in the stripe cache array
249 * to indicate if it has valid data
250 *
251 * once the caching is done, we set the cache ready
252 * bit.
253 */
254static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
255{
256 int i;
257 char *s;
258 char *d;
259 int ret;
260
261 ret = alloc_rbio_pages(rbio);
262 if (ret)
263 return;
264
265 for (i = 0; i < rbio->nr_pages; i++) {
266 if (!rbio->bio_pages[i])
267 continue;
268
269 s = kmap(rbio->bio_pages[i]);
270 d = kmap(rbio->stripe_pages[i]);
271
272 memcpy(d, s, PAGE_CACHE_SIZE);
273
274 kunmap(rbio->bio_pages[i]);
275 kunmap(rbio->stripe_pages[i]);
276 SetPageUptodate(rbio->stripe_pages[i]);
277 }
278 set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
279}
280
53b381b3
DW
281/*
282 * we hash on the first logical address of the stripe
283 */
284static int rbio_bucket(struct btrfs_raid_bio *rbio)
285{
286 u64 num = rbio->raid_map[0];
287
288 /*
289 * we shift down quite a bit. We're using byte
290 * addressing, and most of the lower bits are zeros.
291 * This tends to upset hash_64, and it consistently
292 * returns just one or two different values.
293 *
294 * shifting off the lower bits fixes things.
295 */
296 return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
297}
298
4ae10b3a
CM
299/*
300 * stealing an rbio means taking all the uptodate pages from the stripe
301 * array in the source rbio and putting them into the destination rbio
302 */
303static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
304{
305 int i;
306 struct page *s;
307 struct page *d;
308
309 if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
310 return;
311
312 for (i = 0; i < dest->nr_pages; i++) {
313 s = src->stripe_pages[i];
314 if (!s || !PageUptodate(s)) {
315 continue;
316 }
317
318 d = dest->stripe_pages[i];
319 if (d)
320 __free_page(d);
321
322 dest->stripe_pages[i] = s;
323 src->stripe_pages[i] = NULL;
324 }
325}
326
53b381b3
DW
327/*
328 * merging means we take the bio_list from the victim and
329 * splice it into the destination. The victim should
330 * be discarded afterwards.
331 *
332 * must be called with dest->rbio_list_lock held
333 */
334static void merge_rbio(struct btrfs_raid_bio *dest,
335 struct btrfs_raid_bio *victim)
336{
337 bio_list_merge(&dest->bio_list, &victim->bio_list);
338 dest->bio_list_bytes += victim->bio_list_bytes;
339 bio_list_init(&victim->bio_list);
340}
341
342/*
4ae10b3a
CM
343 * used to prune items that are in the cache. The caller
344 * must hold the hash table lock.
345 */
346static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
347{
348 int bucket = rbio_bucket(rbio);
349 struct btrfs_stripe_hash_table *table;
350 struct btrfs_stripe_hash *h;
351 int freeit = 0;
352
353 /*
354 * check the bit again under the hash table lock.
355 */
356 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
357 return;
358
359 table = rbio->fs_info->stripe_hash_table;
360 h = table->table + bucket;
361
362 /* hold the lock for the bucket because we may be
363 * removing it from the hash table
364 */
365 spin_lock(&h->lock);
366
367 /*
368 * hold the lock for the bio list because we need
369 * to make sure the bio list is empty
370 */
371 spin_lock(&rbio->bio_list_lock);
372
373 if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
374 list_del_init(&rbio->stripe_cache);
375 table->cache_size -= 1;
376 freeit = 1;
377
378 /* if the bio list isn't empty, this rbio is
379 * still involved in an IO. We take it out
380 * of the cache list, and drop the ref that
381 * was held for the list.
382 *
383 * If the bio_list was empty, we also remove
384 * the rbio from the hash_table, and drop
385 * the corresponding ref
386 */
387 if (bio_list_empty(&rbio->bio_list)) {
388 if (!list_empty(&rbio->hash_list)) {
389 list_del_init(&rbio->hash_list);
390 atomic_dec(&rbio->refs);
391 BUG_ON(!list_empty(&rbio->plug_list));
392 }
393 }
394 }
395
396 spin_unlock(&rbio->bio_list_lock);
397 spin_unlock(&h->lock);
398
399 if (freeit)
400 __free_raid_bio(rbio);
401}
402
403/*
404 * prune a given rbio from the cache
405 */
406static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
407{
408 struct btrfs_stripe_hash_table *table;
409 unsigned long flags;
410
411 if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
412 return;
413
414 table = rbio->fs_info->stripe_hash_table;
415
416 spin_lock_irqsave(&table->cache_lock, flags);
417 __remove_rbio_from_cache(rbio);
418 spin_unlock_irqrestore(&table->cache_lock, flags);
419}
420
421/*
422 * remove everything in the cache
423 */
48a3b636 424static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
4ae10b3a
CM
425{
426 struct btrfs_stripe_hash_table *table;
427 unsigned long flags;
428 struct btrfs_raid_bio *rbio;
429
430 table = info->stripe_hash_table;
431
432 spin_lock_irqsave(&table->cache_lock, flags);
433 while (!list_empty(&table->stripe_cache)) {
434 rbio = list_entry(table->stripe_cache.next,
435 struct btrfs_raid_bio,
436 stripe_cache);
437 __remove_rbio_from_cache(rbio);
438 }
439 spin_unlock_irqrestore(&table->cache_lock, flags);
440}
441
442/*
443 * remove all cached entries and free the hash table
444 * used by unmount
53b381b3
DW
445 */
446void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
447{
448 if (!info->stripe_hash_table)
449 return;
4ae10b3a 450 btrfs_clear_rbio_cache(info);
83c8266a
DS
451 if (is_vmalloc_addr(info->stripe_hash_table))
452 vfree(info->stripe_hash_table);
453 else
454 kfree(info->stripe_hash_table);
53b381b3
DW
455 info->stripe_hash_table = NULL;
456}
457
4ae10b3a
CM
458/*
459 * insert an rbio into the stripe cache. It
460 * must have already been prepared by calling
461 * cache_rbio_pages
462 *
463 * If this rbio was already cached, it gets
464 * moved to the front of the lru.
465 *
466 * If the size of the rbio cache is too big, we
467 * prune an item.
468 */
469static void cache_rbio(struct btrfs_raid_bio *rbio)
470{
471 struct btrfs_stripe_hash_table *table;
472 unsigned long flags;
473
474 if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
475 return;
476
477 table = rbio->fs_info->stripe_hash_table;
478
479 spin_lock_irqsave(&table->cache_lock, flags);
480 spin_lock(&rbio->bio_list_lock);
481
482 /* bump our ref if we were not in the list before */
483 if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
484 atomic_inc(&rbio->refs);
485
486 if (!list_empty(&rbio->stripe_cache)){
487 list_move(&rbio->stripe_cache, &table->stripe_cache);
488 } else {
489 list_add(&rbio->stripe_cache, &table->stripe_cache);
490 table->cache_size += 1;
491 }
492
493 spin_unlock(&rbio->bio_list_lock);
494
495 if (table->cache_size > RBIO_CACHE_SIZE) {
496 struct btrfs_raid_bio *found;
497
498 found = list_entry(table->stripe_cache.prev,
499 struct btrfs_raid_bio,
500 stripe_cache);
501
502 if (found != rbio)
503 __remove_rbio_from_cache(found);
504 }
505
506 spin_unlock_irqrestore(&table->cache_lock, flags);
507 return;
508}
509
53b381b3
DW
510/*
511 * helper function to run the xor_blocks api. It is only
512 * able to do MAX_XOR_BLOCKS at a time, so we need to
513 * loop through.
514 */
515static void run_xor(void **pages, int src_cnt, ssize_t len)
516{
517 int src_off = 0;
518 int xor_src_cnt = 0;
519 void *dest = pages[src_cnt];
520
521 while(src_cnt > 0) {
522 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
523 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
524
525 src_cnt -= xor_src_cnt;
526 src_off += xor_src_cnt;
527 }
528}
529
530/*
531 * returns true if the bio list inside this rbio
532 * covers an entire stripe (no rmw required).
533 * Must be called with the bio list lock held, or
534 * at a time when you know it is impossible to add
535 * new bios into the list
536 */
537static int __rbio_is_full(struct btrfs_raid_bio *rbio)
538{
539 unsigned long size = rbio->bio_list_bytes;
540 int ret = 1;
541
542 if (size != rbio->nr_data * rbio->stripe_len)
543 ret = 0;
544
545 BUG_ON(size > rbio->nr_data * rbio->stripe_len);
546 return ret;
547}
548
549static int rbio_is_full(struct btrfs_raid_bio *rbio)
550{
551 unsigned long flags;
552 int ret;
553
554 spin_lock_irqsave(&rbio->bio_list_lock, flags);
555 ret = __rbio_is_full(rbio);
556 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
557 return ret;
558}
559
560/*
561 * returns 1 if it is safe to merge two rbios together.
562 * The merging is safe if the two rbios correspond to
563 * the same stripe and if they are both going in the same
564 * direction (read vs write), and if neither one is
565 * locked for final IO
566 *
567 * The caller is responsible for locking such that
568 * rmw_locked is safe to test
569 */
570static int rbio_can_merge(struct btrfs_raid_bio *last,
571 struct btrfs_raid_bio *cur)
572{
573 if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
574 test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
575 return 0;
576
4ae10b3a
CM
577 /*
578 * we can't merge with cached rbios, since the
579 * idea is that when we merge the destination
580 * rbio is going to run our IO for us. We can
581 * steal from cached rbio's though, other functions
582 * handle that.
583 */
584 if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
585 test_bit(RBIO_CACHE_BIT, &cur->flags))
586 return 0;
587
53b381b3
DW
588 if (last->raid_map[0] !=
589 cur->raid_map[0])
590 return 0;
591
592 /* reads can't merge with writes */
593 if (last->read_rebuild !=
594 cur->read_rebuild) {
595 return 0;
596 }
597
598 return 1;
599}
600
601/*
602 * helper to index into the pstripe
603 */
604static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index)
605{
606 index += (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
607 return rbio->stripe_pages[index];
608}
609
610/*
611 * helper to index into the qstripe, returns null
612 * if there is no qstripe
613 */
614static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index)
615{
616 if (rbio->nr_data + 1 == rbio->bbio->num_stripes)
617 return NULL;
618
619 index += ((rbio->nr_data + 1) * rbio->stripe_len) >>
620 PAGE_CACHE_SHIFT;
621 return rbio->stripe_pages[index];
622}
623
624/*
625 * The first stripe in the table for a logical address
626 * has the lock. rbios are added in one of three ways:
627 *
628 * 1) Nobody has the stripe locked yet. The rbio is given
629 * the lock and 0 is returned. The caller must start the IO
630 * themselves.
631 *
632 * 2) Someone has the stripe locked, but we're able to merge
633 * with the lock owner. The rbio is freed and the IO will
634 * start automatically along with the existing rbio. 1 is returned.
635 *
636 * 3) Someone has the stripe locked, but we're not able to merge.
637 * The rbio is added to the lock owner's plug list, or merged into
638 * an rbio already on the plug list. When the lock owner unlocks,
639 * the next rbio on the list is run and the IO is started automatically.
640 * 1 is returned
641 *
642 * If we return 0, the caller still owns the rbio and must continue with
643 * IO submission. If we return 1, the caller must assume the rbio has
644 * already been freed.
645 */
646static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
647{
648 int bucket = rbio_bucket(rbio);
649 struct btrfs_stripe_hash *h = rbio->fs_info->stripe_hash_table->table + bucket;
650 struct btrfs_raid_bio *cur;
651 struct btrfs_raid_bio *pending;
652 unsigned long flags;
653 DEFINE_WAIT(wait);
654 struct btrfs_raid_bio *freeit = NULL;
4ae10b3a 655 struct btrfs_raid_bio *cache_drop = NULL;
53b381b3
DW
656 int ret = 0;
657 int walk = 0;
658
659 spin_lock_irqsave(&h->lock, flags);
660 list_for_each_entry(cur, &h->hash_list, hash_list) {
661 walk++;
662 if (cur->raid_map[0] == rbio->raid_map[0]) {
663 spin_lock(&cur->bio_list_lock);
664
4ae10b3a
CM
665 /* can we steal this cached rbio's pages? */
666 if (bio_list_empty(&cur->bio_list) &&
667 list_empty(&cur->plug_list) &&
668 test_bit(RBIO_CACHE_BIT, &cur->flags) &&
669 !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
670 list_del_init(&cur->hash_list);
671 atomic_dec(&cur->refs);
672
673 steal_rbio(cur, rbio);
674 cache_drop = cur;
675 spin_unlock(&cur->bio_list_lock);
676
677 goto lockit;
678 }
679
53b381b3
DW
680 /* can we merge into the lock owner? */
681 if (rbio_can_merge(cur, rbio)) {
682 merge_rbio(cur, rbio);
683 spin_unlock(&cur->bio_list_lock);
684 freeit = rbio;
685 ret = 1;
686 goto out;
687 }
688
4ae10b3a 689
53b381b3
DW
690 /*
691 * we couldn't merge with the running
692 * rbio, see if we can merge with the
693 * pending ones. We don't have to
694 * check for rmw_locked because there
695 * is no way they are inside finish_rmw
696 * right now
697 */
698 list_for_each_entry(pending, &cur->plug_list,
699 plug_list) {
700 if (rbio_can_merge(pending, rbio)) {
701 merge_rbio(pending, rbio);
702 spin_unlock(&cur->bio_list_lock);
703 freeit = rbio;
704 ret = 1;
705 goto out;
706 }
707 }
708
709 /* no merging, put us on the tail of the plug list,
710 * our rbio will be started with the currently
711 * running rbio unlocks
712 */
713 list_add_tail(&rbio->plug_list, &cur->plug_list);
714 spin_unlock(&cur->bio_list_lock);
715 ret = 1;
716 goto out;
717 }
718 }
4ae10b3a 719lockit:
53b381b3
DW
720 atomic_inc(&rbio->refs);
721 list_add(&rbio->hash_list, &h->hash_list);
722out:
723 spin_unlock_irqrestore(&h->lock, flags);
4ae10b3a
CM
724 if (cache_drop)
725 remove_rbio_from_cache(cache_drop);
53b381b3
DW
726 if (freeit)
727 __free_raid_bio(freeit);
728 return ret;
729}
730
731/*
732 * called as rmw or parity rebuild is completed. If the plug list has more
733 * rbios waiting for this stripe, the next one on the list will be started
734 */
735static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
736{
737 int bucket;
738 struct btrfs_stripe_hash *h;
739 unsigned long flags;
4ae10b3a 740 int keep_cache = 0;
53b381b3
DW
741
742 bucket = rbio_bucket(rbio);
743 h = rbio->fs_info->stripe_hash_table->table + bucket;
744
4ae10b3a
CM
745 if (list_empty(&rbio->plug_list))
746 cache_rbio(rbio);
747
53b381b3
DW
748 spin_lock_irqsave(&h->lock, flags);
749 spin_lock(&rbio->bio_list_lock);
750
751 if (!list_empty(&rbio->hash_list)) {
4ae10b3a
CM
752 /*
753 * if we're still cached and there is no other IO
754 * to perform, just leave this rbio here for others
755 * to steal from later
756 */
757 if (list_empty(&rbio->plug_list) &&
758 test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
759 keep_cache = 1;
760 clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
761 BUG_ON(!bio_list_empty(&rbio->bio_list));
762 goto done;
763 }
53b381b3
DW
764
765 list_del_init(&rbio->hash_list);
766 atomic_dec(&rbio->refs);
767
768 /*
769 * we use the plug list to hold all the rbios
770 * waiting for the chance to lock this stripe.
771 * hand the lock over to one of them.
772 */
773 if (!list_empty(&rbio->plug_list)) {
774 struct btrfs_raid_bio *next;
775 struct list_head *head = rbio->plug_list.next;
776
777 next = list_entry(head, struct btrfs_raid_bio,
778 plug_list);
779
780 list_del_init(&rbio->plug_list);
781
782 list_add(&next->hash_list, &h->hash_list);
783 atomic_inc(&next->refs);
784 spin_unlock(&rbio->bio_list_lock);
785 spin_unlock_irqrestore(&h->lock, flags);
786
787 if (next->read_rebuild)
788 async_read_rebuild(next);
4ae10b3a
CM
789 else {
790 steal_rbio(rbio, next);
53b381b3 791 async_rmw_stripe(next);
4ae10b3a 792 }
53b381b3
DW
793
794 goto done_nolock;
53b381b3
DW
795 } else if (waitqueue_active(&h->wait)) {
796 spin_unlock(&rbio->bio_list_lock);
797 spin_unlock_irqrestore(&h->lock, flags);
798 wake_up(&h->wait);
799 goto done_nolock;
800 }
801 }
4ae10b3a 802done:
53b381b3
DW
803 spin_unlock(&rbio->bio_list_lock);
804 spin_unlock_irqrestore(&h->lock, flags);
805
806done_nolock:
4ae10b3a
CM
807 if (!keep_cache)
808 remove_rbio_from_cache(rbio);
53b381b3
DW
809}
810
af8e2d1d
MX
811static inline void
812__free_bbio_and_raid_map(struct btrfs_bio *bbio, u64 *raid_map, int need)
813{
814 if (need) {
815 kfree(raid_map);
816 kfree(bbio);
817 }
818}
819
820static inline void free_bbio_and_raid_map(struct btrfs_raid_bio *rbio)
821{
822 __free_bbio_and_raid_map(rbio->bbio, rbio->raid_map,
823 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags));
824}
825
53b381b3
DW
826static void __free_raid_bio(struct btrfs_raid_bio *rbio)
827{
828 int i;
829
830 WARN_ON(atomic_read(&rbio->refs) < 0);
831 if (!atomic_dec_and_test(&rbio->refs))
832 return;
833
4ae10b3a 834 WARN_ON(!list_empty(&rbio->stripe_cache));
53b381b3
DW
835 WARN_ON(!list_empty(&rbio->hash_list));
836 WARN_ON(!bio_list_empty(&rbio->bio_list));
837
838 for (i = 0; i < rbio->nr_pages; i++) {
839 if (rbio->stripe_pages[i]) {
840 __free_page(rbio->stripe_pages[i]);
841 rbio->stripe_pages[i] = NULL;
842 }
843 }
af8e2d1d
MX
844
845 free_bbio_and_raid_map(rbio);
846
53b381b3
DW
847 kfree(rbio);
848}
849
850static void free_raid_bio(struct btrfs_raid_bio *rbio)
851{
852 unlock_stripe(rbio);
853 __free_raid_bio(rbio);
854}
855
856/*
857 * this frees the rbio and runs through all the bios in the
858 * bio_list and calls end_io on them
859 */
860static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, int err, int uptodate)
861{
862 struct bio *cur = bio_list_get(&rbio->bio_list);
863 struct bio *next;
864 free_raid_bio(rbio);
865
866 while (cur) {
867 next = cur->bi_next;
868 cur->bi_next = NULL;
869 if (uptodate)
870 set_bit(BIO_UPTODATE, &cur->bi_flags);
871 bio_endio(cur, err);
872 cur = next;
873 }
874}
875
876/*
877 * end io function used by finish_rmw. When we finally
878 * get here, we've written a full stripe
879 */
880static void raid_write_end_io(struct bio *bio, int err)
881{
882 struct btrfs_raid_bio *rbio = bio->bi_private;
883
884 if (err)
885 fail_bio_stripe(rbio, bio);
886
887 bio_put(bio);
888
b89e1b01 889 if (!atomic_dec_and_test(&rbio->stripes_pending))
53b381b3
DW
890 return;
891
892 err = 0;
893
894 /* OK, we have read all the stripes we need to. */
b89e1b01 895 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
53b381b3
DW
896 err = -EIO;
897
898 rbio_orig_end_io(rbio, err, 0);
899 return;
900}
901
902/*
903 * the read/modify/write code wants to use the original bio for
904 * any pages it included, and then use the rbio for everything
905 * else. This function decides if a given index (stripe number)
906 * and page number in that stripe fall inside the original bio
907 * or the rbio.
908 *
909 * if you set bio_list_only, you'll get a NULL back for any ranges
910 * that are outside the bio_list
911 *
912 * This doesn't take any refs on anything, you get a bare page pointer
913 * and the caller must bump refs as required.
914 *
915 * You must call index_rbio_pages once before you can trust
916 * the answers from this function.
917 */
918static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
919 int index, int pagenr, int bio_list_only)
920{
921 int chunk_page;
922 struct page *p = NULL;
923
924 chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr;
925
926 spin_lock_irq(&rbio->bio_list_lock);
927 p = rbio->bio_pages[chunk_page];
928 spin_unlock_irq(&rbio->bio_list_lock);
929
930 if (p || bio_list_only)
931 return p;
932
933 return rbio->stripe_pages[chunk_page];
934}
935
936/*
937 * number of pages we need for the entire stripe across all the
938 * drives
939 */
940static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
941{
942 unsigned long nr = stripe_len * nr_stripes;
ed6078f7 943 return DIV_ROUND_UP(nr, PAGE_CACHE_SIZE);
53b381b3
DW
944}
945
946/*
947 * allocation and initial setup for the btrfs_raid_bio. Not
948 * this does not allocate any pages for rbio->pages.
949 */
950static struct btrfs_raid_bio *alloc_rbio(struct btrfs_root *root,
951 struct btrfs_bio *bbio, u64 *raid_map,
952 u64 stripe_len)
953{
954 struct btrfs_raid_bio *rbio;
955 int nr_data = 0;
956 int num_pages = rbio_nr_pages(stripe_len, bbio->num_stripes);
957 void *p;
958
959 rbio = kzalloc(sizeof(*rbio) + num_pages * sizeof(struct page *) * 2,
960 GFP_NOFS);
af8e2d1d 961 if (!rbio)
53b381b3 962 return ERR_PTR(-ENOMEM);
53b381b3
DW
963
964 bio_list_init(&rbio->bio_list);
965 INIT_LIST_HEAD(&rbio->plug_list);
966 spin_lock_init(&rbio->bio_list_lock);
4ae10b3a 967 INIT_LIST_HEAD(&rbio->stripe_cache);
53b381b3
DW
968 INIT_LIST_HEAD(&rbio->hash_list);
969 rbio->bbio = bbio;
970 rbio->raid_map = raid_map;
971 rbio->fs_info = root->fs_info;
972 rbio->stripe_len = stripe_len;
973 rbio->nr_pages = num_pages;
974 rbio->faila = -1;
975 rbio->failb = -1;
976 atomic_set(&rbio->refs, 1);
b89e1b01
MX
977 atomic_set(&rbio->error, 0);
978 atomic_set(&rbio->stripes_pending, 0);
53b381b3
DW
979
980 /*
981 * the stripe_pages and bio_pages array point to the extra
982 * memory we allocated past the end of the rbio
983 */
984 p = rbio + 1;
985 rbio->stripe_pages = p;
986 rbio->bio_pages = p + sizeof(struct page *) * num_pages;
987
988 if (raid_map[bbio->num_stripes - 1] == RAID6_Q_STRIPE)
989 nr_data = bbio->num_stripes - 2;
990 else
991 nr_data = bbio->num_stripes - 1;
992
993 rbio->nr_data = nr_data;
994 return rbio;
995}
996
997/* allocate pages for all the stripes in the bio, including parity */
998static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
999{
1000 int i;
1001 struct page *page;
1002
1003 for (i = 0; i < rbio->nr_pages; i++) {
1004 if (rbio->stripe_pages[i])
1005 continue;
1006 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1007 if (!page)
1008 return -ENOMEM;
1009 rbio->stripe_pages[i] = page;
1010 ClearPageUptodate(page);
1011 }
1012 return 0;
1013}
1014
1015/* allocate pages for just the p/q stripes */
1016static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
1017{
1018 int i;
1019 struct page *page;
1020
1021 i = (rbio->nr_data * rbio->stripe_len) >> PAGE_CACHE_SHIFT;
1022
1023 for (; i < rbio->nr_pages; i++) {
1024 if (rbio->stripe_pages[i])
1025 continue;
1026 page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
1027 if (!page)
1028 return -ENOMEM;
1029 rbio->stripe_pages[i] = page;
1030 }
1031 return 0;
1032}
1033
1034/*
1035 * add a single page from a specific stripe into our list of bios for IO
1036 * this will try to merge into existing bios if possible, and returns
1037 * zero if all went well.
1038 */
48a3b636
ES
1039static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
1040 struct bio_list *bio_list,
1041 struct page *page,
1042 int stripe_nr,
1043 unsigned long page_index,
1044 unsigned long bio_max_len)
53b381b3
DW
1045{
1046 struct bio *last = bio_list->tail;
1047 u64 last_end = 0;
1048 int ret;
1049 struct bio *bio;
1050 struct btrfs_bio_stripe *stripe;
1051 u64 disk_start;
1052
1053 stripe = &rbio->bbio->stripes[stripe_nr];
1054 disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
1055
1056 /* if the device is missing, just fail this stripe */
1057 if (!stripe->dev->bdev)
1058 return fail_rbio_index(rbio, stripe_nr);
1059
1060 /* see if we can add this page onto our existing bio */
1061 if (last) {
4f024f37
KO
1062 last_end = (u64)last->bi_iter.bi_sector << 9;
1063 last_end += last->bi_iter.bi_size;
53b381b3
DW
1064
1065 /*
1066 * we can't merge these if they are from different
1067 * devices or if they are not contiguous
1068 */
1069 if (last_end == disk_start && stripe->dev->bdev &&
1070 test_bit(BIO_UPTODATE, &last->bi_flags) &&
1071 last->bi_bdev == stripe->dev->bdev) {
1072 ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
1073 if (ret == PAGE_CACHE_SIZE)
1074 return 0;
1075 }
1076 }
1077
1078 /* put a new bio on the list */
9be3395b 1079 bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
53b381b3
DW
1080 if (!bio)
1081 return -ENOMEM;
1082
4f024f37 1083 bio->bi_iter.bi_size = 0;
53b381b3 1084 bio->bi_bdev = stripe->dev->bdev;
4f024f37 1085 bio->bi_iter.bi_sector = disk_start >> 9;
53b381b3
DW
1086 set_bit(BIO_UPTODATE, &bio->bi_flags);
1087
1088 bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
1089 bio_list_add(bio_list, bio);
1090 return 0;
1091}
1092
1093/*
1094 * while we're doing the read/modify/write cycle, we could
1095 * have errors in reading pages off the disk. This checks
1096 * for errors and if we're not able to read the page it'll
1097 * trigger parity reconstruction. The rmw will be finished
1098 * after we've reconstructed the failed stripes
1099 */
1100static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
1101{
1102 if (rbio->faila >= 0 || rbio->failb >= 0) {
1103 BUG_ON(rbio->faila == rbio->bbio->num_stripes - 1);
1104 __raid56_parity_recover(rbio);
1105 } else {
1106 finish_rmw(rbio);
1107 }
1108}
1109
1110/*
1111 * these are just the pages from the rbio array, not from anything
1112 * the FS sent down to us
1113 */
1114static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, int page)
1115{
1116 int index;
1117 index = stripe * (rbio->stripe_len >> PAGE_CACHE_SHIFT);
1118 index += page;
1119 return rbio->stripe_pages[index];
1120}
1121
1122/*
1123 * helper function to walk our bio list and populate the bio_pages array with
1124 * the result. This seems expensive, but it is faster than constantly
1125 * searching through the bio list as we setup the IO in finish_rmw or stripe
1126 * reconstruction.
1127 *
1128 * This must be called before you trust the answers from page_in_rbio
1129 */
1130static void index_rbio_pages(struct btrfs_raid_bio *rbio)
1131{
1132 struct bio *bio;
1133 u64 start;
1134 unsigned long stripe_offset;
1135 unsigned long page_index;
1136 struct page *p;
1137 int i;
1138
1139 spin_lock_irq(&rbio->bio_list_lock);
1140 bio_list_for_each(bio, &rbio->bio_list) {
4f024f37 1141 start = (u64)bio->bi_iter.bi_sector << 9;
53b381b3
DW
1142 stripe_offset = start - rbio->raid_map[0];
1143 page_index = stripe_offset >> PAGE_CACHE_SHIFT;
1144
1145 for (i = 0; i < bio->bi_vcnt; i++) {
1146 p = bio->bi_io_vec[i].bv_page;
1147 rbio->bio_pages[page_index + i] = p;
1148 }
1149 }
1150 spin_unlock_irq(&rbio->bio_list_lock);
1151}
1152
1153/*
1154 * this is called from one of two situations. We either
1155 * have a full stripe from the higher layers, or we've read all
1156 * the missing bits off disk.
1157 *
1158 * This will calculate the parity and then send down any
1159 * changed blocks.
1160 */
1161static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
1162{
1163 struct btrfs_bio *bbio = rbio->bbio;
1164 void *pointers[bbio->num_stripes];
1165 int stripe_len = rbio->stripe_len;
1166 int nr_data = rbio->nr_data;
1167 int stripe;
1168 int pagenr;
1169 int p_stripe = -1;
1170 int q_stripe = -1;
1171 struct bio_list bio_list;
1172 struct bio *bio;
1173 int pages_per_stripe = stripe_len >> PAGE_CACHE_SHIFT;
1174 int ret;
1175
1176 bio_list_init(&bio_list);
1177
1178 if (bbio->num_stripes - rbio->nr_data == 1) {
1179 p_stripe = bbio->num_stripes - 1;
1180 } else if (bbio->num_stripes - rbio->nr_data == 2) {
1181 p_stripe = bbio->num_stripes - 2;
1182 q_stripe = bbio->num_stripes - 1;
1183 } else {
1184 BUG();
1185 }
1186
1187 /* at this point we either have a full stripe,
1188 * or we've read the full stripe from the drive.
1189 * recalculate the parity and write the new results.
1190 *
1191 * We're not allowed to add any new bios to the
1192 * bio list here, anyone else that wants to
1193 * change this stripe needs to do their own rmw.
1194 */
1195 spin_lock_irq(&rbio->bio_list_lock);
1196 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1197 spin_unlock_irq(&rbio->bio_list_lock);
1198
b89e1b01 1199 atomic_set(&rbio->error, 0);
53b381b3
DW
1200
1201 /*
1202 * now that we've set rmw_locked, run through the
1203 * bio list one last time and map the page pointers
4ae10b3a
CM
1204 *
1205 * We don't cache full rbios because we're assuming
1206 * the higher layers are unlikely to use this area of
1207 * the disk again soon. If they do use it again,
1208 * hopefully they will send another full bio.
53b381b3
DW
1209 */
1210 index_rbio_pages(rbio);
4ae10b3a
CM
1211 if (!rbio_is_full(rbio))
1212 cache_rbio_pages(rbio);
1213 else
1214 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
53b381b3
DW
1215
1216 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1217 struct page *p;
1218 /* first collect one page from each data stripe */
1219 for (stripe = 0; stripe < nr_data; stripe++) {
1220 p = page_in_rbio(rbio, stripe, pagenr, 0);
1221 pointers[stripe] = kmap(p);
1222 }
1223
1224 /* then add the parity stripe */
1225 p = rbio_pstripe_page(rbio, pagenr);
1226 SetPageUptodate(p);
1227 pointers[stripe++] = kmap(p);
1228
1229 if (q_stripe != -1) {
1230
1231 /*
1232 * raid6, add the qstripe and call the
1233 * library function to fill in our p/q
1234 */
1235 p = rbio_qstripe_page(rbio, pagenr);
1236 SetPageUptodate(p);
1237 pointers[stripe++] = kmap(p);
1238
1239 raid6_call.gen_syndrome(bbio->num_stripes, PAGE_SIZE,
1240 pointers);
1241 } else {
1242 /* raid5 */
1243 memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
1244 run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
1245 }
1246
1247
1248 for (stripe = 0; stripe < bbio->num_stripes; stripe++)
1249 kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
1250 }
1251
1252 /*
1253 * time to start writing. Make bios for everything from the
1254 * higher layers (the bio_list in our rbio) and our p/q. Ignore
1255 * everything else.
1256 */
1257 for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
1258 for (pagenr = 0; pagenr < pages_per_stripe; pagenr++) {
1259 struct page *page;
1260 if (stripe < rbio->nr_data) {
1261 page = page_in_rbio(rbio, stripe, pagenr, 1);
1262 if (!page)
1263 continue;
1264 } else {
1265 page = rbio_stripe_page(rbio, stripe, pagenr);
1266 }
1267
1268 ret = rbio_add_io_page(rbio, &bio_list,
1269 page, stripe, pagenr, rbio->stripe_len);
1270 if (ret)
1271 goto cleanup;
1272 }
1273 }
1274
b89e1b01
MX
1275 atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
1276 BUG_ON(atomic_read(&rbio->stripes_pending) == 0);
53b381b3
DW
1277
1278 while (1) {
1279 bio = bio_list_pop(&bio_list);
1280 if (!bio)
1281 break;
1282
1283 bio->bi_private = rbio;
1284 bio->bi_end_io = raid_write_end_io;
1285 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1286 submit_bio(WRITE, bio);
1287 }
1288 return;
1289
1290cleanup:
1291 rbio_orig_end_io(rbio, -EIO, 0);
1292}
1293
1294/*
1295 * helper to find the stripe number for a given bio. Used to figure out which
1296 * stripe has failed. This expects the bio to correspond to a physical disk,
1297 * so it looks up based on physical sector numbers.
1298 */
1299static int find_bio_stripe(struct btrfs_raid_bio *rbio,
1300 struct bio *bio)
1301{
4f024f37 1302 u64 physical = bio->bi_iter.bi_sector;
53b381b3
DW
1303 u64 stripe_start;
1304 int i;
1305 struct btrfs_bio_stripe *stripe;
1306
1307 physical <<= 9;
1308
1309 for (i = 0; i < rbio->bbio->num_stripes; i++) {
1310 stripe = &rbio->bbio->stripes[i];
1311 stripe_start = stripe->physical;
1312 if (physical >= stripe_start &&
1313 physical < stripe_start + rbio->stripe_len) {
1314 return i;
1315 }
1316 }
1317 return -1;
1318}
1319
1320/*
1321 * helper to find the stripe number for a given
1322 * bio (before mapping). Used to figure out which stripe has
1323 * failed. This looks up based on logical block numbers.
1324 */
1325static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio,
1326 struct bio *bio)
1327{
4f024f37 1328 u64 logical = bio->bi_iter.bi_sector;
53b381b3
DW
1329 u64 stripe_start;
1330 int i;
1331
1332 logical <<= 9;
1333
1334 for (i = 0; i < rbio->nr_data; i++) {
1335 stripe_start = rbio->raid_map[i];
1336 if (logical >= stripe_start &&
1337 logical < stripe_start + rbio->stripe_len) {
1338 return i;
1339 }
1340 }
1341 return -1;
1342}
1343
1344/*
1345 * returns -EIO if we had too many failures
1346 */
1347static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed)
1348{
1349 unsigned long flags;
1350 int ret = 0;
1351
1352 spin_lock_irqsave(&rbio->bio_list_lock, flags);
1353
1354 /* we already know this stripe is bad, move on */
1355 if (rbio->faila == failed || rbio->failb == failed)
1356 goto out;
1357
1358 if (rbio->faila == -1) {
1359 /* first failure on this rbio */
1360 rbio->faila = failed;
b89e1b01 1361 atomic_inc(&rbio->error);
53b381b3
DW
1362 } else if (rbio->failb == -1) {
1363 /* second failure on this rbio */
1364 rbio->failb = failed;
b89e1b01 1365 atomic_inc(&rbio->error);
53b381b3
DW
1366 } else {
1367 ret = -EIO;
1368 }
1369out:
1370 spin_unlock_irqrestore(&rbio->bio_list_lock, flags);
1371
1372 return ret;
1373}
1374
1375/*
1376 * helper to fail a stripe based on a physical disk
1377 * bio.
1378 */
1379static int fail_bio_stripe(struct btrfs_raid_bio *rbio,
1380 struct bio *bio)
1381{
1382 int failed = find_bio_stripe(rbio, bio);
1383
1384 if (failed < 0)
1385 return -EIO;
1386
1387 return fail_rbio_index(rbio, failed);
1388}
1389
1390/*
1391 * this sets each page in the bio uptodate. It should only be used on private
1392 * rbio pages, nothing that comes in from the higher layers
1393 */
1394static void set_bio_pages_uptodate(struct bio *bio)
1395{
1396 int i;
1397 struct page *p;
1398
1399 for (i = 0; i < bio->bi_vcnt; i++) {
1400 p = bio->bi_io_vec[i].bv_page;
1401 SetPageUptodate(p);
1402 }
1403}
1404
1405/*
1406 * end io for the read phase of the rmw cycle. All the bios here are physical
1407 * stripe bios we've read from the disk so we can recalculate the parity of the
1408 * stripe.
1409 *
1410 * This will usually kick off finish_rmw once all the bios are read in, but it
1411 * may trigger parity reconstruction if we had any errors along the way
1412 */
1413static void raid_rmw_end_io(struct bio *bio, int err)
1414{
1415 struct btrfs_raid_bio *rbio = bio->bi_private;
1416
1417 if (err)
1418 fail_bio_stripe(rbio, bio);
1419 else
1420 set_bio_pages_uptodate(bio);
1421
1422 bio_put(bio);
1423
b89e1b01 1424 if (!atomic_dec_and_test(&rbio->stripes_pending))
53b381b3
DW
1425 return;
1426
1427 err = 0;
b89e1b01 1428 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
53b381b3
DW
1429 goto cleanup;
1430
1431 /*
1432 * this will normally call finish_rmw to start our write
1433 * but if there are any failed stripes we'll reconstruct
1434 * from parity first
1435 */
1436 validate_rbio_for_rmw(rbio);
1437 return;
1438
1439cleanup:
1440
1441 rbio_orig_end_io(rbio, -EIO, 0);
1442}
1443
1444static void async_rmw_stripe(struct btrfs_raid_bio *rbio)
1445{
9e0af237
LB
1446 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1447 rmw_work, NULL, NULL);
53b381b3 1448
d05a33ac
QW
1449 btrfs_queue_work(rbio->fs_info->rmw_workers,
1450 &rbio->work);
53b381b3
DW
1451}
1452
1453static void async_read_rebuild(struct btrfs_raid_bio *rbio)
1454{
9e0af237
LB
1455 btrfs_init_work(&rbio->work, btrfs_rmw_helper,
1456 read_rebuild_work, NULL, NULL);
53b381b3 1457
d05a33ac
QW
1458 btrfs_queue_work(rbio->fs_info->rmw_workers,
1459 &rbio->work);
53b381b3
DW
1460}
1461
1462/*
1463 * the stripe must be locked by the caller. It will
1464 * unlock after all the writes are done
1465 */
1466static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
1467{
1468 int bios_to_read = 0;
53b381b3
DW
1469 struct bio_list bio_list;
1470 int ret;
ed6078f7 1471 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
53b381b3
DW
1472 int pagenr;
1473 int stripe;
1474 struct bio *bio;
1475
1476 bio_list_init(&bio_list);
1477
1478 ret = alloc_rbio_pages(rbio);
1479 if (ret)
1480 goto cleanup;
1481
1482 index_rbio_pages(rbio);
1483
b89e1b01 1484 atomic_set(&rbio->error, 0);
53b381b3
DW
1485 /*
1486 * build a list of bios to read all the missing parts of this
1487 * stripe
1488 */
1489 for (stripe = 0; stripe < rbio->nr_data; stripe++) {
1490 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1491 struct page *page;
1492 /*
1493 * we want to find all the pages missing from
1494 * the rbio and read them from the disk. If
1495 * page_in_rbio finds a page in the bio list
1496 * we don't need to read it off the stripe.
1497 */
1498 page = page_in_rbio(rbio, stripe, pagenr, 1);
1499 if (page)
1500 continue;
1501
1502 page = rbio_stripe_page(rbio, stripe, pagenr);
4ae10b3a
CM
1503 /*
1504 * the bio cache may have handed us an uptodate
1505 * page. If so, be happy and use it
1506 */
1507 if (PageUptodate(page))
1508 continue;
1509
53b381b3
DW
1510 ret = rbio_add_io_page(rbio, &bio_list, page,
1511 stripe, pagenr, rbio->stripe_len);
1512 if (ret)
1513 goto cleanup;
1514 }
1515 }
1516
1517 bios_to_read = bio_list_size(&bio_list);
1518 if (!bios_to_read) {
1519 /*
1520 * this can happen if others have merged with
1521 * us, it means there is nothing left to read.
1522 * But if there are missing devices it may not be
1523 * safe to do the full stripe write yet.
1524 */
1525 goto finish;
1526 }
1527
1528 /*
1529 * the bbio may be freed once we submit the last bio. Make sure
1530 * not to touch it after that
1531 */
b89e1b01 1532 atomic_set(&rbio->stripes_pending, bios_to_read);
53b381b3
DW
1533 while (1) {
1534 bio = bio_list_pop(&bio_list);
1535 if (!bio)
1536 break;
1537
1538 bio->bi_private = rbio;
1539 bio->bi_end_io = raid_rmw_end_io;
1540
1541 btrfs_bio_wq_end_io(rbio->fs_info, bio,
1542 BTRFS_WQ_ENDIO_RAID56);
1543
1544 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
1545 submit_bio(READ, bio);
1546 }
1547 /* the actual write will happen once the reads are done */
1548 return 0;
1549
1550cleanup:
1551 rbio_orig_end_io(rbio, -EIO, 0);
1552 return -EIO;
1553
1554finish:
1555 validate_rbio_for_rmw(rbio);
1556 return 0;
1557}
1558
1559/*
1560 * if the upper layers pass in a full stripe, we thank them by only allocating
1561 * enough pages to hold the parity, and sending it all down quickly.
1562 */
1563static int full_stripe_write(struct btrfs_raid_bio *rbio)
1564{
1565 int ret;
1566
1567 ret = alloc_rbio_parity_pages(rbio);
3cd846d1
MX
1568 if (ret) {
1569 __free_raid_bio(rbio);
53b381b3 1570 return ret;
3cd846d1 1571 }
53b381b3
DW
1572
1573 ret = lock_stripe_add(rbio);
1574 if (ret == 0)
1575 finish_rmw(rbio);
1576 return 0;
1577}
1578
1579/*
1580 * partial stripe writes get handed over to async helpers.
1581 * We're really hoping to merge a few more writes into this
1582 * rbio before calculating new parity
1583 */
1584static int partial_stripe_write(struct btrfs_raid_bio *rbio)
1585{
1586 int ret;
1587
1588 ret = lock_stripe_add(rbio);
1589 if (ret == 0)
1590 async_rmw_stripe(rbio);
1591 return 0;
1592}
1593
1594/*
1595 * sometimes while we were reading from the drive to
1596 * recalculate parity, enough new bios come into create
1597 * a full stripe. So we do a check here to see if we can
1598 * go directly to finish_rmw
1599 */
1600static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
1601{
1602 /* head off into rmw land if we don't have a full stripe */
1603 if (!rbio_is_full(rbio))
1604 return partial_stripe_write(rbio);
1605 return full_stripe_write(rbio);
1606}
1607
6ac0f488
CM
1608/*
1609 * We use plugging call backs to collect full stripes.
1610 * Any time we get a partial stripe write while plugged
1611 * we collect it into a list. When the unplug comes down,
1612 * we sort the list by logical block number and merge
1613 * everything we can into the same rbios
1614 */
1615struct btrfs_plug_cb {
1616 struct blk_plug_cb cb;
1617 struct btrfs_fs_info *info;
1618 struct list_head rbio_list;
1619 struct btrfs_work work;
1620};
1621
1622/*
1623 * rbios on the plug list are sorted for easier merging.
1624 */
1625static int plug_cmp(void *priv, struct list_head *a, struct list_head *b)
1626{
1627 struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
1628 plug_list);
1629 struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
1630 plug_list);
4f024f37
KO
1631 u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
1632 u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
6ac0f488
CM
1633
1634 if (a_sector < b_sector)
1635 return -1;
1636 if (a_sector > b_sector)
1637 return 1;
1638 return 0;
1639}
1640
1641static void run_plug(struct btrfs_plug_cb *plug)
1642{
1643 struct btrfs_raid_bio *cur;
1644 struct btrfs_raid_bio *last = NULL;
1645
1646 /*
1647 * sort our plug list then try to merge
1648 * everything we can in hopes of creating full
1649 * stripes.
1650 */
1651 list_sort(NULL, &plug->rbio_list, plug_cmp);
1652 while (!list_empty(&plug->rbio_list)) {
1653 cur = list_entry(plug->rbio_list.next,
1654 struct btrfs_raid_bio, plug_list);
1655 list_del_init(&cur->plug_list);
1656
1657 if (rbio_is_full(cur)) {
1658 /* we have a full stripe, send it down */
1659 full_stripe_write(cur);
1660 continue;
1661 }
1662 if (last) {
1663 if (rbio_can_merge(last, cur)) {
1664 merge_rbio(last, cur);
1665 __free_raid_bio(cur);
1666 continue;
1667
1668 }
1669 __raid56_parity_write(last);
1670 }
1671 last = cur;
1672 }
1673 if (last) {
1674 __raid56_parity_write(last);
1675 }
1676 kfree(plug);
1677}
1678
1679/*
1680 * if the unplug comes from schedule, we have to push the
1681 * work off to a helper thread
1682 */
1683static void unplug_work(struct btrfs_work *work)
1684{
1685 struct btrfs_plug_cb *plug;
1686 plug = container_of(work, struct btrfs_plug_cb, work);
1687 run_plug(plug);
1688}
1689
1690static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
1691{
1692 struct btrfs_plug_cb *plug;
1693 plug = container_of(cb, struct btrfs_plug_cb, cb);
1694
1695 if (from_schedule) {
9e0af237
LB
1696 btrfs_init_work(&plug->work, btrfs_rmw_helper,
1697 unplug_work, NULL, NULL);
d05a33ac
QW
1698 btrfs_queue_work(plug->info->rmw_workers,
1699 &plug->work);
6ac0f488
CM
1700 return;
1701 }
1702 run_plug(plug);
1703}
1704
53b381b3
DW
1705/*
1706 * our main entry point for writes from the rest of the FS.
1707 */
1708int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
1709 struct btrfs_bio *bbio, u64 *raid_map,
1710 u64 stripe_len)
1711{
1712 struct btrfs_raid_bio *rbio;
6ac0f488
CM
1713 struct btrfs_plug_cb *plug = NULL;
1714 struct blk_plug_cb *cb;
53b381b3
DW
1715
1716 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
af8e2d1d
MX
1717 if (IS_ERR(rbio)) {
1718 __free_bbio_and_raid_map(bbio, raid_map, 1);
53b381b3 1719 return PTR_ERR(rbio);
af8e2d1d 1720 }
53b381b3 1721 bio_list_add(&rbio->bio_list, bio);
4f024f37 1722 rbio->bio_list_bytes = bio->bi_iter.bi_size;
6ac0f488
CM
1723
1724 /*
1725 * don't plug on full rbios, just get them out the door
1726 * as quickly as we can
1727 */
1728 if (rbio_is_full(rbio))
1729 return full_stripe_write(rbio);
1730
1731 cb = blk_check_plugged(btrfs_raid_unplug, root->fs_info,
1732 sizeof(*plug));
1733 if (cb) {
1734 plug = container_of(cb, struct btrfs_plug_cb, cb);
1735 if (!plug->info) {
1736 plug->info = root->fs_info;
1737 INIT_LIST_HEAD(&plug->rbio_list);
1738 }
1739 list_add_tail(&rbio->plug_list, &plug->rbio_list);
1740 } else {
1741 return __raid56_parity_write(rbio);
1742 }
1743 return 0;
53b381b3
DW
1744}
1745
1746/*
1747 * all parity reconstruction happens here. We've read in everything
1748 * we can find from the drives and this does the heavy lifting of
1749 * sorting the good from the bad.
1750 */
1751static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
1752{
1753 int pagenr, stripe;
1754 void **pointers;
1755 int faila = -1, failb = -1;
ed6078f7 1756 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
53b381b3
DW
1757 struct page *page;
1758 int err;
1759 int i;
1760
1761 pointers = kzalloc(rbio->bbio->num_stripes * sizeof(void *),
1762 GFP_NOFS);
1763 if (!pointers) {
1764 err = -ENOMEM;
1765 goto cleanup_io;
1766 }
1767
1768 faila = rbio->faila;
1769 failb = rbio->failb;
1770
1771 if (rbio->read_rebuild) {
1772 spin_lock_irq(&rbio->bio_list_lock);
1773 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
1774 spin_unlock_irq(&rbio->bio_list_lock);
1775 }
1776
1777 index_rbio_pages(rbio);
1778
1779 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1780 /* setup our array of pointers with pages
1781 * from each stripe
1782 */
1783 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1784 /*
1785 * if we're rebuilding a read, we have to use
1786 * pages from the bio list
1787 */
1788 if (rbio->read_rebuild &&
1789 (stripe == faila || stripe == failb)) {
1790 page = page_in_rbio(rbio, stripe, pagenr, 0);
1791 } else {
1792 page = rbio_stripe_page(rbio, stripe, pagenr);
1793 }
1794 pointers[stripe] = kmap(page);
1795 }
1796
1797 /* all raid6 handling here */
1798 if (rbio->raid_map[rbio->bbio->num_stripes - 1] ==
1799 RAID6_Q_STRIPE) {
1800
1801 /*
1802 * single failure, rebuild from parity raid5
1803 * style
1804 */
1805 if (failb < 0) {
1806 if (faila == rbio->nr_data) {
1807 /*
1808 * Just the P stripe has failed, without
1809 * a bad data or Q stripe.
1810 * TODO, we should redo the xor here.
1811 */
1812 err = -EIO;
1813 goto cleanup;
1814 }
1815 /*
1816 * a single failure in raid6 is rebuilt
1817 * in the pstripe code below
1818 */
1819 goto pstripe;
1820 }
1821
1822 /* make sure our ps and qs are in order */
1823 if (faila > failb) {
1824 int tmp = failb;
1825 failb = faila;
1826 faila = tmp;
1827 }
1828
1829 /* if the q stripe is failed, do a pstripe reconstruction
1830 * from the xors.
1831 * If both the q stripe and the P stripe are failed, we're
1832 * here due to a crc mismatch and we can't give them the
1833 * data they want
1834 */
1835 if (rbio->raid_map[failb] == RAID6_Q_STRIPE) {
1836 if (rbio->raid_map[faila] == RAID5_P_STRIPE) {
1837 err = -EIO;
1838 goto cleanup;
1839 }
1840 /*
1841 * otherwise we have one bad data stripe and
1842 * a good P stripe. raid5!
1843 */
1844 goto pstripe;
1845 }
1846
1847 if (rbio->raid_map[failb] == RAID5_P_STRIPE) {
1848 raid6_datap_recov(rbio->bbio->num_stripes,
1849 PAGE_SIZE, faila, pointers);
1850 } else {
1851 raid6_2data_recov(rbio->bbio->num_stripes,
1852 PAGE_SIZE, faila, failb,
1853 pointers);
1854 }
1855 } else {
1856 void *p;
1857
1858 /* rebuild from P stripe here (raid5 or raid6) */
1859 BUG_ON(failb != -1);
1860pstripe:
1861 /* Copy parity block into failed block to start with */
1862 memcpy(pointers[faila],
1863 pointers[rbio->nr_data],
1864 PAGE_CACHE_SIZE);
1865
1866 /* rearrange the pointer array */
1867 p = pointers[faila];
1868 for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
1869 pointers[stripe] = pointers[stripe + 1];
1870 pointers[rbio->nr_data - 1] = p;
1871
1872 /* xor in the rest */
1873 run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
1874 }
1875 /* if we're doing this rebuild as part of an rmw, go through
1876 * and set all of our private rbio pages in the
1877 * failed stripes as uptodate. This way finish_rmw will
1878 * know they can be trusted. If this was a read reconstruction,
1879 * other endio functions will fiddle the uptodate bits
1880 */
1881 if (!rbio->read_rebuild) {
1882 for (i = 0; i < nr_pages; i++) {
1883 if (faila != -1) {
1884 page = rbio_stripe_page(rbio, faila, i);
1885 SetPageUptodate(page);
1886 }
1887 if (failb != -1) {
1888 page = rbio_stripe_page(rbio, failb, i);
1889 SetPageUptodate(page);
1890 }
1891 }
1892 }
1893 for (stripe = 0; stripe < rbio->bbio->num_stripes; stripe++) {
1894 /*
1895 * if we're rebuilding a read, we have to use
1896 * pages from the bio list
1897 */
1898 if (rbio->read_rebuild &&
1899 (stripe == faila || stripe == failb)) {
1900 page = page_in_rbio(rbio, stripe, pagenr, 0);
1901 } else {
1902 page = rbio_stripe_page(rbio, stripe, pagenr);
1903 }
1904 kunmap(page);
1905 }
1906 }
1907
1908 err = 0;
1909cleanup:
1910 kfree(pointers);
1911
1912cleanup_io:
1913
1914 if (rbio->read_rebuild) {
af8e2d1d
MX
1915 if (err == 0 &&
1916 !test_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags))
4ae10b3a
CM
1917 cache_rbio_pages(rbio);
1918 else
1919 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
1920
53b381b3
DW
1921 rbio_orig_end_io(rbio, err, err == 0);
1922 } else if (err == 0) {
1923 rbio->faila = -1;
1924 rbio->failb = -1;
1925 finish_rmw(rbio);
1926 } else {
1927 rbio_orig_end_io(rbio, err, 0);
1928 }
1929}
1930
1931/*
1932 * This is called only for stripes we've read from disk to
1933 * reconstruct the parity.
1934 */
1935static void raid_recover_end_io(struct bio *bio, int err)
1936{
1937 struct btrfs_raid_bio *rbio = bio->bi_private;
1938
1939 /*
1940 * we only read stripe pages off the disk, set them
1941 * up to date if there were no errors
1942 */
1943 if (err)
1944 fail_bio_stripe(rbio, bio);
1945 else
1946 set_bio_pages_uptodate(bio);
1947 bio_put(bio);
1948
b89e1b01 1949 if (!atomic_dec_and_test(&rbio->stripes_pending))
53b381b3
DW
1950 return;
1951
b89e1b01 1952 if (atomic_read(&rbio->error) > rbio->bbio->max_errors)
53b381b3
DW
1953 rbio_orig_end_io(rbio, -EIO, 0);
1954 else
1955 __raid_recover_end_io(rbio);
1956}
1957
1958/*
1959 * reads everything we need off the disk to reconstruct
1960 * the parity. endio handlers trigger final reconstruction
1961 * when the IO is done.
1962 *
1963 * This is used both for reads from the higher layers and for
1964 * parity construction required to finish a rmw cycle.
1965 */
1966static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
1967{
1968 int bios_to_read = 0;
1969 struct btrfs_bio *bbio = rbio->bbio;
1970 struct bio_list bio_list;
1971 int ret;
ed6078f7 1972 int nr_pages = DIV_ROUND_UP(rbio->stripe_len, PAGE_CACHE_SIZE);
53b381b3
DW
1973 int pagenr;
1974 int stripe;
1975 struct bio *bio;
1976
1977 bio_list_init(&bio_list);
1978
1979 ret = alloc_rbio_pages(rbio);
1980 if (ret)
1981 goto cleanup;
1982
b89e1b01 1983 atomic_set(&rbio->error, 0);
53b381b3
DW
1984
1985 /*
4ae10b3a
CM
1986 * read everything that hasn't failed. Thanks to the
1987 * stripe cache, it is possible that some or all of these
1988 * pages are going to be uptodate.
53b381b3
DW
1989 */
1990 for (stripe = 0; stripe < bbio->num_stripes; stripe++) {
5588383e 1991 if (rbio->faila == stripe || rbio->failb == stripe) {
b89e1b01 1992 atomic_inc(&rbio->error);
53b381b3 1993 continue;
5588383e 1994 }
53b381b3
DW
1995
1996 for (pagenr = 0; pagenr < nr_pages; pagenr++) {
1997 struct page *p;
1998
1999 /*
2000 * the rmw code may have already read this
2001 * page in
2002 */
2003 p = rbio_stripe_page(rbio, stripe, pagenr);
2004 if (PageUptodate(p))
2005 continue;
2006
2007 ret = rbio_add_io_page(rbio, &bio_list,
2008 rbio_stripe_page(rbio, stripe, pagenr),
2009 stripe, pagenr, rbio->stripe_len);
2010 if (ret < 0)
2011 goto cleanup;
2012 }
2013 }
2014
2015 bios_to_read = bio_list_size(&bio_list);
2016 if (!bios_to_read) {
2017 /*
2018 * we might have no bios to read just because the pages
2019 * were up to date, or we might have no bios to read because
2020 * the devices were gone.
2021 */
b89e1b01 2022 if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) {
53b381b3
DW
2023 __raid_recover_end_io(rbio);
2024 goto out;
2025 } else {
2026 goto cleanup;
2027 }
2028 }
2029
2030 /*
2031 * the bbio may be freed once we submit the last bio. Make sure
2032 * not to touch it after that
2033 */
b89e1b01 2034 atomic_set(&rbio->stripes_pending, bios_to_read);
53b381b3
DW
2035 while (1) {
2036 bio = bio_list_pop(&bio_list);
2037 if (!bio)
2038 break;
2039
2040 bio->bi_private = rbio;
2041 bio->bi_end_io = raid_recover_end_io;
2042
2043 btrfs_bio_wq_end_io(rbio->fs_info, bio,
2044 BTRFS_WQ_ENDIO_RAID56);
2045
2046 BUG_ON(!test_bit(BIO_UPTODATE, &bio->bi_flags));
2047 submit_bio(READ, bio);
2048 }
2049out:
2050 return 0;
2051
2052cleanup:
2053 if (rbio->read_rebuild)
2054 rbio_orig_end_io(rbio, -EIO, 0);
2055 return -EIO;
2056}
2057
2058/*
2059 * the main entry point for reads from the higher layers. This
2060 * is really only called when the normal read path had a failure,
2061 * so we assume the bio they send down corresponds to a failed part
2062 * of the drive.
2063 */
2064int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
2065 struct btrfs_bio *bbio, u64 *raid_map,
af8e2d1d 2066 u64 stripe_len, int mirror_num, int hold_bbio)
53b381b3
DW
2067{
2068 struct btrfs_raid_bio *rbio;
2069 int ret;
2070
2071 rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
af8e2d1d
MX
2072 if (IS_ERR(rbio)) {
2073 __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio);
53b381b3 2074 return PTR_ERR(rbio);
af8e2d1d 2075 }
53b381b3 2076
af8e2d1d
MX
2077 if (hold_bbio)
2078 set_bit(RBIO_HOLD_BBIO_MAP_BIT, &rbio->flags);
53b381b3
DW
2079 rbio->read_rebuild = 1;
2080 bio_list_add(&rbio->bio_list, bio);
4f024f37 2081 rbio->bio_list_bytes = bio->bi_iter.bi_size;
53b381b3
DW
2082
2083 rbio->faila = find_logical_bio_stripe(rbio, bio);
2084 if (rbio->faila == -1) {
2085 BUG();
af8e2d1d 2086 __free_bbio_and_raid_map(bbio, raid_map, !hold_bbio);
53b381b3
DW
2087 kfree(rbio);
2088 return -EIO;
2089 }
2090
2091 /*
2092 * reconstruct from the q stripe if they are
2093 * asking for mirror 3
2094 */
2095 if (mirror_num == 3)
2096 rbio->failb = bbio->num_stripes - 2;
2097
2098 ret = lock_stripe_add(rbio);
2099
2100 /*
2101 * __raid56_parity_recover will end the bio with
2102 * any errors it hits. We don't want to return
2103 * its error value up the stack because our caller
2104 * will end up calling bio_endio with any nonzero
2105 * return
2106 */
2107 if (ret == 0)
2108 __raid56_parity_recover(rbio);
2109 /*
2110 * our rbio has been added to the list of
2111 * rbios that will be handled after the
2112 * currently lock owner is done
2113 */
2114 return 0;
2115
2116}
2117
2118static void rmw_work(struct btrfs_work *work)
2119{
2120 struct btrfs_raid_bio *rbio;
2121
2122 rbio = container_of(work, struct btrfs_raid_bio, work);
2123 raid56_rmw_stripe(rbio);
2124}
2125
2126static void read_rebuild_work(struct btrfs_work *work)
2127{
2128 struct btrfs_raid_bio *rbio;
2129
2130 rbio = container_of(work, struct btrfs_raid_bio, work);
2131 __raid56_parity_recover(rbio);
2132}