btrfs delete ordered inode handling fix
[linux-2.6-block.git] / fs / btrfs / volumes.c
CommitLineData
0b86a832
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
8a4b83cc 20#include <linux/buffer_head.h>
f2d8d74d 21#include <linux/blkdev.h>
788f20eb 22#include <linux/random.h>
593060d7 23#include <asm/div64.h>
0b86a832
CM
24#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h"
30
593060d7
CM
31struct map_lookup {
32 u64 type;
33 int io_align;
34 int io_width;
35 int stripe_len;
36 int sector_size;
37 int num_stripes;
321aecc6 38 int sub_stripes;
cea9e445 39 struct btrfs_bio_stripe stripes[];
593060d7
CM
40};
41
42#define map_lookup_size(n) (sizeof(struct map_lookup) + \
cea9e445 43 (sizeof(struct btrfs_bio_stripe) * (n)))
593060d7 44
8a4b83cc
CM
45static DEFINE_MUTEX(uuid_mutex);
46static LIST_HEAD(fs_uuids);
47
a061fc8d
CM
48void btrfs_lock_volumes(void)
49{
50 mutex_lock(&uuid_mutex);
51}
52
53void btrfs_unlock_volumes(void)
54{
55 mutex_unlock(&uuid_mutex);
56}
57
8a4b83cc
CM
58int btrfs_cleanup_fs_uuids(void)
59{
60 struct btrfs_fs_devices *fs_devices;
61 struct list_head *uuid_cur;
62 struct list_head *devices_cur;
63 struct btrfs_device *dev;
64
65 list_for_each(uuid_cur, &fs_uuids) {
66 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
67 list);
68 while(!list_empty(&fs_devices->devices)) {
69 devices_cur = fs_devices->devices.next;
70 dev = list_entry(devices_cur, struct btrfs_device,
71 dev_list);
8a4b83cc 72 if (dev->bdev) {
8a4b83cc 73 close_bdev_excl(dev->bdev);
a0af469b 74 fs_devices->open_devices--;
8a4b83cc
CM
75 }
76 list_del(&dev->dev_list);
dfe25020 77 kfree(dev->name);
8a4b83cc
CM
78 kfree(dev);
79 }
80 }
81 return 0;
82}
83
a443755f
CM
84static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
85 u8 *uuid)
8a4b83cc
CM
86{
87 struct btrfs_device *dev;
88 struct list_head *cur;
89
90 list_for_each(cur, head) {
91 dev = list_entry(cur, struct btrfs_device, dev_list);
a443755f 92 if (dev->devid == devid &&
8f18cf13 93 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
8a4b83cc 94 return dev;
a443755f 95 }
8a4b83cc
CM
96 }
97 return NULL;
98}
99
100static struct btrfs_fs_devices *find_fsid(u8 *fsid)
101{
102 struct list_head *cur;
103 struct btrfs_fs_devices *fs_devices;
104
105 list_for_each(cur, &fs_uuids) {
106 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
107 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
108 return fs_devices;
109 }
110 return NULL;
111}
112
113static int device_list_add(const char *path,
114 struct btrfs_super_block *disk_super,
115 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
116{
117 struct btrfs_device *device;
118 struct btrfs_fs_devices *fs_devices;
119 u64 found_transid = btrfs_super_generation(disk_super);
120
121 fs_devices = find_fsid(disk_super->fsid);
122 if (!fs_devices) {
515dc322 123 fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
8a4b83cc
CM
124 if (!fs_devices)
125 return -ENOMEM;
126 INIT_LIST_HEAD(&fs_devices->devices);
b3075717 127 INIT_LIST_HEAD(&fs_devices->alloc_list);
8a4b83cc
CM
128 list_add(&fs_devices->list, &fs_uuids);
129 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
130 fs_devices->latest_devid = devid;
131 fs_devices->latest_trans = found_transid;
8a4b83cc
CM
132 device = NULL;
133 } else {
a443755f
CM
134 device = __find_device(&fs_devices->devices, devid,
135 disk_super->dev_item.uuid);
8a4b83cc
CM
136 }
137 if (!device) {
138 device = kzalloc(sizeof(*device), GFP_NOFS);
139 if (!device) {
140 /* we can safely leave the fs_devices entry around */
141 return -ENOMEM;
142 }
143 device->devid = devid;
a443755f
CM
144 memcpy(device->uuid, disk_super->dev_item.uuid,
145 BTRFS_UUID_SIZE);
f2984462 146 device->barriers = 1;
b248a415 147 spin_lock_init(&device->io_lock);
8a4b83cc
CM
148 device->name = kstrdup(path, GFP_NOFS);
149 if (!device->name) {
150 kfree(device);
151 return -ENOMEM;
152 }
153 list_add(&device->dev_list, &fs_devices->devices);
b3075717 154 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
8a4b83cc
CM
155 fs_devices->num_devices++;
156 }
157
158 if (found_transid > fs_devices->latest_trans) {
159 fs_devices->latest_devid = devid;
160 fs_devices->latest_trans = found_transid;
161 }
8a4b83cc
CM
162 *fs_devices_ret = fs_devices;
163 return 0;
164}
165
dfe25020
CM
166int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
167{
168 struct list_head *head = &fs_devices->devices;
169 struct list_head *cur;
170 struct btrfs_device *device;
171
172 mutex_lock(&uuid_mutex);
173again:
174 list_for_each(cur, head) {
175 device = list_entry(cur, struct btrfs_device, dev_list);
176 if (!device->in_fs_metadata) {
a0af469b 177 if (device->bdev) {
dfe25020 178 close_bdev_excl(device->bdev);
a0af469b
CM
179 fs_devices->open_devices--;
180 }
dfe25020
CM
181 list_del(&device->dev_list);
182 list_del(&device->dev_alloc_list);
183 fs_devices->num_devices--;
184 kfree(device->name);
185 kfree(device);
186 goto again;
187 }
188 }
189 mutex_unlock(&uuid_mutex);
190 return 0;
191}
a0af469b 192
8a4b83cc
CM
193int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
194{
195 struct list_head *head = &fs_devices->devices;
196 struct list_head *cur;
197 struct btrfs_device *device;
198
199 mutex_lock(&uuid_mutex);
200 list_for_each(cur, head) {
201 device = list_entry(cur, struct btrfs_device, dev_list);
202 if (device->bdev) {
203 close_bdev_excl(device->bdev);
a0af469b 204 fs_devices->open_devices--;
8a4b83cc
CM
205 }
206 device->bdev = NULL;
dfe25020 207 device->in_fs_metadata = 0;
8a4b83cc 208 }
a0af469b 209 fs_devices->mounted = 0;
8a4b83cc
CM
210 mutex_unlock(&uuid_mutex);
211 return 0;
212}
213
214int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
215 int flags, void *holder)
216{
217 struct block_device *bdev;
218 struct list_head *head = &fs_devices->devices;
219 struct list_head *cur;
220 struct btrfs_device *device;
a0af469b
CM
221 struct block_device *latest_bdev = NULL;
222 struct buffer_head *bh;
223 struct btrfs_super_block *disk_super;
224 u64 latest_devid = 0;
225 u64 latest_transid = 0;
226 u64 transid;
227 u64 devid;
228 int ret = 0;
8a4b83cc
CM
229
230 mutex_lock(&uuid_mutex);
a0af469b
CM
231 if (fs_devices->mounted)
232 goto out;
233
8a4b83cc
CM
234 list_for_each(cur, head) {
235 device = list_entry(cur, struct btrfs_device, dev_list);
c1c4d91c
CM
236 if (device->bdev)
237 continue;
238
dfe25020
CM
239 if (!device->name)
240 continue;
241
8a4b83cc 242 bdev = open_bdev_excl(device->name, flags, holder);
e17cade2 243
8a4b83cc
CM
244 if (IS_ERR(bdev)) {
245 printk("open %s failed\n", device->name);
a0af469b 246 goto error;
8a4b83cc 247 }
a061fc8d 248 set_blocksize(bdev, 4096);
a0af469b
CM
249
250 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
251 if (!bh)
252 goto error_close;
253
254 disk_super = (struct btrfs_super_block *)bh->b_data;
255 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
256 sizeof(disk_super->magic)))
257 goto error_brelse;
258
259 devid = le64_to_cpu(disk_super->dev_item.devid);
260 if (devid != device->devid)
261 goto error_brelse;
262
263 transid = btrfs_super_generation(disk_super);
6af5ac3c 264 if (!latest_transid || transid > latest_transid) {
a0af469b
CM
265 latest_devid = devid;
266 latest_transid = transid;
267 latest_bdev = bdev;
268 }
269
8a4b83cc 270 device->bdev = bdev;
dfe25020 271 device->in_fs_metadata = 0;
a0af469b
CM
272 fs_devices->open_devices++;
273 continue;
a061fc8d 274
a0af469b
CM
275error_brelse:
276 brelse(bh);
277error_close:
278 close_bdev_excl(bdev);
279error:
280 continue;
8a4b83cc 281 }
a0af469b
CM
282 if (fs_devices->open_devices == 0) {
283 ret = -EIO;
284 goto out;
285 }
286 fs_devices->mounted = 1;
287 fs_devices->latest_bdev = latest_bdev;
288 fs_devices->latest_devid = latest_devid;
289 fs_devices->latest_trans = latest_transid;
290out:
8a4b83cc 291 mutex_unlock(&uuid_mutex);
8a4b83cc
CM
292 return ret;
293}
294
295int btrfs_scan_one_device(const char *path, int flags, void *holder,
296 struct btrfs_fs_devices **fs_devices_ret)
297{
298 struct btrfs_super_block *disk_super;
299 struct block_device *bdev;
300 struct buffer_head *bh;
301 int ret;
302 u64 devid;
f2984462 303 u64 transid;
8a4b83cc
CM
304
305 mutex_lock(&uuid_mutex);
306
8a4b83cc
CM
307 bdev = open_bdev_excl(path, flags, holder);
308
309 if (IS_ERR(bdev)) {
8a4b83cc
CM
310 ret = PTR_ERR(bdev);
311 goto error;
312 }
313
314 ret = set_blocksize(bdev, 4096);
315 if (ret)
316 goto error_close;
317 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
318 if (!bh) {
319 ret = -EIO;
320 goto error_close;
321 }
322 disk_super = (struct btrfs_super_block *)bh->b_data;
323 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
324 sizeof(disk_super->magic))) {
e58ca020 325 ret = -EINVAL;
8a4b83cc
CM
326 goto error_brelse;
327 }
328 devid = le64_to_cpu(disk_super->dev_item.devid);
f2984462 329 transid = btrfs_super_generation(disk_super);
7ae9c09d
CM
330 if (disk_super->label[0])
331 printk("device label %s ", disk_super->label);
332 else {
333 /* FIXME, make a readl uuid parser */
334 printk("device fsid %llx-%llx ",
335 *(unsigned long long *)disk_super->fsid,
336 *(unsigned long long *)(disk_super->fsid + 8));
337 }
338 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
8a4b83cc
CM
339 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
340
341error_brelse:
342 brelse(bh);
343error_close:
344 close_bdev_excl(bdev);
8a4b83cc
CM
345error:
346 mutex_unlock(&uuid_mutex);
347 return ret;
348}
0b86a832
CM
349
350/*
351 * this uses a pretty simple search, the expectation is that it is
352 * called very infrequently and that a given device has a small number
353 * of extents
354 */
355static int find_free_dev_extent(struct btrfs_trans_handle *trans,
356 struct btrfs_device *device,
357 struct btrfs_path *path,
358 u64 num_bytes, u64 *start)
359{
360 struct btrfs_key key;
361 struct btrfs_root *root = device->dev_root;
362 struct btrfs_dev_extent *dev_extent = NULL;
363 u64 hole_size = 0;
364 u64 last_byte = 0;
365 u64 search_start = 0;
366 u64 search_end = device->total_bytes;
367 int ret;
368 int slot = 0;
369 int start_found;
370 struct extent_buffer *l;
371
372 start_found = 0;
373 path->reada = 2;
374
375 /* FIXME use last free of some kind */
376
8a4b83cc
CM
377 /* we don't want to overwrite the superblock on the drive,
378 * so we make sure to start at an offset of at least 1MB
379 */
380 search_start = max((u64)1024 * 1024, search_start);
8f18cf13
CM
381
382 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
383 search_start = max(root->fs_info->alloc_start, search_start);
384
0b86a832
CM
385 key.objectid = device->devid;
386 key.offset = search_start;
387 key.type = BTRFS_DEV_EXTENT_KEY;
388 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
389 if (ret < 0)
390 goto error;
391 ret = btrfs_previous_item(root, path, 0, key.type);
392 if (ret < 0)
393 goto error;
394 l = path->nodes[0];
395 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
396 while (1) {
397 l = path->nodes[0];
398 slot = path->slots[0];
399 if (slot >= btrfs_header_nritems(l)) {
400 ret = btrfs_next_leaf(root, path);
401 if (ret == 0)
402 continue;
403 if (ret < 0)
404 goto error;
405no_more_items:
406 if (!start_found) {
407 if (search_start >= search_end) {
408 ret = -ENOSPC;
409 goto error;
410 }
411 *start = search_start;
412 start_found = 1;
413 goto check_pending;
414 }
415 *start = last_byte > search_start ?
416 last_byte : search_start;
417 if (search_end <= *start) {
418 ret = -ENOSPC;
419 goto error;
420 }
421 goto check_pending;
422 }
423 btrfs_item_key_to_cpu(l, &key, slot);
424
425 if (key.objectid < device->devid)
426 goto next;
427
428 if (key.objectid > device->devid)
429 goto no_more_items;
430
431 if (key.offset >= search_start && key.offset > last_byte &&
432 start_found) {
433 if (last_byte < search_start)
434 last_byte = search_start;
435 hole_size = key.offset - last_byte;
436 if (key.offset > last_byte &&
437 hole_size >= num_bytes) {
438 *start = last_byte;
439 goto check_pending;
440 }
441 }
442 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
443 goto next;
444 }
445
446 start_found = 1;
447 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
448 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
449next:
450 path->slots[0]++;
451 cond_resched();
452 }
453check_pending:
454 /* we have to make sure we didn't find an extent that has already
455 * been allocated by the map tree or the original allocation
456 */
457 btrfs_release_path(root, path);
458 BUG_ON(*start < search_start);
459
6324fbf3 460 if (*start + num_bytes > search_end) {
0b86a832
CM
461 ret = -ENOSPC;
462 goto error;
463 }
464 /* check for pending inserts here */
465 return 0;
466
467error:
468 btrfs_release_path(root, path);
469 return ret;
470}
471
8f18cf13
CM
472int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
473 struct btrfs_device *device,
474 u64 start)
475{
476 int ret;
477 struct btrfs_path *path;
478 struct btrfs_root *root = device->dev_root;
479 struct btrfs_key key;
a061fc8d
CM
480 struct btrfs_key found_key;
481 struct extent_buffer *leaf = NULL;
482 struct btrfs_dev_extent *extent = NULL;
8f18cf13
CM
483
484 path = btrfs_alloc_path();
485 if (!path)
486 return -ENOMEM;
487
488 key.objectid = device->devid;
489 key.offset = start;
490 key.type = BTRFS_DEV_EXTENT_KEY;
491
492 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
a061fc8d
CM
493 if (ret > 0) {
494 ret = btrfs_previous_item(root, path, key.objectid,
495 BTRFS_DEV_EXTENT_KEY);
496 BUG_ON(ret);
497 leaf = path->nodes[0];
498 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
499 extent = btrfs_item_ptr(leaf, path->slots[0],
500 struct btrfs_dev_extent);
501 BUG_ON(found_key.offset > start || found_key.offset +
502 btrfs_dev_extent_length(leaf, extent) < start);
503 ret = 0;
504 } else if (ret == 0) {
505 leaf = path->nodes[0];
506 extent = btrfs_item_ptr(leaf, path->slots[0],
507 struct btrfs_dev_extent);
508 }
8f18cf13
CM
509 BUG_ON(ret);
510
dfe25020
CM
511 if (device->bytes_used > 0)
512 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
8f18cf13
CM
513 ret = btrfs_del_item(trans, root, path);
514 BUG_ON(ret);
515
516 btrfs_free_path(path);
517 return ret;
518}
519
0b86a832
CM
520int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
521 struct btrfs_device *device,
e17cade2
CM
522 u64 chunk_tree, u64 chunk_objectid,
523 u64 chunk_offset,
524 u64 num_bytes, u64 *start)
0b86a832
CM
525{
526 int ret;
527 struct btrfs_path *path;
528 struct btrfs_root *root = device->dev_root;
529 struct btrfs_dev_extent *extent;
530 struct extent_buffer *leaf;
531 struct btrfs_key key;
532
dfe25020 533 WARN_ON(!device->in_fs_metadata);
0b86a832
CM
534 path = btrfs_alloc_path();
535 if (!path)
536 return -ENOMEM;
537
538 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
6324fbf3 539 if (ret) {
0b86a832 540 goto err;
6324fbf3 541 }
0b86a832
CM
542
543 key.objectid = device->devid;
544 key.offset = *start;
545 key.type = BTRFS_DEV_EXTENT_KEY;
546 ret = btrfs_insert_empty_item(trans, root, path, &key,
547 sizeof(*extent));
548 BUG_ON(ret);
549
550 leaf = path->nodes[0];
551 extent = btrfs_item_ptr(leaf, path->slots[0],
552 struct btrfs_dev_extent);
e17cade2
CM
553 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
554 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
555 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
556
557 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
558 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
559 BTRFS_UUID_SIZE);
560
0b86a832
CM
561 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
562 btrfs_mark_buffer_dirty(leaf);
563err:
564 btrfs_free_path(path);
565 return ret;
566}
567
e17cade2 568static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
0b86a832
CM
569{
570 struct btrfs_path *path;
571 int ret;
572 struct btrfs_key key;
e17cade2 573 struct btrfs_chunk *chunk;
0b86a832
CM
574 struct btrfs_key found_key;
575
576 path = btrfs_alloc_path();
577 BUG_ON(!path);
578
e17cade2 579 key.objectid = objectid;
0b86a832
CM
580 key.offset = (u64)-1;
581 key.type = BTRFS_CHUNK_ITEM_KEY;
582
583 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
584 if (ret < 0)
585 goto error;
586
587 BUG_ON(ret == 0);
588
589 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
590 if (ret) {
e17cade2 591 *offset = 0;
0b86a832
CM
592 } else {
593 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
594 path->slots[0]);
e17cade2
CM
595 if (found_key.objectid != objectid)
596 *offset = 0;
597 else {
598 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
599 struct btrfs_chunk);
600 *offset = found_key.offset +
601 btrfs_chunk_length(path->nodes[0], chunk);
602 }
0b86a832
CM
603 }
604 ret = 0;
605error:
606 btrfs_free_path(path);
607 return ret;
608}
609
0b86a832
CM
610static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
611 u64 *objectid)
612{
613 int ret;
614 struct btrfs_key key;
615 struct btrfs_key found_key;
616
617 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
618 key.type = BTRFS_DEV_ITEM_KEY;
619 key.offset = (u64)-1;
620
621 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
622 if (ret < 0)
623 goto error;
624
625 BUG_ON(ret == 0);
626
627 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
628 BTRFS_DEV_ITEM_KEY);
629 if (ret) {
630 *objectid = 1;
631 } else {
632 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
633 path->slots[0]);
634 *objectid = found_key.offset + 1;
635 }
636 ret = 0;
637error:
638 btrfs_release_path(root, path);
639 return ret;
640}
641
642/*
643 * the device information is stored in the chunk root
644 * the btrfs_device struct should be fully filled in
645 */
646int btrfs_add_device(struct btrfs_trans_handle *trans,
647 struct btrfs_root *root,
648 struct btrfs_device *device)
649{
650 int ret;
651 struct btrfs_path *path;
652 struct btrfs_dev_item *dev_item;
653 struct extent_buffer *leaf;
654 struct btrfs_key key;
655 unsigned long ptr;
006a58a2 656 u64 free_devid = 0;
0b86a832
CM
657
658 root = root->fs_info->chunk_root;
659
660 path = btrfs_alloc_path();
661 if (!path)
662 return -ENOMEM;
663
664 ret = find_next_devid(root, path, &free_devid);
665 if (ret)
666 goto out;
667
668 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
669 key.type = BTRFS_DEV_ITEM_KEY;
670 key.offset = free_devid;
671
672 ret = btrfs_insert_empty_item(trans, root, path, &key,
0d81ba5d 673 sizeof(*dev_item));
0b86a832
CM
674 if (ret)
675 goto out;
676
677 leaf = path->nodes[0];
678 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
679
8a4b83cc 680 device->devid = free_devid;
0b86a832
CM
681 btrfs_set_device_id(leaf, dev_item, device->devid);
682 btrfs_set_device_type(leaf, dev_item, device->type);
683 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
684 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
685 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
686 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
687 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
e17cade2
CM
688 btrfs_set_device_group(leaf, dev_item, 0);
689 btrfs_set_device_seek_speed(leaf, dev_item, 0);
690 btrfs_set_device_bandwidth(leaf, dev_item, 0);
0b86a832 691
0b86a832 692 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 693 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
0b86a832
CM
694 btrfs_mark_buffer_dirty(leaf);
695 ret = 0;
696
697out:
698 btrfs_free_path(path);
699 return ret;
700}
8f18cf13 701
a061fc8d
CM
702static int btrfs_rm_dev_item(struct btrfs_root *root,
703 struct btrfs_device *device)
704{
705 int ret;
706 struct btrfs_path *path;
707 struct block_device *bdev = device->bdev;
708 struct btrfs_device *next_dev;
709 struct btrfs_key key;
710 u64 total_bytes;
711 struct btrfs_fs_devices *fs_devices;
712 struct btrfs_trans_handle *trans;
713
714 root = root->fs_info->chunk_root;
715
716 path = btrfs_alloc_path();
717 if (!path)
718 return -ENOMEM;
719
720 trans = btrfs_start_transaction(root, 1);
721 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
722 key.type = BTRFS_DEV_ITEM_KEY;
723 key.offset = device->devid;
724
725 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
726 if (ret < 0)
727 goto out;
728
729 if (ret > 0) {
730 ret = -ENOENT;
731 goto out;
732 }
733
734 ret = btrfs_del_item(trans, root, path);
735 if (ret)
736 goto out;
737
738 /*
739 * at this point, the device is zero sized. We want to
740 * remove it from the devices list and zero out the old super
741 */
742 list_del_init(&device->dev_list);
743 list_del_init(&device->dev_alloc_list);
744 fs_devices = root->fs_info->fs_devices;
745
746 next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
747 dev_list);
a061fc8d
CM
748 if (bdev == root->fs_info->sb->s_bdev)
749 root->fs_info->sb->s_bdev = next_dev->bdev;
750 if (bdev == fs_devices->latest_bdev)
751 fs_devices->latest_bdev = next_dev->bdev;
752
a061fc8d
CM
753 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
754 btrfs_set_super_num_devices(&root->fs_info->super_copy,
755 total_bytes - 1);
756out:
757 btrfs_free_path(path);
758 btrfs_commit_transaction(trans, root);
759 return ret;
760}
761
762int btrfs_rm_device(struct btrfs_root *root, char *device_path)
763{
764 struct btrfs_device *device;
765 struct block_device *bdev;
dfe25020 766 struct buffer_head *bh = NULL;
a061fc8d
CM
767 struct btrfs_super_block *disk_super;
768 u64 all_avail;
769 u64 devid;
770 int ret = 0;
771
772 mutex_lock(&root->fs_info->fs_mutex);
773 mutex_lock(&uuid_mutex);
774
775 all_avail = root->fs_info->avail_data_alloc_bits |
776 root->fs_info->avail_system_alloc_bits |
777 root->fs_info->avail_metadata_alloc_bits;
778
779 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
dfe25020 780 btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
a061fc8d
CM
781 printk("btrfs: unable to go below four devices on raid10\n");
782 ret = -EINVAL;
783 goto out;
784 }
785
786 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
dfe25020 787 btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
a061fc8d
CM
788 printk("btrfs: unable to go below two devices on raid1\n");
789 ret = -EINVAL;
790 goto out;
791 }
792
dfe25020
CM
793 if (strcmp(device_path, "missing") == 0) {
794 struct list_head *cur;
795 struct list_head *devices;
796 struct btrfs_device *tmp;
a061fc8d 797
dfe25020
CM
798 device = NULL;
799 devices = &root->fs_info->fs_devices->devices;
800 list_for_each(cur, devices) {
801 tmp = list_entry(cur, struct btrfs_device, dev_list);
802 if (tmp->in_fs_metadata && !tmp->bdev) {
803 device = tmp;
804 break;
805 }
806 }
807 bdev = NULL;
808 bh = NULL;
809 disk_super = NULL;
810 if (!device) {
811 printk("btrfs: no missing devices found to remove\n");
812 goto out;
813 }
814
815 } else {
816 bdev = open_bdev_excl(device_path, 0,
817 root->fs_info->bdev_holder);
818 if (IS_ERR(bdev)) {
819 ret = PTR_ERR(bdev);
820 goto out;
821 }
a061fc8d 822
dfe25020
CM
823 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
824 if (!bh) {
825 ret = -EIO;
826 goto error_close;
827 }
828 disk_super = (struct btrfs_super_block *)bh->b_data;
829 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
830 sizeof(disk_super->magic))) {
831 ret = -ENOENT;
832 goto error_brelse;
833 }
834 if (memcmp(disk_super->fsid, root->fs_info->fsid,
835 BTRFS_FSID_SIZE)) {
836 ret = -ENOENT;
837 goto error_brelse;
838 }
839 devid = le64_to_cpu(disk_super->dev_item.devid);
840 device = btrfs_find_device(root, devid, NULL);
841 if (!device) {
842 ret = -ENOENT;
843 goto error_brelse;
844 }
845
846 }
a061fc8d 847 root->fs_info->fs_devices->num_devices--;
0ef3e66b 848 root->fs_info->fs_devices->open_devices--;
a061fc8d
CM
849
850 ret = btrfs_shrink_device(device, 0);
851 if (ret)
852 goto error_brelse;
853
854
855 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
856 if (ret)
857 goto error_brelse;
858
dfe25020
CM
859 if (bh) {
860 /* make sure this device isn't detected as part of
861 * the FS anymore
862 */
863 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
864 set_buffer_dirty(bh);
865 sync_dirty_buffer(bh);
a061fc8d 866
dfe25020
CM
867 brelse(bh);
868 }
a061fc8d 869
dfe25020
CM
870 if (device->bdev) {
871 /* one close for the device struct or super_block */
872 close_bdev_excl(device->bdev);
873 }
874 if (bdev) {
875 /* one close for us */
876 close_bdev_excl(bdev);
877 }
a061fc8d
CM
878 kfree(device->name);
879 kfree(device);
880 ret = 0;
881 goto out;
882
883error_brelse:
884 brelse(bh);
885error_close:
dfe25020
CM
886 if (bdev)
887 close_bdev_excl(bdev);
a061fc8d
CM
888out:
889 mutex_unlock(&uuid_mutex);
890 mutex_unlock(&root->fs_info->fs_mutex);
891 return ret;
892}
893
788f20eb
CM
894int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
895{
896 struct btrfs_trans_handle *trans;
897 struct btrfs_device *device;
898 struct block_device *bdev;
899 struct list_head *cur;
900 struct list_head *devices;
901 u64 total_bytes;
902 int ret = 0;
903
904
905 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
906 if (!bdev) {
907 return -EIO;
908 }
909 mutex_lock(&root->fs_info->fs_mutex);
910 trans = btrfs_start_transaction(root, 1);
911 devices = &root->fs_info->fs_devices->devices;
912 list_for_each(cur, devices) {
913 device = list_entry(cur, struct btrfs_device, dev_list);
914 if (device->bdev == bdev) {
915 ret = -EEXIST;
916 goto out;
917 }
918 }
919
920 device = kzalloc(sizeof(*device), GFP_NOFS);
921 if (!device) {
922 /* we can safely leave the fs_devices entry around */
923 ret = -ENOMEM;
924 goto out_close_bdev;
925 }
926
927 device->barriers = 1;
928 generate_random_uuid(device->uuid);
929 spin_lock_init(&device->io_lock);
930 device->name = kstrdup(device_path, GFP_NOFS);
931 if (!device->name) {
932 kfree(device);
933 goto out_close_bdev;
934 }
935 device->io_width = root->sectorsize;
936 device->io_align = root->sectorsize;
937 device->sector_size = root->sectorsize;
938 device->total_bytes = i_size_read(bdev->bd_inode);
939 device->dev_root = root->fs_info->dev_root;
940 device->bdev = bdev;
dfe25020 941 device->in_fs_metadata = 1;
788f20eb
CM
942
943 ret = btrfs_add_device(trans, root, device);
944 if (ret)
945 goto out_close_bdev;
946
947 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
948 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
949 total_bytes + device->total_bytes);
950
951 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
952 btrfs_set_super_num_devices(&root->fs_info->super_copy,
953 total_bytes + 1);
954
955 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
956 list_add(&device->dev_alloc_list,
957 &root->fs_info->fs_devices->alloc_list);
958 root->fs_info->fs_devices->num_devices++;
a0af469b 959 root->fs_info->fs_devices->open_devices++;
788f20eb
CM
960out:
961 btrfs_end_transaction(trans, root);
962 mutex_unlock(&root->fs_info->fs_mutex);
963 return ret;
964
965out_close_bdev:
966 close_bdev_excl(bdev);
967 goto out;
968}
969
0b86a832
CM
970int btrfs_update_device(struct btrfs_trans_handle *trans,
971 struct btrfs_device *device)
972{
973 int ret;
974 struct btrfs_path *path;
975 struct btrfs_root *root;
976 struct btrfs_dev_item *dev_item;
977 struct extent_buffer *leaf;
978 struct btrfs_key key;
979
980 root = device->dev_root->fs_info->chunk_root;
981
982 path = btrfs_alloc_path();
983 if (!path)
984 return -ENOMEM;
985
986 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
987 key.type = BTRFS_DEV_ITEM_KEY;
988 key.offset = device->devid;
989
990 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
991 if (ret < 0)
992 goto out;
993
994 if (ret > 0) {
995 ret = -ENOENT;
996 goto out;
997 }
998
999 leaf = path->nodes[0];
1000 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1001
1002 btrfs_set_device_id(leaf, dev_item, device->devid);
1003 btrfs_set_device_type(leaf, dev_item, device->type);
1004 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1005 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1006 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
1007 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1008 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1009 btrfs_mark_buffer_dirty(leaf);
1010
1011out:
1012 btrfs_free_path(path);
1013 return ret;
1014}
1015
8f18cf13
CM
1016int btrfs_grow_device(struct btrfs_trans_handle *trans,
1017 struct btrfs_device *device, u64 new_size)
1018{
1019 struct btrfs_super_block *super_copy =
1020 &device->dev_root->fs_info->super_copy;
1021 u64 old_total = btrfs_super_total_bytes(super_copy);
1022 u64 diff = new_size - device->total_bytes;
1023
1024 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1025 return btrfs_update_device(trans, device);
1026}
1027
1028static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1029 struct btrfs_root *root,
1030 u64 chunk_tree, u64 chunk_objectid,
1031 u64 chunk_offset)
1032{
1033 int ret;
1034 struct btrfs_path *path;
1035 struct btrfs_key key;
1036
1037 root = root->fs_info->chunk_root;
1038 path = btrfs_alloc_path();
1039 if (!path)
1040 return -ENOMEM;
1041
1042 key.objectid = chunk_objectid;
1043 key.offset = chunk_offset;
1044 key.type = BTRFS_CHUNK_ITEM_KEY;
1045
1046 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1047 BUG_ON(ret);
1048
1049 ret = btrfs_del_item(trans, root, path);
1050 BUG_ON(ret);
1051
1052 btrfs_free_path(path);
1053 return 0;
1054}
1055
1056int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1057 chunk_offset)
1058{
1059 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1060 struct btrfs_disk_key *disk_key;
1061 struct btrfs_chunk *chunk;
1062 u8 *ptr;
1063 int ret = 0;
1064 u32 num_stripes;
1065 u32 array_size;
1066 u32 len = 0;
1067 u32 cur;
1068 struct btrfs_key key;
1069
1070 array_size = btrfs_super_sys_array_size(super_copy);
1071
1072 ptr = super_copy->sys_chunk_array;
1073 cur = 0;
1074
1075 while (cur < array_size) {
1076 disk_key = (struct btrfs_disk_key *)ptr;
1077 btrfs_disk_key_to_cpu(&key, disk_key);
1078
1079 len = sizeof(*disk_key);
1080
1081 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1082 chunk = (struct btrfs_chunk *)(ptr + len);
1083 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1084 len += btrfs_chunk_item_size(num_stripes);
1085 } else {
1086 ret = -EIO;
1087 break;
1088 }
1089 if (key.objectid == chunk_objectid &&
1090 key.offset == chunk_offset) {
1091 memmove(ptr, ptr + len, array_size - (cur + len));
1092 array_size -= len;
1093 btrfs_set_super_sys_array_size(super_copy, array_size);
1094 } else {
1095 ptr += len;
1096 cur += len;
1097 }
1098 }
1099 return ret;
1100}
1101
1102
1103int btrfs_relocate_chunk(struct btrfs_root *root,
1104 u64 chunk_tree, u64 chunk_objectid,
1105 u64 chunk_offset)
1106{
1107 struct extent_map_tree *em_tree;
1108 struct btrfs_root *extent_root;
1109 struct btrfs_trans_handle *trans;
1110 struct extent_map *em;
1111 struct map_lookup *map;
1112 int ret;
1113 int i;
1114
323da79c
CM
1115 printk("btrfs relocating chunk %llu\n",
1116 (unsigned long long)chunk_offset);
8f18cf13
CM
1117 root = root->fs_info->chunk_root;
1118 extent_root = root->fs_info->extent_root;
1119 em_tree = &root->fs_info->mapping_tree.map_tree;
1120
1121 /* step one, relocate all the extents inside this chunk */
1122 ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
1123 BUG_ON(ret);
1124
1125 trans = btrfs_start_transaction(root, 1);
1126 BUG_ON(!trans);
1127
1128 /*
1129 * step two, delete the device extents and the
1130 * chunk tree entries
1131 */
1132 spin_lock(&em_tree->lock);
1133 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1134 spin_unlock(&em_tree->lock);
1135
a061fc8d
CM
1136 BUG_ON(em->start > chunk_offset ||
1137 em->start + em->len < chunk_offset);
8f18cf13
CM
1138 map = (struct map_lookup *)em->bdev;
1139
1140 for (i = 0; i < map->num_stripes; i++) {
1141 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1142 map->stripes[i].physical);
1143 BUG_ON(ret);
a061fc8d 1144
dfe25020
CM
1145 if (map->stripes[i].dev) {
1146 ret = btrfs_update_device(trans, map->stripes[i].dev);
1147 BUG_ON(ret);
1148 }
8f18cf13
CM
1149 }
1150 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1151 chunk_offset);
1152
1153 BUG_ON(ret);
1154
1155 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1156 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1157 BUG_ON(ret);
8f18cf13
CM
1158 }
1159
8f18cf13
CM
1160 spin_lock(&em_tree->lock);
1161 remove_extent_mapping(em_tree, em);
1162 kfree(map);
1163 em->bdev = NULL;
1164
1165 /* once for the tree */
1166 free_extent_map(em);
1167 spin_unlock(&em_tree->lock);
1168
8f18cf13
CM
1169 /* once for us */
1170 free_extent_map(em);
1171
1172 btrfs_end_transaction(trans, root);
1173 return 0;
1174}
1175
ec44a35c
CM
1176static u64 div_factor(u64 num, int factor)
1177{
1178 if (factor == 10)
1179 return num;
1180 num *= factor;
1181 do_div(num, 10);
1182 return num;
1183}
1184
1185
1186int btrfs_balance(struct btrfs_root *dev_root)
1187{
1188 int ret;
1189 struct list_head *cur;
1190 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1191 struct btrfs_device *device;
1192 u64 old_size;
1193 u64 size_to_free;
1194 struct btrfs_path *path;
1195 struct btrfs_key key;
1196 struct btrfs_chunk *chunk;
1197 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1198 struct btrfs_trans_handle *trans;
1199 struct btrfs_key found_key;
1200
1201
1202 dev_root = dev_root->fs_info->dev_root;
1203
1204 mutex_lock(&dev_root->fs_info->fs_mutex);
1205 /* step one make some room on all the devices */
1206 list_for_each(cur, devices) {
1207 device = list_entry(cur, struct btrfs_device, dev_list);
1208 old_size = device->total_bytes;
1209 size_to_free = div_factor(old_size, 1);
1210 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1211 if (device->total_bytes - device->bytes_used > size_to_free)
1212 continue;
1213
1214 ret = btrfs_shrink_device(device, old_size - size_to_free);
1215 BUG_ON(ret);
1216
1217 trans = btrfs_start_transaction(dev_root, 1);
1218 BUG_ON(!trans);
1219
1220 ret = btrfs_grow_device(trans, device, old_size);
1221 BUG_ON(ret);
1222
1223 btrfs_end_transaction(trans, dev_root);
1224 }
1225
1226 /* step two, relocate all the chunks */
1227 path = btrfs_alloc_path();
1228 BUG_ON(!path);
1229
1230 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1231 key.offset = (u64)-1;
1232 key.type = BTRFS_CHUNK_ITEM_KEY;
1233
1234 while(1) {
1235 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1236 if (ret < 0)
1237 goto error;
1238
1239 /*
1240 * this shouldn't happen, it means the last relocate
1241 * failed
1242 */
1243 if (ret == 0)
1244 break;
1245
1246 ret = btrfs_previous_item(chunk_root, path, 0,
1247 BTRFS_CHUNK_ITEM_KEY);
1248 if (ret) {
1249 break;
1250 }
1251 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1252 path->slots[0]);
1253 if (found_key.objectid != key.objectid)
1254 break;
1255 chunk = btrfs_item_ptr(path->nodes[0],
1256 path->slots[0],
1257 struct btrfs_chunk);
1258 key.offset = found_key.offset;
1259 /* chunk zero is special */
1260 if (key.offset == 0)
1261 break;
1262
1263 ret = btrfs_relocate_chunk(chunk_root,
1264 chunk_root->root_key.objectid,
1265 found_key.objectid,
1266 found_key.offset);
1267 BUG_ON(ret);
1268 btrfs_release_path(chunk_root, path);
1269 }
1270 ret = 0;
1271error:
1272 btrfs_free_path(path);
1273 mutex_unlock(&dev_root->fs_info->fs_mutex);
1274 return ret;
1275}
1276
8f18cf13
CM
1277/*
1278 * shrinking a device means finding all of the device extents past
1279 * the new size, and then following the back refs to the chunks.
1280 * The chunk relocation code actually frees the device extent
1281 */
1282int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1283{
1284 struct btrfs_trans_handle *trans;
1285 struct btrfs_root *root = device->dev_root;
1286 struct btrfs_dev_extent *dev_extent = NULL;
1287 struct btrfs_path *path;
1288 u64 length;
1289 u64 chunk_tree;
1290 u64 chunk_objectid;
1291 u64 chunk_offset;
1292 int ret;
1293 int slot;
1294 struct extent_buffer *l;
1295 struct btrfs_key key;
1296 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1297 u64 old_total = btrfs_super_total_bytes(super_copy);
1298 u64 diff = device->total_bytes - new_size;
1299
1300
1301 path = btrfs_alloc_path();
1302 if (!path)
1303 return -ENOMEM;
1304
1305 trans = btrfs_start_transaction(root, 1);
1306 if (!trans) {
1307 ret = -ENOMEM;
1308 goto done;
1309 }
1310
1311 path->reada = 2;
1312
1313 device->total_bytes = new_size;
1314 ret = btrfs_update_device(trans, device);
1315 if (ret) {
1316 btrfs_end_transaction(trans, root);
1317 goto done;
1318 }
1319 WARN_ON(diff > old_total);
1320 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1321 btrfs_end_transaction(trans, root);
1322
1323 key.objectid = device->devid;
1324 key.offset = (u64)-1;
1325 key.type = BTRFS_DEV_EXTENT_KEY;
1326
1327 while (1) {
1328 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1329 if (ret < 0)
1330 goto done;
1331
1332 ret = btrfs_previous_item(root, path, 0, key.type);
1333 if (ret < 0)
1334 goto done;
1335 if (ret) {
1336 ret = 0;
1337 goto done;
1338 }
1339
1340 l = path->nodes[0];
1341 slot = path->slots[0];
1342 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1343
1344 if (key.objectid != device->devid)
1345 goto done;
1346
1347 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1348 length = btrfs_dev_extent_length(l, dev_extent);
1349
1350 if (key.offset + length <= new_size)
1351 goto done;
1352
1353 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1354 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1355 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1356 btrfs_release_path(root, path);
1357
1358 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1359 chunk_offset);
1360 if (ret)
1361 goto done;
1362 }
1363
1364done:
1365 btrfs_free_path(path);
1366 return ret;
1367}
1368
0b86a832
CM
1369int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1370 struct btrfs_root *root,
1371 struct btrfs_key *key,
1372 struct btrfs_chunk *chunk, int item_size)
1373{
1374 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1375 struct btrfs_disk_key disk_key;
1376 u32 array_size;
1377 u8 *ptr;
1378
1379 array_size = btrfs_super_sys_array_size(super_copy);
1380 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1381 return -EFBIG;
1382
1383 ptr = super_copy->sys_chunk_array + array_size;
1384 btrfs_cpu_key_to_disk(&disk_key, key);
1385 memcpy(ptr, &disk_key, sizeof(disk_key));
1386 ptr += sizeof(disk_key);
1387 memcpy(ptr, chunk, item_size);
1388 item_size += sizeof(disk_key);
1389 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1390 return 0;
1391}
1392
9b3f68b9
CM
1393static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
1394 int sub_stripes)
1395{
1396 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1397 return calc_size;
1398 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1399 return calc_size * (num_stripes / sub_stripes);
1400 else
1401 return calc_size * num_stripes;
1402}
1403
1404
0b86a832
CM
1405int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1406 struct btrfs_root *extent_root, u64 *start,
6324fbf3 1407 u64 *num_bytes, u64 type)
0b86a832
CM
1408{
1409 u64 dev_offset;
593060d7 1410 struct btrfs_fs_info *info = extent_root->fs_info;
0b86a832 1411 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
8f18cf13 1412 struct btrfs_path *path;
0b86a832
CM
1413 struct btrfs_stripe *stripes;
1414 struct btrfs_device *device = NULL;
1415 struct btrfs_chunk *chunk;
6324fbf3 1416 struct list_head private_devs;
b3075717 1417 struct list_head *dev_list;
6324fbf3 1418 struct list_head *cur;
0b86a832
CM
1419 struct extent_map_tree *em_tree;
1420 struct map_lookup *map;
1421 struct extent_map *em;
a40a90a0 1422 int min_stripe_size = 1 * 1024 * 1024;
0b86a832
CM
1423 u64 physical;
1424 u64 calc_size = 1024 * 1024 * 1024;
9b3f68b9
CM
1425 u64 max_chunk_size = calc_size;
1426 u64 min_free;
6324fbf3
CM
1427 u64 avail;
1428 u64 max_avail = 0;
9b3f68b9 1429 u64 percent_max;
6324fbf3 1430 int num_stripes = 1;
a40a90a0 1431 int min_stripes = 1;
321aecc6 1432 int sub_stripes = 0;
6324fbf3 1433 int looped = 0;
0b86a832 1434 int ret;
6324fbf3 1435 int index;
593060d7 1436 int stripe_len = 64 * 1024;
0b86a832
CM
1437 struct btrfs_key key;
1438
ec44a35c
CM
1439 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1440 (type & BTRFS_BLOCK_GROUP_DUP)) {
1441 WARN_ON(1);
1442 type &= ~BTRFS_BLOCK_GROUP_DUP;
1443 }
b3075717 1444 dev_list = &extent_root->fs_info->fs_devices->alloc_list;
6324fbf3
CM
1445 if (list_empty(dev_list))
1446 return -ENOSPC;
593060d7 1447
a40a90a0 1448 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
0ef3e66b 1449 num_stripes = extent_root->fs_info->fs_devices->open_devices;
a40a90a0
CM
1450 min_stripes = 2;
1451 }
1452 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
611f0e00 1453 num_stripes = 2;
a40a90a0
CM
1454 min_stripes = 2;
1455 }
8790d502
CM
1456 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1457 num_stripes = min_t(u64, 2,
0ef3e66b 1458 extent_root->fs_info->fs_devices->open_devices);
9b3f68b9
CM
1459 if (num_stripes < 2)
1460 return -ENOSPC;
a40a90a0 1461 min_stripes = 2;
8790d502 1462 }
321aecc6 1463 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
0ef3e66b 1464 num_stripes = extent_root->fs_info->fs_devices->open_devices;
321aecc6
CM
1465 if (num_stripes < 4)
1466 return -ENOSPC;
1467 num_stripes &= ~(u32)1;
1468 sub_stripes = 2;
a40a90a0 1469 min_stripes = 4;
321aecc6 1470 }
9b3f68b9
CM
1471
1472 if (type & BTRFS_BLOCK_GROUP_DATA) {
1473 max_chunk_size = 10 * calc_size;
a40a90a0 1474 min_stripe_size = 64 * 1024 * 1024;
9b3f68b9
CM
1475 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1476 max_chunk_size = 4 * calc_size;
a40a90a0
CM
1477 min_stripe_size = 32 * 1024 * 1024;
1478 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1479 calc_size = 8 * 1024 * 1024;
1480 max_chunk_size = calc_size * 2;
1481 min_stripe_size = 1 * 1024 * 1024;
9b3f68b9
CM
1482 }
1483
8f18cf13
CM
1484 path = btrfs_alloc_path();
1485 if (!path)
1486 return -ENOMEM;
1487
9b3f68b9
CM
1488 /* we don't want a chunk larger than 10% of the FS */
1489 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1490 max_chunk_size = min(percent_max, max_chunk_size);
1491
a40a90a0 1492again:
9b3f68b9
CM
1493 if (calc_size * num_stripes > max_chunk_size) {
1494 calc_size = max_chunk_size;
1495 do_div(calc_size, num_stripes);
1496 do_div(calc_size, stripe_len);
1497 calc_size *= stripe_len;
1498 }
1499 /* we don't want tiny stripes */
a40a90a0 1500 calc_size = max_t(u64, min_stripe_size, calc_size);
9b3f68b9 1501
9b3f68b9
CM
1502 do_div(calc_size, stripe_len);
1503 calc_size *= stripe_len;
1504
6324fbf3
CM
1505 INIT_LIST_HEAD(&private_devs);
1506 cur = dev_list->next;
1507 index = 0;
611f0e00
CM
1508
1509 if (type & BTRFS_BLOCK_GROUP_DUP)
1510 min_free = calc_size * 2;
9b3f68b9
CM
1511 else
1512 min_free = calc_size;
611f0e00 1513
ad5bd91e
CM
1514 /* we add 1MB because we never use the first 1MB of the device */
1515 min_free += 1024 * 1024;
1516
6324fbf3
CM
1517 /* build a private list of devices we will allocate from */
1518 while(index < num_stripes) {
b3075717 1519 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
611f0e00 1520
dfe25020
CM
1521 if (device->total_bytes > device->bytes_used)
1522 avail = device->total_bytes - device->bytes_used;
1523 else
1524 avail = 0;
6324fbf3 1525 cur = cur->next;
8f18cf13 1526
dfe25020 1527 if (device->in_fs_metadata && avail >= min_free) {
8f18cf13
CM
1528 u64 ignored_start = 0;
1529 ret = find_free_dev_extent(trans, device, path,
1530 min_free,
1531 &ignored_start);
1532 if (ret == 0) {
1533 list_move_tail(&device->dev_alloc_list,
1534 &private_devs);
611f0e00 1535 index++;
8f18cf13
CM
1536 if (type & BTRFS_BLOCK_GROUP_DUP)
1537 index++;
1538 }
dfe25020 1539 } else if (device->in_fs_metadata && avail > max_avail)
a40a90a0 1540 max_avail = avail;
6324fbf3
CM
1541 if (cur == dev_list)
1542 break;
1543 }
1544 if (index < num_stripes) {
1545 list_splice(&private_devs, dev_list);
a40a90a0
CM
1546 if (index >= min_stripes) {
1547 num_stripes = index;
1548 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1549 num_stripes /= sub_stripes;
1550 num_stripes *= sub_stripes;
1551 }
1552 looped = 1;
1553 goto again;
1554 }
6324fbf3
CM
1555 if (!looped && max_avail > 0) {
1556 looped = 1;
1557 calc_size = max_avail;
1558 goto again;
1559 }
8f18cf13 1560 btrfs_free_path(path);
6324fbf3
CM
1561 return -ENOSPC;
1562 }
e17cade2
CM
1563 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1564 key.type = BTRFS_CHUNK_ITEM_KEY;
1565 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1566 &key.offset);
8f18cf13
CM
1567 if (ret) {
1568 btrfs_free_path(path);
0b86a832 1569 return ret;
8f18cf13 1570 }
0b86a832 1571
0b86a832 1572 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
8f18cf13
CM
1573 if (!chunk) {
1574 btrfs_free_path(path);
0b86a832 1575 return -ENOMEM;
8f18cf13 1576 }
0b86a832 1577
593060d7
CM
1578 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1579 if (!map) {
1580 kfree(chunk);
8f18cf13 1581 btrfs_free_path(path);
593060d7
CM
1582 return -ENOMEM;
1583 }
8f18cf13
CM
1584 btrfs_free_path(path);
1585 path = NULL;
593060d7 1586
0b86a832 1587 stripes = &chunk->stripe;
9b3f68b9
CM
1588 *num_bytes = chunk_bytes_by_type(type, calc_size,
1589 num_stripes, sub_stripes);
0b86a832 1590
6324fbf3 1591 index = 0;
0b86a832 1592 while(index < num_stripes) {
e17cade2 1593 struct btrfs_stripe *stripe;
6324fbf3
CM
1594 BUG_ON(list_empty(&private_devs));
1595 cur = private_devs.next;
b3075717 1596 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
611f0e00
CM
1597
1598 /* loop over this device again if we're doing a dup group */
1599 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
1600 (index == num_stripes - 1))
b3075717 1601 list_move_tail(&device->dev_alloc_list, dev_list);
0b86a832
CM
1602
1603 ret = btrfs_alloc_dev_extent(trans, device,
e17cade2
CM
1604 info->chunk_root->root_key.objectid,
1605 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
1606 calc_size, &dev_offset);
0b86a832 1607 BUG_ON(ret);
0b86a832
CM
1608 device->bytes_used += calc_size;
1609 ret = btrfs_update_device(trans, device);
1610 BUG_ON(ret);
1611
593060d7
CM
1612 map->stripes[index].dev = device;
1613 map->stripes[index].physical = dev_offset;
e17cade2
CM
1614 stripe = stripes + index;
1615 btrfs_set_stack_stripe_devid(stripe, device->devid);
1616 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1617 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
0b86a832
CM
1618 physical = dev_offset;
1619 index++;
1620 }
6324fbf3 1621 BUG_ON(!list_empty(&private_devs));
0b86a832 1622
e17cade2
CM
1623 /* key was set above */
1624 btrfs_set_stack_chunk_length(chunk, *num_bytes);
0b86a832 1625 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
593060d7 1626 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
0b86a832
CM
1627 btrfs_set_stack_chunk_type(chunk, type);
1628 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
593060d7
CM
1629 btrfs_set_stack_chunk_io_align(chunk, stripe_len);
1630 btrfs_set_stack_chunk_io_width(chunk, stripe_len);
0b86a832 1631 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
321aecc6 1632 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
593060d7
CM
1633 map->sector_size = extent_root->sectorsize;
1634 map->stripe_len = stripe_len;
1635 map->io_align = stripe_len;
1636 map->io_width = stripe_len;
1637 map->type = type;
1638 map->num_stripes = num_stripes;
321aecc6 1639 map->sub_stripes = sub_stripes;
0b86a832
CM
1640
1641 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1642 btrfs_chunk_item_size(num_stripes));
1643 BUG_ON(ret);
e17cade2 1644 *start = key.offset;;
0b86a832
CM
1645
1646 em = alloc_extent_map(GFP_NOFS);
1647 if (!em)
1648 return -ENOMEM;
0b86a832 1649 em->bdev = (struct block_device *)map;
e17cade2
CM
1650 em->start = key.offset;
1651 em->len = *num_bytes;
0b86a832
CM
1652 em->block_start = 0;
1653
8f18cf13
CM
1654 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1655 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
1656 chunk, btrfs_chunk_item_size(num_stripes));
1657 BUG_ON(ret);
1658 }
0b86a832
CM
1659 kfree(chunk);
1660
1661 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1662 spin_lock(&em_tree->lock);
1663 ret = add_extent_mapping(em_tree, em);
0b86a832 1664 spin_unlock(&em_tree->lock);
b248a415 1665 BUG_ON(ret);
0b86a832
CM
1666 free_extent_map(em);
1667 return ret;
1668}
1669
1670void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
1671{
1672 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
1673}
1674
1675void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
1676{
1677 struct extent_map *em;
1678
1679 while(1) {
1680 spin_lock(&tree->map_tree.lock);
1681 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
1682 if (em)
1683 remove_extent_mapping(&tree->map_tree, em);
1684 spin_unlock(&tree->map_tree.lock);
1685 if (!em)
1686 break;
1687 kfree(em->bdev);
1688 /* once for us */
1689 free_extent_map(em);
1690 /* once for the tree */
1691 free_extent_map(em);
1692 }
1693}
1694
f188591e
CM
1695int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
1696{
1697 struct extent_map *em;
1698 struct map_lookup *map;
1699 struct extent_map_tree *em_tree = &map_tree->map_tree;
1700 int ret;
1701
1702 spin_lock(&em_tree->lock);
1703 em = lookup_extent_mapping(em_tree, logical, len);
b248a415 1704 spin_unlock(&em_tree->lock);
f188591e
CM
1705 BUG_ON(!em);
1706
1707 BUG_ON(em->start > logical || em->start + em->len < logical);
1708 map = (struct map_lookup *)em->bdev;
1709 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
1710 ret = map->num_stripes;
321aecc6
CM
1711 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
1712 ret = map->sub_stripes;
f188591e
CM
1713 else
1714 ret = 1;
1715 free_extent_map(em);
f188591e
CM
1716 return ret;
1717}
1718
dfe25020
CM
1719static int find_live_mirror(struct map_lookup *map, int first, int num,
1720 int optimal)
1721{
1722 int i;
1723 if (map->stripes[optimal].dev->bdev)
1724 return optimal;
1725 for (i = first; i < first + num; i++) {
1726 if (map->stripes[i].dev->bdev)
1727 return i;
1728 }
1729 /* we couldn't find one that doesn't fail. Just return something
1730 * and the io error handling code will clean up eventually
1731 */
1732 return optimal;
1733}
1734
f2d8d74d
CM
1735static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1736 u64 logical, u64 *length,
1737 struct btrfs_multi_bio **multi_ret,
1738 int mirror_num, struct page *unplug_page)
0b86a832
CM
1739{
1740 struct extent_map *em;
1741 struct map_lookup *map;
1742 struct extent_map_tree *em_tree = &map_tree->map_tree;
1743 u64 offset;
593060d7
CM
1744 u64 stripe_offset;
1745 u64 stripe_nr;
cea9e445 1746 int stripes_allocated = 8;
321aecc6 1747 int stripes_required = 1;
593060d7 1748 int stripe_index;
cea9e445 1749 int i;
f2d8d74d 1750 int num_stripes;
a236aed1 1751 int max_errors = 0;
cea9e445 1752 struct btrfs_multi_bio *multi = NULL;
0b86a832 1753
cea9e445
CM
1754 if (multi_ret && !(rw & (1 << BIO_RW))) {
1755 stripes_allocated = 1;
1756 }
1757again:
1758 if (multi_ret) {
1759 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
1760 GFP_NOFS);
1761 if (!multi)
1762 return -ENOMEM;
a236aed1
CM
1763
1764 atomic_set(&multi->error, 0);
cea9e445 1765 }
0b86a832
CM
1766
1767 spin_lock(&em_tree->lock);
1768 em = lookup_extent_mapping(em_tree, logical, *length);
b248a415 1769 spin_unlock(&em_tree->lock);
f2d8d74d
CM
1770
1771 if (!em && unplug_page)
1772 return 0;
1773
3b951516 1774 if (!em) {
a061fc8d 1775 printk("unable to find logical %Lu len %Lu\n", logical, *length);
f2d8d74d 1776 BUG();
3b951516 1777 }
0b86a832
CM
1778
1779 BUG_ON(em->start > logical || em->start + em->len < logical);
1780 map = (struct map_lookup *)em->bdev;
1781 offset = logical - em->start;
593060d7 1782
f188591e
CM
1783 if (mirror_num > map->num_stripes)
1784 mirror_num = 0;
1785
cea9e445 1786 /* if our multi bio struct is too small, back off and try again */
321aecc6
CM
1787 if (rw & (1 << BIO_RW)) {
1788 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1789 BTRFS_BLOCK_GROUP_DUP)) {
1790 stripes_required = map->num_stripes;
a236aed1 1791 max_errors = 1;
321aecc6
CM
1792 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1793 stripes_required = map->sub_stripes;
a236aed1 1794 max_errors = 1;
321aecc6
CM
1795 }
1796 }
1797 if (multi_ret && rw == WRITE &&
1798 stripes_allocated < stripes_required) {
cea9e445 1799 stripes_allocated = map->num_stripes;
cea9e445
CM
1800 free_extent_map(em);
1801 kfree(multi);
1802 goto again;
1803 }
593060d7
CM
1804 stripe_nr = offset;
1805 /*
1806 * stripe_nr counts the total number of stripes we have to stride
1807 * to get to this block
1808 */
1809 do_div(stripe_nr, map->stripe_len);
1810
1811 stripe_offset = stripe_nr * map->stripe_len;
1812 BUG_ON(offset < stripe_offset);
1813
1814 /* stripe_offset is the offset of this block in its stripe*/
1815 stripe_offset = offset - stripe_offset;
1816
cea9e445 1817 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
321aecc6 1818 BTRFS_BLOCK_GROUP_RAID10 |
cea9e445
CM
1819 BTRFS_BLOCK_GROUP_DUP)) {
1820 /* we limit the length of each bio to what fits in a stripe */
1821 *length = min_t(u64, em->len - offset,
1822 map->stripe_len - stripe_offset);
1823 } else {
1824 *length = em->len - offset;
1825 }
f2d8d74d
CM
1826
1827 if (!multi_ret && !unplug_page)
cea9e445
CM
1828 goto out;
1829
f2d8d74d 1830 num_stripes = 1;
cea9e445 1831 stripe_index = 0;
8790d502 1832 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
f2d8d74d
CM
1833 if (unplug_page || (rw & (1 << BIO_RW)))
1834 num_stripes = map->num_stripes;
2fff734f 1835 else if (mirror_num)
f188591e 1836 stripe_index = mirror_num - 1;
dfe25020
CM
1837 else {
1838 stripe_index = find_live_mirror(map, 0,
1839 map->num_stripes,
1840 current->pid % map->num_stripes);
1841 }
2fff734f 1842
611f0e00 1843 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
cea9e445 1844 if (rw & (1 << BIO_RW))
f2d8d74d 1845 num_stripes = map->num_stripes;
f188591e
CM
1846 else if (mirror_num)
1847 stripe_index = mirror_num - 1;
2fff734f 1848
321aecc6
CM
1849 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1850 int factor = map->num_stripes / map->sub_stripes;
321aecc6
CM
1851
1852 stripe_index = do_div(stripe_nr, factor);
1853 stripe_index *= map->sub_stripes;
1854
f2d8d74d
CM
1855 if (unplug_page || (rw & (1 << BIO_RW)))
1856 num_stripes = map->sub_stripes;
321aecc6
CM
1857 else if (mirror_num)
1858 stripe_index += mirror_num - 1;
dfe25020
CM
1859 else {
1860 stripe_index = find_live_mirror(map, stripe_index,
1861 map->sub_stripes, stripe_index +
1862 current->pid % map->sub_stripes);
1863 }
8790d502
CM
1864 } else {
1865 /*
1866 * after this do_div call, stripe_nr is the number of stripes
1867 * on this device we have to walk to find the data, and
1868 * stripe_index is the number of our device in the stripe array
1869 */
1870 stripe_index = do_div(stripe_nr, map->num_stripes);
1871 }
593060d7 1872 BUG_ON(stripe_index >= map->num_stripes);
cea9e445 1873
f2d8d74d
CM
1874 for (i = 0; i < num_stripes; i++) {
1875 if (unplug_page) {
1876 struct btrfs_device *device;
1877 struct backing_dev_info *bdi;
1878
1879 device = map->stripes[stripe_index].dev;
dfe25020
CM
1880 if (device->bdev) {
1881 bdi = blk_get_backing_dev_info(device->bdev);
1882 if (bdi->unplug_io_fn) {
1883 bdi->unplug_io_fn(bdi, unplug_page);
1884 }
f2d8d74d
CM
1885 }
1886 } else {
1887 multi->stripes[i].physical =
1888 map->stripes[stripe_index].physical +
1889 stripe_offset + stripe_nr * map->stripe_len;
1890 multi->stripes[i].dev = map->stripes[stripe_index].dev;
1891 }
cea9e445 1892 stripe_index++;
593060d7 1893 }
f2d8d74d
CM
1894 if (multi_ret) {
1895 *multi_ret = multi;
1896 multi->num_stripes = num_stripes;
a236aed1 1897 multi->max_errors = max_errors;
f2d8d74d 1898 }
cea9e445 1899out:
0b86a832 1900 free_extent_map(em);
0b86a832
CM
1901 return 0;
1902}
1903
f2d8d74d
CM
1904int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1905 u64 logical, u64 *length,
1906 struct btrfs_multi_bio **multi_ret, int mirror_num)
1907{
1908 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
1909 mirror_num, NULL);
1910}
1911
1912int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
1913 u64 logical, struct page *page)
1914{
1915 u64 length = PAGE_CACHE_SIZE;
1916 return __btrfs_map_block(map_tree, READ, logical, &length,
1917 NULL, 0, page);
1918}
1919
1920
8790d502
CM
1921#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1922static void end_bio_multi_stripe(struct bio *bio, int err)
1923#else
1924static int end_bio_multi_stripe(struct bio *bio,
1925 unsigned int bytes_done, int err)
1926#endif
1927{
cea9e445 1928 struct btrfs_multi_bio *multi = bio->bi_private;
8790d502
CM
1929
1930#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1931 if (bio->bi_size)
1932 return 1;
1933#endif
1934 if (err)
a236aed1 1935 atomic_inc(&multi->error);
8790d502 1936
cea9e445 1937 if (atomic_dec_and_test(&multi->stripes_pending)) {
8790d502
CM
1938 bio->bi_private = multi->private;
1939 bio->bi_end_io = multi->end_io;
a236aed1
CM
1940 /* only send an error to the higher layers if it is
1941 * beyond the tolerance of the multi-bio
1942 */
1259ab75 1943 if (atomic_read(&multi->error) > multi->max_errors) {
a236aed1 1944 err = -EIO;
1259ab75
CM
1945 } else if (err) {
1946 /*
1947 * this bio is actually up to date, we didn't
1948 * go over the max number of errors
1949 */
1950 set_bit(BIO_UPTODATE, &bio->bi_flags);
a236aed1 1951 err = 0;
1259ab75 1952 }
8790d502
CM
1953 kfree(multi);
1954
73f61b2a
M
1955#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1956 bio_endio(bio, bio->bi_size, err);
1957#else
8790d502 1958 bio_endio(bio, err);
73f61b2a 1959#endif
8790d502
CM
1960 } else {
1961 bio_put(bio);
1962 }
1963#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1964 return 0;
1965#endif
1966}
1967
f188591e
CM
1968int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1969 int mirror_num)
0b86a832
CM
1970{
1971 struct btrfs_mapping_tree *map_tree;
1972 struct btrfs_device *dev;
8790d502 1973 struct bio *first_bio = bio;
0b86a832 1974 u64 logical = bio->bi_sector << 9;
0b86a832
CM
1975 u64 length = 0;
1976 u64 map_length;
cea9e445 1977 struct btrfs_multi_bio *multi = NULL;
0b86a832 1978 int ret;
8790d502
CM
1979 int dev_nr = 0;
1980 int total_devs = 1;
0b86a832 1981
f2d8d74d 1982 length = bio->bi_size;
0b86a832
CM
1983 map_tree = &root->fs_info->mapping_tree;
1984 map_length = length;
cea9e445 1985
f188591e
CM
1986 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
1987 mirror_num);
cea9e445
CM
1988 BUG_ON(ret);
1989
1990 total_devs = multi->num_stripes;
1991 if (map_length < length) {
1992 printk("mapping failed logical %Lu bio len %Lu "
1993 "len %Lu\n", logical, length, map_length);
1994 BUG();
1995 }
1996 multi->end_io = first_bio->bi_end_io;
1997 multi->private = first_bio->bi_private;
1998 atomic_set(&multi->stripes_pending, multi->num_stripes);
1999
8790d502 2000 while(dev_nr < total_devs) {
8790d502 2001 if (total_devs > 1) {
8790d502
CM
2002 if (dev_nr < total_devs - 1) {
2003 bio = bio_clone(first_bio, GFP_NOFS);
2004 BUG_ON(!bio);
2005 } else {
2006 bio = first_bio;
2007 }
2008 bio->bi_private = multi;
2009 bio->bi_end_io = end_bio_multi_stripe;
2010 }
cea9e445
CM
2011 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2012 dev = multi->stripes[dev_nr].dev;
dfe25020
CM
2013 if (dev && dev->bdev) {
2014 bio->bi_bdev = dev->bdev;
2015 spin_lock(&dev->io_lock);
2016 dev->total_ios++;
2017 spin_unlock(&dev->io_lock);
2018 submit_bio(rw, bio);
2019 } else {
2020 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2021 bio->bi_sector = logical >> 9;
2022#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
2023 bio_endio(bio, bio->bi_size, -EIO);
2024#else
2025 bio_endio(bio, -EIO);
2026#endif
2027 }
8790d502
CM
2028 dev_nr++;
2029 }
cea9e445
CM
2030 if (total_devs == 1)
2031 kfree(multi);
0b86a832
CM
2032 return 0;
2033}
2034
a443755f
CM
2035struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2036 u8 *uuid)
0b86a832 2037{
8a4b83cc 2038 struct list_head *head = &root->fs_info->fs_devices->devices;
0b86a832 2039
a443755f 2040 return __find_device(head, devid, uuid);
0b86a832
CM
2041}
2042
dfe25020
CM
2043static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2044 u64 devid, u8 *dev_uuid)
2045{
2046 struct btrfs_device *device;
2047 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2048
2049 device = kzalloc(sizeof(*device), GFP_NOFS);
2050 list_add(&device->dev_list,
2051 &fs_devices->devices);
2052 list_add(&device->dev_alloc_list,
2053 &fs_devices->alloc_list);
2054 device->barriers = 1;
2055 device->dev_root = root->fs_info->dev_root;
2056 device->devid = devid;
2057 fs_devices->num_devices++;
2058 spin_lock_init(&device->io_lock);
2059 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2060 return device;
2061}
2062
2063
0b86a832
CM
2064static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2065 struct extent_buffer *leaf,
2066 struct btrfs_chunk *chunk)
2067{
2068 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2069 struct map_lookup *map;
2070 struct extent_map *em;
2071 u64 logical;
2072 u64 length;
2073 u64 devid;
a443755f 2074 u8 uuid[BTRFS_UUID_SIZE];
593060d7 2075 int num_stripes;
0b86a832 2076 int ret;
593060d7 2077 int i;
0b86a832 2078
e17cade2
CM
2079 logical = key->offset;
2080 length = btrfs_chunk_length(leaf, chunk);
a061fc8d 2081
0b86a832
CM
2082 spin_lock(&map_tree->map_tree.lock);
2083 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
b248a415 2084 spin_unlock(&map_tree->map_tree.lock);
0b86a832
CM
2085
2086 /* already mapped? */
2087 if (em && em->start <= logical && em->start + em->len > logical) {
2088 free_extent_map(em);
0b86a832
CM
2089 return 0;
2090 } else if (em) {
2091 free_extent_map(em);
2092 }
0b86a832
CM
2093
2094 map = kzalloc(sizeof(*map), GFP_NOFS);
2095 if (!map)
2096 return -ENOMEM;
2097
2098 em = alloc_extent_map(GFP_NOFS);
2099 if (!em)
2100 return -ENOMEM;
593060d7
CM
2101 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2102 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
0b86a832
CM
2103 if (!map) {
2104 free_extent_map(em);
2105 return -ENOMEM;
2106 }
2107
2108 em->bdev = (struct block_device *)map;
2109 em->start = logical;
2110 em->len = length;
2111 em->block_start = 0;
2112
593060d7
CM
2113 map->num_stripes = num_stripes;
2114 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2115 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2116 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2117 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2118 map->type = btrfs_chunk_type(leaf, chunk);
321aecc6 2119 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
593060d7
CM
2120 for (i = 0; i < num_stripes; i++) {
2121 map->stripes[i].physical =
2122 btrfs_stripe_offset_nr(leaf, chunk, i);
2123 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
a443755f
CM
2124 read_extent_buffer(leaf, uuid, (unsigned long)
2125 btrfs_stripe_dev_uuid_nr(chunk, i),
2126 BTRFS_UUID_SIZE);
2127 map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
dfe25020
CM
2128
2129 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
593060d7
CM
2130 kfree(map);
2131 free_extent_map(em);
2132 return -EIO;
2133 }
dfe25020
CM
2134 if (!map->stripes[i].dev) {
2135 map->stripes[i].dev =
2136 add_missing_dev(root, devid, uuid);
2137 if (!map->stripes[i].dev) {
2138 kfree(map);
2139 free_extent_map(em);
2140 return -EIO;
2141 }
2142 }
2143 map->stripes[i].dev->in_fs_metadata = 1;
0b86a832
CM
2144 }
2145
2146 spin_lock(&map_tree->map_tree.lock);
2147 ret = add_extent_mapping(&map_tree->map_tree, em);
0b86a832 2148 spin_unlock(&map_tree->map_tree.lock);
b248a415 2149 BUG_ON(ret);
0b86a832
CM
2150 free_extent_map(em);
2151
2152 return 0;
2153}
2154
2155static int fill_device_from_item(struct extent_buffer *leaf,
2156 struct btrfs_dev_item *dev_item,
2157 struct btrfs_device *device)
2158{
2159 unsigned long ptr;
0b86a832
CM
2160
2161 device->devid = btrfs_device_id(leaf, dev_item);
2162 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2163 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2164 device->type = btrfs_device_type(leaf, dev_item);
2165 device->io_align = btrfs_device_io_align(leaf, dev_item);
2166 device->io_width = btrfs_device_io_width(leaf, dev_item);
2167 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
0b86a832
CM
2168
2169 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 2170 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
0b86a832 2171
0b86a832
CM
2172 return 0;
2173}
2174
0d81ba5d 2175static int read_one_dev(struct btrfs_root *root,
0b86a832
CM
2176 struct extent_buffer *leaf,
2177 struct btrfs_dev_item *dev_item)
2178{
2179 struct btrfs_device *device;
2180 u64 devid;
2181 int ret;
a443755f
CM
2182 u8 dev_uuid[BTRFS_UUID_SIZE];
2183
0b86a832 2184 devid = btrfs_device_id(leaf, dev_item);
a443755f
CM
2185 read_extent_buffer(leaf, dev_uuid,
2186 (unsigned long)btrfs_device_uuid(dev_item),
2187 BTRFS_UUID_SIZE);
2188 device = btrfs_find_device(root, devid, dev_uuid);
6324fbf3 2189 if (!device) {
dfe25020
CM
2190 printk("warning devid %Lu missing\n", devid);
2191 device = add_missing_dev(root, devid, dev_uuid);
6324fbf3
CM
2192 if (!device)
2193 return -ENOMEM;
6324fbf3 2194 }
0b86a832
CM
2195
2196 fill_device_from_item(leaf, dev_item, device);
2197 device->dev_root = root->fs_info->dev_root;
dfe25020 2198 device->in_fs_metadata = 1;
0b86a832
CM
2199 ret = 0;
2200#if 0
2201 ret = btrfs_open_device(device);
2202 if (ret) {
2203 kfree(device);
2204 }
2205#endif
2206 return ret;
2207}
2208
0d81ba5d
CM
2209int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2210{
2211 struct btrfs_dev_item *dev_item;
2212
2213 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2214 dev_item);
2215 return read_one_dev(root, buf, dev_item);
2216}
2217
0b86a832
CM
2218int btrfs_read_sys_array(struct btrfs_root *root)
2219{
2220 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
a061fc8d 2221 struct extent_buffer *sb;
0b86a832 2222 struct btrfs_disk_key *disk_key;
0b86a832 2223 struct btrfs_chunk *chunk;
84eed90f
CM
2224 u8 *ptr;
2225 unsigned long sb_ptr;
2226 int ret = 0;
0b86a832
CM
2227 u32 num_stripes;
2228 u32 array_size;
2229 u32 len = 0;
0b86a832 2230 u32 cur;
84eed90f 2231 struct btrfs_key key;
0b86a832 2232
a061fc8d
CM
2233 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
2234 BTRFS_SUPER_INFO_SIZE);
2235 if (!sb)
2236 return -ENOMEM;
2237 btrfs_set_buffer_uptodate(sb);
2238 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
0b86a832
CM
2239 array_size = btrfs_super_sys_array_size(super_copy);
2240
0b86a832
CM
2241 ptr = super_copy->sys_chunk_array;
2242 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
2243 cur = 0;
2244
2245 while (cur < array_size) {
2246 disk_key = (struct btrfs_disk_key *)ptr;
2247 btrfs_disk_key_to_cpu(&key, disk_key);
2248
a061fc8d 2249 len = sizeof(*disk_key); ptr += len;
0b86a832
CM
2250 sb_ptr += len;
2251 cur += len;
2252
0d81ba5d 2253 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
0b86a832 2254 chunk = (struct btrfs_chunk *)sb_ptr;
0d81ba5d 2255 ret = read_one_chunk(root, &key, sb, chunk);
84eed90f
CM
2256 if (ret)
2257 break;
0b86a832
CM
2258 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
2259 len = btrfs_chunk_item_size(num_stripes);
2260 } else {
84eed90f
CM
2261 ret = -EIO;
2262 break;
0b86a832
CM
2263 }
2264 ptr += len;
2265 sb_ptr += len;
2266 cur += len;
2267 }
a061fc8d 2268 free_extent_buffer(sb);
84eed90f 2269 return ret;
0b86a832
CM
2270}
2271
2272int btrfs_read_chunk_tree(struct btrfs_root *root)
2273{
2274 struct btrfs_path *path;
2275 struct extent_buffer *leaf;
2276 struct btrfs_key key;
2277 struct btrfs_key found_key;
2278 int ret;
2279 int slot;
2280
2281 root = root->fs_info->chunk_root;
2282
2283 path = btrfs_alloc_path();
2284 if (!path)
2285 return -ENOMEM;
2286
2287 /* first we search for all of the device items, and then we
2288 * read in all of the chunk items. This way we can create chunk
2289 * mappings that reference all of the devices that are afound
2290 */
2291 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2292 key.offset = 0;
2293 key.type = 0;
2294again:
2295 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2296 while(1) {
2297 leaf = path->nodes[0];
2298 slot = path->slots[0];
2299 if (slot >= btrfs_header_nritems(leaf)) {
2300 ret = btrfs_next_leaf(root, path);
2301 if (ret == 0)
2302 continue;
2303 if (ret < 0)
2304 goto error;
2305 break;
2306 }
2307 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2308 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2309 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
2310 break;
2311 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
2312 struct btrfs_dev_item *dev_item;
2313 dev_item = btrfs_item_ptr(leaf, slot,
2314 struct btrfs_dev_item);
0d81ba5d 2315 ret = read_one_dev(root, leaf, dev_item);
0b86a832
CM
2316 BUG_ON(ret);
2317 }
2318 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2319 struct btrfs_chunk *chunk;
2320 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2321 ret = read_one_chunk(root, &found_key, leaf, chunk);
2322 }
2323 path->slots[0]++;
2324 }
2325 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2326 key.objectid = 0;
2327 btrfs_release_path(root, path);
2328 goto again;
2329 }
2330
2331 btrfs_free_path(path);
2332 ret = 0;
2333error:
2334 return ret;
2335}
2336