2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/bio.h>
21 #include "extent_map.h"
23 #include "transaction.h"
24 #include "print-tree.h"
28 struct btrfs_device *dev;
33 * this uses a pretty simple search, the expectation is that it is
34 * called very infrequently and that a given device has a small number
37 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device,
39 struct btrfs_path *path,
40 u64 num_bytes, u64 *start)
43 struct btrfs_root *root = device->dev_root;
44 struct btrfs_dev_extent *dev_extent = NULL;
48 u64 search_end = device->total_bytes;
52 struct extent_buffer *l;
57 /* FIXME use last free of some kind */
59 key.objectid = device->devid;
60 key.offset = search_start;
61 key.type = BTRFS_DEV_EXTENT_KEY;
62 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
65 ret = btrfs_previous_item(root, path, 0, key.type);
69 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
72 slot = path->slots[0];
73 if (slot >= btrfs_header_nritems(l)) {
74 ret = btrfs_next_leaf(root, path);
81 if (search_start >= search_end) {
85 *start = search_start;
89 *start = last_byte > search_start ?
90 last_byte : search_start;
91 if (search_end <= *start) {
97 btrfs_item_key_to_cpu(l, &key, slot);
99 if (key.objectid < device->devid)
102 if (key.objectid > device->devid)
105 if (key.offset >= search_start && key.offset > last_byte &&
107 if (last_byte < search_start)
108 last_byte = search_start;
109 hole_size = key.offset - last_byte;
110 if (key.offset > last_byte &&
111 hole_size >= num_bytes) {
116 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
121 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
122 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
128 /* we have to make sure we didn't find an extent that has already
129 * been allocated by the map tree or the original allocation
131 btrfs_release_path(root, path);
132 BUG_ON(*start < search_start);
134 if (*start + num_bytes > search_end) {
138 /* check for pending inserts here */
142 btrfs_release_path(root, path);
146 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
147 struct btrfs_device *device,
148 u64 owner, u64 num_bytes, u64 *start)
151 struct btrfs_path *path;
152 struct btrfs_root *root = device->dev_root;
153 struct btrfs_dev_extent *extent;
154 struct extent_buffer *leaf;
155 struct btrfs_key key;
157 path = btrfs_alloc_path();
161 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
166 key.objectid = device->devid;
168 key.type = BTRFS_DEV_EXTENT_KEY;
169 ret = btrfs_insert_empty_item(trans, root, path, &key,
173 leaf = path->nodes[0];
174 extent = btrfs_item_ptr(leaf, path->slots[0],
175 struct btrfs_dev_extent);
176 btrfs_set_dev_extent_owner(leaf, extent, owner);
177 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
178 btrfs_mark_buffer_dirty(leaf);
180 btrfs_free_path(path);
184 static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
186 struct btrfs_path *path;
188 struct btrfs_key key;
189 struct btrfs_key found_key;
191 path = btrfs_alloc_path();
194 key.objectid = (u64)-1;
195 key.offset = (u64)-1;
196 key.type = BTRFS_CHUNK_ITEM_KEY;
198 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
204 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
208 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
210 *objectid = found_key.objectid + found_key.offset;
214 btrfs_free_path(path);
218 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
222 struct btrfs_key key;
223 struct btrfs_key found_key;
225 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
226 key.type = BTRFS_DEV_ITEM_KEY;
227 key.offset = (u64)-1;
229 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
235 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
240 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
242 *objectid = found_key.offset + 1;
246 btrfs_release_path(root, path);
251 * the device information is stored in the chunk root
252 * the btrfs_device struct should be fully filled in
254 int btrfs_add_device(struct btrfs_trans_handle *trans,
255 struct btrfs_root *root,
256 struct btrfs_device *device)
259 struct btrfs_path *path;
260 struct btrfs_dev_item *dev_item;
261 struct extent_buffer *leaf;
262 struct btrfs_key key;
266 root = root->fs_info->chunk_root;
268 path = btrfs_alloc_path();
272 ret = find_next_devid(root, path, &free_devid);
276 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
277 key.type = BTRFS_DEV_ITEM_KEY;
278 key.offset = free_devid;
280 ret = btrfs_insert_empty_item(trans, root, path, &key,
285 leaf = path->nodes[0];
286 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
288 btrfs_set_device_id(leaf, dev_item, device->devid);
289 btrfs_set_device_type(leaf, dev_item, device->type);
290 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
291 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
292 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
293 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
294 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
296 ptr = (unsigned long)btrfs_device_uuid(dev_item);
297 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
298 btrfs_mark_buffer_dirty(leaf);
302 btrfs_free_path(path);
305 int btrfs_update_device(struct btrfs_trans_handle *trans,
306 struct btrfs_device *device)
309 struct btrfs_path *path;
310 struct btrfs_root *root;
311 struct btrfs_dev_item *dev_item;
312 struct extent_buffer *leaf;
313 struct btrfs_key key;
315 root = device->dev_root->fs_info->chunk_root;
317 path = btrfs_alloc_path();
321 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
322 key.type = BTRFS_DEV_ITEM_KEY;
323 key.offset = device->devid;
325 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
334 leaf = path->nodes[0];
335 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
337 btrfs_set_device_id(leaf, dev_item, device->devid);
338 btrfs_set_device_type(leaf, dev_item, device->type);
339 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
340 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
341 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
342 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
343 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
344 btrfs_mark_buffer_dirty(leaf);
347 btrfs_free_path(path);
351 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
352 struct btrfs_root *root,
353 struct btrfs_key *key,
354 struct btrfs_chunk *chunk, int item_size)
356 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
357 struct btrfs_disk_key disk_key;
361 array_size = btrfs_super_sys_array_size(super_copy);
362 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
365 ptr = super_copy->sys_chunk_array + array_size;
366 btrfs_cpu_key_to_disk(&disk_key, key);
367 memcpy(ptr, &disk_key, sizeof(disk_key));
368 ptr += sizeof(disk_key);
369 memcpy(ptr, chunk, item_size);
370 item_size += sizeof(disk_key);
371 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
375 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
376 struct btrfs_root *extent_root, u64 *start,
377 u64 *num_bytes, u64 type)
380 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
381 struct btrfs_stripe *stripes;
382 struct btrfs_device *device = NULL;
383 struct btrfs_chunk *chunk;
384 struct list_head private_devs;
385 struct list_head *dev_list = &extent_root->fs_info->devices;
386 struct list_head *cur;
387 struct extent_map_tree *em_tree;
388 struct map_lookup *map;
389 struct extent_map *em;
391 u64 calc_size = 1024 * 1024 * 1024;
398 struct btrfs_key key;
400 if (list_empty(dev_list))
403 INIT_LIST_HEAD(&private_devs);
404 cur = dev_list->next;
406 /* build a private list of devices we will allocate from */
407 while(index < num_stripes) {
408 device = list_entry(cur, struct btrfs_device, dev_list);
409 avail = device->total_bytes - device->bytes_used;
411 if (avail > max_avail)
413 if (avail >= calc_size) {
414 list_move_tail(&device->dev_list, &private_devs);
420 if (index < num_stripes) {
421 list_splice(&private_devs, dev_list);
422 if (!looped && max_avail > 0) {
424 calc_size = max_avail;
430 ret = find_next_chunk(chunk_root, &key.objectid);
434 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
438 stripes = &chunk->stripe;
440 *num_bytes = calc_size;
442 while(index < num_stripes) {
443 BUG_ON(list_empty(&private_devs));
444 cur = private_devs.next;
445 device = list_entry(cur, struct btrfs_device, dev_list);
446 list_move_tail(&device->dev_list, dev_list);
448 ret = btrfs_alloc_dev_extent(trans, device,
450 calc_size, &dev_offset);
453 device->bytes_used += calc_size;
454 ret = btrfs_update_device(trans, device);
457 btrfs_set_stack_stripe_devid(stripes + index, device->devid);
458 btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
459 physical = dev_offset;
462 BUG_ON(!list_empty(&private_devs));
464 /* key.objectid was set above */
465 key.offset = *num_bytes;
466 key.type = BTRFS_CHUNK_ITEM_KEY;
467 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
468 btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
469 btrfs_set_stack_chunk_type(chunk, type);
470 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
471 btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
472 btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
473 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
475 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
476 btrfs_chunk_item_size(num_stripes));
478 *start = key.objectid;
480 em = alloc_extent_map(GFP_NOFS);
483 map = kmalloc(sizeof(*map), GFP_NOFS);
489 em->bdev = (struct block_device *)map;
490 em->start = key.objectid;
491 em->len = key.offset;
494 map->physical = physical;
504 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
505 spin_lock(&em_tree->lock);
506 ret = add_extent_mapping(em_tree, em);
508 spin_unlock(&em_tree->lock);
513 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
515 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
518 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
520 struct extent_map *em;
523 spin_lock(&tree->map_tree.lock);
524 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
526 remove_extent_mapping(&tree->map_tree, em);
527 spin_unlock(&tree->map_tree.lock);
533 /* once for the tree */
538 int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
539 u64 logical, u64 *phys, u64 *length,
540 struct btrfs_device **dev)
542 struct extent_map *em;
543 struct map_lookup *map;
544 struct extent_map_tree *em_tree = &map_tree->map_tree;
548 spin_lock(&em_tree->lock);
549 em = lookup_extent_mapping(em_tree, logical, *length);
552 BUG_ON(em->start > logical || em->start + em->len < logical);
553 map = (struct map_lookup *)em->bdev;
554 offset = logical - em->start;
555 *phys = map->physical + offset;
556 *length = em->len - offset;
559 spin_unlock(&em_tree->lock);
563 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
565 struct btrfs_mapping_tree *map_tree;
566 struct btrfs_device *dev;
567 u64 logical = bio->bi_sector << 9;
571 struct bio_vec *bvec;
575 bio_for_each_segment(bvec, bio, i) {
576 length += bvec->bv_len;
578 map_tree = &root->fs_info->mapping_tree;
580 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
581 BUG_ON(map_length < length);
582 bio->bi_sector = physical >> 9;
583 bio->bi_bdev = dev->bdev;
588 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
590 struct btrfs_device *dev;
591 struct list_head *cur = root->fs_info->devices.next;
592 struct list_head *head = &root->fs_info->devices;
595 dev = list_entry(cur, struct btrfs_device, dev_list);
596 if (dev->devid == devid)
603 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
604 struct extent_buffer *leaf,
605 struct btrfs_chunk *chunk)
607 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
608 struct map_lookup *map;
609 struct extent_map *em;
615 logical = key->objectid;
616 length = key->offset;
617 spin_lock(&map_tree->map_tree.lock);
618 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
620 /* already mapped? */
621 if (em && em->start <= logical && em->start + em->len > logical) {
623 spin_unlock(&map_tree->map_tree.lock);
628 spin_unlock(&map_tree->map_tree.lock);
630 map = kzalloc(sizeof(*map), GFP_NOFS);
634 em = alloc_extent_map(GFP_NOFS);
637 map = kmalloc(sizeof(*map), GFP_NOFS);
643 em->bdev = (struct block_device *)map;
648 map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
649 devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
650 map->dev = btrfs_find_device(root, devid);
657 spin_lock(&map_tree->map_tree.lock);
658 ret = add_extent_mapping(&map_tree->map_tree, em);
660 spin_unlock(&map_tree->map_tree.lock);
666 static int fill_device_from_item(struct extent_buffer *leaf,
667 struct btrfs_dev_item *dev_item,
668 struct btrfs_device *device)
672 device->devid = btrfs_device_id(leaf, dev_item);
673 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
674 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
675 device->type = btrfs_device_type(leaf, dev_item);
676 device->io_align = btrfs_device_io_align(leaf, dev_item);
677 device->io_width = btrfs_device_io_width(leaf, dev_item);
678 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
680 ptr = (unsigned long)btrfs_device_uuid(dev_item);
681 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
686 static int read_one_dev(struct btrfs_root *root,
687 struct extent_buffer *leaf,
688 struct btrfs_dev_item *dev_item)
690 struct btrfs_device *device;
694 devid = btrfs_device_id(leaf, dev_item);
695 device = btrfs_find_device(root, devid);
697 device = kmalloc(sizeof(*device), GFP_NOFS);
700 list_add(&device->dev_list, &root->fs_info->devices);
703 fill_device_from_item(leaf, dev_item, device);
704 device->dev_root = root->fs_info->dev_root;
705 device->bdev = root->fs_info->sb->s_bdev;
708 ret = btrfs_open_device(device);
716 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
718 struct btrfs_dev_item *dev_item;
720 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
722 return read_one_dev(root, buf, dev_item);
725 int btrfs_read_sys_array(struct btrfs_root *root)
727 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
728 struct extent_buffer *sb = root->fs_info->sb_buffer;
729 struct btrfs_disk_key *disk_key;
730 struct btrfs_chunk *chunk;
731 struct btrfs_key key;
736 unsigned long sb_ptr;
740 array_size = btrfs_super_sys_array_size(super_copy);
743 * we do this loop twice, once for the device items and
744 * once for all of the chunks. This way there are device
745 * structs filled in for every chunk
747 ptr = super_copy->sys_chunk_array;
748 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
751 while (cur < array_size) {
752 disk_key = (struct btrfs_disk_key *)ptr;
753 btrfs_disk_key_to_cpu(&key, disk_key);
755 len = sizeof(*disk_key);
760 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
761 chunk = (struct btrfs_chunk *)sb_ptr;
762 ret = read_one_chunk(root, &key, sb, chunk);
764 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
765 len = btrfs_chunk_item_size(num_stripes);
776 int btrfs_read_chunk_tree(struct btrfs_root *root)
778 struct btrfs_path *path;
779 struct extent_buffer *leaf;
780 struct btrfs_key key;
781 struct btrfs_key found_key;
785 root = root->fs_info->chunk_root;
787 path = btrfs_alloc_path();
791 /* first we search for all of the device items, and then we
792 * read in all of the chunk items. This way we can create chunk
793 * mappings that reference all of the devices that are afound
795 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
799 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
801 leaf = path->nodes[0];
802 slot = path->slots[0];
803 if (slot >= btrfs_header_nritems(leaf)) {
804 ret = btrfs_next_leaf(root, path);
811 btrfs_item_key_to_cpu(leaf, &found_key, slot);
812 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
813 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
815 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
816 struct btrfs_dev_item *dev_item;
817 dev_item = btrfs_item_ptr(leaf, slot,
818 struct btrfs_dev_item);
819 ret = read_one_dev(root, leaf, dev_item);
822 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
823 struct btrfs_chunk *chunk;
824 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
825 ret = read_one_chunk(root, &found_key, leaf, chunk);
829 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
831 btrfs_release_path(root, path);
835 btrfs_free_path(path);