2 * Copyright (C) 2007 Oracle. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
18 #include <linux/sched.h>
19 #include <linux/bio.h>
21 #include "extent_map.h"
23 #include "transaction.h"
24 #include "print-tree.h"
28 struct btrfs_device *dev;
33 * this uses a pretty simple search, the expectation is that it is
34 * called very infrequently and that a given device has a small number
37 static int find_free_dev_extent(struct btrfs_trans_handle *trans,
38 struct btrfs_device *device,
39 struct btrfs_path *path,
40 u64 num_bytes, u64 *start)
43 struct btrfs_root *root = device->dev_root;
44 struct btrfs_dev_extent *dev_extent = NULL;
48 u64 search_end = device->total_bytes;
52 struct extent_buffer *l;
57 /* FIXME use last free of some kind */
59 key.objectid = device->devid;
60 key.offset = search_start;
61 key.type = BTRFS_DEV_EXTENT_KEY;
62 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
65 ret = btrfs_previous_item(root, path, 0, key.type);
69 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
72 slot = path->slots[0];
73 if (slot >= btrfs_header_nritems(l)) {
74 ret = btrfs_next_leaf(root, path);
81 if (search_start >= search_end) {
85 *start = search_start;
89 *start = last_byte > search_start ?
90 last_byte : search_start;
91 if (search_end <= *start) {
97 btrfs_item_key_to_cpu(l, &key, slot);
99 if (key.objectid < device->devid)
102 if (key.objectid > device->devid)
105 if (key.offset >= search_start && key.offset > last_byte &&
107 if (last_byte < search_start)
108 last_byte = search_start;
109 hole_size = key.offset - last_byte;
110 if (key.offset > last_byte &&
111 hole_size >= num_bytes) {
116 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
121 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
122 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
128 /* we have to make sure we didn't find an extent that has already
129 * been allocated by the map tree or the original allocation
131 btrfs_release_path(root, path);
132 BUG_ON(*start < search_start);
134 if (*start + num_bytes > search_end) {
138 /* check for pending inserts here */
142 btrfs_release_path(root, path);
146 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
147 struct btrfs_device *device,
148 u64 owner, u64 num_bytes, u64 *start)
151 struct btrfs_path *path;
152 struct btrfs_root *root = device->dev_root;
153 struct btrfs_dev_extent *extent;
154 struct extent_buffer *leaf;
155 struct btrfs_key key;
157 path = btrfs_alloc_path();
161 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
166 key.objectid = device->devid;
168 key.type = BTRFS_DEV_EXTENT_KEY;
169 ret = btrfs_insert_empty_item(trans, root, path, &key,
173 leaf = path->nodes[0];
174 extent = btrfs_item_ptr(leaf, path->slots[0],
175 struct btrfs_dev_extent);
176 btrfs_set_dev_extent_owner(leaf, extent, owner);
177 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
178 btrfs_mark_buffer_dirty(leaf);
180 btrfs_free_path(path);
184 static int find_next_chunk(struct btrfs_root *root, u64 *objectid)
186 struct btrfs_path *path;
188 struct btrfs_key key;
189 struct btrfs_key found_key;
191 path = btrfs_alloc_path();
194 key.objectid = (u64)-1;
195 key.offset = (u64)-1;
196 key.type = BTRFS_CHUNK_ITEM_KEY;
198 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
204 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
208 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
210 *objectid = found_key.objectid + found_key.offset;
214 btrfs_free_path(path);
218 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
222 struct btrfs_key key;
223 struct btrfs_key found_key;
225 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
226 key.type = BTRFS_DEV_ITEM_KEY;
227 key.offset = (u64)-1;
229 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
235 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
240 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
242 *objectid = found_key.offset + 1;
246 btrfs_release_path(root, path);
251 * the device information is stored in the chunk root
252 * the btrfs_device struct should be fully filled in
254 int btrfs_add_device(struct btrfs_trans_handle *trans,
255 struct btrfs_root *root,
256 struct btrfs_device *device)
259 struct btrfs_path *path;
260 struct btrfs_dev_item *dev_item;
261 struct extent_buffer *leaf;
262 struct btrfs_key key;
266 root = root->fs_info->chunk_root;
268 path = btrfs_alloc_path();
272 ret = find_next_devid(root, path, &free_devid);
276 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
277 key.type = BTRFS_DEV_ITEM_KEY;
278 key.offset = free_devid;
280 ret = btrfs_insert_empty_item(trans, root, path, &key,
281 sizeof(*dev_item) + device->name_len);
285 leaf = path->nodes[0];
286 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
288 btrfs_set_device_id(leaf, dev_item, device->devid);
289 btrfs_set_device_type(leaf, dev_item, device->type);
290 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
291 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
292 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
293 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
294 btrfs_set_device_partition(leaf, dev_item, device->partition);
295 btrfs_set_device_name_len(leaf, dev_item, device->name_len);
296 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
297 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
299 ptr = (unsigned long)btrfs_device_name(dev_item);
300 write_extent_buffer(leaf, device->name, ptr, device->name_len);
302 ptr = (unsigned long)btrfs_device_uuid(dev_item);
303 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
304 btrfs_mark_buffer_dirty(leaf);
308 btrfs_free_path(path);
311 int btrfs_update_device(struct btrfs_trans_handle *trans,
312 struct btrfs_device *device)
315 struct btrfs_path *path;
316 struct btrfs_root *root;
317 struct btrfs_dev_item *dev_item;
318 struct extent_buffer *leaf;
319 struct btrfs_key key;
321 root = device->dev_root->fs_info->chunk_root;
323 path = btrfs_alloc_path();
327 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
328 key.type = BTRFS_DEV_ITEM_KEY;
329 key.offset = device->devid;
331 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
340 leaf = path->nodes[0];
341 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
343 btrfs_set_device_id(leaf, dev_item, device->devid);
344 btrfs_set_device_type(leaf, dev_item, device->type);
345 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
346 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
347 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
348 btrfs_set_device_rdev(leaf, dev_item, device->rdev);
349 btrfs_set_device_partition(leaf, dev_item, device->partition);
350 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
351 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
352 btrfs_mark_buffer_dirty(leaf);
355 btrfs_free_path(path);
359 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
360 struct btrfs_root *root,
361 struct btrfs_key *key,
362 struct btrfs_chunk *chunk, int item_size)
364 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
365 struct btrfs_disk_key disk_key;
369 array_size = btrfs_super_sys_array_size(super_copy);
370 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
373 ptr = super_copy->sys_chunk_array + array_size;
374 btrfs_cpu_key_to_disk(&disk_key, key);
375 memcpy(ptr, &disk_key, sizeof(disk_key));
376 ptr += sizeof(disk_key);
377 memcpy(ptr, chunk, item_size);
378 item_size += sizeof(disk_key);
379 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
383 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
384 struct btrfs_root *extent_root, u64 *start,
385 u64 *num_bytes, u64 type)
388 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
389 struct btrfs_stripe *stripes;
390 struct btrfs_device *device = NULL;
391 struct btrfs_chunk *chunk;
392 struct list_head private_devs;
393 struct list_head *dev_list = &extent_root->fs_info->devices;
394 struct list_head *cur;
395 struct extent_map_tree *em_tree;
396 struct map_lookup *map;
397 struct extent_map *em;
399 u64 calc_size = 1024 * 1024 * 1024;
406 struct btrfs_key key;
408 if (list_empty(dev_list))
411 INIT_LIST_HEAD(&private_devs);
412 cur = dev_list->next;
414 /* build a private list of devices we will allocate from */
415 while(index < num_stripes) {
416 device = list_entry(cur, struct btrfs_device, dev_list);
417 avail = device->total_bytes - device->bytes_used;
419 if (avail > max_avail)
421 if (avail >= calc_size) {
422 list_move_tail(&device->dev_list, &private_devs);
428 if (index < num_stripes) {
429 list_splice(&private_devs, dev_list);
430 if (!looped && max_avail > 0) {
432 calc_size = max_avail;
438 ret = find_next_chunk(chunk_root, &key.objectid);
442 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
446 stripes = &chunk->stripe;
448 *num_bytes = calc_size;
450 while(index < num_stripes) {
451 BUG_ON(list_empty(&private_devs));
452 cur = private_devs.next;
453 device = list_entry(cur, struct btrfs_device, dev_list);
454 list_move_tail(&device->dev_list, dev_list);
456 ret = btrfs_alloc_dev_extent(trans, device,
458 calc_size, &dev_offset);
461 device->bytes_used += calc_size;
462 ret = btrfs_update_device(trans, device);
465 btrfs_set_stack_stripe_devid(stripes + index, device->devid);
466 btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
467 physical = dev_offset;
470 BUG_ON(!list_empty(&private_devs));
472 /* key.objectid was set above */
473 key.offset = *num_bytes;
474 key.type = BTRFS_CHUNK_ITEM_KEY;
475 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
476 btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024);
477 btrfs_set_stack_chunk_type(chunk, type);
478 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
479 btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize);
480 btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize);
481 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
483 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
484 btrfs_chunk_item_size(num_stripes));
486 *start = key.objectid;
488 em = alloc_extent_map(GFP_NOFS);
491 map = kmalloc(sizeof(*map), GFP_NOFS);
497 em->bdev = (struct block_device *)map;
498 em->start = key.objectid;
499 em->len = key.offset;
502 map->physical = physical;
512 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
513 spin_lock(&em_tree->lock);
514 ret = add_extent_mapping(em_tree, em);
516 spin_unlock(&em_tree->lock);
521 void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
523 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
526 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
528 struct extent_map *em;
531 spin_lock(&tree->map_tree.lock);
532 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
534 remove_extent_mapping(&tree->map_tree, em);
535 spin_unlock(&tree->map_tree.lock);
541 /* once for the tree */
546 int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
547 u64 logical, u64 *phys, u64 *length,
548 struct btrfs_device **dev)
550 struct extent_map *em;
551 struct map_lookup *map;
552 struct extent_map_tree *em_tree = &map_tree->map_tree;
556 spin_lock(&em_tree->lock);
557 em = lookup_extent_mapping(em_tree, logical, *length);
560 BUG_ON(em->start > logical || em->start + em->len < logical);
561 map = (struct map_lookup *)em->bdev;
562 offset = logical - em->start;
563 *phys = map->physical + offset;
564 *length = em->len - offset;
567 spin_unlock(&em_tree->lock);
571 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
573 struct btrfs_mapping_tree *map_tree;
574 struct btrfs_device *dev;
575 u64 logical = bio->bi_sector << 9;
579 struct bio_vec *bvec;
583 bio_for_each_segment(bvec, bio, i) {
584 length += bvec->bv_len;
586 map_tree = &root->fs_info->mapping_tree;
588 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
589 BUG_ON(map_length < length);
590 bio->bi_sector = physical >> 9;
591 bio->bi_bdev = dev->bdev;
596 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid)
598 struct btrfs_device *dev;
599 struct list_head *cur = root->fs_info->devices.next;
600 struct list_head *head = &root->fs_info->devices;
603 dev = list_entry(cur, struct btrfs_device, dev_list);
604 if (dev->devid == devid)
611 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
612 struct extent_buffer *leaf,
613 struct btrfs_chunk *chunk)
615 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
616 struct map_lookup *map;
617 struct extent_map *em;
623 logical = key->objectid;
624 length = key->offset;
625 spin_lock(&map_tree->map_tree.lock);
626 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
628 /* already mapped? */
629 if (em && em->start <= logical && em->start + em->len > logical) {
631 spin_unlock(&map_tree->map_tree.lock);
636 spin_unlock(&map_tree->map_tree.lock);
638 map = kzalloc(sizeof(*map), GFP_NOFS);
642 em = alloc_extent_map(GFP_NOFS);
645 map = kmalloc(sizeof(*map), GFP_NOFS);
651 em->bdev = (struct block_device *)map;
656 map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0);
657 devid = btrfs_stripe_devid_nr(leaf, chunk, 0);
658 map->dev = btrfs_find_device(root, devid);
665 spin_lock(&map_tree->map_tree.lock);
666 ret = add_extent_mapping(&map_tree->map_tree, em);
668 spin_unlock(&map_tree->map_tree.lock);
674 static int fill_device_from_item(struct extent_buffer *leaf,
675 struct btrfs_dev_item *dev_item,
676 struct btrfs_device *device)
681 device->devid = btrfs_device_id(leaf, dev_item);
682 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
683 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
684 device->type = btrfs_device_type(leaf, dev_item);
685 device->io_align = btrfs_device_io_align(leaf, dev_item);
686 device->io_width = btrfs_device_io_width(leaf, dev_item);
687 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
688 device->rdev = btrfs_device_rdev(leaf, dev_item);
689 device->partition = btrfs_device_partition(leaf, dev_item);
690 device->name_len = btrfs_device_name_len(leaf, dev_item);
692 ptr = (unsigned long)btrfs_device_uuid(dev_item);
693 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE);
695 name = kmalloc(device->name_len + 1, GFP_NOFS);
699 ptr = (unsigned long)btrfs_device_name(dev_item);
700 read_extent_buffer(leaf, name, ptr, device->name_len);
701 name[device->name_len] = '\0';
705 static int read_one_dev(struct btrfs_root *root, struct btrfs_key *key,
706 struct extent_buffer *leaf,
707 struct btrfs_dev_item *dev_item)
709 struct btrfs_device *device;
713 devid = btrfs_device_id(leaf, dev_item);
714 device = btrfs_find_device(root, devid);
716 device = kmalloc(sizeof(*device), GFP_NOFS);
719 list_add(&device->dev_list, &root->fs_info->devices);
722 fill_device_from_item(leaf, dev_item, device);
723 device->dev_root = root->fs_info->dev_root;
724 device->bdev = root->fs_info->sb->s_bdev;
725 memcpy(&device->dev_key, key, sizeof(*key));
728 ret = btrfs_open_device(device);
736 int btrfs_read_sys_array(struct btrfs_root *root)
738 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
739 struct extent_buffer *sb = root->fs_info->sb_buffer;
740 struct btrfs_disk_key *disk_key;
741 struct btrfs_dev_item *dev_item;
742 struct btrfs_chunk *chunk;
743 struct btrfs_key key;
748 unsigned long sb_ptr;
753 array_size = btrfs_super_sys_array_size(super_copy);
756 * we do this loop twice, once for the device items and
757 * once for all of the chunks. This way there are device
758 * structs filled in for every chunk
761 ptr = super_copy->sys_chunk_array;
762 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
765 while (cur < array_size) {
766 disk_key = (struct btrfs_disk_key *)ptr;
767 btrfs_disk_key_to_cpu(&key, disk_key);
769 len = sizeof(*disk_key);
774 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID &&
775 key.type == BTRFS_DEV_ITEM_KEY) {
776 dev_item = (struct btrfs_dev_item *)sb_ptr;
778 ret = read_one_dev(root, &key, sb, dev_item);
781 len = sizeof(*dev_item);
782 len += btrfs_device_name_len(sb, dev_item);
783 } else if (key.type == BTRFS_CHUNK_ITEM_KEY) {
785 chunk = (struct btrfs_chunk *)sb_ptr;
787 ret = read_one_chunk(root, &key, sb, chunk);
790 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
791 len = btrfs_chunk_item_size(num_stripes);
806 int btrfs_read_chunk_tree(struct btrfs_root *root)
808 struct btrfs_path *path;
809 struct extent_buffer *leaf;
810 struct btrfs_key key;
811 struct btrfs_key found_key;
815 root = root->fs_info->chunk_root;
817 path = btrfs_alloc_path();
821 /* first we search for all of the device items, and then we
822 * read in all of the chunk items. This way we can create chunk
823 * mappings that reference all of the devices that are afound
825 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
829 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
831 leaf = path->nodes[0];
832 slot = path->slots[0];
833 if (slot >= btrfs_header_nritems(leaf)) {
834 ret = btrfs_next_leaf(root, path);
841 btrfs_item_key_to_cpu(leaf, &found_key, slot);
842 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
843 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
845 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
846 struct btrfs_dev_item *dev_item;
847 dev_item = btrfs_item_ptr(leaf, slot,
848 struct btrfs_dev_item);
849 ret = read_one_dev(root, &found_key, leaf,
853 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
854 struct btrfs_chunk *chunk;
855 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
856 ret = read_one_chunk(root, &found_key, leaf, chunk);
860 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
862 btrfs_release_path(root, path);
866 btrfs_free_path(path);