Btrfs: symlinks and hard links
[linux-2.6-block.git] / fs / btrfs / super.c
CommitLineData
2e635a27 1#include <linux/module.h>
e20d96d6 2#include <linux/buffer_head.h>
2e635a27
CM
3#include <linux/fs.h>
4#include <linux/pagemap.h>
5#include <linux/highmem.h>
6#include <linux/time.h>
7#include <linux/init.h>
8#include <linux/string.h>
9#include <linux/smp_lock.h>
10#include <linux/backing-dev.h>
dee26a9f 11#include <linux/mpage.h>
75dfe396
CM
12#include <linux/swap.h>
13#include <linux/writeback.h>
8fd17795 14#include <linux/statfs.h>
2e635a27 15#include "ctree.h"
e20d96d6 16#include "disk-io.h"
d5719762 17#include "transaction.h"
2c90e5d6 18#include "btrfs_inode.h"
c5739bba 19#include "ioctl.h"
2e635a27 20
c5739bba
CM
21struct btrfs_iget_args {
22 u64 ino;
23 struct btrfs_root *root;
24};
25
2e635a27 26#define BTRFS_SUPER_MAGIC 0x9123682E
e20d96d6
CM
27
28static struct inode_operations btrfs_dir_inode_operations;
2b8d99a7 29static struct inode_operations btrfs_symlink_inode_operations;
d6e4a428 30static struct inode_operations btrfs_dir_ro_inode_operations;
e20d96d6
CM
31static struct super_operations btrfs_super_ops;
32static struct file_operations btrfs_dir_file_operations;
dee26a9f
CM
33static struct inode_operations btrfs_file_inode_operations;
34static struct address_space_operations btrfs_aops;
2b8d99a7 35static struct address_space_operations btrfs_symlink_aops;
dee26a9f 36static struct file_operations btrfs_file_operations;
e20d96d6 37
e20d96d6 38static void btrfs_read_locked_inode(struct inode *inode)
2e635a27 39{
5caf2a00 40 struct btrfs_path *path;
e20d96d6 41 struct btrfs_inode_item *inode_item;
d6e4a428
CM
42 struct btrfs_root *root = BTRFS_I(inode)->root;
43 struct btrfs_key location;
31f3c99b
CM
44 struct btrfs_block_group_cache *alloc_group;
45 u64 alloc_group_block;
e20d96d6 46 int ret;
f4b9aa8d 47
5caf2a00
CM
48 path = btrfs_alloc_path();
49 BUG_ON(!path);
50 btrfs_init_path(path);
f4b9aa8d
CM
51 mutex_lock(&root->fs_info->fs_mutex);
52
d6e4a428
CM
53 memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
54 ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
e20d96d6 55 if (ret) {
7cfcc17e 56 btrfs_free_path(path);
d6e4a428 57 goto make_bad;
e20d96d6 58 }
5caf2a00
CM
59 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
60 path->slots[0],
e20d96d6 61 struct btrfs_inode_item);
2e635a27 62
e20d96d6
CM
63 inode->i_mode = btrfs_inode_mode(inode_item);
64 inode->i_nlink = btrfs_inode_nlink(inode_item);
65 inode->i_uid = btrfs_inode_uid(inode_item);
66 inode->i_gid = btrfs_inode_gid(inode_item);
67 inode->i_size = btrfs_inode_size(inode_item);
68 inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
69 inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
70 inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
71 inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
72 inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
73 inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
74 inode->i_blocks = btrfs_inode_nblocks(inode_item);
75 inode->i_generation = btrfs_inode_generation(inode_item);
31f3c99b
CM
76 alloc_group_block = btrfs_inode_block_group(inode_item);
77 ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
78 (void **)&alloc_group,
79 alloc_group_block, 1);
80 BUG_ON(!ret);
81 BTRFS_I(inode)->block_group = alloc_group;
5caf2a00 82
5caf2a00
CM
83 btrfs_free_path(path);
84 inode_item = NULL;
85
f4b9aa8d 86 mutex_unlock(&root->fs_info->fs_mutex);
1b05da2e 87
e20d96d6
CM
88 switch (inode->i_mode & S_IFMT) {
89#if 0
90 default:
91 init_special_inode(inode, inode->i_mode,
92 btrfs_inode_rdev(inode_item));
93 break;
94#endif
95 case S_IFREG:
dee26a9f
CM
96 inode->i_mapping->a_ops = &btrfs_aops;
97 inode->i_fop = &btrfs_file_operations;
98 inode->i_op = &btrfs_file_inode_operations;
e20d96d6
CM
99 break;
100 case S_IFDIR:
e20d96d6 101 inode->i_fop = &btrfs_dir_file_operations;
d6e4a428
CM
102 if (root == root->fs_info->tree_root)
103 inode->i_op = &btrfs_dir_ro_inode_operations;
104 else
105 inode->i_op = &btrfs_dir_inode_operations;
e20d96d6
CM
106 break;
107 case S_IFLNK:
2b8d99a7
CM
108 inode->i_op = &btrfs_symlink_inode_operations;
109 inode->i_mapping->a_ops = &btrfs_symlink_aops;
e20d96d6 110 break;
2e635a27 111 }
e20d96d6 112 return;
d6e4a428
CM
113
114make_bad:
115 btrfs_release_path(root, path);
116 btrfs_free_path(path);
117 mutex_unlock(&root->fs_info->fs_mutex);
118 make_bad_inode(inode);
2e635a27
CM
119}
120
f68cad0f
CM
121static void fill_inode_item(struct btrfs_inode_item *item,
122 struct inode *inode)
123{
124 btrfs_set_inode_uid(item, inode->i_uid);
125 btrfs_set_inode_gid(item, inode->i_gid);
126 btrfs_set_inode_size(item, inode->i_size);
127 btrfs_set_inode_mode(item, inode->i_mode);
128 btrfs_set_inode_nlink(item, inode->i_nlink);
129 btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
130 btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
131 btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
132 btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
133 btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
134 btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
135 btrfs_set_inode_nblocks(item, inode->i_blocks);
136 btrfs_set_inode_generation(item, inode->i_generation);
31f3c99b
CM
137 btrfs_set_inode_block_group(item,
138 BTRFS_I(inode)->block_group->key.objectid);
f68cad0f
CM
139}
140
f68cad0f
CM
141static int btrfs_update_inode(struct btrfs_trans_handle *trans,
142 struct btrfs_root *root,
143 struct inode *inode)
144{
145 struct btrfs_inode_item *inode_item;
146 struct btrfs_path *path;
147 int ret;
148
149 path = btrfs_alloc_path();
150 BUG_ON(!path);
151 btrfs_init_path(path);
152 ret = btrfs_lookup_inode(trans, root, path,
153 &BTRFS_I(inode)->location, 1);
154 if (ret) {
155 if (ret > 0)
156 ret = -ENOENT;
157 goto failed;
158 }
159
160 inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
161 path->slots[0],
162 struct btrfs_inode_item);
163
164 fill_inode_item(inode_item, inode);
165 btrfs_mark_buffer_dirty(path->nodes[0]);
166 ret = 0;
167failed:
168 btrfs_release_path(root, path);
169 btrfs_free_path(path);
170 return ret;
171}
172
173
5f443fd2
CM
174static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
175 struct btrfs_root *root,
176 struct inode *dir,
177 struct dentry *dentry)
134e9731 178{
5caf2a00 179 struct btrfs_path *path;
134e9731
CM
180 const char *name = dentry->d_name.name;
181 int name_len = dentry->d_name.len;
7e38180e 182 int ret = 0;
134e9731
CM
183 u64 objectid;
184 struct btrfs_dir_item *di;
185
5caf2a00
CM
186 path = btrfs_alloc_path();
187 BUG_ON(!path);
188 btrfs_init_path(path);
7e38180e 189 di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
134e9731 190 name, name_len, -1);
7e38180e
CM
191 if (IS_ERR(di)) {
192 ret = PTR_ERR(di);
134e9731 193 goto err;
7e38180e
CM
194 }
195 if (!di) {
134e9731
CM
196 ret = -ENOENT;
197 goto err;
198 }
d6e4a428 199 objectid = btrfs_disk_key_objectid(&di->location);
7e38180e
CM
200 ret = btrfs_delete_one_dir_name(trans, root, path, di);
201 BUG_ON(ret);
202 btrfs_release_path(root, path);
134e9731 203
7e38180e
CM
204 di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
205 objectid, name, name_len, -1);
206 if (IS_ERR(di)) {
207 ret = PTR_ERR(di);
208 goto err;
209 }
210 if (!di) {
211 ret = -ENOENT;
212 goto err;
213 }
214 ret = btrfs_delete_one_dir_name(trans, root, path, di);
5f26f772
CM
215 BUG_ON(ret);
216
134e9731
CM
217 dentry->d_inode->i_ctime = dir->i_ctime;
218err:
5caf2a00 219 btrfs_free_path(path);
f68cad0f 220 if (!ret) {
5f26f772 221 dir->i_size -= name_len * 2;
f68cad0f
CM
222 btrfs_update_inode(trans, root, dir);
223 drop_nlink(dentry->d_inode);
224 btrfs_update_inode(trans, root, dentry->d_inode);
cd1bc465 225 dir->i_sb->s_dirt = 1;
d4dbff95 226 }
134e9731
CM
227 return ret;
228}
229
5f443fd2
CM
230static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
231{
232 struct btrfs_root *root;
233 struct btrfs_trans_handle *trans;
234 int ret;
235
d6e4a428 236 root = BTRFS_I(dir)->root;
5f443fd2
CM
237 mutex_lock(&root->fs_info->fs_mutex);
238 trans = btrfs_start_transaction(root, 1);
31f3c99b 239 btrfs_set_trans_block_group(trans, dir);
5f443fd2
CM
240 ret = btrfs_unlink_trans(trans, root, dir, dentry);
241 btrfs_end_transaction(trans, root);
242 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 243 btrfs_btree_balance_dirty(root);
5f443fd2
CM
244 return ret;
245}
246
247static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
248{
249 struct inode *inode = dentry->d_inode;
250 int err;
251 int ret;
d6e4a428 252 struct btrfs_root *root = BTRFS_I(dir)->root;
5caf2a00 253 struct btrfs_path *path;
5f443fd2
CM
254 struct btrfs_key key;
255 struct btrfs_trans_handle *trans;
5f26f772
CM
256 struct btrfs_key found_key;
257 int found_type;
5f443fd2 258 struct btrfs_leaf *leaf;
5f26f772 259 char *goodnames = "..";
5f443fd2 260
5caf2a00
CM
261 path = btrfs_alloc_path();
262 BUG_ON(!path);
263 btrfs_init_path(path);
5f443fd2
CM
264 mutex_lock(&root->fs_info->fs_mutex);
265 trans = btrfs_start_transaction(root, 1);
31f3c99b 266 btrfs_set_trans_block_group(trans, dir);
5f443fd2
CM
267 key.objectid = inode->i_ino;
268 key.offset = (u64)-1;
5f26f772
CM
269 key.flags = (u32)-1;
270 while(1) {
271 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
272 if (ret < 0) {
273 err = ret;
274 goto out;
275 }
276 BUG_ON(ret == 0);
277 if (path->slots[0] == 0) {
278 err = -ENOENT;
279 goto out;
280 }
281 path->slots[0]--;
282 leaf = btrfs_buffer_leaf(path->nodes[0]);
283 btrfs_disk_key_to_cpu(&found_key,
284 &leaf->items[path->slots[0]].key);
285 found_type = btrfs_key_type(&found_key);
286 if (found_key.objectid != inode->i_ino) {
287 err = -ENOENT;
288 goto out;
289 }
290 if ((found_type != BTRFS_DIR_ITEM_KEY &&
291 found_type != BTRFS_DIR_INDEX_KEY) ||
292 (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
293 !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
294 err = -ENOTEMPTY;
295 goto out;
296 }
297 ret = btrfs_del_item(trans, root, path);
298 BUG_ON(ret);
5f443fd2 299
5f26f772
CM
300 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
301 break;
302 btrfs_release_path(root, path);
5f443fd2 303 }
5f26f772 304 ret = 0;
5caf2a00 305 btrfs_release_path(root, path);
5f443fd2
CM
306
307 /* now the directory is empty */
308 err = btrfs_unlink_trans(trans, root, dir, dentry);
309 if (!err) {
310 inode->i_size = 0;
311 }
312out:
7cfcc17e
CM
313 btrfs_release_path(root, path);
314 btrfs_free_path(path);
5f443fd2
CM
315 mutex_unlock(&root->fs_info->fs_mutex);
316 ret = btrfs_end_transaction(trans, root);
35b7e476 317 btrfs_btree_balance_dirty(root);
5f443fd2
CM
318 if (ret && !err)
319 err = ret;
320 return err;
321}
322
134e9731
CM
323static int btrfs_free_inode(struct btrfs_trans_handle *trans,
324 struct btrfs_root *root,
325 struct inode *inode)
326{
5caf2a00 327 struct btrfs_path *path;
134e9731 328 int ret;
5caf2a00 329
134e9731 330 clear_inode(inode);
5caf2a00
CM
331
332 path = btrfs_alloc_path();
333 BUG_ON(!path);
334 btrfs_init_path(path);
d6e4a428
CM
335 ret = btrfs_lookup_inode(trans, root, path,
336 &BTRFS_I(inode)->location, -1);
134e9731 337 BUG_ON(ret);
5caf2a00 338 ret = btrfs_del_item(trans, root, path);
134e9731 339 BUG_ON(ret);
5caf2a00 340 btrfs_free_path(path);
134e9731
CM
341 return ret;
342}
343
de428b63
CM
344static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
345 u64 objectid)
346{
347 struct btrfs_node *node;
348 int i;
349 int nritems;
350 u64 item_objectid;
351 u64 blocknr;
352 int slot;
353 int ret;
354
355 if (!path->nodes[1])
356 return;
357 node = btrfs_buffer_node(path->nodes[1]);
358 slot = path->slots[1];
359 if (slot == 0)
360 return;
361 nritems = btrfs_header_nritems(&node->header);
362 for (i = slot - 1; i >= 0; i--) {
363 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
364 if (item_objectid != objectid)
365 break;
366 blocknr = btrfs_node_blockptr(node, i);
367 ret = readahead_tree_block(root, blocknr);
368 if (ret)
369 break;
370 }
371}
372
f4b9aa8d
CM
373static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
374 struct btrfs_root *root,
375 struct inode *inode)
376{
377 int ret;
5caf2a00 378 struct btrfs_path *path;
f4b9aa8d
CM
379 struct btrfs_key key;
380 struct btrfs_disk_key *found_key;
e06afa83 381 u32 found_type;
f4b9aa8d 382 struct btrfs_leaf *leaf;
f254e52c
CM
383 struct btrfs_file_extent_item *fi = NULL;
384 u64 extent_start = 0;
385 u64 extent_num_blocks = 0;
386 int found_extent;
f4b9aa8d 387
5caf2a00
CM
388 path = btrfs_alloc_path();
389 BUG_ON(!path);
f4b9aa8d
CM
390 /* FIXME, add redo link to tree so we don't leak on crash */
391 key.objectid = inode->i_ino;
392 key.offset = (u64)-1;
e06afa83 393 key.flags = (u32)-1;
f4b9aa8d 394 while(1) {
5caf2a00
CM
395 btrfs_init_path(path);
396 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
f4b9aa8d 397 if (ret < 0) {
f4b9aa8d
CM
398 goto error;
399 }
400 if (ret > 0) {
5caf2a00
CM
401 BUG_ON(path->slots[0] == 0);
402 path->slots[0]--;
f4b9aa8d 403 }
de428b63 404 reada_truncate(root, path, inode->i_ino);
5caf2a00
CM
405 leaf = btrfs_buffer_leaf(path->nodes[0]);
406 found_key = &leaf->items[path->slots[0]].key;
e06afa83 407 found_type = btrfs_disk_key_type(found_key);
f4b9aa8d
CM
408 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
409 break;
e06afa83
CM
410 if (found_type != BTRFS_CSUM_ITEM_KEY &&
411 found_type != BTRFS_DIR_ITEM_KEY &&
412 found_type != BTRFS_DIR_INDEX_KEY &&
413 found_type != BTRFS_EXTENT_DATA_KEY)
f4b9aa8d 414 break;
f4b9aa8d
CM
415 if (btrfs_disk_key_offset(found_key) < inode->i_size)
416 break;
236454df 417 found_extent = 0;
f254e52c 418 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
5caf2a00
CM
419 fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
420 path->slots[0],
f254e52c 421 struct btrfs_file_extent_item);
236454df
CM
422 if (btrfs_file_extent_type(fi) !=
423 BTRFS_FILE_EXTENT_INLINE) {
424 extent_start =
425 btrfs_file_extent_disk_blocknr(fi);
426 extent_num_blocks =
427 btrfs_file_extent_disk_num_blocks(fi);
428 /* FIXME blocksize != 4096 */
429 inode->i_blocks -=
430 btrfs_file_extent_num_blocks(fi) << 3;
431 found_extent = 1;
432 }
f254e52c 433 }
5caf2a00 434 ret = btrfs_del_item(trans, root, path);
f4b9aa8d 435 BUG_ON(ret);
5caf2a00 436 btrfs_release_path(root, path);
f254e52c
CM
437 if (found_extent) {
438 ret = btrfs_free_extent(trans, root, extent_start,
439 extent_num_blocks, 0);
440 BUG_ON(ret);
441 }
f4b9aa8d 442 }
f4b9aa8d
CM
443 ret = 0;
444error:
5caf2a00
CM
445 btrfs_release_path(root, path);
446 btrfs_free_path(path);
cd1bc465 447 inode->i_sb->s_dirt = 1;
f4b9aa8d
CM
448 return ret;
449}
450
134e9731
CM
451static void btrfs_delete_inode(struct inode *inode)
452{
453 struct btrfs_trans_handle *trans;
d6e4a428 454 struct btrfs_root *root = BTRFS_I(inode)->root;
f4b9aa8d
CM
455 int ret;
456
134e9731
CM
457 truncate_inode_pages(&inode->i_data, 0);
458 if (is_bad_inode(inode)) {
459 goto no_delete;
460 }
461 inode->i_size = 0;
134e9731
CM
462 mutex_lock(&root->fs_info->fs_mutex);
463 trans = btrfs_start_transaction(root, 1);
31f3c99b 464 btrfs_set_trans_block_group(trans, inode);
e06afa83
CM
465 ret = btrfs_truncate_in_trans(trans, root, inode);
466 BUG_ON(ret);
134e9731
CM
467 btrfs_free_inode(trans, root, inode);
468 btrfs_end_transaction(trans, root);
469 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 470 btrfs_btree_balance_dirty(root);
134e9731
CM
471 return;
472no_delete:
473 clear_inode(inode);
474}
475
e20d96d6 476static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
d6e4a428 477 struct btrfs_key *location)
e20d96d6
CM
478{
479 const char *name = dentry->d_name.name;
480 int namelen = dentry->d_name.len;
481 struct btrfs_dir_item *di;
5caf2a00 482 struct btrfs_path *path;
d6e4a428 483 struct btrfs_root *root = BTRFS_I(dir)->root;
e20d96d6
CM
484 int ret;
485
5caf2a00
CM
486 path = btrfs_alloc_path();
487 BUG_ON(!path);
488 btrfs_init_path(path);
7e38180e 489 di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
e20d96d6 490 namelen, 0);
7e38180e 491 if (!di || IS_ERR(di)) {
d6e4a428 492 location->objectid = 0;
2c90e5d6 493 ret = 0;
e20d96d6
CM
494 goto out;
495 }
d6e4a428 496 btrfs_disk_key_to_cpu(location, &di->location);
e20d96d6 497out:
5caf2a00
CM
498 btrfs_release_path(root, path);
499 btrfs_free_path(path);
e20d96d6
CM
500 return ret;
501}
502
35b7e476 503static int fixup_tree_root_location(struct btrfs_root *root,
d6e4a428
CM
504 struct btrfs_key *location,
505 struct btrfs_root **sub_root)
506{
507 struct btrfs_path *path;
508 struct btrfs_root_item *ri;
d6e4a428
CM
509
510 if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
511 return 0;
512 if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
513 return 0;
514
515 path = btrfs_alloc_path();
516 BUG_ON(!path);
517 mutex_lock(&root->fs_info->fs_mutex);
518
0f7d52f4
CM
519 *sub_root = btrfs_read_fs_root(root->fs_info, location);
520 if (IS_ERR(*sub_root))
521 return PTR_ERR(*sub_root);
522
523 ri = &(*sub_root)->root_item;
d6e4a428
CM
524 location->objectid = btrfs_root_dirid(ri);
525 location->flags = 0;
526 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
527 location->offset = 0;
0f7d52f4 528
d6e4a428
CM
529 btrfs_free_path(path);
530 mutex_unlock(&root->fs_info->fs_mutex);
0f7d52f4 531 return 0;
d6e4a428
CM
532}
533
35b7e476 534static int btrfs_init_locked_inode(struct inode *inode, void *p)
c5739bba
CM
535{
536 struct btrfs_iget_args *args = p;
537 inode->i_ino = args->ino;
538 BTRFS_I(inode)->root = args->root;
539 return 0;
540}
541
35b7e476 542static int btrfs_find_actor(struct inode *inode, void *opaque)
c5739bba
CM
543{
544 struct btrfs_iget_args *args = opaque;
545 return (args->ino == inode->i_ino &&
546 args->root == BTRFS_I(inode)->root);
547}
548
35b7e476
CM
549static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
550 struct btrfs_root *root)
c5739bba
CM
551{
552 struct inode *inode;
553 struct btrfs_iget_args args;
554 args.ino = objectid;
555 args.root = root;
556
557 inode = iget5_locked(s, objectid, btrfs_find_actor,
558 btrfs_init_locked_inode,
559 (void *)&args);
560 return inode;
561}
d6e4a428 562
e20d96d6
CM
563static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
564 struct nameidata *nd)
565{
566 struct inode * inode;
d6e4a428
CM
567 struct btrfs_inode *bi = BTRFS_I(dir);
568 struct btrfs_root *root = bi->root;
569 struct btrfs_root *sub_root = root;
570 struct btrfs_key location;
e20d96d6
CM
571 int ret;
572
573 if (dentry->d_name.len > BTRFS_NAME_LEN)
574 return ERR_PTR(-ENAMETOOLONG);
22b0ebda 575 mutex_lock(&root->fs_info->fs_mutex);
d6e4a428 576 ret = btrfs_inode_by_name(dir, dentry, &location);
22b0ebda 577 mutex_unlock(&root->fs_info->fs_mutex);
e20d96d6
CM
578 if (ret < 0)
579 return ERR_PTR(ret);
580 inode = NULL;
d6e4a428
CM
581 if (location.objectid) {
582 ret = fixup_tree_root_location(root, &location, &sub_root);
583 if (ret < 0)
584 return ERR_PTR(ret);
585 if (ret > 0)
586 return ERR_PTR(-ENOENT);
c5739bba
CM
587 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
588 sub_root);
e20d96d6
CM
589 if (!inode)
590 return ERR_PTR(-EACCES);
d6e4a428 591 if (inode->i_state & I_NEW) {
0f7d52f4 592 if (sub_root != root) {
c5739bba 593printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
0f7d52f4
CM
594 igrab(inode);
595 sub_root->inode = inode;
596 }
d6e4a428
CM
597 BTRFS_I(inode)->root = sub_root;
598 memcpy(&BTRFS_I(inode)->location, &location,
599 sizeof(location));
600 btrfs_read_locked_inode(inode);
601 unlock_new_inode(inode);
602 }
e20d96d6
CM
603 }
604 return d_splice_alias(inode, dentry);
605}
606
de428b63
CM
607static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
608 u64 objectid)
090d1875
CM
609{
610 struct btrfs_node *node;
611 int i;
de428b63 612 u32 nritems;
090d1875
CM
613 u64 item_objectid;
614 u64 blocknr;
615 int slot;
de428b63 616 int ret;
090d1875
CM
617
618 if (!path->nodes[1])
619 return;
620 node = btrfs_buffer_node(path->nodes[1]);
621 slot = path->slots[1];
090d1875 622 nritems = btrfs_header_nritems(&node->header);
de428b63 623 for (i = slot + 1; i < nritems; i++) {
090d1875
CM
624 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
625 if (item_objectid != objectid)
626 break;
627 blocknr = btrfs_node_blockptr(node, i);
de428b63
CM
628 ret = readahead_tree_block(root, blocknr);
629 if (ret)
630 break;
090d1875
CM
631 }
632}
633
e20d96d6
CM
634static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
635{
636 struct inode *inode = filp->f_path.dentry->d_inode;
d6e4a428 637 struct btrfs_root *root = BTRFS_I(inode)->root;
e20d96d6
CM
638 struct btrfs_item *item;
639 struct btrfs_dir_item *di;
640 struct btrfs_key key;
5caf2a00 641 struct btrfs_path *path;
e20d96d6
CM
642 int ret;
643 u32 nritems;
644 struct btrfs_leaf *leaf;
645 int slot;
646 int advance;
647 unsigned char d_type = DT_UNKNOWN;
7f5c1516 648 int over = 0;
7e38180e
CM
649 u32 di_cur;
650 u32 di_total;
651 u32 di_len;
652 int key_type = BTRFS_DIR_INDEX_KEY;
d6e4a428
CM
653
654 /* FIXME, use a real flag for deciding about the key type */
655 if (root->fs_info->tree_root == root)
656 key_type = BTRFS_DIR_ITEM_KEY;
22b0ebda 657 mutex_lock(&root->fs_info->fs_mutex);
e20d96d6 658 key.objectid = inode->i_ino;
e20d96d6 659 key.flags = 0;
d6e4a428 660 btrfs_set_key_type(&key, key_type);
e20d96d6 661 key.offset = filp->f_pos;
5caf2a00
CM
662 path = btrfs_alloc_path();
663 btrfs_init_path(path);
664 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1b05da2e 665 if (ret < 0)
e20d96d6 666 goto err;
7f5c1516 667 advance = 0;
de428b63 668 reada_leaves(root, path, inode->i_ino);
e20d96d6 669 while(1) {
5caf2a00 670 leaf = btrfs_buffer_leaf(path->nodes[0]);
e20d96d6 671 nritems = btrfs_header_nritems(&leaf->header);
5caf2a00 672 slot = path->slots[0];
dee26a9f
CM
673 if (advance || slot >= nritems) {
674 if (slot >= nritems -1) {
de428b63 675 reada_leaves(root, path, inode->i_ino);
5caf2a00 676 ret = btrfs_next_leaf(root, path);
e20d96d6
CM
677 if (ret)
678 break;
5caf2a00 679 leaf = btrfs_buffer_leaf(path->nodes[0]);
e20d96d6 680 nritems = btrfs_header_nritems(&leaf->header);
5caf2a00 681 slot = path->slots[0];
e20d96d6
CM
682 } else {
683 slot++;
5caf2a00 684 path->slots[0]++;
e20d96d6
CM
685 }
686 }
687 advance = 1;
688 item = leaf->items + slot;
e20d96d6
CM
689 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
690 break;
d6e4a428 691 if (btrfs_disk_key_type(&item->key) != key_type)
a429e513 692 break;
7f5c1516
CM
693 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
694 continue;
7fcde0e3 695 filp->f_pos = btrfs_disk_key_offset(&item->key);
dee26a9f 696 advance = 1;
e20d96d6 697 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
7e38180e
CM
698 di_cur = 0;
699 di_total = btrfs_item_size(leaf->items + slot);
700 while(di_cur < di_total) {
701 over = filldir(dirent, (const char *)(di + 1),
702 btrfs_dir_name_len(di),
703 btrfs_disk_key_offset(&item->key),
704 btrfs_disk_key_objectid(&di->location),
705 d_type);
706 if (over)
707 goto nopos;
708 di_len = btrfs_dir_name_len(di) + sizeof(*di);
709 di_cur += di_len;
710 di = (struct btrfs_dir_item *)((char *)di + di_len);
711 }
e20d96d6 712 }
7fcde0e3
CM
713 filp->f_pos++;
714nopos:
e20d96d6
CM
715 ret = 0;
716err:
5caf2a00
CM
717 btrfs_release_path(root, path);
718 btrfs_free_path(path);
22b0ebda 719 mutex_unlock(&root->fs_info->fs_mutex);
e20d96d6
CM
720 return ret;
721}
722
723static void btrfs_put_super (struct super_block * sb)
724{
725 struct btrfs_root *root = btrfs_sb(sb);
726 int ret;
727
728 ret = close_ctree(root);
729 if (ret) {
730 printk("close ctree returns %d\n", ret);
731 }
732 sb->s_fs_info = NULL;
733}
2e635a27
CM
734
735static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
736{
737 struct inode * inode;
e20d96d6
CM
738 struct dentry * root_dentry;
739 struct btrfs_super_block *disk_super;
0f7d52f4 740 struct btrfs_root *tree_root;
d6e4a428 741 struct btrfs_inode *bi;
2e635a27
CM
742
743 sb->s_maxbytes = MAX_LFS_FILESIZE;
2e635a27 744 sb->s_magic = BTRFS_SUPER_MAGIC;
e20d96d6 745 sb->s_op = &btrfs_super_ops;
2e635a27 746 sb->s_time_gran = 1;
e20d96d6 747
0f7d52f4 748 tree_root = open_ctree(sb);
d98237b3 749
0f7d52f4 750 if (!tree_root) {
e20d96d6
CM
751 printk("btrfs: open_ctree failed\n");
752 return -EIO;
753 }
0f7d52f4
CM
754 sb->s_fs_info = tree_root;
755 disk_super = tree_root->fs_info->disk_super;
e20d96d6
CM
756 printk("read in super total blocks %Lu root %Lu\n",
757 btrfs_super_total_blocks(disk_super),
758 btrfs_super_root_dir(disk_super));
759
c5739bba
CM
760 inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
761 tree_root);
d6e4a428
CM
762 bi = BTRFS_I(inode);
763 bi->location.objectid = inode->i_ino;
764 bi->location.offset = 0;
765 bi->location.flags = 0;
0f7d52f4 766 bi->root = tree_root;
d6e4a428
CM
767 btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
768
2e635a27
CM
769 if (!inode)
770 return -ENOMEM;
e20d96d6
CM
771 if (inode->i_state & I_NEW) {
772 btrfs_read_locked_inode(inode);
773 unlock_new_inode(inode);
774 }
2e635a27 775
e20d96d6
CM
776 root_dentry = d_alloc_root(inode);
777 if (!root_dentry) {
2e635a27
CM
778 iput(inode);
779 return -ENOMEM;
780 }
e20d96d6
CM
781 sb->s_root = root_dentry;
782
2e635a27
CM
783 return 0;
784}
785
4730a4bc
CM
786static int btrfs_write_inode(struct inode *inode, int wait)
787{
d6e4a428 788 struct btrfs_root *root = BTRFS_I(inode)->root;
4730a4bc 789 struct btrfs_trans_handle *trans;
b5133862
CM
790 int ret = 0;
791
792 if (wait) {
793 mutex_lock(&root->fs_info->fs_mutex);
794 trans = btrfs_start_transaction(root, 1);
31f3c99b 795 btrfs_set_trans_block_group(trans, inode);
b5133862
CM
796 ret = btrfs_commit_transaction(trans, root);
797 mutex_unlock(&root->fs_info->fs_mutex);
798 }
799 return ret;
800}
801
802static void btrfs_dirty_inode(struct inode *inode)
803{
804 struct btrfs_root *root = BTRFS_I(inode)->root;
805 struct btrfs_trans_handle *trans;
4730a4bc
CM
806
807 mutex_lock(&root->fs_info->fs_mutex);
808 trans = btrfs_start_transaction(root, 1);
31f3c99b 809 btrfs_set_trans_block_group(trans, inode);
b5133862
CM
810 btrfs_update_inode(trans, root, inode);
811 btrfs_end_transaction(trans, root);
4730a4bc 812 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 813 btrfs_btree_balance_dirty(root);
4730a4bc
CM
814}
815
d5719762 816static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
2619ba1f 817 struct btrfs_root *root,
31f3c99b
CM
818 u64 objectid,
819 struct btrfs_block_group_cache *group,
820 int mode)
d5719762
CM
821{
822 struct inode *inode;
823 struct btrfs_inode_item inode_item;
1b05da2e 824 struct btrfs_key *location;
d5719762 825 int ret;
de428b63 826 int owner;
d5719762 827
2619ba1f 828 inode = new_inode(root->fs_info->sb);
d5719762
CM
829 if (!inode)
830 return ERR_PTR(-ENOMEM);
831
2619ba1f 832 BTRFS_I(inode)->root = root;
de428b63
CM
833 if (mode & S_IFDIR)
834 owner = 0;
835 else
836 owner = 1;
837 group = btrfs_find_block_group(root, group, 0, 0, owner);
31f3c99b 838 BTRFS_I(inode)->block_group = group;
d5719762
CM
839
840 inode->i_uid = current->fsuid;
841 inode->i_gid = current->fsgid;
842 inode->i_mode = mode;
843 inode->i_ino = objectid;
844 inode->i_blocks = 0;
c5739bba 845 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
d5719762 846 fill_inode_item(&inode_item, inode);
1b05da2e
CM
847 location = &BTRFS_I(inode)->location;
848 location->objectid = objectid;
849 location->flags = 0;
850 location->offset = 0;
851 btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
d5719762
CM
852
853 ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
854 BUG_ON(ret);
855
856 insert_inode_hash(inode);
d5719762
CM
857 return inode;
858}
859
860static int btrfs_add_link(struct btrfs_trans_handle *trans,
861 struct dentry *dentry, struct inode *inode)
862{
863 int ret;
d6e4a428
CM
864 struct btrfs_key key;
865 struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
866 key.objectid = inode->i_ino;
867 key.flags = 0;
868 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
869 key.offset = 0;
870
871 ret = btrfs_insert_dir_item(trans, root,
d5719762
CM
872 dentry->d_name.name, dentry->d_name.len,
873 dentry->d_parent->d_inode->i_ino,
d6e4a428 874 &key, 0);
4730a4bc 875 if (ret == 0) {
5f26f772 876 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
d6e4a428 877 ret = btrfs_update_inode(trans, root,
4730a4bc
CM
878 dentry->d_parent->d_inode);
879 }
d5719762
CM
880 return ret;
881}
882
883static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
884 struct dentry *dentry, struct inode *inode)
885{
886 int err = btrfs_add_link(trans, dentry, inode);
887 if (!err) {
888 d_instantiate(dentry, inode);
889 return 0;
890 }
2c90e5d6
CM
891 if (err > 0)
892 err = -EEXIST;
d5719762
CM
893 return err;
894}
895
896static int btrfs_create(struct inode *dir, struct dentry *dentry,
897 int mode, struct nameidata *nd)
898{
899 struct btrfs_trans_handle *trans;
d6e4a428 900 struct btrfs_root *root = BTRFS_I(dir)->root;
d5719762
CM
901 struct inode *inode;
902 int err;
134e9731 903 int drop_inode = 0;
2619ba1f 904 u64 objectid;
d5719762 905
d561c025 906 mutex_lock(&root->fs_info->fs_mutex);
d5719762 907 trans = btrfs_start_transaction(root, 1);
31f3c99b 908 btrfs_set_trans_block_group(trans, dir);
2619ba1f
CM
909
910 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
911 if (err) {
912 err = -ENOSPC;
913 goto out_unlock;
914 }
915
31f3c99b
CM
916 inode = btrfs_new_inode(trans, root, objectid,
917 BTRFS_I(dir)->block_group, mode);
d5719762
CM
918 err = PTR_ERR(inode);
919 if (IS_ERR(inode))
d561c025 920 goto out_unlock;
31f3c99b
CM
921
922 btrfs_set_trans_block_group(trans, inode);
d5719762 923 err = btrfs_add_nondir(trans, dentry, inode);
134e9731
CM
924 if (err)
925 drop_inode = 1;
dee26a9f
CM
926 else {
927 inode->i_mapping->a_ops = &btrfs_aops;
928 inode->i_fop = &btrfs_file_operations;
929 inode->i_op = &btrfs_file_inode_operations;
930 }
d5719762 931 dir->i_sb->s_dirt = 1;
31f3c99b
CM
932 btrfs_update_inode_block_group(trans, inode);
933 btrfs_update_inode_block_group(trans, dir);
d561c025 934out_unlock:
22b0ebda 935 btrfs_end_transaction(trans, root);
d561c025 936 mutex_unlock(&root->fs_info->fs_mutex);
2c90e5d6 937
134e9731
CM
938 if (drop_inode) {
939 inode_dec_link_count(inode);
940 iput(inode);
941 }
35b7e476 942 btrfs_btree_balance_dirty(root);
d5719762
CM
943 return err;
944}
945
2b8d99a7
CM
946static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
947 struct dentry *dentry)
948{
949 struct btrfs_trans_handle *trans;
950 struct btrfs_root *root = BTRFS_I(dir)->root;
951 struct inode *inode = old_dentry->d_inode;
952 int err;
953 int drop_inode = 0;
954
955 if (inode->i_nlink == 0)
956 return -ENOENT;
957
958 inc_nlink(inode);
959 mutex_lock(&root->fs_info->fs_mutex);
960 trans = btrfs_start_transaction(root, 1);
961 btrfs_set_trans_block_group(trans, dir);
962 atomic_inc(&inode->i_count);
963 err = btrfs_add_nondir(trans, dentry, inode);
964 if (err)
965 drop_inode = 1;
966 dir->i_sb->s_dirt = 1;
967 btrfs_update_inode_block_group(trans, dir);
968 btrfs_update_inode(trans, root, inode);
969
970 btrfs_end_transaction(trans, root);
971 mutex_unlock(&root->fs_info->fs_mutex);
972
973 if (drop_inode) {
974 inode_dec_link_count(inode);
975 iput(inode);
976 }
977 btrfs_btree_balance_dirty(root);
978 return err;
979}
980
f7922033 981static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
2619ba1f
CM
982 struct btrfs_root *root,
983 u64 objectid, u64 dirid)
f7922033 984{
f7922033
CM
985 int ret;
986 char buf[2];
d6e4a428
CM
987 struct btrfs_key key;
988
f7922033
CM
989 buf[0] = '.';
990 buf[1] = '.';
991
2619ba1f 992 key.objectid = objectid;
d6e4a428
CM
993 key.offset = 0;
994 key.flags = 0;
995 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
996
2619ba1f 997 ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
d6e4a428 998 &key, 1);
f7922033
CM
999 if (ret)
1000 goto error;
2619ba1f
CM
1001 key.objectid = dirid;
1002 ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
d6e4a428 1003 &key, 1);
4730a4bc
CM
1004 if (ret)
1005 goto error;
f7922033
CM
1006error:
1007 return ret;
1008}
1009
1010static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1011{
1012 struct inode *inode;
1013 struct btrfs_trans_handle *trans;
d6e4a428 1014 struct btrfs_root *root = BTRFS_I(dir)->root;
f7922033
CM
1015 int err = 0;
1016 int drop_on_err = 0;
2619ba1f 1017 u64 objectid;
f7922033
CM
1018
1019 mutex_lock(&root->fs_info->fs_mutex);
1020 trans = btrfs_start_transaction(root, 1);
31f3c99b 1021 btrfs_set_trans_block_group(trans, dir);
f7922033
CM
1022 if (IS_ERR(trans)) {
1023 err = PTR_ERR(trans);
1024 goto out_unlock;
1025 }
2619ba1f
CM
1026
1027 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1028 if (err) {
1029 err = -ENOSPC;
1030 goto out_unlock;
1031 }
1032
31f3c99b
CM
1033 inode = btrfs_new_inode(trans, root, objectid,
1034 BTRFS_I(dir)->block_group, S_IFDIR | mode);
f7922033
CM
1035 if (IS_ERR(inode)) {
1036 err = PTR_ERR(inode);
1037 goto out_fail;
1038 }
1039 drop_on_err = 1;
1040 inode->i_op = &btrfs_dir_inode_operations;
1041 inode->i_fop = &btrfs_dir_file_operations;
31f3c99b 1042 btrfs_set_trans_block_group(trans, inode);
f7922033 1043
2619ba1f
CM
1044 err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1045 if (err)
1046 goto out_fail;
1047
1048 inode->i_size = 6;
1049 err = btrfs_update_inode(trans, root, inode);
f7922033
CM
1050 if (err)
1051 goto out_fail;
1052 err = btrfs_add_link(trans, dentry, inode);
1053 if (err)
1054 goto out_fail;
1055 d_instantiate(dentry, inode);
f7922033 1056 drop_on_err = 0;
cd1bc465 1057 dir->i_sb->s_dirt = 1;
31f3c99b
CM
1058 btrfs_update_inode_block_group(trans, inode);
1059 btrfs_update_inode_block_group(trans, dir);
f7922033
CM
1060
1061out_fail:
1062 btrfs_end_transaction(trans, root);
1063out_unlock:
1064 mutex_unlock(&root->fs_info->fs_mutex);
1065 if (drop_on_err)
1066 iput(inode);
35b7e476 1067 btrfs_btree_balance_dirty(root);
f7922033
CM
1068 return err;
1069}
1070
8fd17795
CM
1071static int btrfs_sync_file(struct file *file,
1072 struct dentry *dentry, int datasync)
1073{
1074 struct inode *inode = dentry->d_inode;
1075 struct btrfs_root *root = BTRFS_I(inode)->root;
1076 int ret;
1077 struct btrfs_trans_handle *trans;
1078
1079 mutex_lock(&root->fs_info->fs_mutex);
1080 trans = btrfs_start_transaction(root, 1);
1081 if (!trans) {
1082 ret = -ENOMEM;
1083 goto out;
1084 }
1085 ret = btrfs_commit_transaction(trans, root);
1086 mutex_unlock(&root->fs_info->fs_mutex);
1087out:
1088 return ret > 0 ? EIO : ret;
1089}
1090
d5719762
CM
1091static int btrfs_sync_fs(struct super_block *sb, int wait)
1092{
1093 struct btrfs_trans_handle *trans;
1094 struct btrfs_root *root;
1095 int ret;
d98237b3 1096 root = btrfs_sb(sb);
df2ce34c 1097
d5719762 1098 sb->s_dirt = 0;
d561c025 1099 if (!wait) {
7cfcc17e 1100 filemap_flush(root->fs_info->btree_inode->i_mapping);
d561c025
CM
1101 return 0;
1102 }
d561c025 1103 mutex_lock(&root->fs_info->fs_mutex);
d5719762
CM
1104 trans = btrfs_start_transaction(root, 1);
1105 ret = btrfs_commit_transaction(trans, root);
1106 sb->s_dirt = 0;
1107 BUG_ON(ret);
1108printk("btrfs sync_fs\n");
d561c025 1109 mutex_unlock(&root->fs_info->fs_mutex);
d5719762
CM
1110 return 0;
1111}
1112
75dfe396 1113static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
dee26a9f
CM
1114 struct buffer_head *result, int create)
1115{
1116 int ret;
1117 int err = 0;
1118 u64 blocknr;
1119 u64 extent_start = 0;
1120 u64 extent_end = 0;
1121 u64 objectid = inode->i_ino;
236454df 1122 u32 found_type;
5caf2a00 1123 struct btrfs_path *path;
d6e4a428 1124 struct btrfs_root *root = BTRFS_I(inode)->root;
dee26a9f
CM
1125 struct btrfs_file_extent_item *item;
1126 struct btrfs_leaf *leaf;
1127 struct btrfs_disk_key *found_key;
1128
5caf2a00
CM
1129 path = btrfs_alloc_path();
1130 BUG_ON(!path);
1131 btrfs_init_path(path);
6567e837 1132 if (create) {
6567e837
CM
1133 WARN_ON(1);
1134 }
dee26a9f 1135
236454df 1136 ret = btrfs_lookup_file_extent(NULL, root, path,
9773a788 1137 inode->i_ino,
236454df 1138 iblock << inode->i_blkbits, 0);
dee26a9f 1139 if (ret < 0) {
dee26a9f
CM
1140 err = ret;
1141 goto out;
1142 }
1143
1144 if (ret != 0) {
5caf2a00
CM
1145 if (path->slots[0] == 0) {
1146 btrfs_release_path(root, path);
236454df 1147 goto out;
dee26a9f 1148 }
5caf2a00 1149 path->slots[0]--;
dee26a9f
CM
1150 }
1151
5caf2a00 1152 item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
dee26a9f 1153 struct btrfs_file_extent_item);
5caf2a00 1154 leaf = btrfs_buffer_leaf(path->nodes[0]);
dee26a9f
CM
1155 blocknr = btrfs_file_extent_disk_blocknr(item);
1156 blocknr += btrfs_file_extent_offset(item);
1157
dee26a9f 1158 /* are we inside the extent that was found? */
5caf2a00 1159 found_key = &leaf->items[path->slots[0]].key;
236454df 1160 found_type = btrfs_disk_key_type(found_key);
dee26a9f 1161 if (btrfs_disk_key_objectid(found_key) != objectid ||
236454df 1162 found_type != BTRFS_EXTENT_DATA_KEY) {
dee26a9f
CM
1163 extent_end = 0;
1164 extent_start = 0;
dee26a9f
CM
1165 goto out;
1166 }
236454df
CM
1167 found_type = btrfs_file_extent_type(item);
1168 extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1169 if (found_type == BTRFS_FILE_EXTENT_REG) {
1170 extent_start = extent_start >> inode->i_blkbits;
1171 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1172 if (iblock >= extent_start && iblock < extent_end) {
1173 err = 0;
1174 btrfs_map_bh_to_logical(root, result, blocknr +
1175 iblock - extent_start);
1176 goto out;
1177 }
1178 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1179 char *ptr;
1180 char *map;
1181 u32 size;
1182 size = btrfs_file_extent_inline_len(leaf->items +
1183 path->slots[0]);
1184 extent_end = (extent_start + size) >> inode->i_blkbits;
1185 extent_start >>= inode->i_blkbits;
1186 if (iblock < extent_start || iblock > extent_end) {
1187 goto out;
1188 }
1189 ptr = btrfs_file_extent_inline_start(item);
1190 map = kmap(result->b_page);
1191 memcpy(map, ptr, size);
1192 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1193 flush_dcache_page(result->b_page);
1194 kunmap(result->b_page);
1195 set_buffer_uptodate(result);
1196 SetPageChecked(result->b_page);
1197 btrfs_map_bh_to_logical(root, result, 0);
dee26a9f 1198 }
dee26a9f 1199out:
5caf2a00 1200 btrfs_free_path(path);
75dfe396
CM
1201 return err;
1202}
1203
1204static int btrfs_get_block(struct inode *inode, sector_t iblock,
1205 struct buffer_head *result, int create)
1206{
1207 int err;
d6e4a428 1208 struct btrfs_root *root = BTRFS_I(inode)->root;
75dfe396 1209 mutex_lock(&root->fs_info->fs_mutex);
e8f05c45 1210 err = btrfs_get_block_lock(inode, iblock, result, create);
dee26a9f
CM
1211 mutex_unlock(&root->fs_info->fs_mutex);
1212 return err;
1213}
1214
1215static int btrfs_prepare_write(struct file *file, struct page *page,
1216 unsigned from, unsigned to)
1217{
6407bf6d 1218 return nobh_prepare_write(page, from, to, btrfs_get_block);
dee26a9f
CM
1219}
1220
d561c025
CM
1221static void btrfs_write_super(struct super_block *sb)
1222{
1223 btrfs_sync_fs(sb, 1);
1224}
1225
dee26a9f
CM
1226static int btrfs_readpage(struct file *file, struct page *page)
1227{
1228 return mpage_readpage(page, btrfs_get_block);
1229}
1230
236454df
CM
1231/*
1232 * While block_write_full_page is writing back the dirty buffers under
1233 * the page lock, whoever dirtied the buffers may decide to clean them
1234 * again at any time. We handle that by only looking at the buffer
1235 * state inside lock_buffer().
1236 *
1237 * If block_write_full_page() is called for regular writeback
1238 * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1239 * locked buffer. This only can happen if someone has written the buffer
1240 * directly, with submit_bh(). At the address_space level PageWriteback
1241 * prevents this contention from occurring.
1242 */
1243static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1244 struct writeback_control *wbc)
1245{
1246 int err;
1247 sector_t block;
1248 sector_t last_block;
1249 struct buffer_head *bh, *head;
1250 const unsigned blocksize = 1 << inode->i_blkbits;
1251 int nr_underway = 0;
1252
1253 BUG_ON(!PageLocked(page));
1254
1255 last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1256
1257 if (!page_has_buffers(page)) {
1258 create_empty_buffers(page, blocksize,
1259 (1 << BH_Dirty)|(1 << BH_Uptodate));
1260 }
1261
1262 /*
1263 * Be very careful. We have no exclusion from __set_page_dirty_buffers
1264 * here, and the (potentially unmapped) buffers may become dirty at
1265 * any time. If a buffer becomes dirty here after we've inspected it
1266 * then we just miss that fact, and the page stays dirty.
1267 *
1268 * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1269 * handle that here by just cleaning them.
1270 */
1271
1272 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1273 head = page_buffers(page);
1274 bh = head;
1275
1276 /*
1277 * Get all the dirty buffers mapped to disk addresses and
1278 * handle any aliases from the underlying blockdev's mapping.
1279 */
1280 do {
1281 if (block > last_block) {
1282 /*
1283 * mapped buffers outside i_size will occur, because
1284 * this page can be outside i_size when there is a
1285 * truncate in progress.
1286 */
1287 /*
1288 * The buffer was zeroed by block_write_full_page()
1289 */
1290 clear_buffer_dirty(bh);
1291 set_buffer_uptodate(bh);
1292 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1293 WARN_ON(bh->b_size != blocksize);
1294 err = btrfs_get_block(inode, block, bh, 0);
35b7e476
CM
1295 if (err) {
1296printk("writepage going to recovery err %d\n", err);
236454df 1297 goto recover;
35b7e476 1298 }
236454df
CM
1299 if (buffer_new(bh)) {
1300 /* blockdev mappings never come here */
1301 clear_buffer_new(bh);
236454df
CM
1302 }
1303 }
1304 bh = bh->b_this_page;
1305 block++;
1306 } while (bh != head);
1307
1308 do {
1309 if (!buffer_mapped(bh))
1310 continue;
1311 /*
1312 * If it's a fully non-blocking write attempt and we cannot
1313 * lock the buffer then redirty the page. Note that this can
1314 * potentially cause a busy-wait loop from pdflush and kswapd
1315 * activity, but those code paths have their own higher-level
1316 * throttling.
1317 */
1318 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1319 lock_buffer(bh);
1320 } else if (test_set_buffer_locked(bh)) {
1321 redirty_page_for_writepage(wbc, page);
1322 continue;
1323 }
1324 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1325 mark_buffer_async_write(bh);
1326 } else {
1327 unlock_buffer(bh);
1328 }
1329 } while ((bh = bh->b_this_page) != head);
1330
1331 /*
1332 * The page and its buffers are protected by PageWriteback(), so we can
1333 * drop the bh refcounts early.
1334 */
1335 BUG_ON(PageWriteback(page));
1336 set_page_writeback(page);
1337
1338 do {
1339 struct buffer_head *next = bh->b_this_page;
1340 if (buffer_async_write(bh)) {
1341 submit_bh(WRITE, bh);
1342 nr_underway++;
1343 }
1344 bh = next;
1345 } while (bh != head);
1346 unlock_page(page);
1347
1348 err = 0;
1349done:
1350 if (nr_underway == 0) {
1351 /*
1352 * The page was marked dirty, but the buffers were
1353 * clean. Someone wrote them back by hand with
1354 * ll_rw_block/submit_bh. A rare case.
1355 */
1356 int uptodate = 1;
1357 do {
1358 if (!buffer_uptodate(bh)) {
1359 uptodate = 0;
1360 break;
1361 }
1362 bh = bh->b_this_page;
1363 } while (bh != head);
1364 if (uptodate)
1365 SetPageUptodate(page);
1366 end_page_writeback(page);
236454df
CM
1367 }
1368 return err;
1369
1370recover:
1371 /*
1372 * ENOSPC, or some other error. We may already have added some
1373 * blocks to the file, so we need to write these out to avoid
1374 * exposing stale data.
1375 * The page is currently locked and not marked for writeback
1376 */
1377 bh = head;
1378 /* Recovery: lock and submit the mapped buffers */
1379 do {
1380 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1381 lock_buffer(bh);
1382 mark_buffer_async_write(bh);
1383 } else {
1384 /*
1385 * The buffer may have been set dirty during
1386 * attachment to a dirty page.
1387 */
1388 clear_buffer_dirty(bh);
1389 }
1390 } while ((bh = bh->b_this_page) != head);
1391 SetPageError(page);
1392 BUG_ON(PageWriteback(page));
1393 set_page_writeback(page);
1394 do {
1395 struct buffer_head *next = bh->b_this_page;
1396 if (buffer_async_write(bh)) {
1397 clear_buffer_dirty(bh);
1398 submit_bh(WRITE, bh);
1399 nr_underway++;
1400 }
1401 bh = next;
1402 } while (bh != head);
1403 unlock_page(page);
1404 goto done;
1405}
1406
1407/*
1408 * The generic ->writepage function for buffer-backed address_spaces
1409 */
dee26a9f
CM
1410static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1411{
236454df
CM
1412 struct inode * const inode = page->mapping->host;
1413 loff_t i_size = i_size_read(inode);
1414 const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1415 unsigned offset;
1416 void *kaddr;
1417
1418 /* Is the page fully inside i_size? */
1419 if (page->index < end_index)
1420 return __btrfs_write_full_page(inode, page, wbc);
1421
1422 /* Is the page fully outside i_size? (truncate in progress) */
1423 offset = i_size & (PAGE_CACHE_SIZE-1);
1424 if (page->index >= end_index+1 || !offset) {
1425 /*
1426 * The page may have dirty, unmapped buffers. For example,
1427 * they may have been added in ext3_writepage(). Make them
1428 * freeable here, so the page does not leak.
1429 */
1430 block_invalidatepage(page, 0);
1431 unlock_page(page);
1432 return 0; /* don't care */
1433 }
1434
1435 /*
1436 * The page straddles i_size. It must be zeroed out on each and every
1437 * writepage invokation because it may be mmapped. "A file is mapped
1438 * in multiples of the page size. For a file that is not a multiple of
1439 * the page size, the remaining memory is zeroed when mapped, and
1440 * writes to that region are not written out to the file."
1441 */
1442 kaddr = kmap_atomic(page, KM_USER0);
1443 memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1444 flush_dcache_page(page);
1445 kunmap_atomic(kaddr, KM_USER0);
1446 return __btrfs_write_full_page(inode, page, wbc);
dee26a9f 1447}
d561c025 1448
f4b9aa8d
CM
1449static void btrfs_truncate(struct inode *inode)
1450{
d6e4a428 1451 struct btrfs_root *root = BTRFS_I(inode)->root;
f4b9aa8d
CM
1452 int ret;
1453 struct btrfs_trans_handle *trans;
1454
1455 if (!S_ISREG(inode->i_mode))
1456 return;
1457 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1458 return;
1459
e8f05c45 1460 nobh_truncate_page(inode->i_mapping, inode->i_size);
f4b9aa8d
CM
1461
1462 /* FIXME, add redo link to tree so we don't leak on crash */
1463 mutex_lock(&root->fs_info->fs_mutex);
1464 trans = btrfs_start_transaction(root, 1);
31f3c99b 1465 btrfs_set_trans_block_group(trans, inode);
f4b9aa8d
CM
1466 ret = btrfs_truncate_in_trans(trans, root, inode);
1467 BUG_ON(ret);
35b7e476 1468 btrfs_update_inode(trans, root, inode);
f4b9aa8d
CM
1469 ret = btrfs_end_transaction(trans, root);
1470 BUG_ON(ret);
1471 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 1472 btrfs_btree_balance_dirty(root);
f4b9aa8d
CM
1473}
1474
236454df
CM
1475/*
1476 * Make sure any changes to nobh_commit_write() are reflected in
1477 * nobh_truncate_page(), since it doesn't call commit_write().
1478 */
1479static int btrfs_commit_write(struct file *file, struct page *page,
1480 unsigned from, unsigned to)
1481{
1482 struct inode *inode = page->mapping->host;
1483 struct buffer_head *bh;
1484 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1485
1486 SetPageUptodate(page);
1487 bh = page_buffers(page);
1488 if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1489 set_page_dirty(page);
1490 }
1491 if (pos > inode->i_size) {
1492 i_size_write(inode, pos);
1493 mark_inode_dirty(inode);
1494 }
1495 return 0;
1496}
1497
75dfe396
CM
1498static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1499 struct page **prepared_pages,
1500 const char __user * buf)
1501{
1502 long page_fault = 0;
1503 int i;
1504 int offset = pos & (PAGE_CACHE_SIZE - 1);
1505
1506 for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1507 size_t count = min_t(size_t,
1508 PAGE_CACHE_SIZE - offset, write_bytes);
1509 struct page *page = prepared_pages[i];
1510 fault_in_pages_readable(buf, count);
1511
1512 /* Copy data from userspace to the current page */
1513 kmap(page);
1514 page_fault = __copy_from_user(page_address(page) + offset,
1515 buf, count);
1516 /* Flush processor's dcache for this page */
1517 flush_dcache_page(page);
1518 kunmap(page);
1519 buf += count;
1520 write_bytes -= count;
1521
1522 if (page_fault)
1523 break;
1524 }
1525 return page_fault ? -EFAULT : 0;
1526}
1527
1528static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1529{
1530 size_t i;
1531 for (i = 0; i < num_pages; i++) {
1532 if (!pages[i])
1533 break;
1534 unlock_page(pages[i]);
1535 mark_page_accessed(pages[i]);
1536 page_cache_release(pages[i]);
1537 }
1538}
1539static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1540 struct btrfs_root *root,
1541 struct file *file,
1542 struct page **pages,
1543 size_t num_pages,
1544 loff_t pos,
1545 size_t write_bytes)
1546{
1547 int i;
1548 int offset;
1549 int err = 0;
1550 int ret;
1551 int this_write;
f254e52c 1552 struct inode *inode = file->f_path.dentry->d_inode;
236454df
CM
1553 struct buffer_head *bh;
1554 struct btrfs_file_extent_item *ei;
75dfe396
CM
1555
1556 for (i = 0; i < num_pages; i++) {
1557 offset = pos & (PAGE_CACHE_SIZE -1);
1558 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
f254e52c
CM
1559 /* FIXME, one block at a time */
1560
1561 mutex_lock(&root->fs_info->fs_mutex);
1562 trans = btrfs_start_transaction(root, 1);
31f3c99b 1563 btrfs_set_trans_block_group(trans, inode);
236454df
CM
1564
1565 bh = page_buffers(pages[i]);
1566 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1567 struct btrfs_key key;
1568 struct btrfs_path *path;
1569 char *ptr;
1570 u32 datasize;
1571
1572 path = btrfs_alloc_path();
1573 BUG_ON(!path);
1574 key.objectid = inode->i_ino;
1575 key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1576 key.flags = 0;
1577 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1578 BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1579 datasize = offset +
1580 btrfs_file_extent_calc_inline_size(write_bytes);
1581 ret = btrfs_insert_empty_item(trans, root, path, &key,
1582 datasize);
1583 BUG_ON(ret);
1584 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1585 path->slots[0], struct btrfs_file_extent_item);
1586 btrfs_set_file_extent_generation(ei, trans->transid);
1587 btrfs_set_file_extent_type(ei,
1588 BTRFS_FILE_EXTENT_INLINE);
1589 ptr = btrfs_file_extent_inline_start(ei);
098f59c2
CM
1590 btrfs_memcpy(root, path->nodes[0]->b_data,
1591 ptr, bh->b_data, offset + write_bytes);
236454df
CM
1592 mark_buffer_dirty(path->nodes[0]);
1593 btrfs_free_path(path);
1594 } else {
1595 btrfs_csum_file_block(trans, root, inode->i_ino,
f254e52c
CM
1596 pages[i]->index << PAGE_CACHE_SHIFT,
1597 kmap(pages[i]), PAGE_CACHE_SIZE);
236454df
CM
1598 kunmap(pages[i]);
1599 }
f254e52c 1600 SetPageChecked(pages[i]);
e37c9e69 1601 // btrfs_update_inode_block_group(trans, inode);
f254e52c
CM
1602 ret = btrfs_end_transaction(trans, root);
1603 BUG_ON(ret);
1604 mutex_unlock(&root->fs_info->fs_mutex);
1605
236454df 1606 ret = btrfs_commit_write(file, pages[i], offset,
75dfe396
CM
1607 offset + this_write);
1608 pos += this_write;
1609 if (ret) {
1610 err = ret;
1611 goto failed;
1612 }
1613 WARN_ON(this_write > write_bytes);
1614 write_bytes -= this_write;
1615 }
1616failed:
1617 return err;
1618}
1619
b18c6685
CM
1620static int drop_extents(struct btrfs_trans_handle *trans,
1621 struct btrfs_root *root,
1622 struct inode *inode,
de428b63 1623 u64 start, u64 end, u64 *hint_block)
b18c6685
CM
1624{
1625 int ret;
1626 struct btrfs_key key;
1627 struct btrfs_leaf *leaf;
1628 int slot;
1629 struct btrfs_file_extent_item *extent;
236454df 1630 u64 extent_end = 0;
b18c6685
CM
1631 int keep;
1632 struct btrfs_file_extent_item old;
1633 struct btrfs_path *path;
1634 u64 search_start = start;
1635 int bookend;
236454df
CM
1636 int found_type;
1637 int found_extent;
1638 int found_inline;
1639
b18c6685
CM
1640 path = btrfs_alloc_path();
1641 if (!path)
1642 return -ENOMEM;
a429e513
CM
1643 while(1) {
1644 btrfs_release_path(root, path);
1645 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1646 search_start, -1);
1647 if (ret < 0)
70b2befd 1648 goto out;
a429e513
CM
1649 if (ret > 0) {
1650 if (path->slots[0] == 0) {
236454df 1651 ret = 0;
a429e513
CM
1652 goto out;
1653 }
1654 path->slots[0]--;
70b2befd 1655 }
b18c6685
CM
1656 keep = 0;
1657 bookend = 0;
236454df
CM
1658 found_extent = 0;
1659 found_inline = 0;
1660 extent = NULL;
b18c6685
CM
1661 leaf = btrfs_buffer_leaf(path->nodes[0]);
1662 slot = path->slots[0];
1663 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
b18c6685
CM
1664 if (key.offset >= end || key.objectid != inode->i_ino) {
1665 ret = 0;
1666 goto out;
1667 }
236454df
CM
1668 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1669 ret = 0;
1670 goto out;
1671 }
1672 extent = btrfs_item_ptr(leaf, slot,
1673 struct btrfs_file_extent_item);
1674 found_type = btrfs_file_extent_type(extent);
1675 if (found_type == BTRFS_FILE_EXTENT_REG) {
1676 extent_end = key.offset +
1677 (btrfs_file_extent_num_blocks(extent) <<
1678 inode->i_blkbits);
1679 found_extent = 1;
1680 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1681 found_inline = 1;
1682 extent_end = key.offset +
1683 btrfs_file_extent_inline_len(leaf->items + slot);
1684 }
1685
1686 if (!found_extent && !found_inline) {
1687 ret = 0;
a429e513 1688 goto out;
236454df
CM
1689 }
1690
1691 if (search_start >= extent_end) {
1692 ret = 0;
a429e513 1693 goto out;
236454df
CM
1694 }
1695
a429e513 1696 search_start = extent_end;
b18c6685
CM
1697
1698 if (end < extent_end && end >= key.offset) {
236454df
CM
1699 if (found_extent) {
1700 memcpy(&old, extent, sizeof(old));
1701 ret = btrfs_inc_extent_ref(trans, root,
1702 btrfs_file_extent_disk_blocknr(&old),
1703 btrfs_file_extent_disk_num_blocks(&old));
1704 BUG_ON(ret);
1705 }
1706 WARN_ON(found_inline);
b18c6685
CM
1707 bookend = 1;
1708 }
1709
1710 if (start > key.offset) {
1711 u64 new_num;
a429e513 1712 u64 old_num;
b18c6685
CM
1713 /* truncate existing extent */
1714 keep = 1;
1715 WARN_ON(start & (root->blocksize - 1));
236454df
CM
1716 if (found_extent) {
1717 new_num = (start - key.offset) >>
1718 inode->i_blkbits;
1719 old_num = btrfs_file_extent_num_blocks(extent);
de428b63
CM
1720 *hint_block =
1721 btrfs_file_extent_disk_blocknr(extent);
236454df
CM
1722 inode->i_blocks -= (old_num - new_num) << 3;
1723 btrfs_set_file_extent_num_blocks(extent,
1724 new_num);
1725 mark_buffer_dirty(path->nodes[0]);
1726 } else {
1727 WARN_ON(1);
236454df 1728 }
b18c6685
CM
1729 }
1730 if (!keep) {
236454df
CM
1731 u64 disk_blocknr = 0;
1732 u64 disk_num_blocks = 0;
1733 u64 extent_num_blocks = 0;
1734 if (found_extent) {
1735 disk_blocknr =
1736 btrfs_file_extent_disk_blocknr(extent);
1737 disk_num_blocks =
1738 btrfs_file_extent_disk_num_blocks(extent);
1739 extent_num_blocks =
1740 btrfs_file_extent_num_blocks(extent);
de428b63
CM
1741 *hint_block =
1742 btrfs_file_extent_disk_blocknr(extent);
236454df 1743 }
b18c6685
CM
1744 ret = btrfs_del_item(trans, root, path);
1745 BUG_ON(ret);
1746 btrfs_release_path(root, path);
098f59c2 1747 extent = NULL;
236454df 1748 if (found_extent) {
098f59c2 1749 inode->i_blocks -= extent_num_blocks << 3;
236454df
CM
1750 ret = btrfs_free_extent(trans, root,
1751 disk_blocknr,
1752 disk_num_blocks, 0);
1753 }
b18c6685
CM
1754
1755 BUG_ON(ret);
1756 if (!bookend && search_start >= end) {
1757 ret = 0;
1758 goto out;
1759 }
1760 if (!bookend)
a429e513 1761 continue;
b18c6685 1762 }
236454df 1763 if (bookend && found_extent) {
b18c6685
CM
1764 /* create bookend */
1765 struct btrfs_key ins;
b18c6685
CM
1766 ins.objectid = inode->i_ino;
1767 ins.offset = end;
1768 ins.flags = 0;
1769 btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1770
1771 btrfs_release_path(root, path);
b18c6685
CM
1772 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1773 sizeof(*extent));
1774 BUG_ON(ret);
1775 extent = btrfs_item_ptr(
1776 btrfs_buffer_leaf(path->nodes[0]),
1777 path->slots[0],
1778 struct btrfs_file_extent_item);
1779 btrfs_set_file_extent_disk_blocknr(extent,
1780 btrfs_file_extent_disk_blocknr(&old));
1781 btrfs_set_file_extent_disk_num_blocks(extent,
1782 btrfs_file_extent_disk_num_blocks(&old));
1783
1784 btrfs_set_file_extent_offset(extent,
1785 btrfs_file_extent_offset(&old) +
1786 ((end - key.offset) >> inode->i_blkbits));
1787 WARN_ON(btrfs_file_extent_num_blocks(&old) <
1788 (end - key.offset) >> inode->i_blkbits);
1789 btrfs_set_file_extent_num_blocks(extent,
1790 btrfs_file_extent_num_blocks(&old) -
1791 ((end - key.offset) >> inode->i_blkbits));
1792
236454df
CM
1793 btrfs_set_file_extent_type(extent,
1794 BTRFS_FILE_EXTENT_REG);
b18c6685
CM
1795 btrfs_set_file_extent_generation(extent,
1796 btrfs_file_extent_generation(&old));
b18c6685 1797 btrfs_mark_buffer_dirty(path->nodes[0]);
a429e513
CM
1798 inode->i_blocks +=
1799 btrfs_file_extent_num_blocks(extent) << 3;
b18c6685 1800 ret = 0;
70b2befd 1801 goto out;
b18c6685 1802 }
b18c6685 1803 }
b18c6685 1804out:
b18c6685
CM
1805 btrfs_free_path(path);
1806 return ret;
1807}
1808
1809static int prepare_pages(struct btrfs_root *root,
75dfe396
CM
1810 struct file *file,
1811 struct page **pages,
1812 size_t num_pages,
1813 loff_t pos,
2932f3ec
CM
1814 unsigned long first_index,
1815 unsigned long last_index,
6567e837
CM
1816 size_t write_bytes,
1817 u64 alloc_extent_start)
75dfe396
CM
1818{
1819 int i;
1820 unsigned long index = pos >> PAGE_CACHE_SHIFT;
1821 struct inode *inode = file->f_path.dentry->d_inode;
1822 int offset;
1823 int err = 0;
75dfe396 1824 int this_write;
6567e837
CM
1825 struct buffer_head *bh;
1826 struct buffer_head *head;
75dfe396
CM
1827 loff_t isize = i_size_read(inode);
1828
1829 memset(pages, 0, num_pages * sizeof(struct page *));
1830
1831 for (i = 0; i < num_pages; i++) {
1832 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1833 if (!pages[i]) {
1834 err = -ENOMEM;
1835 goto failed_release;
1836 }
35b7e476
CM
1837 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
1838 wait_on_page_writeback(pages[i]);
75dfe396
CM
1839 offset = pos & (PAGE_CACHE_SIZE -1);
1840 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
35b7e476
CM
1841 if (!page_has_buffers(pages[i])) {
1842 create_empty_buffers(pages[i],
1843 root->fs_info->sb->s_blocksize,
1844 (1 << BH_Uptodate));
1845 }
6567e837
CM
1846 head = page_buffers(pages[i]);
1847 bh = head;
1848 do {
1849 err = btrfs_map_bh_to_logical(root, bh,
1850 alloc_extent_start);
1851 BUG_ON(err);
1852 if (err)
1853 goto failed_truncate;
1854 bh = bh->b_this_page;
236454df
CM
1855 if (alloc_extent_start)
1856 alloc_extent_start++;
6567e837 1857 } while (bh != head);
75dfe396 1858 pos += this_write;
75dfe396
CM
1859 WARN_ON(this_write > write_bytes);
1860 write_bytes -= this_write;
1861 }
1862 return 0;
1863
1864failed_release:
1865 btrfs_drop_pages(pages, num_pages);
1866 return err;
1867
1868failed_truncate:
1869 btrfs_drop_pages(pages, num_pages);
1870 if (pos > isize)
1871 vmtruncate(inode, isize);
1872 return err;
1873}
1874
1875static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1876 size_t count, loff_t *ppos)
1877{
1878 loff_t pos;
1879 size_t num_written = 0;
1880 int err = 0;
1881 int ret = 0;
75dfe396 1882 struct inode *inode = file->f_path.dentry->d_inode;
d6e4a428 1883 struct btrfs_root *root = BTRFS_I(inode)->root;
b18c6685 1884 struct page *pages[8];
35b7e476 1885 struct page *pinned[2];
2932f3ec
CM
1886 unsigned long first_index;
1887 unsigned long last_index;
6567e837
CM
1888 u64 start_pos;
1889 u64 num_blocks;
1890 u64 alloc_extent_start;
de428b63 1891 u64 hint_block;
6567e837 1892 struct btrfs_trans_handle *trans;
b18c6685 1893 struct btrfs_key ins;
35b7e476
CM
1894 pinned[0] = NULL;
1895 pinned[1] = NULL;
75dfe396
CM
1896 if (file->f_flags & O_DIRECT)
1897 return -EINVAL;
1898 pos = *ppos;
75dfe396
CM
1899 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1900 current->backing_dev_info = inode->i_mapping->backing_dev_info;
1901 err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1902 if (err)
1903 goto out;
1904 if (count == 0)
1905 goto out;
1906 err = remove_suid(file->f_path.dentry);
1907 if (err)
1908 goto out;
1909 file_update_time(file);
a429e513
CM
1910
1911 start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1912 num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1913 inode->i_blkbits;
1914
75dfe396 1915 mutex_lock(&inode->i_mutex);
2932f3ec
CM
1916 first_index = pos >> PAGE_CACHE_SHIFT;
1917 last_index = (pos + count) >> PAGE_CACHE_SHIFT;
6567e837 1918
a429e513
CM
1919 if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1920 (pos & (PAGE_CACHE_SIZE - 1))) {
1921 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1922 if (!PageUptodate(pinned[0])) {
1923 ret = mpage_readpage(pinned[0], btrfs_get_block);
1924 BUG_ON(ret);
35b7e476 1925 wait_on_page_locked(pinned[0]);
a429e513
CM
1926 } else {
1927 unlock_page(pinned[0]);
1928 }
1929 }
1930 if (first_index != last_index &&
1931 (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
de428b63 1932 pos + count < inode->i_size &&
a429e513
CM
1933 (count & (PAGE_CACHE_SIZE - 1))) {
1934 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1935 if (!PageUptodate(pinned[1])) {
1936 ret = mpage_readpage(pinned[1], btrfs_get_block);
1937 BUG_ON(ret);
35b7e476 1938 wait_on_page_locked(pinned[1]);
a429e513
CM
1939 } else {
1940 unlock_page(pinned[1]);
1941 }
1942 }
1943
6567e837
CM
1944 mutex_lock(&root->fs_info->fs_mutex);
1945 trans = btrfs_start_transaction(root, 1);
1946 if (!trans) {
1947 err = -ENOMEM;
b18c6685 1948 mutex_unlock(&root->fs_info->fs_mutex);
6567e837
CM
1949 goto out_unlock;
1950 }
31f3c99b 1951 btrfs_set_trans_block_group(trans, inode);
a429e513
CM
1952 /* FIXME blocksize != 4096 */
1953 inode->i_blocks += num_blocks << 3;
de428b63 1954 hint_block = 0;
b18c6685 1955 if (start_pos < inode->i_size) {
a429e513 1956 /* FIXME blocksize != pagesize */
b18c6685
CM
1957 ret = drop_extents(trans, root, inode,
1958 start_pos,
1959 (pos + count + root->blocksize -1) &
de428b63 1960 ~((u64)root->blocksize - 1), &hint_block);
236454df 1961 BUG_ON(ret);
b18c6685 1962 }
236454df
CM
1963 if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1964 pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
4d775673 1965 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
de428b63
CM
1966 num_blocks, hint_block, (u64)-1,
1967 &ins, 1);
236454df
CM
1968 BUG_ON(ret);
1969 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
b18c6685 1970 start_pos, ins.objectid, ins.offset);
236454df
CM
1971 BUG_ON(ret);
1972 } else {
1973 ins.offset = 0;
1974 ins.objectid = 0;
1975 }
6567e837 1976 BUG_ON(ret);
b18c6685 1977 alloc_extent_start = ins.objectid;
e37c9e69 1978 // btrfs_update_inode_block_group(trans, inode);
b18c6685 1979 ret = btrfs_end_transaction(trans, root);
6567e837
CM
1980 mutex_unlock(&root->fs_info->fs_mutex);
1981
75dfe396
CM
1982 while(count > 0) {
1983 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1984 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1985 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1986 PAGE_CACHE_SHIFT;
b18c6685
CM
1987
1988 memset(pages, 0, sizeof(pages));
1989 ret = prepare_pages(root, file, pages, num_pages,
6567e837
CM
1990 pos, first_index, last_index,
1991 write_bytes, alloc_extent_start);
75dfe396 1992 BUG_ON(ret);
b18c6685 1993
6567e837 1994 /* FIXME blocks != pagesize */
236454df
CM
1995 if (alloc_extent_start)
1996 alloc_extent_start += num_pages;
75dfe396
CM
1997 ret = btrfs_copy_from_user(pos, num_pages,
1998 write_bytes, pages, buf);
1999 BUG_ON(ret);
2000
f254e52c 2001 ret = dirty_and_release_pages(NULL, root, file, pages,
70b2befd 2002 num_pages, pos, write_bytes);
75dfe396
CM
2003 BUG_ON(ret);
2004 btrfs_drop_pages(pages, num_pages);
2005
75dfe396
CM
2006 buf += write_bytes;
2007 count -= write_bytes;
2008 pos += write_bytes;
2009 num_written += write_bytes;
2010
2011 balance_dirty_pages_ratelimited(inode->i_mapping);
35b7e476 2012 btrfs_btree_balance_dirty(root);
75dfe396
CM
2013 cond_resched();
2014 }
6567e837 2015out_unlock:
75dfe396
CM
2016 mutex_unlock(&inode->i_mutex);
2017out:
a429e513
CM
2018 if (pinned[0])
2019 page_cache_release(pinned[0]);
2020 if (pinned[1])
2021 page_cache_release(pinned[1]);
75dfe396
CM
2022 *ppos = pos;
2023 current->backing_dev_info = NULL;
a429e513 2024 mark_inode_dirty(inode);
75dfe396
CM
2025 return num_written ? num_written : err;
2026}
2027
f254e52c
CM
2028static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2029 unsigned long offset, unsigned long size)
2030{
2031 char *kaddr;
2032 unsigned long left, count = desc->count;
d6e4a428 2033 struct inode *inode = page->mapping->host;
f254e52c
CM
2034
2035 if (size > count)
2036 size = count;
2037
2038 if (!PageChecked(page)) {
2039 /* FIXME, do it per block */
d6e4a428 2040 struct btrfs_root *root = BTRFS_I(inode)->root;
236454df 2041
f254e52c 2042 int ret = btrfs_csum_verify_file_block(root,
236454df
CM
2043 page->mapping->host->i_ino,
2044 page->index << PAGE_CACHE_SHIFT,
2045 kmap(page), PAGE_CACHE_SIZE);
f254e52c
CM
2046 if (ret) {
2047 printk("failed to verify ino %lu page %lu\n",
2048 page->mapping->host->i_ino,
2049 page->index);
2050 memset(page_address(page), 0, PAGE_CACHE_SIZE);
2051 }
2052 SetPageChecked(page);
2053 kunmap(page);
2054 }
2055 /*
2056 * Faults on the destination of a read are common, so do it before
2057 * taking the kmap.
2058 */
2059 if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2060 kaddr = kmap_atomic(page, KM_USER0);
2061 left = __copy_to_user_inatomic(desc->arg.buf,
2062 kaddr + offset, size);
2063 kunmap_atomic(kaddr, KM_USER0);
2064 if (left == 0)
2065 goto success;
2066 }
2067
2068 /* Do it the slow way */
2069 kaddr = kmap(page);
2070 left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2071 kunmap(page);
2072
2073 if (left) {
2074 size -= left;
2075 desc->error = -EFAULT;
2076 }
2077success:
2078 desc->count = count - size;
2079 desc->written += size;
2080 desc->arg.buf += size;
2081 return size;
2082}
2083
2084/**
2085 * btrfs_file_aio_read - filesystem read routine
2086 * @iocb: kernel I/O control block
2087 * @iov: io vector request
2088 * @nr_segs: number of segments in the iovec
2089 * @pos: current file position
2090 */
2091static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2092 unsigned long nr_segs, loff_t pos)
2093{
2094 struct file *filp = iocb->ki_filp;
2095 ssize_t retval;
2096 unsigned long seg;
2097 size_t count;
2098 loff_t *ppos = &iocb->ki_pos;
2099
2100 count = 0;
2101 for (seg = 0; seg < nr_segs; seg++) {
2102 const struct iovec *iv = &iov[seg];
2103
2104 /*
2105 * If any segment has a negative length, or the cumulative
2106 * length ever wraps negative then return -EINVAL.
2107 */
2108 count += iv->iov_len;
2109 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2110 return -EINVAL;
2111 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2112 continue;
2113 if (seg == 0)
2114 return -EFAULT;
2115 nr_segs = seg;
2116 count -= iv->iov_len; /* This segment is no good */
2117 break;
2118 }
2119 retval = 0;
2120 if (count) {
2121 for (seg = 0; seg < nr_segs; seg++) {
2122 read_descriptor_t desc;
2123
2124 desc.written = 0;
2125 desc.arg.buf = iov[seg].iov_base;
2126 desc.count = iov[seg].iov_len;
2127 if (desc.count == 0)
2128 continue;
2129 desc.error = 0;
2130 do_generic_file_read(filp, ppos, &desc,
2131 btrfs_read_actor);
2132 retval += desc.written;
2133 if (desc.error) {
2134 retval = retval ?: desc.error;
2135 break;
2136 }
2137 }
2138 }
2139 return retval;
2140}
2141
2619ba1f
CM
2142static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2143{
2144 struct btrfs_trans_handle *trans;
2145 struct btrfs_key key;
2146 struct btrfs_root_item root_item;
2147 struct btrfs_inode_item *inode_item;
2148 struct buffer_head *subvol;
2149 struct btrfs_leaf *leaf;
2150 struct btrfs_root *new_root;
2151 struct inode *inode;
31f3c99b 2152 struct inode *dir;
2619ba1f
CM
2153 int ret;
2154 u64 objectid;
2155 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2156
2157 mutex_lock(&root->fs_info->fs_mutex);
2158 trans = btrfs_start_transaction(root, 1);
2159 BUG_ON(!trans);
2160
31f3c99b 2161 subvol = btrfs_alloc_free_block(trans, root, 0);
5e82849e
CM
2162 if (subvol == NULL)
2163 return -ENOSPC;
2619ba1f
CM
2164 leaf = btrfs_buffer_leaf(subvol);
2165 btrfs_set_header_nritems(&leaf->header, 0);
2166 btrfs_set_header_level(&leaf->header, 0);
7eccb903 2167 btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2619ba1f 2168 btrfs_set_header_generation(&leaf->header, trans->transid);
4d775673 2169 btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2619ba1f
CM
2170 memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2171 sizeof(leaf->header.fsid));
4d775673 2172 mark_buffer_dirty(subvol);
2619ba1f
CM
2173
2174 inode_item = &root_item.inode;
2175 memset(inode_item, 0, sizeof(*inode_item));
2176 btrfs_set_inode_generation(inode_item, 1);
2177 btrfs_set_inode_size(inode_item, 3);
2178 btrfs_set_inode_nlink(inode_item, 1);
2179 btrfs_set_inode_nblocks(inode_item, 1);
2180 btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2181
7eccb903 2182 btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2619ba1f 2183 btrfs_set_root_refs(&root_item, 1);
5e82849e
CM
2184 brelse(subvol);
2185 subvol = NULL;
2619ba1f 2186
2619ba1f
CM
2187 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2188 0, &objectid);
2189 BUG_ON(ret);
2190
2191 btrfs_set_root_dirid(&root_item, new_dirid);
2192
2193 key.objectid = objectid;
2194 key.offset = 1;
2195 key.flags = 0;
2196 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2197 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2198 &root_item);
2199 BUG_ON(ret);
2200
2201 /*
2202 * insert the directory item
2203 */
2204 key.offset = (u64)-1;
31f3c99b 2205 dir = root->fs_info->sb->s_root->d_inode;
2619ba1f 2206 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
31f3c99b 2207 name, namelen, dir->i_ino, &key, 0);
2619ba1f
CM
2208 BUG_ON(ret);
2209
2210 ret = btrfs_commit_transaction(trans, root);
2211 BUG_ON(ret);
2212
2213 new_root = btrfs_read_fs_root(root->fs_info, &key);
2214 BUG_ON(!new_root);
2215
2216 trans = btrfs_start_transaction(new_root, 1);
2217 BUG_ON(!trans);
2218
31f3c99b
CM
2219 inode = btrfs_new_inode(trans, new_root, new_dirid,
2220 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2619ba1f
CM
2221 inode->i_op = &btrfs_dir_inode_operations;
2222 inode->i_fop = &btrfs_dir_file_operations;
2223
2224 ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2225 BUG_ON(ret);
2226
2227 inode->i_nlink = 1;
2228 inode->i_size = 6;
2229 ret = btrfs_update_inode(trans, new_root, inode);
2230 BUG_ON(ret);
2231
2232 ret = btrfs_commit_transaction(trans, new_root);
2233 BUG_ON(ret);
2234
2235 iput(inode);
2236
2237 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 2238 btrfs_btree_balance_dirty(root);
2619ba1f
CM
2239 return 0;
2240}
2241
c5739bba
CM
2242static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2243{
2244 struct btrfs_trans_handle *trans;
2245 struct btrfs_key key;
2246 struct btrfs_root_item new_root_item;
2247 int ret;
2248 u64 objectid;
2249
2619ba1f
CM
2250 if (!root->ref_cows)
2251 return -EINVAL;
2252
c5739bba
CM
2253 mutex_lock(&root->fs_info->fs_mutex);
2254 trans = btrfs_start_transaction(root, 1);
2255 BUG_ON(!trans);
2256
2257 ret = btrfs_update_inode(trans, root, root->inode);
2258 BUG_ON(ret);
2259
1b05da2e
CM
2260 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2261 0, &objectid);
c5739bba
CM
2262 BUG_ON(ret);
2263
c5739bba
CM
2264 memcpy(&new_root_item, &root->root_item,
2265 sizeof(new_root_item));
2266
c5739bba
CM
2267 key.objectid = objectid;
2268 key.offset = 1;
2269 key.flags = 0;
2270 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
7eccb903 2271 btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
c5739bba
CM
2272
2273 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2274 &new_root_item);
2275 BUG_ON(ret);
2276
c5739bba
CM
2277 /*
2278 * insert the directory item
2279 */
2280 key.offset = (u64)-1;
2281 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2282 name, namelen,
2283 root->fs_info->sb->s_root->d_inode->i_ino,
2284 &key, 0);
2285
2286 BUG_ON(ret);
2287
2288 ret = btrfs_inc_root_ref(trans, root);
2289 BUG_ON(ret);
2290
2291 ret = btrfs_commit_transaction(trans, root);
2292 BUG_ON(ret);
2293 mutex_unlock(&root->fs_info->fs_mutex);
35b7e476 2294 btrfs_btree_balance_dirty(root);
c5739bba
CM
2295 return 0;
2296}
2297
8352d8a4
CM
2298static int add_disk(struct btrfs_root *root, char *name, int namelen)
2299{
2300 struct block_device *bdev;
2301 struct btrfs_path *path;
2302 struct super_block *sb = root->fs_info->sb;
2303 struct btrfs_root *dev_root = root->fs_info->dev_root;
2304 struct btrfs_trans_handle *trans;
2305 struct btrfs_device_item *dev_item;
2306 struct btrfs_key key;
2307 u16 item_size;
2308 u64 num_blocks;
2309 u64 new_blocks;
b4100d64 2310 u64 device_id;
8352d8a4 2311 int ret;
b4100d64 2312
8352d8a4
CM
2313printk("adding disk %s\n", name);
2314 path = btrfs_alloc_path();
2315 if (!path)
2316 return -ENOMEM;
2317 num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2318 bdev = open_bdev_excl(name, O_RDWR, sb);
2319 if (IS_ERR(bdev)) {
2320 ret = PTR_ERR(bdev);
2321printk("open bdev excl failed ret %d\n", ret);
2322 goto out_nolock;
2323 }
2324 set_blocksize(bdev, sb->s_blocksize);
2325 new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2326 key.objectid = num_blocks;
2327 key.offset = new_blocks;
2328 key.flags = 0;
2329 btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2330
2331 mutex_lock(&dev_root->fs_info->fs_mutex);
2332 trans = btrfs_start_transaction(dev_root, 1);
2333 item_size = sizeof(*dev_item) + namelen;
2334printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2335 ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2336 if (ret) {
2337printk("insert failed %d\n", ret);
2338 close_bdev_excl(bdev);
2339 if (ret > 0)
2340 ret = -EEXIST;
2341 goto out;
2342 }
2343 dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2344 path->slots[0], struct btrfs_device_item);
2345 btrfs_set_device_pathlen(dev_item, namelen);
2346 memcpy(dev_item + 1, name, namelen);
b4100d64
CM
2347
2348 device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2349 btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2350 btrfs_set_device_id(dev_item, device_id);
8352d8a4
CM
2351 mark_buffer_dirty(path->nodes[0]);
2352
b4100d64
CM
2353 ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2354 new_blocks);
8352d8a4
CM
2355
2356 if (!ret) {
2357 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2358 num_blocks + new_blocks);
2359 i_size_write(root->fs_info->btree_inode,
2360 (num_blocks + new_blocks) <<
2361 root->fs_info->btree_inode->i_blkbits);
2362 }
2363
2364out:
2365 ret = btrfs_commit_transaction(trans, dev_root);
2366 BUG_ON(ret);
2367 mutex_unlock(&root->fs_info->fs_mutex);
2368out_nolock:
2369 btrfs_free_path(path);
35b7e476 2370 btrfs_btree_balance_dirty(root);
8352d8a4
CM
2371
2372 return ret;
2373}
2374
c5739bba
CM
2375static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2376 cmd, unsigned long arg)
2377{
2378 struct btrfs_root *root = BTRFS_I(inode)->root;
2379 struct btrfs_ioctl_vol_args vol_args;
8352d8a4 2380 int ret = 0;
7e38180e 2381 struct btrfs_dir_item *di;
c5739bba 2382 int namelen;
2619ba1f
CM
2383 struct btrfs_path *path;
2384 u64 root_dirid;
c5739bba 2385
c5739bba
CM
2386 switch (cmd) {
2387 case BTRFS_IOC_SNAP_CREATE:
2388 if (copy_from_user(&vol_args,
2389 (struct btrfs_ioctl_vol_args __user *)arg,
2390 sizeof(vol_args)))
2391 return -EFAULT;
2392 namelen = strlen(vol_args.name);
2393 if (namelen > BTRFS_VOL_NAME_MAX)
2394 return -EINVAL;
2619ba1f
CM
2395 path = btrfs_alloc_path();
2396 if (!path)
2397 return -ENOMEM;
2d13d8d0 2398 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2619ba1f 2399 mutex_lock(&root->fs_info->fs_mutex);
7e38180e 2400 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2619ba1f
CM
2401 path, root_dirid,
2402 vol_args.name, namelen, 0);
2403 mutex_unlock(&root->fs_info->fs_mutex);
2d13d8d0 2404 btrfs_free_path(path);
7e38180e 2405 if (di && !IS_ERR(di))
2619ba1f
CM
2406 return -EEXIST;
2407
2408 if (root == root->fs_info->tree_root)
2409 ret = create_subvol(root, vol_args.name, namelen);
2410 else
2411 ret = create_snapshot(root, vol_args.name, namelen);
c5739bba
CM
2412 WARN_ON(ret);
2413 break;
8352d8a4
CM
2414 case BTRFS_IOC_ADD_DISK:
2415 if (copy_from_user(&vol_args,
2416 (struct btrfs_ioctl_vol_args __user *)arg,
2417 sizeof(vol_args)))
2418 return -EFAULT;
2419 namelen = strlen(vol_args.name);
2420 if (namelen > BTRFS_VOL_NAME_MAX)
2421 return -EINVAL;
2422 vol_args.name[namelen] = '\0';
2423 ret = add_disk(root, vol_args.name, namelen);
2424 break;
c5739bba
CM
2425 default:
2426 return -ENOTTY;
2427 }
8352d8a4 2428 return ret;
c5739bba
CM
2429}
2430
2c90e5d6
CM
2431static struct kmem_cache *btrfs_inode_cachep;
2432struct kmem_cache *btrfs_trans_handle_cachep;
2433struct kmem_cache *btrfs_transaction_cachep;
2434struct kmem_cache *btrfs_bit_radix_cachep;
2435struct kmem_cache *btrfs_path_cachep;
2436
2437/*
2438 * Called inside transaction, so use GFP_NOFS
2439 */
2440static struct inode *btrfs_alloc_inode(struct super_block *sb)
2441{
2442 struct btrfs_inode *ei;
2443
2444 ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2445 if (!ei)
2446 return NULL;
2c90e5d6
CM
2447 return &ei->vfs_inode;
2448}
2449
2450static void btrfs_destroy_inode(struct inode *inode)
2451{
2c90e5d6 2452 WARN_ON(!list_empty(&inode->i_dentry));
2c90e5d6
CM
2453 WARN_ON(inode->i_data.nrpages);
2454
2c90e5d6
CM
2455 kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2456}
2457
2458static void init_once(void * foo, struct kmem_cache * cachep,
2459 unsigned long flags)
2460{
2461 struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2462
f9f3c6b6 2463 if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2c90e5d6
CM
2464 SLAB_CTOR_CONSTRUCTOR) {
2465 inode_init_once(&ei->vfs_inode);
2466 }
2467}
2468
2469static int init_inodecache(void)
2470{
2471 btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2472 sizeof(struct btrfs_inode),
2473 0, (SLAB_RECLAIM_ACCOUNT|
2474 SLAB_MEM_SPREAD),
2475 init_once, NULL);
2476 btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2477 sizeof(struct btrfs_trans_handle),
2478 0, (SLAB_RECLAIM_ACCOUNT|
2479 SLAB_MEM_SPREAD),
2480 NULL, NULL);
2481 btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2482 sizeof(struct btrfs_transaction),
2483 0, (SLAB_RECLAIM_ACCOUNT|
2484 SLAB_MEM_SPREAD),
2485 NULL, NULL);
2486 btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2487 sizeof(struct btrfs_transaction),
2488 0, (SLAB_RECLAIM_ACCOUNT|
2489 SLAB_MEM_SPREAD),
2490 NULL, NULL);
2491 btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2492 256,
2493 0, (SLAB_RECLAIM_ACCOUNT|
2494 SLAB_MEM_SPREAD |
2495 SLAB_DESTROY_BY_RCU),
2496 NULL, NULL);
2497 if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2498 btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2499 return -ENOMEM;
2500 return 0;
2501}
2502
2503static void destroy_inodecache(void)
2504{
2505 kmem_cache_destroy(btrfs_inode_cachep);
2506 kmem_cache_destroy(btrfs_trans_handle_cachep);
2507 kmem_cache_destroy(btrfs_transaction_cachep);
2508 kmem_cache_destroy(btrfs_bit_radix_cachep);
2509 kmem_cache_destroy(btrfs_path_cachep);
2510}
2511
2e635a27
CM
2512static int btrfs_get_sb(struct file_system_type *fs_type,
2513 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2514{
2515 return get_sb_bdev(fs_type, flags, dev_name, data,
2516 btrfs_fill_super, mnt);
2517}
2518
236454df
CM
2519static int btrfs_getattr(struct vfsmount *mnt,
2520 struct dentry *dentry, struct kstat *stat)
2521{
2522 struct inode *inode = dentry->d_inode;
2523 generic_fillattr(inode, stat);
2524 stat->blksize = 256 * 1024;
2525 return 0;
2526}
2527
8fd17795
CM
2528static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2529{
2530 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2531 struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2532
2533 buf->f_namelen = BTRFS_NAME_LEN;
2534 buf->f_blocks = btrfs_super_total_blocks(disk_super);
2535 buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2536 buf->f_bavail = buf->f_bfree;
2537 buf->f_bsize = dentry->d_sb->s_blocksize;
2538 buf->f_type = BTRFS_SUPER_MAGIC;
2539 return 0;
2540}
b5133862 2541
e06afa83
CM
2542static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2543 struct inode * new_dir,struct dentry *new_dentry)
2544{
2545 struct btrfs_trans_handle *trans;
2546 struct btrfs_root *root = BTRFS_I(old_dir)->root;
2547 struct inode *new_inode = new_dentry->d_inode;
2548 struct inode *old_inode = old_dentry->d_inode;
2549 struct timespec ctime = CURRENT_TIME;
2550 struct btrfs_path *path;
2551 struct btrfs_dir_item *di;
2552 int ret;
2553
2554 if (S_ISDIR(old_inode->i_mode) && new_inode &&
2555 new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2556 return -ENOTEMPTY;
2557 }
2558 mutex_lock(&root->fs_info->fs_mutex);
2559 trans = btrfs_start_transaction(root, 1);
2560 btrfs_set_trans_block_group(trans, new_dir);
2561 path = btrfs_alloc_path();
2562 if (!path) {
2563 ret = -ENOMEM;
2564 goto out_fail;
2565 }
2566
2567 old_dentry->d_inode->i_nlink++;
2568 old_dir->i_ctime = old_dir->i_mtime = ctime;
2569 new_dir->i_ctime = new_dir->i_mtime = ctime;
2570 old_inode->i_ctime = ctime;
2571 if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2572 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2573 u64 old_parent_oid;
2574 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2575 "..", 2, -1);
2576 if (IS_ERR(di)) {
2577 ret = PTR_ERR(di);
2578 goto out_fail;
2579 }
2580 if (!di) {
2581 ret = -ENOENT;
2582 goto out_fail;
2583 }
2584 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2585 ret = btrfs_del_item(trans, root, path);
2586 if (ret) {
2587 ret = -EIO;
2588 goto out_fail;
2589 }
2590 btrfs_release_path(root, path);
2591
2592 di = btrfs_lookup_dir_index_item(trans, root, path,
2593 old_inode->i_ino,
2594 old_parent_oid,
2595 "..", 2, -1);
2596 if (IS_ERR(di)) {
2597 ret = PTR_ERR(di);
2598 goto out_fail;
2599 }
2600 if (!di) {
2601 ret = -ENOENT;
2602 goto out_fail;
2603 }
2604 ret = btrfs_del_item(trans, root, path);
2605 if (ret) {
2606 ret = -EIO;
2607 goto out_fail;
2608 }
2609 btrfs_release_path(root, path);
2610
2611 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2612 old_inode->i_ino, location, 0);
2613 if (ret)
2614 goto out_fail;
2615 }
2616
2617
e06afa83
CM
2618 ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2619 if (ret)
2620 goto out_fail;
2621
2622 if (new_inode) {
2623 new_inode->i_ctime = CURRENT_TIME;
2b8d99a7
CM
2624 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2625 if (ret)
2626 goto out_fail;
e06afa83
CM
2627 if (S_ISDIR(new_inode->i_mode))
2628 clear_nlink(new_inode);
2629 else
2630 drop_nlink(new_inode);
2631 btrfs_update_inode(trans, root, new_inode);
2632 }
2b8d99a7
CM
2633 ret = btrfs_add_link(trans, new_dentry, old_inode);
2634 if (ret)
2635 goto out_fail;
2636
e06afa83
CM
2637out_fail:
2638 btrfs_free_path(path);
2639 btrfs_end_transaction(trans, root);
2640 mutex_unlock(&root->fs_info->fs_mutex);
2641 return ret;
2642}
2643
2b8d99a7
CM
2644static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2645 const char *symname)
2646{
2647 struct btrfs_trans_handle *trans;
2648 struct btrfs_root *root = BTRFS_I(dir)->root;
2649 struct btrfs_path *path;
2650 struct btrfs_key key;
2651 struct inode *inode;
2652 int err;
2653 int drop_inode = 0;
2654 u64 objectid;
2655 int name_len;
2656 int datasize;
2657 char *ptr;
2658 struct btrfs_file_extent_item *ei;
2659
2660 name_len = strlen(symname) + 1;
2661 if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2662 return -ENAMETOOLONG;
2663 mutex_lock(&root->fs_info->fs_mutex);
2664 trans = btrfs_start_transaction(root, 1);
2665 btrfs_set_trans_block_group(trans, dir);
2666
2667 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2668 if (err) {
2669 err = -ENOSPC;
2670 goto out_unlock;
2671 }
2672
2673 inode = btrfs_new_inode(trans, root, objectid,
2674 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2675 err = PTR_ERR(inode);
2676 if (IS_ERR(inode))
2677 goto out_unlock;
2678
2679 btrfs_set_trans_block_group(trans, inode);
2680 err = btrfs_add_nondir(trans, dentry, inode);
2681 if (err)
2682 drop_inode = 1;
2683 else {
2684 inode->i_mapping->a_ops = &btrfs_aops;
2685 inode->i_fop = &btrfs_file_operations;
2686 inode->i_op = &btrfs_file_inode_operations;
2687 }
2688 dir->i_sb->s_dirt = 1;
2689 btrfs_update_inode_block_group(trans, inode);
2690 btrfs_update_inode_block_group(trans, dir);
2691 if (drop_inode)
2692 goto out_unlock;
2693
2694 path = btrfs_alloc_path();
2695 BUG_ON(!path);
2696 key.objectid = inode->i_ino;
2697 key.offset = 0;
2698 key.flags = 0;
2699 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2700 datasize = btrfs_file_extent_calc_inline_size(name_len);
2701 err = btrfs_insert_empty_item(trans, root, path, &key,
2702 datasize);
2703 BUG_ON(err);
2704 ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2705 path->slots[0], struct btrfs_file_extent_item);
2706 btrfs_set_file_extent_generation(ei, trans->transid);
2707 btrfs_set_file_extent_type(ei,
2708 BTRFS_FILE_EXTENT_INLINE);
2709 ptr = btrfs_file_extent_inline_start(ei);
2710 btrfs_memcpy(root, path->nodes[0]->b_data,
2711 ptr, symname, name_len);
2712 mark_buffer_dirty(path->nodes[0]);
2713 btrfs_free_path(path);
2714 inode->i_op = &btrfs_symlink_inode_operations;
2715 inode->i_mapping->a_ops = &btrfs_symlink_aops;
2716 inode->i_size = name_len - 1;
2717 btrfs_update_inode(trans, root, inode);
2718 err = 0;
2719
2720out_unlock:
2721 btrfs_end_transaction(trans, root);
2722 mutex_unlock(&root->fs_info->fs_mutex);
2723
2724 if (drop_inode) {
2725 inode_dec_link_count(inode);
2726 iput(inode);
2727 }
2728 btrfs_btree_balance_dirty(root);
2729 return err;
2730}
2731
2e635a27
CM
2732static struct file_system_type btrfs_fs_type = {
2733 .owner = THIS_MODULE,
2734 .name = "btrfs",
2735 .get_sb = btrfs_get_sb,
2736 .kill_sb = kill_block_super,
2737 .fs_flags = FS_REQUIRES_DEV,
2738};
2739
e20d96d6 2740static struct super_operations btrfs_super_ops = {
134e9731 2741 .delete_inode = btrfs_delete_inode,
e20d96d6
CM
2742 .put_super = btrfs_put_super,
2743 .read_inode = btrfs_read_locked_inode,
d5719762
CM
2744 .write_super = btrfs_write_super,
2745 .sync_fs = btrfs_sync_fs,
4730a4bc 2746 .write_inode = btrfs_write_inode,
b5133862 2747 .dirty_inode = btrfs_dirty_inode,
2c90e5d6
CM
2748 .alloc_inode = btrfs_alloc_inode,
2749 .destroy_inode = btrfs_destroy_inode,
8fd17795 2750 .statfs = btrfs_statfs,
e20d96d6
CM
2751};
2752
2753static struct inode_operations btrfs_dir_inode_operations = {
2754 .lookup = btrfs_lookup,
d5719762 2755 .create = btrfs_create,
134e9731 2756 .unlink = btrfs_unlink,
2b8d99a7 2757 .link = btrfs_link,
f7922033 2758 .mkdir = btrfs_mkdir,
5f443fd2 2759 .rmdir = btrfs_rmdir,
e06afa83 2760 .rename = btrfs_rename,
2b8d99a7 2761 .symlink = btrfs_symlink,
e20d96d6
CM
2762};
2763
d6e4a428
CM
2764static struct inode_operations btrfs_dir_ro_inode_operations = {
2765 .lookup = btrfs_lookup,
2766};
2767
e20d96d6
CM
2768static struct file_operations btrfs_dir_file_operations = {
2769 .llseek = generic_file_llseek,
2770 .read = generic_read_dir,
2771 .readdir = btrfs_readdir,
c5739bba 2772 .ioctl = btrfs_ioctl,
e20d96d6
CM
2773};
2774
dee26a9f
CM
2775static struct address_space_operations btrfs_aops = {
2776 .readpage = btrfs_readpage,
dee26a9f
CM
2777 .writepage = btrfs_writepage,
2778 .sync_page = block_sync_page,
2779 .prepare_write = btrfs_prepare_write,
75dfe396 2780 .commit_write = btrfs_commit_write,
dee26a9f
CM
2781};
2782
2b8d99a7
CM
2783static struct address_space_operations btrfs_symlink_aops = {
2784 .readpage = btrfs_readpage,
2785 .writepage = btrfs_writepage,
2786};
2787
dee26a9f 2788static struct inode_operations btrfs_file_inode_operations = {
f4b9aa8d 2789 .truncate = btrfs_truncate,
236454df 2790 .getattr = btrfs_getattr,
dee26a9f
CM
2791};
2792
2793static struct file_operations btrfs_file_operations = {
2794 .llseek = generic_file_llseek,
2795 .read = do_sync_read,
e8f05c45
CM
2796 .aio_read = btrfs_file_aio_read,
2797 .write = btrfs_file_write,
dee26a9f
CM
2798 .mmap = generic_file_mmap,
2799 .open = generic_file_open,
c5739bba 2800 .ioctl = btrfs_ioctl,
8fd17795 2801 .fsync = btrfs_sync_file,
dee26a9f 2802};
e20d96d6 2803
2b8d99a7
CM
2804static struct inode_operations btrfs_symlink_inode_operations = {
2805 .readlink = generic_readlink,
2806 .follow_link = page_follow_link_light,
2807 .put_link = page_put_link,
2808};
2809
2e635a27
CM
2810static int __init init_btrfs_fs(void)
2811{
2c90e5d6 2812 int err;
2e635a27 2813 printk("btrfs loaded!\n");
2c90e5d6
CM
2814 err = init_inodecache();
2815 if (err)
2816 return err;
2e635a27 2817 return register_filesystem(&btrfs_fs_type);
d6e4a428
CM
2818 destroy_inodecache();
2819 return err;
2e635a27
CM
2820}
2821
2822static void __exit exit_btrfs_fs(void)
2823{
2c90e5d6 2824 destroy_inodecache();
2e635a27
CM
2825 unregister_filesystem(&btrfs_fs_type);
2826 printk("btrfs unloaded\n");
2827}
2828
2829module_init(init_btrfs_fs)
2830module_exit(exit_btrfs_fs)
2831
2832MODULE_LICENSE("GPL");