Btrfs: add compat ioctl
[linux-2.6-block.git] / fs / btrfs / transaction.c
1 #include <linux/module.h>
2 #include <linux/fs.h>
3 #include "ctree.h"
4 #include "disk-io.h"
5 #include "transaction.h"
6
7 static int total_trans = 0;
8 extern struct kmem_cache *btrfs_trans_handle_cachep;
9 extern struct kmem_cache *btrfs_transaction_cachep;
10
11 static struct workqueue_struct *trans_wq;
12
13 #define BTRFS_ROOT_TRANS_TAG 0
14
15 #define TRANS_MAGIC 0xE1E10E
16 static void put_transaction(struct btrfs_transaction *transaction)
17 {
18         WARN_ON(transaction->use_count == 0);
19         transaction->use_count--;
20         WARN_ON(transaction->magic != TRANS_MAGIC);
21         if (transaction->use_count == 0) {
22                 WARN_ON(total_trans == 0);
23                 total_trans--;
24                 list_del_init(&transaction->list);
25                 memset(transaction, 0, sizeof(*transaction));
26                 kmem_cache_free(btrfs_transaction_cachep, transaction);
27         }
28 }
29
30 static int join_transaction(struct btrfs_root *root)
31 {
32         struct btrfs_transaction *cur_trans;
33         cur_trans = root->fs_info->running_transaction;
34         if (!cur_trans) {
35                 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
36                                              GFP_NOFS);
37                 total_trans++;
38                 BUG_ON(!cur_trans);
39                 root->fs_info->generation++;
40                 root->fs_info->running_transaction = cur_trans;
41                 cur_trans->num_writers = 0;
42                 cur_trans->transid = root->fs_info->generation;
43                 init_waitqueue_head(&cur_trans->writer_wait);
44                 init_waitqueue_head(&cur_trans->commit_wait);
45                 cur_trans->magic = TRANS_MAGIC;
46                 cur_trans->in_commit = 0;
47                 cur_trans->use_count = 1;
48                 cur_trans->commit_done = 0;
49                 cur_trans->start_time = get_seconds();
50                 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
51                 init_bit_radix(&cur_trans->dirty_pages);
52         }
53         cur_trans->num_writers++;
54         return 0;
55 }
56
57 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
58                                                    int num_blocks)
59 {
60         struct btrfs_trans_handle *h =
61                 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
62         int ret;
63         u64 running_trans_id;
64
65         mutex_lock(&root->fs_info->trans_mutex);
66         ret = join_transaction(root);
67         BUG_ON(ret);
68         running_trans_id = root->fs_info->running_transaction->transid;
69
70         if (root != root->fs_info->tree_root && root->last_trans <
71             running_trans_id) {
72                 radix_tree_tag_set(&root->fs_info->fs_roots_radix,
73                                    (unsigned long)root->root_key.objectid,
74                                    BTRFS_ROOT_TRANS_TAG);
75                 root->commit_root = root->node;
76                 get_bh(root->node);
77         }
78         root->last_trans = running_trans_id;
79         h->transid = running_trans_id;
80         h->transaction = root->fs_info->running_transaction;
81         h->blocks_reserved = num_blocks;
82         h->blocks_used = 0;
83         h->block_group = NULL;
84         root->fs_info->running_transaction->use_count++;
85         mutex_unlock(&root->fs_info->trans_mutex);
86         h->magic = h->magic2 = TRANS_MAGIC;
87         return h;
88 }
89
90 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
91                           struct btrfs_root *root)
92 {
93         struct btrfs_transaction *cur_trans;
94
95         WARN_ON(trans->magic != TRANS_MAGIC);
96         WARN_ON(trans->magic2 != TRANS_MAGIC);
97         mutex_lock(&root->fs_info->trans_mutex);
98         cur_trans = root->fs_info->running_transaction;
99         WARN_ON(cur_trans->num_writers < 1);
100         if (waitqueue_active(&cur_trans->writer_wait))
101                 wake_up(&cur_trans->writer_wait);
102         cur_trans->num_writers--;
103         put_transaction(cur_trans);
104         mutex_unlock(&root->fs_info->trans_mutex);
105         memset(trans, 0, sizeof(*trans));
106         kmem_cache_free(btrfs_trans_handle_cachep, trans);
107         return 0;
108 }
109
110
111 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
112                                      struct btrfs_root *root)
113 {
114         unsigned long gang[16];
115         int ret;
116         int i;
117         int err;
118         int werr = 0;
119         struct page *page;
120         struct radix_tree_root *dirty_pages;
121         struct inode *btree_inode = root->fs_info->btree_inode;
122
123         if (!trans || !trans->transaction) {
124                 return filemap_write_and_wait(btree_inode->i_mapping);
125         }
126         dirty_pages = &trans->transaction->dirty_pages;
127         while(1) {
128                 ret = find_first_radix_bit(dirty_pages, gang,
129                                            0, ARRAY_SIZE(gang));
130                 if (!ret)
131                         break;
132                 for (i = 0; i < ret; i++) {
133                         /* FIXME EIO */
134                         clear_radix_bit(dirty_pages, gang[i]);
135                         page = find_lock_page(btree_inode->i_mapping,
136                                               gang[i]);
137                         if (!page)
138                                 continue;
139                         err = write_one_page(page, 0);
140                         if (err)
141                                 werr = err;
142                         page_cache_release(page);
143                 }
144         }
145         err = filemap_fdatawait(btree_inode->i_mapping);
146         if (err)
147                 werr = err;
148         return werr;
149 }
150
151 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
152                             struct btrfs_root *root)
153 {
154         int ret;
155         u64 old_extent_block;
156         struct btrfs_fs_info *fs_info = root->fs_info;
157         struct btrfs_root *tree_root = fs_info->tree_root;
158         struct btrfs_root *extent_root = fs_info->extent_root;
159         struct btrfs_root *dev_root = fs_info->dev_root;
160
161         if (btrfs_super_device_root(fs_info->disk_super) !=
162             bh_blocknr(dev_root->node)) {
163                 btrfs_set_super_device_root(fs_info->disk_super,
164                                             bh_blocknr(dev_root->node));
165         }
166         btrfs_write_dirty_block_groups(trans, extent_root);
167         while(1) {
168                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
169                 if (old_extent_block == bh_blocknr(extent_root->node))
170                         break;
171                 btrfs_set_root_blocknr(&extent_root->root_item,
172                                        bh_blocknr(extent_root->node));
173                 ret = btrfs_update_root(trans, tree_root,
174                                         &extent_root->root_key,
175                                         &extent_root->root_item);
176                 BUG_ON(ret);
177                 btrfs_write_dirty_block_groups(trans, extent_root);
178         }
179         return 0;
180 }
181
182 static int wait_for_commit(struct btrfs_root *root,
183                            struct btrfs_transaction *commit)
184 {
185         DEFINE_WAIT(wait);
186         while(!commit->commit_done) {
187                 prepare_to_wait(&commit->commit_wait, &wait,
188                                 TASK_UNINTERRUPTIBLE);
189                 if (commit->commit_done)
190                         break;
191                 mutex_unlock(&root->fs_info->trans_mutex);
192                 schedule();
193                 mutex_lock(&root->fs_info->trans_mutex);
194         }
195         finish_wait(&commit->commit_wait, &wait);
196         return 0;
197 }
198
199 struct dirty_root {
200         struct list_head list;
201         struct btrfs_key snap_key;
202         struct buffer_head *commit_root;
203         struct btrfs_root *root;
204 };
205
206 static int add_dirty_roots(struct btrfs_trans_handle *trans,
207                            struct radix_tree_root *radix,
208                            struct list_head *list)
209 {
210         struct dirty_root *dirty;
211         struct btrfs_root *gang[8];
212         struct btrfs_root *root;
213         int i;
214         int ret;
215         int err;
216         while(1) {
217                 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
218                                                  ARRAY_SIZE(gang),
219                                                  BTRFS_ROOT_TRANS_TAG);
220                 if (ret == 0)
221                         break;
222                 for (i = 0; i < ret; i++) {
223                         root = gang[i];
224                         radix_tree_tag_clear(radix,
225                                      (unsigned long)root->root_key.objectid,
226                                      BTRFS_ROOT_TRANS_TAG);
227                         if (root->commit_root == root->node) {
228                                 WARN_ON(bh_blocknr(root->node) !=
229                                         btrfs_root_blocknr(&root->root_item));
230                                 brelse(root->commit_root);
231                                 root->commit_root = NULL;
232                                 continue;
233                         }
234                         dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
235                         BUG_ON(!dirty);
236                         memcpy(&dirty->snap_key, &root->root_key,
237                                sizeof(root->root_key));
238                         dirty->commit_root = root->commit_root;
239                         root->commit_root = NULL;
240                         dirty->root = root;
241                         root->root_key.offset = root->fs_info->generation;
242                         btrfs_set_root_blocknr(&root->root_item,
243                                                bh_blocknr(root->node));
244                         err = btrfs_insert_root(trans, root->fs_info->tree_root,
245                                                 &root->root_key,
246                                                 &root->root_item);
247                         BUG_ON(err);
248                         list_add(&dirty->list, list);
249                 }
250         }
251         return 0;
252 }
253
254 static int drop_dirty_roots(struct btrfs_root *tree_root,
255                             struct list_head *list)
256 {
257         struct dirty_root *dirty;
258         struct btrfs_trans_handle *trans;
259         int ret;
260
261         while(!list_empty(list)) {
262                 dirty = list_entry(list->next, struct dirty_root, list);
263                 list_del_init(&dirty->list);
264                 trans = btrfs_start_transaction(tree_root, 1);
265                 ret = btrfs_drop_snapshot(trans, dirty->root,
266                                           dirty->commit_root);
267                 BUG_ON(ret);
268
269                 ret = btrfs_del_root(trans, tree_root, &dirty->snap_key);
270                 BUG_ON(ret);
271                 ret = btrfs_end_transaction(trans, tree_root);
272                 BUG_ON(ret);
273                 kfree(dirty);
274         }
275         return 0;
276 }
277
278 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
279                              struct btrfs_root *root)
280 {
281         int ret = 0;
282         struct btrfs_transaction *cur_trans;
283         struct btrfs_transaction *prev_trans = NULL;
284         struct list_head dirty_fs_roots;
285         DEFINE_WAIT(wait);
286
287         INIT_LIST_HEAD(&dirty_fs_roots);
288
289         mutex_lock(&root->fs_info->trans_mutex);
290         if (trans->transaction->in_commit) {
291                 cur_trans = trans->transaction;
292                 trans->transaction->use_count++;
293                 btrfs_end_transaction(trans, root);
294                 ret = wait_for_commit(root, cur_trans);
295                 BUG_ON(ret);
296                 put_transaction(cur_trans);
297                 mutex_unlock(&root->fs_info->trans_mutex);
298                 return 0;
299         }
300         cur_trans = trans->transaction;
301         trans->transaction->in_commit = 1;
302         while (trans->transaction->num_writers > 1) {
303                 WARN_ON(cur_trans != trans->transaction);
304                 prepare_to_wait(&trans->transaction->writer_wait, &wait,
305                                 TASK_UNINTERRUPTIBLE);
306                 if (trans->transaction->num_writers <= 1)
307                         break;
308                 mutex_unlock(&root->fs_info->trans_mutex);
309                 schedule();
310                 mutex_lock(&root->fs_info->trans_mutex);
311                 finish_wait(&trans->transaction->writer_wait, &wait);
312         }
313         finish_wait(&trans->transaction->writer_wait, &wait);
314         WARN_ON(cur_trans != trans->transaction);
315         add_dirty_roots(trans, &root->fs_info->fs_roots_radix, &dirty_fs_roots);
316         ret = btrfs_commit_tree_roots(trans, root);
317         BUG_ON(ret);
318         cur_trans = root->fs_info->running_transaction;
319         root->fs_info->running_transaction = NULL;
320         if (cur_trans->list.prev != &root->fs_info->trans_list) {
321                 prev_trans = list_entry(cur_trans->list.prev,
322                                         struct btrfs_transaction, list);
323                 if (prev_trans->commit_done)
324                         prev_trans = NULL;
325                 else
326                         prev_trans->use_count++;
327         }
328         mutex_unlock(&root->fs_info->trans_mutex);
329         mutex_unlock(&root->fs_info->fs_mutex);
330         ret = btrfs_write_and_wait_transaction(trans, root);
331         if (prev_trans) {
332                 mutex_lock(&root->fs_info->trans_mutex);
333                 wait_for_commit(root, prev_trans);
334                 put_transaction(prev_trans);
335                 mutex_unlock(&root->fs_info->trans_mutex);
336         }
337         btrfs_set_super_generation(root->fs_info->disk_super,
338                                    cur_trans->transid);
339         BUG_ON(ret);
340         write_ctree_super(trans, root);
341
342         mutex_lock(&root->fs_info->fs_mutex);
343         btrfs_finish_extent_commit(trans, root);
344         mutex_lock(&root->fs_info->trans_mutex);
345         cur_trans->commit_done = 1;
346         wake_up(&cur_trans->commit_wait);
347         put_transaction(cur_trans);
348         put_transaction(cur_trans);
349         mutex_unlock(&root->fs_info->trans_mutex);
350         kmem_cache_free(btrfs_trans_handle_cachep, trans);
351
352         drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
353         return ret;
354 }
355
356 void btrfs_transaction_cleaner(struct work_struct *work)
357 {
358         struct btrfs_fs_info *fs_info = container_of(work,
359                                                      struct btrfs_fs_info,
360                                                      trans_work.work);
361
362         struct btrfs_root *root = fs_info->tree_root;
363         struct btrfs_transaction *cur;
364         struct btrfs_trans_handle *trans;
365         unsigned long now;
366         unsigned long delay = HZ * 30;
367         int ret;
368
369 printk("btrfs transaction cleaner\n");
370         mutex_lock(&root->fs_info->fs_mutex);
371         mutex_lock(&root->fs_info->trans_mutex);
372         cur = root->fs_info->running_transaction;
373         if (!cur) {
374                 mutex_unlock(&root->fs_info->trans_mutex);
375                 goto out;
376         }
377         now = get_seconds();
378         if (now < cur->start_time || now - cur->start_time < 30) {
379                 mutex_unlock(&root->fs_info->trans_mutex);
380                 delay = HZ * 5;
381                 goto out;
382         }
383         mutex_unlock(&root->fs_info->trans_mutex);
384 printk("forcing commit\n");
385         trans = btrfs_start_transaction(root, 1);
386         ret = btrfs_commit_transaction(trans, root);
387 out:
388         mutex_unlock(&root->fs_info->fs_mutex);
389         btrfs_transaction_queue_work(root, delay);
390 }
391
392 void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
393 {
394         queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
395 }
396
397 void btrfs_transaction_flush_work(struct btrfs_root *root)
398 {
399         cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
400         flush_workqueue(trans_wq);
401 }
402
403 void __init btrfs_init_transaction_sys(void)
404 {
405         trans_wq = create_workqueue("btrfs");
406 }
407
408 void __exit btrfs_exit_transaction_sys(void)
409 {
410         destroy_workqueue(trans_wq);
411 }
412