Btrfs: Fix mtime and ctime updates on parent dirs
[linux-2.6-block.git] / fs / btrfs / transaction.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include <linux/fs.h>
21 #include <linux/sched.h>
22 #include "ctree.h"
23 #include "disk-io.h"
24 #include "transaction.h"
25
26 static int total_trans = 0;
27 extern struct kmem_cache *btrfs_trans_handle_cachep;
28 extern struct kmem_cache *btrfs_transaction_cachep;
29
30 static struct workqueue_struct *trans_wq;
31
32 #define BTRFS_ROOT_TRANS_TAG 0
33
34 static void put_transaction(struct btrfs_transaction *transaction)
35 {
36         WARN_ON(transaction->use_count == 0);
37         transaction->use_count--;
38         if (transaction->use_count == 0) {
39                 WARN_ON(total_trans == 0);
40                 total_trans--;
41                 list_del_init(&transaction->list);
42                 memset(transaction, 0, sizeof(*transaction));
43                 kmem_cache_free(btrfs_transaction_cachep, transaction);
44         }
45 }
46
47 static int join_transaction(struct btrfs_root *root)
48 {
49         struct btrfs_transaction *cur_trans;
50         cur_trans = root->fs_info->running_transaction;
51         if (!cur_trans) {
52                 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep,
53                                              GFP_NOFS);
54                 total_trans++;
55                 BUG_ON(!cur_trans);
56                 root->fs_info->generation++;
57                 root->fs_info->running_transaction = cur_trans;
58                 cur_trans->num_writers = 0;
59                 cur_trans->transid = root->fs_info->generation;
60                 init_waitqueue_head(&cur_trans->writer_wait);
61                 init_waitqueue_head(&cur_trans->commit_wait);
62                 cur_trans->in_commit = 0;
63                 cur_trans->use_count = 1;
64                 cur_trans->commit_done = 0;
65                 cur_trans->start_time = get_seconds();
66                 list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
67                 init_bit_radix(&cur_trans->dirty_pages);
68         }
69         cur_trans->num_writers++;
70         return 0;
71 }
72
73 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
74                                                    int num_blocks)
75 {
76         struct btrfs_trans_handle *h =
77                 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
78         int ret;
79         u64 running_trans_id;
80
81         mutex_lock(&root->fs_info->trans_mutex);
82         ret = join_transaction(root);
83         BUG_ON(ret);
84         running_trans_id = root->fs_info->running_transaction->transid;
85
86         if (root != root->fs_info->tree_root && root->last_trans <
87             running_trans_id) {
88                 if (root->root_item.refs != 0) {
89                         radix_tree_tag_set(&root->fs_info->fs_roots_radix,
90                                            (unsigned long)root->root_key.objectid,
91                                            BTRFS_ROOT_TRANS_TAG);
92                         root->commit_root = root->node;
93                         get_bh(root->node);
94                 } else {
95                         WARN_ON(1);
96                 }
97         }
98         root->last_trans = running_trans_id;
99         h->transid = running_trans_id;
100         h->transaction = root->fs_info->running_transaction;
101         h->blocks_reserved = num_blocks;
102         h->blocks_used = 0;
103         h->block_group = NULL;
104         root->fs_info->running_transaction->use_count++;
105         mutex_unlock(&root->fs_info->trans_mutex);
106         return h;
107 }
108
109 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
110                           struct btrfs_root *root)
111 {
112         struct btrfs_transaction *cur_trans;
113
114         mutex_lock(&root->fs_info->trans_mutex);
115         cur_trans = root->fs_info->running_transaction;
116         WARN_ON(cur_trans->num_writers < 1);
117         if (waitqueue_active(&cur_trans->writer_wait))
118                 wake_up(&cur_trans->writer_wait);
119         cur_trans->num_writers--;
120         put_transaction(cur_trans);
121         mutex_unlock(&root->fs_info->trans_mutex);
122         memset(trans, 0, sizeof(*trans));
123         kmem_cache_free(btrfs_trans_handle_cachep, trans);
124         return 0;
125 }
126
127
128 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
129                                      struct btrfs_root *root)
130 {
131         unsigned long gang[16];
132         int ret;
133         int i;
134         int err;
135         int werr = 0;
136         struct page *page;
137         struct radix_tree_root *dirty_pages;
138         struct inode *btree_inode = root->fs_info->btree_inode;
139
140         if (!trans || !trans->transaction) {
141                 return filemap_write_and_wait(btree_inode->i_mapping);
142         }
143         dirty_pages = &trans->transaction->dirty_pages;
144         while(1) {
145                 ret = find_first_radix_bit(dirty_pages, gang,
146                                            0, ARRAY_SIZE(gang));
147                 if (!ret)
148                         break;
149                 for (i = 0; i < ret; i++) {
150                         /* FIXME EIO */
151                         clear_radix_bit(dirty_pages, gang[i]);
152                         page = find_lock_page(btree_inode->i_mapping,
153                                               gang[i]);
154                         if (!page)
155                                 continue;
156                         err = write_one_page(page, 0);
157                         if (err)
158                                 werr = err;
159                         page_cache_release(page);
160                 }
161         }
162         err = filemap_fdatawait(btree_inode->i_mapping);
163         if (err)
164                 werr = err;
165         return werr;
166 }
167
168 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
169                             struct btrfs_root *root)
170 {
171         int ret;
172         u64 old_extent_block;
173         struct btrfs_fs_info *fs_info = root->fs_info;
174         struct btrfs_root *tree_root = fs_info->tree_root;
175         struct btrfs_root *extent_root = fs_info->extent_root;
176
177         btrfs_write_dirty_block_groups(trans, extent_root);
178         while(1) {
179                 old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
180                 if (old_extent_block == bh_blocknr(extent_root->node))
181                         break;
182                 btrfs_set_root_blocknr(&extent_root->root_item,
183                                        bh_blocknr(extent_root->node));
184                 ret = btrfs_update_root(trans, tree_root,
185                                         &extent_root->root_key,
186                                         &extent_root->root_item);
187                 BUG_ON(ret);
188                 btrfs_write_dirty_block_groups(trans, extent_root);
189         }
190         return 0;
191 }
192
193 static int wait_for_commit(struct btrfs_root *root,
194                            struct btrfs_transaction *commit)
195 {
196         DEFINE_WAIT(wait);
197         while(!commit->commit_done) {
198                 prepare_to_wait(&commit->commit_wait, &wait,
199                                 TASK_UNINTERRUPTIBLE);
200                 if (commit->commit_done)
201                         break;
202                 mutex_unlock(&root->fs_info->trans_mutex);
203                 schedule();
204                 mutex_lock(&root->fs_info->trans_mutex);
205         }
206         finish_wait(&commit->commit_wait, &wait);
207         return 0;
208 }
209
210 struct dirty_root {
211         struct list_head list;
212         struct btrfs_key snap_key;
213         struct buffer_head *commit_root;
214         struct btrfs_root *root;
215         int free_on_drop;
216 };
217
218 int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list)
219 {
220         struct dirty_root *dirty;
221
222         dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
223         if (!dirty)
224                 return -ENOMEM;
225         memcpy(&dirty->snap_key, &root->root_key, sizeof(root->root_key));
226         dirty->commit_root = root->node;
227         dirty->root = root;
228         dirty->free_on_drop = 1;
229         list_add(&dirty->list, dead_list);
230         return 0;
231 }
232
233 static int add_dirty_roots(struct btrfs_trans_handle *trans,
234                            struct radix_tree_root *radix,
235                            struct list_head *list)
236 {
237         struct dirty_root *dirty;
238         struct btrfs_root *gang[8];
239         struct btrfs_root *root;
240         struct btrfs_root_item tmp_item;
241         int i;
242         int ret;
243         int err = 0;
244         u32 refs;
245
246         while(1) {
247                 ret = radix_tree_gang_lookup_tag(radix, (void **)gang, 0,
248                                                  ARRAY_SIZE(gang),
249                                                  BTRFS_ROOT_TRANS_TAG);
250                 if (ret == 0)
251                         break;
252                 for (i = 0; i < ret; i++) {
253                         root = gang[i];
254                         radix_tree_tag_clear(radix,
255                                      (unsigned long)root->root_key.objectid,
256                                      BTRFS_ROOT_TRANS_TAG);
257                         if (root->commit_root == root->node) {
258                                 WARN_ON(bh_blocknr(root->node) !=
259                                         btrfs_root_blocknr(&root->root_item));
260                                 brelse(root->commit_root);
261                                 root->commit_root = NULL;
262                                 continue;
263                         }
264                         dirty = kmalloc(sizeof(*dirty), GFP_NOFS);
265                         BUG_ON(!dirty);
266                         memcpy(&dirty->snap_key, &root->root_key,
267                                sizeof(root->root_key));
268                         dirty->commit_root = root->commit_root;
269                         root->commit_root = NULL;
270                         dirty->root = root;
271                         dirty->free_on_drop = 0;
272                         memcpy(&tmp_item, &root->root_item, sizeof(tmp_item));
273
274                         root->root_key.offset = root->fs_info->generation;
275                         btrfs_set_root_blocknr(&root->root_item,
276                                                bh_blocknr(root->node));
277                         err = btrfs_insert_root(trans, root->fs_info->tree_root,
278                                                 &root->root_key,
279                                                 &root->root_item);
280                         if (err)
281                                 break;
282
283                         refs = btrfs_root_refs(&tmp_item);
284                         btrfs_set_root_refs(&tmp_item, refs - 1);
285                         err = btrfs_update_root(trans, root->fs_info->tree_root,
286                                                 &dirty->snap_key,
287                                                 &tmp_item);
288
289                         BUG_ON(err);
290                         if (refs == 1)
291                                 list_add(&dirty->list, list);
292                         else
293                                 kfree(dirty);
294                 }
295         }
296         return err;
297 }
298
299 static int drop_dirty_roots(struct btrfs_root *tree_root,
300                             struct list_head *list)
301 {
302         struct dirty_root *dirty;
303         struct btrfs_trans_handle *trans;
304         int ret = 0;
305         while(!list_empty(list)) {
306                 mutex_lock(&tree_root->fs_info->fs_mutex);
307                 dirty = list_entry(list->next, struct dirty_root, list);
308                 list_del_init(&dirty->list);
309
310                 trans = btrfs_start_transaction(tree_root, 1);
311                 ret = btrfs_drop_snapshot(trans, dirty->root,
312                                           dirty->commit_root);
313                 BUG_ON(ret);
314                 ret = btrfs_del_root(trans, tree_root, &dirty->snap_key);
315                 if (ret)
316                         break;
317                 ret = btrfs_end_transaction(trans, tree_root);
318                 BUG_ON(ret);
319
320                 if (dirty->free_on_drop)
321                         kfree(dirty->root);
322                 kfree(dirty);
323                 mutex_unlock(&tree_root->fs_info->fs_mutex);
324                 btrfs_btree_balance_dirty(tree_root);
325         }
326         return ret;
327 }
328
329 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
330                              struct btrfs_root *root)
331 {
332         int ret = 0;
333         struct btrfs_transaction *cur_trans;
334         struct btrfs_transaction *prev_trans = NULL;
335         struct list_head dirty_fs_roots;
336         DEFINE_WAIT(wait);
337
338         INIT_LIST_HEAD(&dirty_fs_roots);
339
340         mutex_lock(&root->fs_info->trans_mutex);
341         if (trans->transaction->in_commit) {
342                 cur_trans = trans->transaction;
343                 trans->transaction->use_count++;
344                 btrfs_end_transaction(trans, root);
345                 ret = wait_for_commit(root, cur_trans);
346                 BUG_ON(ret);
347                 put_transaction(cur_trans);
348                 mutex_unlock(&root->fs_info->trans_mutex);
349                 return 0;
350         }
351         cur_trans = trans->transaction;
352         trans->transaction->in_commit = 1;
353         while (trans->transaction->num_writers > 1) {
354                 WARN_ON(cur_trans != trans->transaction);
355                 prepare_to_wait(&trans->transaction->writer_wait, &wait,
356                                 TASK_UNINTERRUPTIBLE);
357                 if (trans->transaction->num_writers <= 1)
358                         break;
359                 mutex_unlock(&root->fs_info->trans_mutex);
360                 schedule();
361                 mutex_lock(&root->fs_info->trans_mutex);
362                 finish_wait(&trans->transaction->writer_wait, &wait);
363         }
364         finish_wait(&trans->transaction->writer_wait, &wait);
365         WARN_ON(cur_trans != trans->transaction);
366         ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
367                               &dirty_fs_roots);
368         BUG_ON(ret);
369
370         ret = btrfs_commit_tree_roots(trans, root);
371         BUG_ON(ret);
372
373         cur_trans = root->fs_info->running_transaction;
374         root->fs_info->running_transaction = NULL;
375         if (cur_trans->list.prev != &root->fs_info->trans_list) {
376                 prev_trans = list_entry(cur_trans->list.prev,
377                                         struct btrfs_transaction, list);
378                 if (prev_trans->commit_done)
379                         prev_trans = NULL;
380                 else
381                         prev_trans->use_count++;
382         }
383         mutex_unlock(&root->fs_info->trans_mutex);
384         mutex_unlock(&root->fs_info->fs_mutex);
385         ret = btrfs_write_and_wait_transaction(trans, root);
386         if (prev_trans) {
387                 mutex_lock(&root->fs_info->trans_mutex);
388                 wait_for_commit(root, prev_trans);
389                 put_transaction(prev_trans);
390                 mutex_unlock(&root->fs_info->trans_mutex);
391         }
392         btrfs_set_super_generation(root->fs_info->disk_super,
393                                    cur_trans->transid);
394         BUG_ON(ret);
395         write_ctree_super(trans, root);
396
397         mutex_lock(&root->fs_info->fs_mutex);
398         btrfs_finish_extent_commit(trans, root);
399         mutex_lock(&root->fs_info->trans_mutex);
400         cur_trans->commit_done = 1;
401         wake_up(&cur_trans->commit_wait);
402         put_transaction(cur_trans);
403         put_transaction(cur_trans);
404         if (root->fs_info->closing)
405                 list_splice_init(&root->fs_info->dead_roots, &dirty_fs_roots);
406         else
407                 list_splice_init(&dirty_fs_roots, &root->fs_info->dead_roots);
408         mutex_unlock(&root->fs_info->trans_mutex);
409         kmem_cache_free(btrfs_trans_handle_cachep, trans);
410
411         if (root->fs_info->closing) {
412                 mutex_unlock(&root->fs_info->fs_mutex);
413                 drop_dirty_roots(root->fs_info->tree_root, &dirty_fs_roots);
414                 mutex_lock(&root->fs_info->fs_mutex);
415         }
416         return ret;
417 }
418
419 void btrfs_transaction_cleaner(struct work_struct *work)
420 {
421         struct btrfs_fs_info *fs_info = container_of(work,
422                                                      struct btrfs_fs_info,
423                                                      trans_work.work);
424
425         struct btrfs_root *root = fs_info->tree_root;
426         struct btrfs_transaction *cur;
427         struct btrfs_trans_handle *trans;
428         struct list_head dirty_roots;
429         unsigned long now;
430         unsigned long delay = HZ * 30;
431         int ret;
432
433         INIT_LIST_HEAD(&dirty_roots);
434         mutex_lock(&root->fs_info->fs_mutex);
435         mutex_lock(&root->fs_info->trans_mutex);
436         cur = root->fs_info->running_transaction;
437         if (!cur) {
438                 mutex_unlock(&root->fs_info->trans_mutex);
439                 goto out;
440         }
441         now = get_seconds();
442         if (now < cur->start_time || now - cur->start_time < 30) {
443                 mutex_unlock(&root->fs_info->trans_mutex);
444                 delay = HZ * 5;
445                 goto out;
446         }
447         mutex_unlock(&root->fs_info->trans_mutex);
448         trans = btrfs_start_transaction(root, 1);
449         ret = btrfs_commit_transaction(trans, root);
450 out:
451         mutex_unlock(&root->fs_info->fs_mutex);
452
453         mutex_lock(&root->fs_info->trans_mutex);
454         list_splice_init(&root->fs_info->dead_roots, &dirty_roots);
455         mutex_unlock(&root->fs_info->trans_mutex);
456
457         if (!list_empty(&dirty_roots)) {
458                 drop_dirty_roots(root, &dirty_roots);
459         }
460         btrfs_transaction_queue_work(root, delay);
461 }
462
463 void btrfs_transaction_queue_work(struct btrfs_root *root, int delay)
464 {
465         queue_delayed_work(trans_wq, &root->fs_info->trans_work, delay);
466 }
467
468 void btrfs_transaction_flush_work(struct btrfs_root *root)
469 {
470         cancel_rearming_delayed_workqueue(trans_wq, &root->fs_info->trans_work);
471         flush_workqueue(trans_wq);
472 }
473
474 void __init btrfs_init_transaction_sys(void)
475 {
476         trans_wq = create_workqueue("btrfs");
477 }
478
479 void __exit btrfs_exit_transaction_sys(void)
480 {
481         destroy_workqueue(trans_wq);
482 }
483