GFS2: Fix use-after-free bug on umount
[linux-2.6-block.git] / fs / gfs2 / ops_super.c
CommitLineData
b3b94faa
DT
1/*
2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
091806ed 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
b3b94faa
DT
4 *
5 * This copyrighted material is made available to anyone wishing to use,
6 * modify, copy, or redistribute it subject to the terms and conditions
e9fc2aa0 7 * of the GNU General Public License version 2.
b3b94faa
DT
8 */
9
10#include <linux/sched.h>
11#include <linux/slab.h>
12#include <linux/spinlock.h>
13#include <linux/completion.h>
14#include <linux/buffer_head.h>
b3b94faa
DT
15#include <linux/statfs.h>
16#include <linux/seq_file.h>
17#include <linux/mount.h>
18#include <linux/kthread.h>
19#include <linux/delay.h>
5c676f6d 20#include <linux/gfs2_ondisk.h>
feaa7bba 21#include <linux/crc32.h>
7d308590 22#include <linux/lm_interface.h>
719ee344 23#include <linux/time.h>
b3b94faa
DT
24
25#include "gfs2.h"
5c676f6d 26#include "incore.h"
b3b94faa
DT
27#include "glock.h"
28#include "inode.h"
b3b94faa
DT
29#include "log.h"
30#include "mount.h"
b3b94faa
DT
31#include "quota.h"
32#include "recovery.h"
33#include "rgrp.h"
34#include "super.h"
35#include "sys.h"
5c676f6d 36#include "util.h"
feaa7bba
SW
37#include "trans.h"
38#include "dir.h"
39#include "eattr.h"
40#include "bmap.h"
719ee344 41#include "meta_io.h"
b3b94faa
DT
42
43/**
44 * gfs2_write_inode - Make sure the inode is stable on the disk
45 * @inode: The inode
46 * @sync: synchronous write flag
47 *
48 * Returns: errno
49 */
50
51static int gfs2_write_inode(struct inode *inode, int sync)
52{
feaa7bba 53 struct gfs2_inode *ip = GFS2_I(inode);
719ee344
SW
54 struct gfs2_sbd *sdp = GFS2_SB(inode);
55 struct gfs2_holder gh;
56 struct buffer_head *bh;
57 struct timespec atime;
58 struct gfs2_dinode *di;
59 int ret = 0;
60
61 /* Check this is a "normal" inode, etc */
62 if (!test_bit(GIF_USER, &ip->i_flags) ||
63 (current->flags & PF_MEMALLOC))
64 return 0;
65 ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
66 if (ret)
67 goto do_flush;
68 ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
69 if (ret)
70 goto do_unlock;
71 ret = gfs2_meta_inode_buffer(ip, &bh);
72 if (ret == 0) {
73 di = (struct gfs2_dinode *)bh->b_data;
74 atime.tv_sec = be64_to_cpu(di->di_atime);
75 atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
76 if (timespec_compare(&inode->i_atime, &atime) > 0) {
77 gfs2_trans_add_bh(ip->i_gl, bh, 1);
78 gfs2_dinode_out(ip, bh->b_data);
79 }
80 brelse(bh);
feaa7bba 81 }
719ee344
SW
82 gfs2_trans_end(sdp);
83do_unlock:
84 gfs2_glock_dq_uninit(&gh);
85do_flush:
86 if (sync != 0)
87 gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
88 return ret;
b3b94faa
DT
89}
90
9b8df98f
SW
91/**
92 * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one
93 * @sdp: the filesystem
94 *
95 * Returns: errno
96 */
97
3af165ac 98int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
9b8df98f
SW
99{
100 struct gfs2_holder t_gh;
101 int error;
102
103 gfs2_quota_sync(sdp);
104 gfs2_statfs_sync(sdp);
105
106 error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
107 &t_gh);
108 if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
109 return error;
110
111 gfs2_meta_syncfs(sdp);
112 gfs2_log_shutdown(sdp);
113
114 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
115
116 if (t_gh.gh_gl)
117 gfs2_glock_dq_uninit(&t_gh);
118
119 gfs2_quota_cleanup(sdp);
120
121 return error;
122}
123
b3b94faa 124/**
4a221953
SW
125 * gfs2_write_super
126 * @sb: the superblock
b3b94faa 127 *
b3b94faa
DT
128 */
129
130static void gfs2_write_super(struct super_block *sb)
131{
4a221953
SW
132 sb->s_dirt = 0;
133}
134
135/**
136 * gfs2_sync_fs - sync the filesystem
137 * @sb: the superblock
138 *
139 * Flushes the log to disk.
140 */
9b8df98f 141
4a221953
SW
142static int gfs2_sync_fs(struct super_block *sb, int wait)
143{
144 sb->s_dirt = 0;
9171f5a9 145 if (wait && sb->s_fs_info)
b004157a 146 gfs2_log_flush(sb->s_fs_info, NULL);
4a221953 147 return 0;
b3b94faa
DT
148}
149
150/**
151 * gfs2_write_super_lockfs - prevent further writes to the filesystem
152 * @sb: the VFS structure for the filesystem
153 *
154 */
155
156static void gfs2_write_super_lockfs(struct super_block *sb)
157{
5c676f6d 158 struct gfs2_sbd *sdp = sb->s_fs_info;
b3b94faa
DT
159 int error;
160
c3780511
DT
161 if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
162 return;
163
b3b94faa
DT
164 for (;;) {
165 error = gfs2_freeze_fs(sdp);
166 if (!error)
167 break;
168
169 switch (error) {
170 case -EBUSY:
171 fs_err(sdp, "waiting for recovery before freeze\n");
172 break;
173
174 default:
175 fs_err(sdp, "error freezing FS: %d\n", error);
176 break;
177 }
178
179 fs_err(sdp, "retrying...\n");
180 msleep(1000);
181 }
182}
183
184/**
185 * gfs2_unlockfs - reallow writes to the filesystem
186 * @sb: the VFS structure for the filesystem
187 *
188 */
189
190static void gfs2_unlockfs(struct super_block *sb)
191{
2bdbc5d7 192 gfs2_unfreeze_fs(sb->s_fs_info);
b3b94faa
DT
193}
194
2bfb6449
SW
195/**
196 * statfs_fill - fill in the sg for a given RG
197 * @rgd: the RG
198 * @sc: the sc structure
199 *
200 * Returns: 0 on success, -ESTALE if the LVB is invalid
201 */
202
203static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
204 struct gfs2_statfs_change_host *sc)
205{
206 gfs2_rgrp_verify(rgd);
207 sc->sc_total += rgd->rd_data;
208 sc->sc_free += rgd->rd_free;
209 sc->sc_dinodes += rgd->rd_dinodes;
210 return 0;
211}
212
213/**
214 * gfs2_statfs_slow - Stat a filesystem using asynchronous locking
215 * @sdp: the filesystem
216 * @sc: the sc info that will be returned
217 *
218 * Any error (other than a signal) will cause this routine to fall back
219 * to the synchronous version.
220 *
221 * FIXME: This really shouldn't busy wait like this.
222 *
223 * Returns: errno
224 */
225
226static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
227{
228 struct gfs2_holder ri_gh;
229 struct gfs2_rgrpd *rgd_next;
230 struct gfs2_holder *gha, *gh;
231 unsigned int slots = 64;
232 unsigned int x;
233 int done;
234 int error = 0, err;
235
236 memset(sc, 0, sizeof(struct gfs2_statfs_change_host));
237 gha = kcalloc(slots, sizeof(struct gfs2_holder), GFP_KERNEL);
238 if (!gha)
239 return -ENOMEM;
240
241 error = gfs2_rindex_hold(sdp, &ri_gh);
242 if (error)
243 goto out;
244
245 rgd_next = gfs2_rgrpd_get_first(sdp);
246
247 for (;;) {
248 done = 1;
249
250 for (x = 0; x < slots; x++) {
251 gh = gha + x;
252
253 if (gh->gh_gl && gfs2_glock_poll(gh)) {
254 err = gfs2_glock_wait(gh);
255 if (err) {
256 gfs2_holder_uninit(gh);
257 error = err;
258 } else {
259 if (!error)
260 error = statfs_slow_fill(
261 gh->gh_gl->gl_object, sc);
262 gfs2_glock_dq_uninit(gh);
263 }
264 }
265
266 if (gh->gh_gl)
267 done = 0;
268 else if (rgd_next && !error) {
269 error = gfs2_glock_nq_init(rgd_next->rd_gl,
270 LM_ST_SHARED,
271 GL_ASYNC,
272 gh);
273 rgd_next = gfs2_rgrpd_get_next(rgd_next);
274 done = 0;
275 }
276
277 if (signal_pending(current))
278 error = -ERESTARTSYS;
279 }
280
281 if (done)
282 break;
283
284 yield();
285 }
286
287 gfs2_glock_dq_uninit(&ri_gh);
288
289out:
290 kfree(gha);
291 return error;
292}
293
294/**
295 * gfs2_statfs_i - Do a statfs
296 * @sdp: the filesystem
297 * @sg: the sg structure
298 *
299 * Returns: errno
300 */
301
302static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
303{
304 struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master;
305 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local;
306
307 spin_lock(&sdp->sd_statfs_spin);
308
309 *sc = *m_sc;
310 sc->sc_total += l_sc->sc_total;
311 sc->sc_free += l_sc->sc_free;
312 sc->sc_dinodes += l_sc->sc_dinodes;
313
314 spin_unlock(&sdp->sd_statfs_spin);
315
316 if (sc->sc_free < 0)
317 sc->sc_free = 0;
318 if (sc->sc_free > sc->sc_total)
319 sc->sc_free = sc->sc_total;
320 if (sc->sc_dinodes < 0)
321 sc->sc_dinodes = 0;
322
323 return 0;
324}
325
b3b94faa
DT
326/**
327 * gfs2_statfs - Gather and return stats about the filesystem
328 * @sb: The superblock
329 * @statfsbuf: The buffer
330 *
331 * Returns: 0 on success or error code
332 */
333
0c0834a3 334static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
b3b94faa 335{
0c0834a3 336 struct super_block *sb = dentry->d_inode->i_sb;
5c676f6d 337 struct gfs2_sbd *sdp = sb->s_fs_info;
bd209cc0 338 struct gfs2_statfs_change_host sc;
b3b94faa
DT
339 int error;
340
b3b94faa
DT
341 if (gfs2_tune_get(sdp, gt_statfs_slow))
342 error = gfs2_statfs_slow(sdp, &sc);
343 else
344 error = gfs2_statfs_i(sdp, &sc);
345
346 if (error)
347 return error;
348
b3b94faa
DT
349 buf->f_type = GFS2_MAGIC;
350 buf->f_bsize = sdp->sd_sb.sb_bsize;
351 buf->f_blocks = sc.sc_total;
352 buf->f_bfree = sc.sc_free;
353 buf->f_bavail = sc.sc_free;
354 buf->f_files = sc.sc_dinodes + sc.sc_free;
355 buf->f_ffree = sc.sc_free;
356 buf->f_namelen = GFS2_FNAMESIZE;
357
358 return 0;
359}
360
361/**
362 * gfs2_remount_fs - called when the FS is remounted
363 * @sb: the filesystem
364 * @flags: the remount flags
365 * @data: extra data passed in (not used right now)
366 *
367 * Returns: errno
368 */
369
370static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data)
371{
5c676f6d 372 struct gfs2_sbd *sdp = sb->s_fs_info;
b3b94faa
DT
373 int error;
374
b3b94faa
DT
375 error = gfs2_mount_args(sdp, data, 1);
376 if (error)
377 return error;
378
379 if (sdp->sd_args.ar_spectator)
380 *flags |= MS_RDONLY;
381 else {
382 if (*flags & MS_RDONLY) {
383 if (!(sb->s_flags & MS_RDONLY))
384 error = gfs2_make_fs_ro(sdp);
385 } else if (!(*flags & MS_RDONLY) &&
386 (sb->s_flags & MS_RDONLY)) {
387 error = gfs2_make_fs_rw(sdp);
388 }
389 }
390
b3b94faa
DT
391 return error;
392}
393
3b8249f6
SW
394/**
395 * gfs2_drop_inode - Drop an inode (test for remote unlink)
396 * @inode: The inode to drop
397 *
398 * If we've received a callback on an iopen lock then its because a
399 * remote node tried to deallocate the inode but failed due to this node
400 * still having the inode open. Here we mark the link count zero
401 * since we know that it must have reached zero if the GLF_DEMOTE flag
402 * is set on the iopen glock. If we didn't do a disk read since the
403 * remote node removed the final link then we might otherwise miss
404 * this event. This check ensures that this node will deallocate the
405 * inode's blocks, or alternatively pass the baton on to another
406 * node for later deallocation.
407 */
9b8df98f 408
3b8249f6
SW
409static void gfs2_drop_inode(struct inode *inode)
410{
091806ed
BP
411 struct gfs2_inode *ip = GFS2_I(inode);
412
413 if (test_bit(GIF_USER, &ip->i_flags) && inode->i_nlink) {
3b8249f6
SW
414 struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
415 if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags))
416 clear_nlink(inode);
417 }
418 generic_drop_inode(inode);
419}
420
b3b94faa
DT
421/**
422 * gfs2_clear_inode - Deallocate an inode when VFS is done with it
423 * @inode: The VFS inode
424 *
425 */
426
427static void gfs2_clear_inode(struct inode *inode)
428{
091806ed
BP
429 struct gfs2_inode *ip = GFS2_I(inode);
430
feaa7bba
SW
431 /* This tells us its a "real" inode and not one which only
432 * serves to contain an address space (see rgrp.c, meta_io.c)
433 * which therefore doesn't have its own glocks.
434 */
091806ed 435 if (test_bit(GIF_USER, &ip->i_flags)) {
feaa7bba 436 ip->i_gl->gl_object = NULL;
feaa7bba
SW
437 gfs2_glock_put(ip->i_gl);
438 ip->i_gl = NULL;
d93cfa98
AD
439 if (ip->i_iopen_gh.gh_gl) {
440 ip->i_iopen_gh.gh_gl->gl_object = NULL;
feaa7bba 441 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
d93cfa98 442 }
b3b94faa
DT
443 }
444}
445
9b8df98f
SW
446static int is_ancestor(const struct dentry *d1, const struct dentry *d2)
447{
448 do {
449 if (d1 == d2)
450 return 1;
451 d1 = d1->d_parent;
452 } while (!IS_ROOT(d1));
453 return 0;
454}
455
b3b94faa
DT
456/**
457 * gfs2_show_options - Show mount options for /proc/mounts
458 * @s: seq_file structure
459 * @mnt: vfsmount
460 *
461 * Returns: 0 on success or error code
462 */
463
464static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
465{
5c676f6d 466 struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info;
b3b94faa
DT
467 struct gfs2_args *args = &sdp->sd_args;
468
9b8df98f
SW
469 if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
470 seq_printf(s, ",meta");
b3b94faa
DT
471 if (args->ar_lockproto[0])
472 seq_printf(s, ",lockproto=%s", args->ar_lockproto);
473 if (args->ar_locktable[0])
474 seq_printf(s, ",locktable=%s", args->ar_locktable);
475 if (args->ar_hostdata[0])
476 seq_printf(s, ",hostdata=%s", args->ar_hostdata);
477 if (args->ar_spectator)
478 seq_printf(s, ",spectator");
479 if (args->ar_ignore_local_fs)
480 seq_printf(s, ",ignore_local_fs");
481 if (args->ar_localflocks)
482 seq_printf(s, ",localflocks");
483 if (args->ar_localcaching)
484 seq_printf(s, ",localcaching");
485 if (args->ar_debug)
486 seq_printf(s, ",debug");
487 if (args->ar_upgrade)
488 seq_printf(s, ",upgrade");
b3b94faa
DT
489 if (args->ar_posix_acl)
490 seq_printf(s, ",acl");
491 if (args->ar_quota != GFS2_QUOTA_DEFAULT) {
492 char *state;
493 switch (args->ar_quota) {
494 case GFS2_QUOTA_OFF:
495 state = "off";
496 break;
497 case GFS2_QUOTA_ACCOUNT:
498 state = "account";
499 break;
500 case GFS2_QUOTA_ON:
501 state = "on";
502 break;
503 default:
504 state = "unknown";
505 break;
506 }
507 seq_printf(s, ",quota=%s", state);
508 }
509 if (args->ar_suiddir)
510 seq_printf(s, ",suiddir");
511 if (args->ar_data != GFS2_DATA_DEFAULT) {
512 char *state;
513 switch (args->ar_data) {
514 case GFS2_DATA_WRITEBACK:
515 state = "writeback";
516 break;
517 case GFS2_DATA_ORDERED:
518 state = "ordered";
519 break;
520 default:
521 state = "unknown";
522 break;
523 }
524 seq_printf(s, ",data=%s", state);
525 }
526
527 return 0;
528}
529
907b9bce 530/*
feaa7bba
SW
531 * We have to (at the moment) hold the inodes main lock to cover
532 * the gap between unlocking the shared lock on the iopen lock and
533 * taking the exclusive lock. I'd rather do a shared -> exclusive
534 * conversion on the iopen lock, but we can change that later. This
535 * is safe, just less efficient.
536 */
9b8df98f 537
feaa7bba
SW
538static void gfs2_delete_inode(struct inode *inode)
539{
540 struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
541 struct gfs2_inode *ip = GFS2_I(inode);
542 struct gfs2_holder gh;
543 int error;
544
091806ed 545 if (!test_bit(GIF_USER, &ip->i_flags))
feaa7bba
SW
546 goto out;
547
d93cfa98 548 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
feaa7bba
SW
549 if (unlikely(error)) {
550 gfs2_glock_dq_uninit(&ip->i_iopen_gh);
551 goto out;
552 }
553
d93cfa98 554 gfs2_glock_dq_wait(&ip->i_iopen_gh);
feaa7bba
SW
555 gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
556 error = gfs2_glock_nq(&ip->i_iopen_gh);
557 if (error)
1bb7322f 558 goto out_truncate;
feaa7bba 559
b60623c2 560 if (S_ISDIR(inode->i_mode) &&
383f01fb 561 (ip->i_diskflags & GFS2_DIF_EXHASH)) {
feaa7bba
SW
562 error = gfs2_dir_exhash_dealloc(ip);
563 if (error)
564 goto out_unlock;
565 }
566
3767ac21 567 if (ip->i_eattr) {
feaa7bba
SW
568 error = gfs2_ea_dealloc(ip);
569 if (error)
570 goto out_unlock;
571 }
572
573 if (!gfs2_is_stuffed(ip)) {
574 error = gfs2_file_dealloc(ip);
575 if (error)
576 goto out_unlock;
577 }
578
579 error = gfs2_dinode_dealloc(ip);
16615be1
SW
580 if (error)
581 goto out_unlock;
582
1bb7322f 583out_truncate:
16615be1
SW
584 error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
585 if (error)
586 goto out_unlock;
587 /* Needs to be done before glock release & also in a transaction */
49686f71 588 truncate_inode_pages(&inode->i_data, 0);
16615be1 589 gfs2_trans_end(sdp);
feaa7bba
SW
590
591out_unlock:
1bb7322f
SW
592 if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags))
593 gfs2_glock_dq(&ip->i_iopen_gh);
feaa7bba
SW
594 gfs2_holder_uninit(&ip->i_iopen_gh);
595 gfs2_glock_dq_uninit(&gh);
3b8249f6 596 if (error && error != GLR_TRYFAILED)
feaa7bba
SW
597 fs_warn(sdp, "gfs2_delete_inode: %d\n", error);
598out:
599 truncate_inode_pages(&inode->i_data, 0);
600 clear_inode(inode);
601}
602
320dd101
SW
603static struct inode *gfs2_alloc_inode(struct super_block *sb)
604{
320dd101
SW
605 struct gfs2_inode *ip;
606
607 ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
608 if (ip) {
609 ip->i_flags = 0;
610 ip->i_gl = NULL;
320dd101
SW
611 }
612 return &ip->i_inode;
613}
614
615static void gfs2_destroy_inode(struct inode *inode)
616{
617 kmem_cache_free(gfs2_inode_cachep, inode);
618}
619
ee9b6d61 620const struct super_operations gfs2_super_ops = {
4a221953
SW
621 .alloc_inode = gfs2_alloc_inode,
622 .destroy_inode = gfs2_destroy_inode,
623 .write_inode = gfs2_write_inode,
624 .delete_inode = gfs2_delete_inode,
3af165ac 625 .put_super = gfs2_gl_hash_clear,
4a221953
SW
626 .write_super = gfs2_write_super,
627 .sync_fs = gfs2_sync_fs,
628 .write_super_lockfs = gfs2_write_super_lockfs,
629 .unlockfs = gfs2_unlockfs,
630 .statfs = gfs2_statfs,
631 .remount_fs = gfs2_remount_fs,
632 .clear_inode = gfs2_clear_inode,
3b8249f6 633 .drop_inode = gfs2_drop_inode,
4a221953 634 .show_options = gfs2_show_options,
b3b94faa
DT
635};
636