Btrfs: Seed device support

[linux-2.6-block.git] / fs / btrfs / super.c
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c

index 7153dfaa34047b1537fc1054252ffdbb9902bcab..92393cc60d086a9888088e83f868b180a7c1bac4 100644 (file)
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -45,6 +45,9 @@
  #include "print-tree.h"
  #include "xattr.h"
  #include "volumes.h"
+#include "version.h"
+#include "export.h"
+#include "compression.h"
  
  #define BTRFS_SUPER_MAGIC 0x9123683E
  
@@ -65,20 +68,26 @@ static void btrfs_put_super (struct super_block * sb)
  }
  
  enum {
-       Opt_subvol, Opt_nodatasum, Opt_nodatacow, Opt_max_extent,
-       Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, Opt_err,
+       Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
+       Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
+       Opt_ssd, Opt_thread_pool, Opt_noacl,  Opt_compress, Opt_err,
  };
  
  static match_table_t tokens = {
+       {Opt_degraded, "degraded"},
         {Opt_subvol, "subvol=%s"},
+       {Opt_device, "device=%s"},
         {Opt_nodatasum, "nodatasum"},
         {Opt_nodatacow, "nodatacow"},
         {Opt_nobarrier, "nobarrier"},
         {Opt_max_extent, "max_extent=%s"},
         {Opt_max_inline, "max_inline=%s"},
         {Opt_alloc_start, "alloc_start=%s"},
+       {Opt_thread_pool, "thread_pool=%d"},
+       {Opt_compress, "compress"},
         {Opt_ssd, "ssd"},
-       {Opt_err, NULL}
+       {Opt_noacl, "noacl"},
+       {Opt_err, NULL},
  };
  
  u64 btrfs_parse_size(char *str)
@@ -106,16 +115,19 @@ u64 btrfs_parse_size(char *str)
         return res;
  }
  
-static int parse_options (char * options,
-                         struct btrfs_root *root,
-                         char **subvol_name)
+/*
+ * Regular mount options parser.  Everything that is needed only when
+ * reading in a new superblock is parsed here.
+ */
+int btrfs_parse_options(struct btrfs_root *root, char *options)
  {
-       char * p;
-       struct btrfs_fs_info *info = NULL;
+       struct btrfs_fs_info *info = root->fs_info;
         substring_t args[MAX_OPT_ARGS];
+       char *p, *num;
+       int intarg;
  
         if (!options)
-               return 1;
+               return 0;
  
         /*
          * strsep changes the string, duplicate it because parse_options
@@ -125,96 +137,163 @@ static int parse_options (char * options,
         if (!options)
                 return -ENOMEM;
  
-       if (root)
-               info = root->fs_info;
  
-       while ((p = strsep (&options, ",")) != NULL) {
+       while ((p = strsep(&options, ",")) != NULL) {
                 int token;
                 if (!*p)
                         continue;
  
                 token = match_token(p, tokens, args);
                 switch (token) {
+               case Opt_degraded:
+                       printk(KERN_INFO "btrfs: allowing degraded mounts\n");
+                       btrfs_set_opt(info->mount_opt, DEGRADED);
+                       break;
                 case Opt_subvol:
-                       if (subvol_name) {
-                               *subvol_name = match_strdup(&args[0]);
-                       }
+               case Opt_device:
+                       /*
+                        * These are parsed by btrfs_parse_early_options
+                        * and can be happily ignored here.
+                        */
                         break;
                 case Opt_nodatasum:
-                       if (info) {
-                               printk("btrfs: setting nodatacsum\n");
-                               btrfs_set_opt(info->mount_opt, NODATASUM);
-                       }
+                       printk(KERN_INFO "btrfs: setting nodatacsum\n");
+                       btrfs_set_opt(info->mount_opt, NODATASUM);
                         break;
                 case Opt_nodatacow:
-                       if (info) {
-                               printk("btrfs: setting nodatacow\n");
-                               btrfs_set_opt(info->mount_opt, NODATACOW);
-                               btrfs_set_opt(info->mount_opt, NODATASUM);
-                       }
+                       printk(KERN_INFO "btrfs: setting nodatacow\n");
+                       btrfs_set_opt(info->mount_opt, NODATACOW);
+                       btrfs_set_opt(info->mount_opt, NODATASUM);
+                       break;
+               case Opt_compress:
+                       printk(KERN_INFO "btrfs: use compression\n");
+                       btrfs_set_opt(info->mount_opt, COMPRESS);
                         break;
                 case Opt_ssd:
-                       if (info) {
-                               printk("btrfs: use ssd allocation scheme\n");
-                               btrfs_set_opt(info->mount_opt, SSD);
-                       }
+                       printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
+                       btrfs_set_opt(info->mount_opt, SSD);
                         break;
                 case Opt_nobarrier:
-                       if (info) {
-                               printk("btrfs: turning off barriers\n");
-                               btrfs_set_opt(info->mount_opt, NOBARRIER);
+                       printk(KERN_INFO "btrfs: turning off barriers\n");
+                       btrfs_set_opt(info->mount_opt, NOBARRIER);
+                       break;
+               case Opt_thread_pool:
+                       intarg = 0;
+                       match_int(&args[0], &intarg);
+                       if (intarg) {
+                               info->thread_pool_size = intarg;
+                               printk(KERN_INFO "btrfs: thread pool %d\n",
+                                      info->thread_pool_size);
                         }
                         break;
                 case Opt_max_extent:
-                       if (info) {
-                               char *num = match_strdup(&args[0]);
-                               if (num) {
-                                       info->max_extent =
-                                               btrfs_parse_size(num);
-                                       kfree(num);
-
-                                       info->max_extent = max_t(u64,
-                                                        info->max_extent,
-                                                        root->sectorsize);
-                                       printk("btrfs: max_extent at %Lu\n",
-                                              info->max_extent);
-                               }
+                       num = match_strdup(&args[0]);
+                       if (num) {
+                               info->max_extent = btrfs_parse_size(num);
+                               kfree(num);
+
+                               info->max_extent = max_t(u64,
+                                       info->max_extent, root->sectorsize);
+                               printk(KERN_INFO "btrfs: max_extent at %llu\n",
+                                      info->max_extent);
                         }
                         break;
                 case Opt_max_inline:
-                       if (info) {
-                               char *num = match_strdup(&args[0]);
-                               if (num) {
-                                       info->max_inline =
-                                               btrfs_parse_size(num);
-                                       kfree(num);
+                       num = match_strdup(&args[0]);
+                       if (num) {
+                               info->max_inline = btrfs_parse_size(num);
+                               kfree(num);
  
+                               if (info->max_inline) {
                                         info->max_inline = max_t(u64,
-                                                        info->max_inline,
-                                                        root->sectorsize);
-                                       printk("btrfs: max_inline at %Lu\n",
-                                              info->max_inline);
+                                               info->max_inline,
+                                               root->sectorsize);
                                 }
+                               printk(KERN_INFO "btrfs: max_inline at %llu\n",
+                                       info->max_inline);
                         }
                         break;
                 case Opt_alloc_start:
-                       if (info) {
-                               char *num = match_strdup(&args[0]);
-                               if (num) {
-                                       info->alloc_start =
-                                               btrfs_parse_size(num);
-                                       kfree(num);
-                                       printk("btrfs: allocations start at "
-                                              "%Lu\n", info->alloc_start);
-                               }
+                       num = match_strdup(&args[0]);
+                       if (num) {
+                               info->alloc_start = btrfs_parse_size(num);
+                               kfree(num);
+                               printk(KERN_INFO
+                                       "btrfs: allocations start at %llu\n",
+                                       info->alloc_start);
                         }
                         break;
+               case Opt_noacl:
+                       root->fs_info->sb->s_flags &= ~MS_POSIXACL;
+                       break;
                 default:
                         break;
                 }
         }
         kfree(options);
-       return 1;
+       return 0;
+}
+
+/*
+ * Parse mount options that are required early in the mount process.
+ *
+ * All other options will be parsed on much later in the mount process and
+ * only when we need to allocate a new super block.
+ */
+static int btrfs_parse_early_options(const char *options, int flags,
+               void *holder, char **subvol_name,
+               struct btrfs_fs_devices **fs_devices)
+{
+       substring_t args[MAX_OPT_ARGS];
+       char *opts, *p;
+       int error = 0;
+
+       if (!options)
+               goto out;
+
+       /*
+        * strsep changes the string, duplicate it because parse_options
+        * gets called twice
+        */
+       opts = kstrdup(options, GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+
+       while ((p = strsep(&opts, ",")) != NULL) {
+               int token;
+               if (!*p)
+                       continue;
+
+               token = match_token(p, tokens, args);
+               switch (token) {
+               case Opt_subvol:
+                       *subvol_name = match_strdup(&args[0]);
+                       break;
+               case Opt_device:
+                       error = btrfs_scan_one_device(match_strdup(&args[0]),
+                                       flags, holder, fs_devices);
+                       if (error)
+                               goto out_free_opts;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+ out_free_opts:
+       kfree(opts);
+ out:
+       /*
+        * If no subvolume name is specified we use the default one.  Allocate
+        * a copy of the string "default" here so that code later in the
+        * mount path doesn't care if it's the default volume or another one.
+        */
+       if (!*subvol_name) {
+               *subvol_name = kstrdup("default", GFP_KERNEL);
+               if (!*subvol_name)
+                       return -ENOMEM;
+       }
+       return error;
  }
  
  static int btrfs_fill_super(struct super_block * sb,
@@ -231,10 +310,12 @@ static int btrfs_fill_super(struct super_block * sb,
         sb->s_maxbytes = MAX_LFS_FILESIZE;
         sb->s_magic = BTRFS_SUPER_MAGIC;
         sb->s_op = &btrfs_super_ops;
+       sb->s_export_op = &btrfs_export_ops;
         sb->s_xattr = btrfs_xattr_handlers;
         sb->s_time_gran = 1;
+       sb->s_flags |= MS_POSIXACL;
  
-       tree_root = open_ctree(sb, fs_devices);
+       tree_root = open_ctree(sb, fs_devices, (char *)data);
  
         if (IS_ERR(tree_root)) {
                 printk("btrfs: open_ctree failed\n");
@@ -267,20 +348,14 @@ static int btrfs_fill_super(struct super_block * sb,
                 goto fail_close;
         }
  
-       parse_options((char *)data, tree_root, NULL);
-
         /* this does the super kobj at the same time */
         err = btrfs_sysfs_add_super(tree_root->fs_info);
         if (err)
                 goto fail_close;
  
         sb->s_root = root_dentry;
-       btrfs_transaction_queue_work(tree_root, HZ * 30);
  
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
         save_mount_options(sb, data);
-#endif
-
         return 0;
  
  fail_close:
@@ -288,25 +363,29 @@ fail_close:
         return err;
  }
  
-static int btrfs_sync_fs(struct super_block *sb, int wait)
+int btrfs_sync_fs(struct super_block *sb, int wait)
  {
         struct btrfs_trans_handle *trans;
         struct btrfs_root *root;
         int ret;
         root = btrfs_sb(sb);
  
+       if (sb->s_flags & MS_RDONLY)
+               return 0;
+
         sb->s_dirt = 0;
         if (!wait) {
                 filemap_flush(root->fs_info->btree_inode->i_mapping);
                 return 0;
         }
+
+       btrfs_start_delalloc_inodes(root);
+       btrfs_wait_ordered_extents(root, 0);
+
         btrfs_clean_old_snapshots(root);
-       mutex_lock(&root->fs_info->fs_mutex);
-       btrfs_defrag_dirty_roots(root->fs_info);
         trans = btrfs_start_transaction(root, 1);
         ret = btrfs_commit_transaction(trans, root);
         sb->s_dirt = 0;
-       mutex_unlock(&root->fs_info->fs_mutex);
         return ret;
  }
  
@@ -315,53 +394,50 @@ static void btrfs_write_super(struct super_block *sb)
         sb->s_dirt = 0;
  }
  
-/*
- * This is almost a copy of get_sb_bdev in fs/super.c.
- * We need the local copy to allow direct mounting of
- * subvolumes, but this could be easily integrated back
- * into the generic version.  --hch
- */
-
-/* start copy & paste */
-static int set_bdev_super(struct super_block *s, void *data)
+static int btrfs_test_super(struct super_block *s, void *data)
  {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       return 0;
-}
+       struct btrfs_fs_devices *test_fs_devices = data;
+       struct btrfs_root *root = btrfs_sb(s);
  
-static int test_bdev_super(struct super_block *s, void *data)
-{
-       return (void *)s->s_bdev == data;
+       return root->fs_info->fs_devices == test_fs_devices;
  }
  
-int btrfs_get_sb_bdev(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data,
-       struct vfsmount *mnt, const char *subvol)
+/*
+ * Find a superblock for the given device / mount point.
+ *
+ * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
+ *       for multiple device setup.  Make sure to keep it in sync.
+ */
+static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
+               const char *dev_name, void *data, struct vfsmount *mnt)
  {
+       char *subvol_name = NULL;
         struct block_device *bdev = NULL;
         struct super_block *s;
         struct dentry *root;
         struct btrfs_fs_devices *fs_devices = NULL;
         int error = 0;
  
+       error = btrfs_parse_early_options(data, flags, fs_type,
+                                         &subvol_name, &fs_devices);
+       if (error)
+               goto error;
+
         error = btrfs_scan_one_device(dev_name, flags, fs_type, &fs_devices);
         if (error)
-               return error;
+               goto error_free_subvol_name;
  
         error = btrfs_open_devices(fs_devices, flags, fs_type);
         if (error)
-               return error;
+               goto error_free_subvol_name;
  
-       bdev = fs_devices->lowest_bdev;
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       down(&bdev->bd_mount_sem);
-       s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-       up(&bdev->bd_mount_sem);
+       if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
+               error = -EACCES;
+               goto error_close_devices;
+       }
+
+       bdev = fs_devices->latest_bdev;
+       s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
         if (IS_ERR(s))
                 goto error_s;
  
@@ -370,16 +446,15 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type,
                         up_write(&s->s_umount);
                         deactivate_super(s);
                         error = -EBUSY;
-                       goto error_bdev;
+                       goto error_close_devices;
                 }
  
-               close_bdev_excl(bdev);
+               btrfs_close_devices(fs_devices);
         } else {
                 char b[BDEVNAME_SIZE];
  
                 s->s_flags = flags;
                 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-               sb_set_blocksize(s, block_size(bdev));
                 error = btrfs_fill_super(s, fs_devices, data,
                                          flags & MS_SILENT ? 1 : 0);
                 if (error) {
@@ -392,8 +467,12 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type,
                 s->s_flags |= MS_ACTIVE;
         }
  
-       if (subvol) {
-               root = lookup_one_len(subvol, s->s_root, strlen(subvol));
+       if (!strcmp(subvol_name, "."))
+               root = dget(s->s_root);
+       else {
+               mutex_lock(&s->s_root->d_inode->i_mutex);
+               root = lookup_one_len(subvol_name, s->s_root, strlen(subvol_name));
+               mutex_unlock(&s->s_root->d_inode->i_mutex);
                 if (IS_ERR(root)) {
                         up_write(&s->s_umount);
                         deactivate_super(s);
@@ -407,35 +486,54 @@ int btrfs_get_sb_bdev(struct file_system_type *fs_type,
                         error = -ENXIO;
                         goto error;
                 }
-       } else {
-               root = dget(s->s_root);
         }
  
         mnt->mnt_sb = s;
         mnt->mnt_root = root;
+
+       kfree(subvol_name);
         return 0;
  
  error_s:
         error = PTR_ERR(s);
-error_bdev:
+error_close_devices:
         btrfs_close_devices(fs_devices);
+error_free_subvol_name:
+       kfree(subvol_name);
  error:
         return error;
  }
-/* end copy & paste */
  
-static int btrfs_get_sb(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static int btrfs_remount(struct super_block *sb, int *flags, char *data)
  {
+       struct btrfs_root *root = btrfs_sb(sb);
         int ret;
-       char *subvol_name = NULL;
  
-       parse_options((char *)data, NULL, &subvol_name);
-       ret = btrfs_get_sb_bdev(fs_type, flags, dev_name, data, mnt,
-                       subvol_name ? subvol_name : "default");
-       if (subvol_name)
-               kfree(subvol_name);
-       return ret;
+       if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+               return 0;
+
+       if (*flags & MS_RDONLY) {
+               sb->s_flags |= MS_RDONLY;
+
+               ret =  btrfs_commit_super(root);
+               WARN_ON(ret);
+       } else {
+               if (root->fs_info->fs_devices->rw_devices == 0)
+                       return -EACCES;
+
+               if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
+                       return -EINVAL;
+
+               ret = btrfs_cleanup_reloc_trees(root);
+               WARN_ON(ret);
+
+               ret = btrfs_cleanup_fs_roots(root->fs_info);
+               WARN_ON(ret);
+
+               sb->s_flags &= ~MS_RDONLY;
+       }
+
+       return 0;
  }
  
  static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -443,6 +541,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
         struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
         int bits = dentry->d_sb->s_blocksize_bits;
+       __be32 *fsid = (__be32 *)root->fs_info->fsid;
  
         buf->f_namelen = BTRFS_NAME_LEN;
         buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
@@ -451,6 +550,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
         buf->f_bavail = buf->f_bfree;
         buf->f_bsize = dentry->d_sb->s_blocksize;
         buf->f_type = BTRFS_SUPER_MAGIC;
+       /* We treat it as constant endianness (it doesn't matter _which_)
+          because we want the fsid to come out the same whether mounted 
+          on a big-endian or little-endian host */
+       buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]);
+       buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]);
+       /* Mask in the root object ID too, to disambiguate subvols */
+       buf->f_fsid.val[0] ^= BTRFS_I(dentry->d_inode)->root->objectid >> 32;
+       buf->f_fsid.val[1] ^= BTRFS_I(dentry->d_inode)->root->objectid;
+
         return 0;
  }
  
@@ -458,16 +566,19 @@ static struct file_system_type btrfs_fs_type = {
         .owner          = THIS_MODULE,
         .name           = "btrfs",
         .get_sb         = btrfs_get_sb,
-       .kill_sb        = kill_block_super,
+       .kill_sb        = kill_anon_super,
         .fs_flags       = FS_REQUIRES_DEV,
  };
  
+/*
+ * used by btrfsctl to scan devices when no FS is mounted
+ */
  static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
                                 unsigned long arg)
  {
         struct btrfs_ioctl_vol_args *vol;
         struct btrfs_fs_devices *fs_devices;
-       int ret;
+       int ret = 0;
         int len;
  
         vol = kmalloc(sizeof(*vol), GFP_KERNEL);
@@ -484,37 +595,35 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
         }
  out:
         kfree(vol);
-       return 0;
+       return ret;
  }
  
  static void btrfs_write_super_lockfs(struct super_block *sb)
  {
         struct btrfs_root *root = btrfs_sb(sb);
-       btrfs_transaction_flush_work(root);
+       mutex_lock(&root->fs_info->transaction_kthread_mutex);
+       mutex_lock(&root->fs_info->cleaner_mutex);
  }
  
  static void btrfs_unlockfs(struct super_block *sb)
  {
         struct btrfs_root *root = btrfs_sb(sb);
-       btrfs_transaction_queue_work(root, HZ * 30);
+       mutex_unlock(&root->fs_info->cleaner_mutex);
+       mutex_unlock(&root->fs_info->transaction_kthread_mutex);
  }
  
  static struct super_operations btrfs_super_ops = {
         .delete_inode   = btrfs_delete_inode,
-       .put_inode      = btrfs_put_inode,
         .put_super      = btrfs_put_super,
         .write_super    = btrfs_write_super,
         .sync_fs        = btrfs_sync_fs,
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
-       .read_inode     = btrfs_read_locked_inode,
-#else
         .show_options   = generic_show_options,
-#endif
         .write_inode    = btrfs_write_inode,
         .dirty_inode    = btrfs_dirty_inode,
         .alloc_inode    = btrfs_alloc_inode,
         .destroy_inode  = btrfs_destroy_inode,
         .statfs         = btrfs_statfs,
+       .remount_fs     = btrfs_remount,
         .write_super_lockfs = btrfs_write_super_lockfs,
         .unlockfs       = btrfs_unlockfs,
  };
@@ -550,10 +659,9 @@ static int __init init_btrfs_fs(void)
         if (err)
                 return err;
  
-       btrfs_init_transaction_sys();
         err = btrfs_init_cachep();
         if (err)
-               goto free_transaction_sys;
+               goto free_sysfs;
  
         err = extent_io_init();
         if (err)
@@ -566,9 +674,12 @@ static int __init init_btrfs_fs(void)
         err = btrfs_interface_init();
         if (err)
                 goto free_extent_map;
+
         err = register_filesystem(&btrfs_fs_type);
         if (err)
                 goto unregister_ioctl;
+
+       printk(KERN_INFO "%s loaded\n", BTRFS_BUILD_VERSION);
         return 0;
  
  unregister_ioctl:
@@ -579,15 +690,13 @@ free_extent_io:
         extent_io_exit();
  free_cachep:
         btrfs_destroy_cachep();
-free_transaction_sys:
-       btrfs_exit_transaction_sys();
+free_sysfs:
         btrfs_exit_sysfs();
         return err;
  }
  
  static void __exit exit_btrfs_fs(void)
  {
-       btrfs_exit_transaction_sys();
         btrfs_destroy_cachep();
         extent_map_exit();
         extent_io_exit();
@@ -595,6 +704,7 @@ static void __exit exit_btrfs_fs(void)
         unregister_filesystem(&btrfs_fs_type);
         btrfs_exit_sysfs();
         btrfs_cleanup_fs_uuids();
+       btrfs_zlib_exit();
  }
  
  module_init(init_btrfs_fs)