ceph: map snapid to anonymous bdev ID
[linux-block.git] / fs / ceph / snap.c
index 041c27ea8de155a0002bdb5af25eb2fc5f8e6efa..89aa37fa0f84c55fe3324e50b554f6fcef5b5be5 100644 (file)
@@ -3,12 +3,13 @@
 
 #include <linux/sort.h>
 #include <linux/slab.h>
-
 #include "super.h"
 #include "mds_client.h"
-
 #include <linux/ceph/decode.h>
 
+/* unused map expires after 5 minutes */
+#define CEPH_SNAPID_MAP_TIMEOUT        (5 * 60 * HZ)
+
 /*
  * Snapshots in ceph are driven in large part by cooperation from the
  * client.  In contrast to local file systems or file servers that
@@ -124,6 +125,8 @@ static struct ceph_snap_realm *ceph_create_snap_realm(
        INIT_LIST_HEAD(&realm->inodes_with_caps);
        spin_lock_init(&realm->inodes_with_caps_lock);
        __insert_snap_realm(&mdsc->snap_realms, realm);
+       mdsc->num_snap_realms++;
+
        dout("create_snap_realm %llx %p\n", realm->ino, realm);
        return realm;
 }
@@ -175,6 +178,7 @@ static void __destroy_snap_realm(struct ceph_mds_client *mdsc,
        dout("__destroy_snap_realm %p %llx\n", realm, realm->ino);
 
        rb_erase(&realm->node, &mdsc->snap_realms);
+       mdsc->num_snap_realms--;
 
        if (realm->parent) {
                list_del_init(&realm->child_item);
@@ -616,7 +620,8 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
             capsnap->size);
 
        spin_lock(&mdsc->snap_flush_lock);
-       list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+       if (list_empty(&ci->i_snap_flush_item))
+               list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
        spin_unlock(&mdsc->snap_flush_lock);
        return 1;  /* caller may want to ceph_flush_snaps */
 }
@@ -985,3 +990,154 @@ out:
                up_write(&mdsc->snap_rwsem);
        return;
 }
+
+struct ceph_snapid_map* ceph_get_snapid_map(struct ceph_mds_client *mdsc,
+                                           u64 snap)
+{
+       struct ceph_snapid_map *sm, *exist;
+       struct rb_node **p, *parent;
+       int ret;
+
+       exist = NULL;
+       spin_lock(&mdsc->snapid_map_lock);
+       p = &mdsc->snapid_map_tree.rb_node;
+       while (*p) {
+               exist = rb_entry(*p, struct ceph_snapid_map, node);
+               if (snap > exist->snap) {
+                       p = &(*p)->rb_left;
+               } else if (snap < exist->snap) {
+                       p = &(*p)->rb_right;
+               } else {
+                       if (atomic_inc_return(&exist->ref) == 1)
+                               list_del_init(&exist->lru);
+                       break;
+               }
+               exist = NULL;
+       }
+       spin_unlock(&mdsc->snapid_map_lock);
+       if (exist) {
+               dout("found snapid map %llx -> %x\n", exist->snap, exist->dev);
+               return exist;
+       }
+
+       sm = kmalloc(sizeof(*sm), GFP_NOFS);
+       if (!sm)
+               return NULL;
+
+       ret = get_anon_bdev(&sm->dev);
+       if (ret < 0) {
+               kfree(sm);
+               return NULL;
+       }
+
+       INIT_LIST_HEAD(&sm->lru);
+       atomic_set(&sm->ref, 1);
+       sm->snap = snap;
+
+       exist = NULL;
+       parent = NULL;
+       p = &mdsc->snapid_map_tree.rb_node;
+       spin_lock(&mdsc->snapid_map_lock);
+       while (*p) {
+               parent = *p;
+               exist = rb_entry(*p, struct ceph_snapid_map, node);
+               if (snap > exist->snap)
+                       p = &(*p)->rb_left;
+               else if (snap < exist->snap)
+                       p = &(*p)->rb_right;
+               else
+                       break;
+               exist = NULL;
+       }
+       if (exist) {
+               if (atomic_inc_return(&exist->ref) == 1)
+                       list_del_init(&exist->lru);
+       } else {
+               rb_link_node(&sm->node, parent, p);
+               rb_insert_color(&sm->node, &mdsc->snapid_map_tree);
+       }
+       spin_unlock(&mdsc->snapid_map_lock);
+       if (exist) {
+               free_anon_bdev(sm->dev);
+               kfree(sm);
+               dout("found snapid map %llx -> %x\n", exist->snap, exist->dev);
+               return exist;
+       }
+
+       dout("create snapid map %llx -> %x\n", sm->snap, sm->dev);
+       return sm;
+}
+
+void ceph_put_snapid_map(struct ceph_mds_client* mdsc,
+                        struct ceph_snapid_map *sm)
+{
+       if (!sm)
+               return;
+       if (atomic_dec_and_lock(&sm->ref, &mdsc->snapid_map_lock)) {
+               if (!RB_EMPTY_NODE(&sm->node)) {
+                       sm->last_used = jiffies;
+                       list_add_tail(&sm->lru, &mdsc->snapid_map_lru);
+                       spin_unlock(&mdsc->snapid_map_lock);
+               } else {
+                       /* already cleaned up by
+                        * ceph_cleanup_snapid_map() */
+                       spin_unlock(&mdsc->snapid_map_lock);
+                       kfree(sm);
+               }
+       }
+}
+
+void ceph_trim_snapid_map(struct ceph_mds_client *mdsc)
+{
+       struct ceph_snapid_map *sm;
+       unsigned long now;
+       LIST_HEAD(to_free);
+
+       spin_lock(&mdsc->snapid_map_lock);
+       now = jiffies;
+
+       while (!list_empty(&mdsc->snapid_map_lru)) {
+               sm = list_first_entry(&mdsc->snapid_map_lru,
+                                     struct ceph_snapid_map, lru);
+               if (time_after(sm->last_used + CEPH_SNAPID_MAP_TIMEOUT, now))
+                       break;
+
+               rb_erase(&sm->node, &mdsc->snapid_map_tree);
+               list_move(&sm->lru, &to_free);
+       }
+       spin_unlock(&mdsc->snapid_map_lock);
+
+       while (!list_empty(&to_free)) {
+               sm = list_first_entry(&to_free, struct ceph_snapid_map, lru);
+               list_del(&sm->lru);
+               dout("trim snapid map %llx -> %x\n", sm->snap, sm->dev);
+               free_anon_bdev(sm->dev);
+               kfree(sm);
+       }
+}
+
+void ceph_cleanup_snapid_map(struct ceph_mds_client *mdsc)
+{
+       struct ceph_snapid_map *sm;
+       struct rb_node *p;
+       LIST_HEAD(to_free);
+
+       spin_lock(&mdsc->snapid_map_lock);
+       while ((p = rb_first(&mdsc->snapid_map_tree))) {
+               sm = rb_entry(p, struct ceph_snapid_map, node);
+               rb_erase(p, &mdsc->snapid_map_tree);
+               RB_CLEAR_NODE(p);
+               list_move(&sm->lru, &to_free);
+       }
+       spin_unlock(&mdsc->snapid_map_lock);
+
+       while (!list_empty(&to_free)) {
+               sm = list_first_entry(&to_free, struct ceph_snapid_map, lru);
+               list_del(&sm->lru);
+               free_anon_bdev(sm->dev);
+               if (WARN_ON_ONCE(atomic_read(&sm->ref))) {
+                       pr_err("snapid map %llx -> %x still in use\n",
+                              sm->snap, sm->dev);
+               }
+       }
+}