X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=fs%2Finode.c;h=5e5bafe70cebced27bf61b3940c9de61d2b6b13a;hb=4c51acbc66f754e536e1c9e3331656b69bce86d0;hp=3368abd64bb542b8a95d5f3d4149588dddb23bb3;hpb=56b0dacfa2b8416815a2f2a5f4f51e46be4cf14c;p=linux-block.git diff --git a/fs/inode.c b/fs/inode.c index 3368abd64bb5..5e5bafe70ceb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ -LIST_HEAD(inode_in_use); -LIST_HEAD(inode_unused); +static LIST_HEAD(inode_unused); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -103,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; +static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; +static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; + static struct kmem_cache *inode_cachep __read_mostly; +static inline int get_nr_inodes(void) +{ + return percpu_counter_sum_positive(&nr_inodes); +} + +static inline int get_nr_inodes_unused(void) +{ + return percpu_counter_sum_positive(&nr_inodes_unused); +} + +int get_nr_dirty_inodes(void) +{ + int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + return nr_dirty > 0 ? nr_dirty : 0; + +} + +/* + * Handle nr_inode sysctl + */ +#ifdef CONFIG_SYSCTL +int proc_nr_inodes(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void wake_up_inode(struct inode *inode) { /* @@ -192,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif + percpu_counter_inc(&nr_inodes); + return 0; out: return -ENOMEM; @@ -232,6 +266,7 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif + percpu_counter_dec(&nr_inodes); } EXPORT_SYMBOL(__destroy_inode); @@ -255,6 +290,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); + INIT_LIST_HEAD(&inode->i_list); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -281,13 +317,76 @@ static void init_once(void *foo) */ void __iget(struct inode *inode) { - if (atomic_inc_return(&inode->i_count) != 1) - return; + atomic_inc(&inode->i_count); +} - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_in_use); - inodes_stat.nr_unused--; +static void inode_lru_list_add(struct inode *inode) +{ + if (list_empty(&inode->i_list)) { + list_add(&inode->i_list, &inode_unused); + percpu_counter_inc(&nr_inodes_unused); + } +} + +static void inode_lru_list_del(struct inode *inode) +{ + if (!list_empty(&inode->i_list)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + } +} + +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +/** + * __insert_inode_hash - hash an inode + * @inode: unhashed inode + * @hashval: unsigned long value used to locate this object in the + * inode_hashtable. + * + * Add an inode to the inode hash for this superblock. + */ +void __insert_inode_hash(struct inode *inode, unsigned long hashval) +{ + struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL(__insert_inode_hash); + +/** + * __remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +static void __remove_inode_hash(struct inode *inode) +{ + hlist_del_init(&inode->i_hash); +} + +/** + * remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +void remove_inode_hash(struct inode *inode) +{ + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); } +EXPORT_SYMBOL(remove_inode_hash); void end_writeback(struct inode *inode) { @@ -327,28 +426,22 @@ static void evict(struct inode *inode) */ static void dispose_list(struct list_head *head) { - int nr_disposed = 0; - while (!list_empty(head)) { struct inode *inode; inode = list_first_entry(head, struct inode, i_list); - list_del(&inode->i_list); + list_del_init(&inode->i_list); evict(inode); spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); wake_up_inode(inode); destroy_inode(inode); - nr_disposed++; } - spin_lock(&inode_lock); - inodes_stat.nr_inodes -= nr_disposed; - spin_unlock(&inode_lock); } /* @@ -357,7 +450,7 @@ static void dispose_list(struct list_head *head) static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; - int busy = 0, count = 0; + int busy = 0; next = head->next; for (;;) { @@ -383,13 +476,12 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - count++; + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; } - /* only unused inodes may be cached with i_count zero */ - inodes_stat.nr_unused -= count; return busy; } @@ -417,11 +509,10 @@ int invalidate_inodes(struct super_block *sb) return busy; } -EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) { - if (inode->i_state) + if (inode->i_state & ~I_REFERENCED) return 0; if (inode_has_buffers(inode)) return 0; @@ -433,22 +524,24 @@ static int can_unuse(struct inode *inode) } /* - * Scan `goal' inodes on the unused list for freeable ones. They are moved to - * a temporary list and then are freed outside inode_lock by dispose_list(). + * Scan `goal' inodes on the unused list for freeable ones. They are moved to a + * temporary list and then are freed outside inode_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. We expect the final iput() on that inode to add it to - * the front of the inode_unused list. So look for it there and if the - * inode is still freeable, proceed. The right inode is found 99.9% of the - * time in testing on a 4-way. + * pagecache removed. If the inode has metadata buffers attached to + * mapping->private_list then try to remove them. * - * If the inode has metadata buffers attached to mapping->private_list then - * try to remove them. + * If the inode has the I_REFERENCED flag set, then it means that it has been + * used recently - the flag is set in iput_final(). When we encounter such an + * inode, clear the flag and move it to the back of the LRU so it gets another + * pass through the LRU before it gets reclaimed. This is necessary because of + * the fact we are doing lazy LRU updates to minimise lock contention so the + * LRU does not have strict ordering. Hence we don't want to reclaim inodes + * with this flag set because they are the inodes that are out of order. */ static void prune_icache(int nr_to_scan) { LIST_HEAD(freeable); - int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; @@ -462,8 +555,21 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_unused.prev, struct inode, i_list); - if (inode->i_state || atomic_read(&inode->i_count)) { + /* + * Referenced or dirty inodes are still in use. Give them + * another pass through the LRU as we canot reclaim them now. + */ + if (atomic_read(&inode->i_count) || + (inode->i_state & ~I_REFERENCED)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + continue; + } + + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { list_move(&inode->i_list, &inode_unused); + inode->i_state &= ~I_REFERENCED; continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { @@ -484,9 +590,8 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - nr_pruned++; + percpu_counter_dec(&nr_inodes_unused); } - inodes_stat.nr_unused -= nr_pruned; if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else @@ -518,7 +623,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) return -1; prune_icache(nr); } - return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; } static struct shrinker icache_shrinker = { @@ -581,22 +686,10 @@ repeat: return node ? inode : NULL; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - inodes_stat.nr_inodes++; - list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) hlist_add_head(&inode->i_hash, head); @@ -663,7 +756,7 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC - if (inode->i_mode & S_IFDIR) { + if (S_ISDIR(inode->i_mode)) { struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ @@ -1095,7 +1188,7 @@ int insert_inode_locked(struct inode *inode) __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1134,7 +1227,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1143,36 +1236,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } EXPORT_SYMBOL(insert_inode_locked4); -/** - * __insert_inode_hash - hash an inode - * @inode: unhashed inode - * @hashval: unsigned long value used to locate this object in the - * inode_hashtable. - * - * Add an inode to the inode hash for this superblock. - */ -void __insert_inode_hash(struct inode *inode, unsigned long hashval) -{ - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); - spin_lock(&inode_lock); - hlist_add_head(&inode->i_hash, head); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(__insert_inode_hash); - -/** - * remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. - */ -void remove_inode_hash(struct inode *inode) -{ - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(remove_inode_hash); int generic_delete_inode(struct inode *inode) { @@ -1187,7 +1250,7 @@ EXPORT_SYMBOL(generic_delete_inode); */ int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || hlist_unhashed(&inode->i_hash); + return !inode->i_nlink || inode_unhashed(inode); } EXPORT_SYMBOL_GPL(generic_drop_inode); @@ -1213,10 +1276,11 @@ static void iput_final(struct inode *inode) drop = generic_drop_inode(inode); if (!drop) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; if (sb->s_flags & MS_ACTIVE) { + inode->i_state |= I_REFERENCED; + if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + inode_lru_list_add(inode); + } spin_unlock(&inode_lock); return; } @@ -1227,19 +1291,22 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; + + /* + * After we delete the inode from the LRU here, we avoid moving dirty + * inodes back onto the LRU now because I_FREEING is set and hence + * writeback_single_inode() won't move the inode around. + */ + inode_lru_list_del(inode); + + list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); evict(inode); - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); + remove_inode_hash(inode); wake_up_inode(inode); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); @@ -1503,6 +1570,8 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); + percpu_counter_init(&nr_inodes, 0); + percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist)