sparc: switch to using asm-generic for seccomp.h
[linux-2.6-block.git] / fs / mbcache.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/mbcache.c
3 * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
4 */
5
6/*
7 * Filesystem Meta Information Block Cache (mbcache)
8 *
9 * The mbcache caches blocks of block devices that need to be located
10 * by their device/block number, as well as by other criteria (such
11 * as the block's contents).
12 *
13 * There can only be one cache entry in a cache per device and block number.
14 * Additional indexes need not be unique in this sense. The number of
15 * additional indexes (=other criteria) can be hardwired at compile time
16 * or specified at cache create time.
17 *
18 * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
19 * in the cache. A valid entry is in the main hash tables of the cache,
20 * and may also be in the lru list. An invalid entry is not in any hashes
21 * or lists.
22 *
23 * A valid cache entry is only in the lru list if no handles refer to it.
24 * Invalid cache entries will be freed when the last handle to the cache
25 * entry is released. Entries that cannot be freed immediately are put
26 * back on the lru list.
27 */
28
1f3e55fe
M
29/*
30 * Lock descriptions and usage:
31 *
32 * Each hash chain of both the block and index hash tables now contains
33 * a built-in lock used to serialize accesses to the hash chain.
34 *
35 * Accesses to global data structures mb_cache_list and mb_cache_lru_list
36 * are serialized via the global spinlock mb_cache_spinlock.
37 *
38 * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
39 * accesses to its local data, such as e_used and e_queued.
40 *
41 * Lock ordering:
42 *
43 * Each block hash chain's lock has the highest lock order, followed by an
44 * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
45 * lock), and mb_cach_spinlock, with the lowest order. While holding
46 * either a block or index hash chain lock, a thread can acquire an
47 * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
48 *
49 * Synchronization:
50 *
51 * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
52 * index hash chian, it needs to lock the corresponding hash chain. For each
53 * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
54 * prevent either any simultaneous release or free on the entry and also
55 * to serialize accesses to either the e_used or e_queued member of the entry.
56 *
57 * To avoid having a dangling reference to an already freed
58 * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
59 * block hash chain and also no longer being referenced, both e_used,
60 * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
61 * first removed from a block hash chain.
62 */
63
1da177e4
LT
64#include <linux/kernel.h>
65#include <linux/module.h>
66
67#include <linux/hash.h>
68#include <linux/fs.h>
69#include <linux/mm.h>
70#include <linux/slab.h>
71#include <linux/sched.h>
3e037e52 72#include <linux/list_bl.h>
1da177e4 73#include <linux/mbcache.h>
3e037e52 74#include <linux/init.h>
1f3e55fe 75#include <linux/blockgroup_lock.h>
ec7756ae 76#include <linux/log2.h>
1da177e4
LT
77
78#ifdef MB_CACHE_DEBUG
79# define mb_debug(f...) do { \
80 printk(KERN_DEBUG f); \
81 printk("\n"); \
82 } while (0)
83#define mb_assert(c) do { if (!(c)) \
84 printk(KERN_ERR "assertion " #c " failed\n"); \
85 } while(0)
86#else
87# define mb_debug(f...) do { } while(0)
88# define mb_assert(c) do { } while(0)
89#endif
90#define mb_error(f...) do { \
91 printk(KERN_ERR f); \
92 printk("\n"); \
93 } while(0)
94
95#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
96
ec7756ae 97#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
1f3e55fe
M
98#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
99 (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
100
75c96f85 101static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
1f3e55fe 102static struct blockgroup_lock *mb_cache_bg_lock;
9c191f70 103static struct kmem_cache *mb_cache_kmem_cache;
1f3e55fe 104
1da177e4
LT
105MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
106MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
107MODULE_LICENSE("GPL");
108
109EXPORT_SYMBOL(mb_cache_create);
110EXPORT_SYMBOL(mb_cache_shrink);
111EXPORT_SYMBOL(mb_cache_destroy);
112EXPORT_SYMBOL(mb_cache_entry_alloc);
113EXPORT_SYMBOL(mb_cache_entry_insert);
114EXPORT_SYMBOL(mb_cache_entry_release);
115EXPORT_SYMBOL(mb_cache_entry_free);
116EXPORT_SYMBOL(mb_cache_entry_get);
117#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
118EXPORT_SYMBOL(mb_cache_entry_find_first);
119EXPORT_SYMBOL(mb_cache_entry_find_next);
120#endif
121
1da177e4
LT
122/*
123 * Global data: list of all mbcache's, lru list, and a spinlock for
124 * accessing cache data structures on SMP machines. The lru list is
125 * global across all mbcaches.
126 */
127
128static LIST_HEAD(mb_cache_list);
129static LIST_HEAD(mb_cache_lru_list);
130static DEFINE_SPINLOCK(mb_cache_spinlock);
1da177e4 131
1f3e55fe
M
132static inline void
133__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
134{
135 spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
136 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
137}
138
139static inline void
140__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
141{
142 spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
143 MB_CACHE_ENTRY_LOCK_INDEX(ce)));
144}
145
1da177e4 146static inline int
3e037e52 147__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
1da177e4 148{
3e037e52 149 return !hlist_bl_unhashed(&ce->e_block_list);
1da177e4
LT
150}
151
152
3e037e52
M
153static inline void
154__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
1da177e4 155{
3e037e52
M
156 if (__mb_cache_entry_is_block_hashed(ce))
157 hlist_bl_del_init(&ce->e_block_list);
158}
159
160static inline int
161__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
162{
163 return !hlist_bl_unhashed(&ce->e_index.o_list);
1da177e4
LT
164}
165
3e037e52
M
166static inline void
167__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
168{
169 if (__mb_cache_entry_is_index_hashed(ce))
170 hlist_bl_del_init(&ce->e_index.o_list);
171}
172
1f3e55fe
M
173/*
174 * __mb_cache_entry_unhash_unlock()
175 *
176 * This function is called to unhash both the block and index hash
177 * chain.
178 * It assumes both the block and index hash chain is locked upon entry.
179 * It also unlock both hash chains both exit
180 */
3e037e52 181static inline void
1f3e55fe 182__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
3e037e52
M
183{
184 __mb_cache_entry_unhash_index(ce);
1f3e55fe 185 hlist_bl_unlock(ce->e_index_hash_p);
3e037e52 186 __mb_cache_entry_unhash_block(ce);
1f3e55fe 187 hlist_bl_unlock(ce->e_block_hash_p);
3e037e52 188}
1da177e4 189
858119e1 190static void
27496a8c 191__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
1da177e4
LT
192{
193 struct mb_cache *cache = ce->e_cache;
194
1f3e55fe 195 mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
2aec7c52
AG
196 kmem_cache_free(cache->c_entry_cache, ce);
197 atomic_dec(&cache->c_entry_count);
1da177e4
LT
198}
199
858119e1 200static void
1f3e55fe 201__mb_cache_entry_release(struct mb_cache_entry *ce)
1da177e4 202{
1f3e55fe
M
203 /* First lock the entry to serialize access to its local data. */
204 __spin_lock_mb_cache_entry(ce);
1da177e4
LT
205 /* Wake up all processes queuing for this cache entry. */
206 if (ce->e_queued)
207 wake_up_all(&mb_cache_queue);
208 if (ce->e_used >= MB_CACHE_WRITER)
209 ce->e_used -= MB_CACHE_WRITER;
1f3e55fe
M
210 /*
211 * Make sure that all cache entries on lru_list have
212 * both e_used and e_qued of 0s.
213 */
1da177e4 214 ce->e_used--;
1f3e55fe
M
215 if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
216 if (!__mb_cache_entry_is_block_hashed(ce)) {
217 __spin_unlock_mb_cache_entry(ce);
1da177e4 218 goto forget;
1f3e55fe
M
219 }
220 /*
221 * Need access to lru list, first drop entry lock,
222 * then reacquire the lock in the proper order.
223 */
224 spin_lock(&mb_cache_spinlock);
225 if (list_empty(&ce->e_lru_list))
226 list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
227 spin_unlock(&mb_cache_spinlock);
1da177e4 228 }
1f3e55fe 229 __spin_unlock_mb_cache_entry(ce);
1da177e4
LT
230 return;
231forget:
1f3e55fe 232 mb_assert(list_empty(&ce->e_lru_list));
1da177e4
LT
233 __mb_cache_entry_forget(ce, GFP_KERNEL);
234}
235
1da177e4 236/*
1ab6c499 237 * mb_cache_shrink_scan() memory pressure callback
1da177e4
LT
238 *
239 * This function is called by the kernel memory management when memory
240 * gets low.
241 *
7f8275d0 242 * @shrink: (ignored)
1495f230 243 * @sc: shrink_control passed from reclaim
1da177e4 244 *
1ab6c499 245 * Returns the number of objects freed.
1da177e4 246 */
1ab6c499
DC
247static unsigned long
248mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
1da177e4
LT
249{
250 LIST_HEAD(free_list);
e566d48c 251 struct mb_cache_entry *entry, *tmp;
1495f230
YH
252 int nr_to_scan = sc->nr_to_scan;
253 gfp_t gfp_mask = sc->gfp_mask;
1ab6c499 254 unsigned long freed = 0;
1da177e4 255
1da177e4 256 mb_debug("trying to free %d entries", nr_to_scan);
e566d48c 257 spin_lock(&mb_cache_spinlock);
1f3e55fe 258 while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
1da177e4
LT
259 struct mb_cache_entry *ce =
260 list_entry(mb_cache_lru_list.next,
1f3e55fe
M
261 struct mb_cache_entry, e_lru_list);
262 list_del_init(&ce->e_lru_list);
263 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
264 continue;
265 spin_unlock(&mb_cache_spinlock);
266 /* Prevent any find or get operation on the entry */
267 hlist_bl_lock(ce->e_block_hash_p);
268 hlist_bl_lock(ce->e_index_hash_p);
269 /* Ignore if it is touched by a find/get */
270 if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
271 !list_empty(&ce->e_lru_list)) {
272 hlist_bl_unlock(ce->e_index_hash_p);
273 hlist_bl_unlock(ce->e_block_hash_p);
274 spin_lock(&mb_cache_spinlock);
275 continue;
276 }
277 __mb_cache_entry_unhash_unlock(ce);
278 list_add_tail(&ce->e_lru_list, &free_list);
279 spin_lock(&mb_cache_spinlock);
1ab6c499
DC
280 }
281 spin_unlock(&mb_cache_spinlock);
1f3e55fe 282
1ab6c499
DC
283 list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
284 __mb_cache_entry_forget(entry, gfp_mask);
1f3e55fe 285 freed++;
1da177e4 286 }
1ab6c499
DC
287 return freed;
288}
289
290static unsigned long
291mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
292{
293 struct mb_cache *cache;
294 unsigned long count = 0;
295
296 spin_lock(&mb_cache_spinlock);
e566d48c
AG
297 list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
298 mb_debug("cache %s (%d)", cache->c_name,
299 atomic_read(&cache->c_entry_count));
300 count += atomic_read(&cache->c_entry_count);
301 }
1da177e4 302 spin_unlock(&mb_cache_spinlock);
1ab6c499 303
55f841ce 304 return vfs_pressure_ratio(count);
1da177e4
LT
305}
306
1ab6c499
DC
307static struct shrinker mb_cache_shrinker = {
308 .count_objects = mb_cache_shrink_count,
309 .scan_objects = mb_cache_shrink_scan,
310 .seeks = DEFAULT_SEEKS,
311};
1da177e4
LT
312
313/*
314 * mb_cache_create() create a new cache
315 *
316 * All entries in one cache are equal size. Cache entries may be from
317 * multiple devices. If this is the first mbcache created, registers
318 * the cache with kernel memory management. Returns NULL if no more
319 * memory was available.
320 *
321 * @name: name of the cache (informal)
1da177e4
LT
322 * @bucket_bits: log2(number of hash buckets)
323 */
324struct mb_cache *
2aec7c52 325mb_cache_create(const char *name, int bucket_bits)
1da177e4 326{
2aec7c52 327 int n, bucket_count = 1 << bucket_bits;
1da177e4
LT
328 struct mb_cache *cache = NULL;
329
1f3e55fe
M
330 if (!mb_cache_bg_lock) {
331 mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
332 GFP_KERNEL);
333 if (!mb_cache_bg_lock)
334 return NULL;
335 bgl_lock_init(mb_cache_bg_lock);
336 }
337
2aec7c52 338 cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
1da177e4 339 if (!cache)
2aec7c52 340 return NULL;
1da177e4 341 cache->c_name = name;
1da177e4
LT
342 atomic_set(&cache->c_entry_count, 0);
343 cache->c_bucket_bits = bucket_bits;
3e037e52
M
344 cache->c_block_hash = kmalloc(bucket_count *
345 sizeof(struct hlist_bl_head), GFP_KERNEL);
1da177e4
LT
346 if (!cache->c_block_hash)
347 goto fail;
348 for (n=0; n<bucket_count; n++)
3e037e52
M
349 INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
350 cache->c_index_hash = kmalloc(bucket_count *
351 sizeof(struct hlist_bl_head), GFP_KERNEL);
2aec7c52
AG
352 if (!cache->c_index_hash)
353 goto fail;
354 for (n=0; n<bucket_count; n++)
3e037e52 355 INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
9c191f70
M
356 if (!mb_cache_kmem_cache) {
357 mb_cache_kmem_cache = kmem_cache_create(name,
358 sizeof(struct mb_cache_entry), 0,
359 SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
360 if (!mb_cache_kmem_cache)
361 goto fail2;
362 }
363 cache->c_entry_cache = mb_cache_kmem_cache;
1da177e4 364
3a48ee8a
AG
365 /*
366 * Set an upper limit on the number of cache entries so that the hash
367 * chains won't grow too long.
368 */
369 cache->c_max_entries = bucket_count << 4;
370
1da177e4
LT
371 spin_lock(&mb_cache_spinlock);
372 list_add(&cache->c_cache_list, &mb_cache_list);
373 spin_unlock(&mb_cache_spinlock);
374 return cache;
375
2aec7c52
AG
376fail2:
377 kfree(cache->c_index_hash);
378
1da177e4 379fail:
2aec7c52
AG
380 kfree(cache->c_block_hash);
381 kfree(cache);
1da177e4
LT
382 return NULL;
383}
384
385
386/*
387 * mb_cache_shrink()
388 *
7f927fcc 389 * Removes all cache entries of a device from the cache. All cache entries
1da177e4
LT
390 * currently in use cannot be freed, and thus remain in the cache. All others
391 * are freed.
392 *
1da177e4
LT
393 * @bdev: which device's cache entries to shrink
394 */
395void
8c52ab42 396mb_cache_shrink(struct block_device *bdev)
1da177e4
LT
397{
398 LIST_HEAD(free_list);
1f3e55fe
M
399 struct list_head *l;
400 struct mb_cache_entry *ce, *tmp;
1da177e4 401
1f3e55fe 402 l = &mb_cache_lru_list;
1da177e4 403 spin_lock(&mb_cache_spinlock);
1f3e55fe
M
404 while (!list_is_last(l, &mb_cache_lru_list)) {
405 l = l->next;
406 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
1da177e4 407 if (ce->e_bdev == bdev) {
1f3e55fe
M
408 list_del_init(&ce->e_lru_list);
409 if (ce->e_used || ce->e_queued ||
410 atomic_read(&ce->e_refcnt))
411 continue;
412 spin_unlock(&mb_cache_spinlock);
413 /*
414 * Prevent any find or get operation on the entry.
415 */
416 hlist_bl_lock(ce->e_block_hash_p);
417 hlist_bl_lock(ce->e_index_hash_p);
418 /* Ignore if it is touched by a find/get */
419 if (ce->e_used || ce->e_queued ||
420 atomic_read(&ce->e_refcnt) ||
421 !list_empty(&ce->e_lru_list)) {
422 hlist_bl_unlock(ce->e_index_hash_p);
423 hlist_bl_unlock(ce->e_block_hash_p);
424 l = &mb_cache_lru_list;
425 spin_lock(&mb_cache_spinlock);
426 continue;
427 }
428 __mb_cache_entry_unhash_unlock(ce);
429 mb_assert(!(ce->e_used || ce->e_queued ||
430 atomic_read(&ce->e_refcnt)));
431 list_add_tail(&ce->e_lru_list, &free_list);
432 l = &mb_cache_lru_list;
433 spin_lock(&mb_cache_spinlock);
1da177e4
LT
434 }
435 }
436 spin_unlock(&mb_cache_spinlock);
1f3e55fe
M
437
438 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
439 __mb_cache_entry_forget(ce, GFP_KERNEL);
1da177e4
LT
440 }
441}
442
443
444/*
445 * mb_cache_destroy()
446 *
447 * Shrinks the cache to its minimum possible size (hopefully 0 entries),
448 * and then destroys it. If this was the last mbcache, un-registers the
449 * mbcache from kernel memory management.
450 */
451void
452mb_cache_destroy(struct mb_cache *cache)
453{
454 LIST_HEAD(free_list);
1f3e55fe 455 struct mb_cache_entry *ce, *tmp;
1da177e4
LT
456
457 spin_lock(&mb_cache_spinlock);
1f3e55fe
M
458 list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
459 if (ce->e_cache == cache)
1da177e4 460 list_move_tail(&ce->e_lru_list, &free_list);
1da177e4
LT
461 }
462 list_del(&cache->c_cache_list);
463 spin_unlock(&mb_cache_spinlock);
464
1f3e55fe
M
465 list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
466 list_del_init(&ce->e_lru_list);
467 /*
468 * Prevent any find or get operation on the entry.
469 */
470 hlist_bl_lock(ce->e_block_hash_p);
471 hlist_bl_lock(ce->e_index_hash_p);
472 mb_assert(!(ce->e_used || ce->e_queued ||
473 atomic_read(&ce->e_refcnt)));
474 __mb_cache_entry_unhash_unlock(ce);
475 __mb_cache_entry_forget(ce, GFP_KERNEL);
1da177e4
LT
476 }
477
478 if (atomic_read(&cache->c_entry_count) > 0) {
479 mb_error("cache %s: %d orphaned entries",
480 cache->c_name,
481 atomic_read(&cache->c_entry_count));
482 }
483
9c191f70
M
484 if (list_empty(&mb_cache_list)) {
485 kmem_cache_destroy(mb_cache_kmem_cache);
486 mb_cache_kmem_cache = NULL;
487 }
2aec7c52 488 kfree(cache->c_index_hash);
1da177e4
LT
489 kfree(cache->c_block_hash);
490 kfree(cache);
491}
492
1da177e4
LT
493/*
494 * mb_cache_entry_alloc()
495 *
496 * Allocates a new cache entry. The new entry will not be valid initially,
497 * and thus cannot be looked up yet. It should be filled with data, and
498 * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
499 * if no more memory was available.
500 */
501struct mb_cache_entry *
335e92e8 502mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
1da177e4 503{
1f3e55fe 504 struct mb_cache_entry *ce;
3a48ee8a
AG
505
506 if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
1f3e55fe
M
507 struct list_head *l;
508
509 l = &mb_cache_lru_list;
3a48ee8a 510 spin_lock(&mb_cache_spinlock);
1f3e55fe
M
511 while (!list_is_last(l, &mb_cache_lru_list)) {
512 l = l->next;
513 ce = list_entry(l, struct mb_cache_entry, e_lru_list);
514 if (ce->e_cache == cache) {
515 list_del_init(&ce->e_lru_list);
516 if (ce->e_used || ce->e_queued ||
517 atomic_read(&ce->e_refcnt))
518 continue;
519 spin_unlock(&mb_cache_spinlock);
520 /*
521 * Prevent any find or get operation on the
522 * entry.
523 */
524 hlist_bl_lock(ce->e_block_hash_p);
525 hlist_bl_lock(ce->e_index_hash_p);
526 /* Ignore if it is touched by a find/get */
527 if (ce->e_used || ce->e_queued ||
528 atomic_read(&ce->e_refcnt) ||
529 !list_empty(&ce->e_lru_list)) {
530 hlist_bl_unlock(ce->e_index_hash_p);
531 hlist_bl_unlock(ce->e_block_hash_p);
532 l = &mb_cache_lru_list;
533 spin_lock(&mb_cache_spinlock);
534 continue;
535 }
536 mb_assert(list_empty(&ce->e_lru_list));
537 mb_assert(!(ce->e_used || ce->e_queued ||
538 atomic_read(&ce->e_refcnt)));
539 __mb_cache_entry_unhash_unlock(ce);
540 goto found;
541 }
3a48ee8a
AG
542 }
543 spin_unlock(&mb_cache_spinlock);
544 }
1f3e55fe
M
545
546 ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
547 if (!ce)
548 return NULL;
549 atomic_inc(&cache->c_entry_count);
550 INIT_LIST_HEAD(&ce->e_lru_list);
551 INIT_HLIST_BL_NODE(&ce->e_block_list);
552 INIT_HLIST_BL_NODE(&ce->e_index.o_list);
553 ce->e_cache = cache;
554 ce->e_queued = 0;
555 atomic_set(&ce->e_refcnt, 0);
556found:
3e037e52
M
557 ce->e_block_hash_p = &cache->c_block_hash[0];
558 ce->e_index_hash_p = &cache->c_index_hash[0];
3a48ee8a 559 ce->e_used = 1 + MB_CACHE_WRITER;
1da177e4
LT
560 return ce;
561}
562
563
564/*
565 * mb_cache_entry_insert()
566 *
567 * Inserts an entry that was allocated using mb_cache_entry_alloc() into
568 * the cache. After this, the cache entry can be looked up, but is not yet
569 * in the lru list as the caller still holds a handle to it. Returns 0 on
570 * success, or -EBUSY if a cache entry for that device + inode exists
571 * already (this may happen after a failed lookup, but when another process
572 * has inserted the same cache entry in the meantime).
573 *
574 * @bdev: device the cache entry belongs to
575 * @block: block number
2aec7c52 576 * @key: lookup key
1da177e4
LT
577 */
578int
579mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
2aec7c52 580 sector_t block, unsigned int key)
1da177e4
LT
581{
582 struct mb_cache *cache = ce->e_cache;
583 unsigned int bucket;
3e037e52 584 struct hlist_bl_node *l;
3e037e52
M
585 struct hlist_bl_head *block_hash_p;
586 struct hlist_bl_head *index_hash_p;
587 struct mb_cache_entry *lce;
1da177e4 588
3e037e52 589 mb_assert(ce);
1da177e4
LT
590 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
591 cache->c_bucket_bits);
3e037e52 592 block_hash_p = &cache->c_block_hash[bucket];
1f3e55fe 593 hlist_bl_lock(block_hash_p);
3e037e52 594 hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
1f3e55fe
M
595 if (lce->e_bdev == bdev && lce->e_block == block) {
596 hlist_bl_unlock(block_hash_p);
597 return -EBUSY;
598 }
1da177e4 599 }
3e037e52 600 mb_assert(!__mb_cache_entry_is_block_hashed(ce));
1f3e55fe
M
601 __mb_cache_entry_unhash_block(ce);
602 __mb_cache_entry_unhash_index(ce);
1da177e4
LT
603 ce->e_bdev = bdev;
604 ce->e_block = block;
3e037e52 605 ce->e_block_hash_p = block_hash_p;
2aec7c52 606 ce->e_index.o_key = key;
1f3e55fe
M
607 hlist_bl_add_head(&ce->e_block_list, block_hash_p);
608 hlist_bl_unlock(block_hash_p);
2aec7c52 609 bucket = hash_long(key, cache->c_bucket_bits);
3e037e52 610 index_hash_p = &cache->c_index_hash[bucket];
1f3e55fe 611 hlist_bl_lock(index_hash_p);
3e037e52
M
612 ce->e_index_hash_p = index_hash_p;
613 hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
1f3e55fe
M
614 hlist_bl_unlock(index_hash_p);
615 return 0;
1da177e4
LT
616}
617
618
619/*
620 * mb_cache_entry_release()
621 *
622 * Release a handle to a cache entry. When the last handle to a cache entry
623 * is released it is either freed (if it is invalid) or otherwise inserted
624 * in to the lru list.
625 */
626void
627mb_cache_entry_release(struct mb_cache_entry *ce)
628{
1f3e55fe 629 __mb_cache_entry_release(ce);
1da177e4
LT
630}
631
632
633/*
634 * mb_cache_entry_free()
635 *
1da177e4
LT
636 */
637void
638mb_cache_entry_free(struct mb_cache_entry *ce)
639{
1f3e55fe 640 mb_assert(ce);
1da177e4 641 mb_assert(list_empty(&ce->e_lru_list));
1f3e55fe
M
642 hlist_bl_lock(ce->e_index_hash_p);
643 __mb_cache_entry_unhash_index(ce);
644 hlist_bl_unlock(ce->e_index_hash_p);
645 hlist_bl_lock(ce->e_block_hash_p);
646 __mb_cache_entry_unhash_block(ce);
647 hlist_bl_unlock(ce->e_block_hash_p);
648 __mb_cache_entry_release(ce);
1da177e4
LT
649}
650
651
652/*
653 * mb_cache_entry_get()
654 *
655 * Get a cache entry by device / block number. (There can only be one entry
656 * in the cache per device and block.) Returns NULL if no such cache entry
657 * exists. The returned cache entry is locked for exclusive access ("single
658 * writer").
659 */
660struct mb_cache_entry *
661mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
662 sector_t block)
663{
664 unsigned int bucket;
3e037e52 665 struct hlist_bl_node *l;
1da177e4 666 struct mb_cache_entry *ce;
3e037e52 667 struct hlist_bl_head *block_hash_p;
1da177e4
LT
668
669 bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
670 cache->c_bucket_bits);
3e037e52 671 block_hash_p = &cache->c_block_hash[bucket];
1f3e55fe
M
672 /* First serialize access to the block corresponding hash chain. */
673 hlist_bl_lock(block_hash_p);
3e037e52
M
674 hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
675 mb_assert(ce->e_block_hash_p == block_hash_p);
1da177e4 676 if (ce->e_bdev == bdev && ce->e_block == block) {
1f3e55fe
M
677 /*
678 * Prevent a free from removing the entry.
679 */
680 atomic_inc(&ce->e_refcnt);
681 hlist_bl_unlock(block_hash_p);
682 __spin_lock_mb_cache_entry(ce);
683 atomic_dec(&ce->e_refcnt);
684 if (ce->e_used > 0) {
685 DEFINE_WAIT(wait);
686 while (ce->e_used > 0) {
687 ce->e_queued++;
688 prepare_to_wait(&mb_cache_queue, &wait,
689 TASK_UNINTERRUPTIBLE);
690 __spin_unlock_mb_cache_entry(ce);
691 schedule();
692 __spin_lock_mb_cache_entry(ce);
693 ce->e_queued--;
694 }
695 finish_wait(&mb_cache_queue, &wait);
696 }
697 ce->e_used += 1 + MB_CACHE_WRITER;
698 __spin_unlock_mb_cache_entry(ce);
1da177e4 699
1f3e55fe
M
700 if (!list_empty(&ce->e_lru_list)) {
701 spin_lock(&mb_cache_spinlock);
1da177e4 702 list_del_init(&ce->e_lru_list);
1da177e4 703 spin_unlock(&mb_cache_spinlock);
1da177e4 704 }
3e037e52 705 if (!__mb_cache_entry_is_block_hashed(ce)) {
1f3e55fe 706 __mb_cache_entry_release(ce);
1da177e4
LT
707 return NULL;
708 }
1f3e55fe 709 return ce;
1da177e4
LT
710 }
711 }
1f3e55fe
M
712 hlist_bl_unlock(block_hash_p);
713 return NULL;
1da177e4
LT
714}
715
716#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
717
718static struct mb_cache_entry *
3e037e52 719__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
2aec7c52 720 struct block_device *bdev, unsigned int key)
1da177e4 721{
1f3e55fe
M
722
723 /* The index hash chain is alredy acquire by caller. */
3e037e52 724 while (l != NULL) {
1da177e4 725 struct mb_cache_entry *ce =
3e037e52
M
726 hlist_bl_entry(l, struct mb_cache_entry,
727 e_index.o_list);
728 mb_assert(ce->e_index_hash_p == head);
2aec7c52 729 if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
1f3e55fe
M
730 /*
731 * Prevent a free from removing the entry.
732 */
733 atomic_inc(&ce->e_refcnt);
734 hlist_bl_unlock(head);
735 __spin_lock_mb_cache_entry(ce);
736 atomic_dec(&ce->e_refcnt);
737 ce->e_used++;
1da177e4
LT
738 /* Incrementing before holding the lock gives readers
739 priority over writers. */
1f3e55fe
M
740 if (ce->e_used >= MB_CACHE_WRITER) {
741 DEFINE_WAIT(wait);
742
743 while (ce->e_used >= MB_CACHE_WRITER) {
744 ce->e_queued++;
745 prepare_to_wait(&mb_cache_queue, &wait,
746 TASK_UNINTERRUPTIBLE);
747 __spin_unlock_mb_cache_entry(ce);
748 schedule();
749 __spin_lock_mb_cache_entry(ce);
750 ce->e_queued--;
751 }
752 finish_wait(&mb_cache_queue, &wait);
753 }
754 __spin_unlock_mb_cache_entry(ce);
755 if (!list_empty(&ce->e_lru_list)) {
1da177e4 756 spin_lock(&mb_cache_spinlock);
1f3e55fe
M
757 list_del_init(&ce->e_lru_list);
758 spin_unlock(&mb_cache_spinlock);
1da177e4 759 }
3e037e52 760 if (!__mb_cache_entry_is_block_hashed(ce)) {
1f3e55fe 761 __mb_cache_entry_release(ce);
1da177e4
LT
762 return ERR_PTR(-EAGAIN);
763 }
764 return ce;
765 }
766 l = l->next;
767 }
1f3e55fe 768 hlist_bl_unlock(head);
1da177e4
LT
769 return NULL;
770}
771
772
773/*
774 * mb_cache_entry_find_first()
775 *
776 * Find the first cache entry on a given device with a certain key in
25985edc 777 * an additional index. Additional matches can be found with
1da177e4
LT
778 * mb_cache_entry_find_next(). Returns NULL if no match was found. The
779 * returned cache entry is locked for shared access ("multiple readers").
780 *
781 * @cache: the cache to search
1da177e4
LT
782 * @bdev: the device the cache entry should belong to
783 * @key: the key in the index
784 */
785struct mb_cache_entry *
2aec7c52
AG
786mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
787 unsigned int key)
1da177e4
LT
788{
789 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
3e037e52
M
790 struct hlist_bl_node *l;
791 struct mb_cache_entry *ce = NULL;
792 struct hlist_bl_head *index_hash_p;
1da177e4 793
3e037e52 794 index_hash_p = &cache->c_index_hash[bucket];
1f3e55fe 795 hlist_bl_lock(index_hash_p);
3e037e52
M
796 if (!hlist_bl_empty(index_hash_p)) {
797 l = hlist_bl_first(index_hash_p);
798 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
1f3e55fe
M
799 } else
800 hlist_bl_unlock(index_hash_p);
1da177e4
LT
801 return ce;
802}
803
804
805/*
806 * mb_cache_entry_find_next()
807 *
808 * Find the next cache entry on a given device with a certain key in an
809 * additional index. Returns NULL if no match could be found. The previous
810 * entry is atomatically released, so that mb_cache_entry_find_next() can
811 * be called like this:
812 *
813 * entry = mb_cache_entry_find_first();
814 * while (entry) {
815 * ...
816 * entry = mb_cache_entry_find_next(entry, ...);
817 * }
818 *
819 * @prev: The previous match
1da177e4
LT
820 * @bdev: the device the cache entry should belong to
821 * @key: the key in the index
822 */
823struct mb_cache_entry *
2aec7c52 824mb_cache_entry_find_next(struct mb_cache_entry *prev,
1da177e4
LT
825 struct block_device *bdev, unsigned int key)
826{
827 struct mb_cache *cache = prev->e_cache;
828 unsigned int bucket = hash_long(key, cache->c_bucket_bits);
3e037e52 829 struct hlist_bl_node *l;
1da177e4 830 struct mb_cache_entry *ce;
3e037e52 831 struct hlist_bl_head *index_hash_p;
1da177e4 832
3e037e52
M
833 index_hash_p = &cache->c_index_hash[bucket];
834 mb_assert(prev->e_index_hash_p == index_hash_p);
1f3e55fe 835 hlist_bl_lock(index_hash_p);
3e037e52 836 mb_assert(!hlist_bl_empty(index_hash_p));
2aec7c52 837 l = prev->e_index.o_list.next;
3e037e52 838 ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
1f3e55fe 839 __mb_cache_entry_release(prev);
1da177e4
LT
840 return ce;
841}
842
843#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
844
845static int __init init_mbcache(void)
846{
8e1f936b 847 register_shrinker(&mb_cache_shrinker);
1da177e4
LT
848 return 0;
849}
850
851static void __exit exit_mbcache(void)
852{
8e1f936b 853 unregister_shrinker(&mb_cache_shrinker);
1da177e4
LT
854}
855
856module_init(init_mbcache)
857module_exit(exit_mbcache)
858