[PATCH] Two small fixes for md verion-1 superblocks.
[linux-2.6-block.git] / drivers / md / bitmap.c
CommitLineData
32a7627c
N
1/*
2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003
3 *
4 * bitmap_create - sets up the bitmap structure
5 * bitmap_destroy - destroys the bitmap structure
6 *
7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
8 * - added disk storage for bitmap
9 * - changes to allow various bitmap chunk sizes
10 * - added bitmap daemon (to asynchronously clear bitmap bits from disk)
11 */
12
13/*
14 * Still to do:
15 *
16 * flush after percent set rather than just time based. (maybe both).
17 * wait if count gets too high, wake when it drops to half.
18 * allow bitmap to be mirrored with superblock (before or after...)
19 * allow hot-add to re-instate a current device.
20 * allow hot-add of bitmap after quiessing device
21 */
22
23#include <linux/module.h>
24#include <linux/version.h>
25#include <linux/errno.h>
26#include <linux/slab.h>
27#include <linux/init.h>
28#include <linux/config.h>
29#include <linux/timer.h>
30#include <linux/sched.h>
31#include <linux/list.h>
32#include <linux/file.h>
33#include <linux/mount.h>
34#include <linux/buffer_head.h>
35#include <linux/raid/md.h>
36#include <linux/raid/bitmap.h>
37
38/* debug macros */
39
40#define DEBUG 0
41
42#if DEBUG
43/* these are for debugging purposes only! */
44
45/* define one and only one of these */
46#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */
47#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/
48#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */
49#define INJECT_FAULTS_4 0 /* undef */
50#define INJECT_FAULTS_5 0 /* undef */
51#define INJECT_FAULTS_6 0
52
53/* if these are defined, the driver will fail! debug only */
54#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */
55#define INJECT_FATAL_FAULT_2 0 /* undef */
56#define INJECT_FATAL_FAULT_3 0 /* undef */
57#endif
58
59//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */
60#define DPRINTK(x...) do { } while(0)
61
62#ifndef PRINTK
63# if DEBUG > 0
64# define PRINTK(x...) printk(KERN_DEBUG x)
65# else
66# define PRINTK(x...)
67# endif
68#endif
69
70static inline char * bmname(struct bitmap *bitmap)
71{
72 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX";
73}
74
75
76/*
77 * test if the bitmap is active
78 */
79int bitmap_active(struct bitmap *bitmap)
80{
81 unsigned long flags;
82 int res = 0;
83
84 if (!bitmap)
85 return res;
86 spin_lock_irqsave(&bitmap->lock, flags);
87 res = bitmap->flags & BITMAP_ACTIVE;
88 spin_unlock_irqrestore(&bitmap->lock, flags);
89 return res;
90}
91
92#define WRITE_POOL_SIZE 256
93/* mempool for queueing pending writes on the bitmap file */
94static void *write_pool_alloc(unsigned int gfp_flags, void *data)
95{
96 return kmalloc(sizeof(struct page_list), gfp_flags);
97}
98
99static void write_pool_free(void *ptr, void *data)
100{
101 kfree(ptr);
102}
103
104/*
105 * just a placeholder - calls kmalloc for bitmap pages
106 */
107static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
108{
109 unsigned char *page;
110
111#if INJECT_FAULTS_1
112 page = NULL;
113#else
114 page = kmalloc(PAGE_SIZE, GFP_NOIO);
115#endif
116 if (!page)
117 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
118 else
a654b9d8 119 PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
32a7627c
N
120 bmname(bitmap), page);
121 return page;
122}
123
124/*
125 * for now just a placeholder -- just calls kfree for bitmap pages
126 */
127static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page)
128{
129 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page);
130 kfree(page);
131}
132
133/*
134 * check a page and, if necessary, allocate it (or hijack it if the alloc fails)
135 *
136 * 1) check to see if this page is allocated, if it's not then try to alloc
137 * 2) if the alloc fails, set the page's hijacked flag so we'll use the
138 * page pointer directly as a counter
139 *
140 * if we find our page, we increment the page's refcount so that it stays
141 * allocated while we're using it
142 */
143static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create)
144{
145 unsigned char *mappage;
146
147 if (page >= bitmap->pages) {
148 printk(KERN_ALERT
149 "%s: invalid bitmap page request: %lu (> %lu)\n",
150 bmname(bitmap), page, bitmap->pages-1);
151 return -EINVAL;
152 }
153
154
155 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
156 return 0;
157
158 if (bitmap->bp[page].map) /* page is already allocated, just return */
159 return 0;
160
161 if (!create)
162 return -ENOENT;
163
164 spin_unlock_irq(&bitmap->lock);
165
166 /* this page has not been allocated yet */
167
168 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) {
169 PRINTK("%s: bitmap map page allocation failed, hijacking\n",
170 bmname(bitmap));
171 /* failed - set the hijacked flag so that we can use the
172 * pointer as a counter */
173 spin_lock_irq(&bitmap->lock);
174 if (!bitmap->bp[page].map)
175 bitmap->bp[page].hijacked = 1;
176 goto out;
177 }
178
179 /* got a page */
180
181 spin_lock_irq(&bitmap->lock);
182
183 /* recheck the page */
184
185 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) {
186 /* somebody beat us to getting the page */
187 bitmap_free_page(bitmap, mappage);
188 return 0;
189 }
190
191 /* no page was in place and we have one, so install it */
192
193 memset(mappage, 0, PAGE_SIZE);
194 bitmap->bp[page].map = mappage;
195 bitmap->missing_pages--;
196out:
197 return 0;
198}
199
200
201/* if page is completely empty, put it back on the free list, or dealloc it */
202/* if page was hijacked, unmark the flag so it might get alloced next time */
203/* Note: lock should be held when calling this */
204static inline void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
205{
206 char *ptr;
207
208 if (bitmap->bp[page].count) /* page is still busy */
209 return;
210
211 /* page is no longer in use, it can be released */
212
213 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */
214 bitmap->bp[page].hijacked = 0;
215 bitmap->bp[page].map = NULL;
216 return;
217 }
218
219 /* normal case, free the page */
220
221#if 0
222/* actually ... let's not. We will probably need the page again exactly when
223 * memory is tight and we are flusing to disk
224 */
225 return;
226#else
227 ptr = bitmap->bp[page].map;
228 bitmap->bp[page].map = NULL;
229 bitmap->missing_pages++;
230 bitmap_free_page(bitmap, ptr);
231 return;
232#endif
233}
234
235
236/*
237 * bitmap file handling - read and write the bitmap file and its superblock
238 */
239
240/* copy the pathname of a file to a buffer */
241char *file_path(struct file *file, char *buf, int count)
242{
243 struct dentry *d;
244 struct vfsmount *v;
245
246 if (!buf)
247 return NULL;
248
249 d = file->f_dentry;
250 v = file->f_vfsmnt;
251
252 buf = d_path(d, v, buf, count);
253
254 return IS_ERR(buf) ? NULL : buf;
255}
256
257/*
258 * basic page I/O operations
259 */
260
a654b9d8
N
261/* IO operations when bitmap is stored near all superblocks */
262static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
263{
264 /* choose a good rdev and read the page from there */
265
266 mdk_rdev_t *rdev;
267 struct list_head *tmp;
268 struct page *page = alloc_page(GFP_KERNEL);
269 sector_t target;
270
271 if (!page)
272 return ERR_PTR(-ENOMEM);
273 do {
274 ITERATE_RDEV(mddev, rdev, tmp)
275 if (rdev->in_sync && !rdev->faulty)
276 goto found;
277 return ERR_PTR(-EIO);
278
279 found:
280 target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
281
282 } while (!sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ));
283
284 page->index = index;
285 return page;
286}
287
288static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
289{
290 mdk_rdev_t *rdev;
291 struct list_head *tmp;
292
293 ITERATE_RDEV(mddev, rdev, tmp)
294 if (rdev->in_sync && !rdev->faulty)
295 md_super_write(mddev, rdev,
296 (rdev->sb_offset<<1) + offset
297 + page->index * (PAGE_SIZE/512),
298 PAGE_SIZE,
299 page);
300
301 if (wait)
302 wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
303 return 0;
304}
305
32a7627c 306/*
a654b9d8 307 * write out a page to a file
32a7627c 308 */
77ad4bc7 309static int write_page(struct bitmap *bitmap, struct page *page, int wait)
32a7627c
N
310{
311 int ret = -ENOMEM;
312
a654b9d8
N
313 if (bitmap->file == NULL)
314 return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);
315
32a7627c
N
316 lock_page(page);
317
32a7627c
N
318 ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
319 if (!ret)
320 ret = page->mapping->a_ops->commit_write(NULL, page, 0,
321 PAGE_SIZE);
322 if (ret) {
32a7627c
N
323 unlock_page(page);
324 return ret;
325 }
326
327 set_page_dirty(page); /* force it to be written out */
77ad4bc7
N
328
329 if (!wait) {
330 /* add to list to be waited for by daemon */
331 struct page_list *item = mempool_alloc(bitmap->write_pool, GFP_NOIO);
332 item->page = page;
333 page_cache_get(page);
334 spin_lock(&bitmap->write_lock);
335 list_add(&item->list, &bitmap->complete_pages);
336 spin_unlock(&bitmap->write_lock);
337 md_wakeup_thread(bitmap->writeback_daemon);
338 }
32a7627c
N
339 return write_one_page(page, wait);
340}
341
342/* read a page from a file, pinning it into cache, and return bytes_read */
343static struct page *read_page(struct file *file, unsigned long index,
344 unsigned long *bytes_read)
345{
346 struct inode *inode = file->f_mapping->host;
347 struct page *page = NULL;
348 loff_t isize = i_size_read(inode);
349 unsigned long end_index = isize >> PAGE_CACHE_SHIFT;
350
351 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_CACHE_SIZE,
352 (unsigned long long)index << PAGE_CACHE_SHIFT);
353
354 page = read_cache_page(inode->i_mapping, index,
355 (filler_t *)inode->i_mapping->a_ops->readpage, file);
356 if (IS_ERR(page))
357 goto out;
358 wait_on_page_locked(page);
359 if (!PageUptodate(page) || PageError(page)) {
360 page_cache_release(page);
361 page = ERR_PTR(-EIO);
362 goto out;
363 }
364
365 if (index > end_index) /* we have read beyond EOF */
366 *bytes_read = 0;
367 else if (index == end_index) /* possible short read */
368 *bytes_read = isize & ~PAGE_CACHE_MASK;
369 else
370 *bytes_read = PAGE_CACHE_SIZE; /* got a full page */
371out:
372 if (IS_ERR(page))
373 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n",
374 (int)PAGE_CACHE_SIZE,
375 (unsigned long long)index << PAGE_CACHE_SHIFT,
376 PTR_ERR(page));
377 return page;
378}
379
380/*
381 * bitmap file superblock operations
382 */
383
384/* update the event counter and sync the superblock to disk */
385int bitmap_update_sb(struct bitmap *bitmap)
386{
387 bitmap_super_t *sb;
388 unsigned long flags;
389
390 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */
391 return 0;
392 spin_lock_irqsave(&bitmap->lock, flags);
393 if (!bitmap->sb_page) { /* no superblock */
394 spin_unlock_irqrestore(&bitmap->lock, flags);
395 return 0;
396 }
32a7627c
N
397 spin_unlock_irqrestore(&bitmap->lock, flags);
398 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
399 sb->events = cpu_to_le64(bitmap->mddev->events);
400 if (!bitmap->mddev->degraded)
401 sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
402 kunmap(bitmap->sb_page);
77ad4bc7 403 return write_page(bitmap, bitmap->sb_page, 0);
32a7627c
N
404}
405
406/* print out the bitmap file superblock */
407void bitmap_print_sb(struct bitmap *bitmap)
408{
409 bitmap_super_t *sb;
410
411 if (!bitmap || !bitmap->sb_page)
412 return;
413 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
414 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
a2cff26a
N
415 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
416 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
417 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n",
32a7627c
N
418 *(__u32 *)(sb->uuid+0),
419 *(__u32 *)(sb->uuid+4),
420 *(__u32 *)(sb->uuid+8),
421 *(__u32 *)(sb->uuid+12));
a2cff26a 422 printk(KERN_DEBUG " events: %llu\n",
32a7627c 423 (unsigned long long) le64_to_cpu(sb->events));
a2cff26a 424 printk(KERN_DEBUG "events cleared: %llu\n",
32a7627c 425 (unsigned long long) le64_to_cpu(sb->events_cleared));
a2cff26a
N
426 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state));
427 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize));
428 printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
429 printk(KERN_DEBUG " sync size: %llu KB\n",
430 (unsigned long long)le64_to_cpu(sb->sync_size)/2);
32a7627c
N
431 kunmap(bitmap->sb_page);
432}
433
434/* read the superblock from the bitmap file and initialize some bitmap fields */
435static int bitmap_read_sb(struct bitmap *bitmap)
436{
437 char *reason = NULL;
438 bitmap_super_t *sb;
439 unsigned long chunksize, daemon_sleep;
440 unsigned long bytes_read;
441 unsigned long long events;
442 int err = -EINVAL;
443
444 /* page 0 is the superblock, read it... */
a654b9d8
N
445 if (bitmap->file)
446 bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
447 else {
448 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
449 bytes_read = PAGE_SIZE;
450 }
32a7627c
N
451 if (IS_ERR(bitmap->sb_page)) {
452 err = PTR_ERR(bitmap->sb_page);
453 bitmap->sb_page = NULL;
454 return err;
455 }
456
457 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
458
459 if (bytes_read < sizeof(*sb)) { /* short read */
460 printk(KERN_INFO "%s: bitmap file superblock truncated\n",
461 bmname(bitmap));
462 err = -ENOSPC;
463 goto out;
464 }
465
466 chunksize = le32_to_cpu(sb->chunksize);
467 daemon_sleep = le32_to_cpu(sb->daemon_sleep);
468
469 /* verify that the bitmap-specific fields are valid */
470 if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
471 reason = "bad magic";
472 else if (sb->version != cpu_to_le32(BITMAP_MAJOR))
473 reason = "unrecognized superblock version";
474 else if (chunksize < 512 || chunksize > (1024 * 1024 * 4))
475 reason = "bitmap chunksize out of range (512B - 4MB)";
476 else if ((1 << ffz(~chunksize)) != chunksize)
477 reason = "bitmap chunksize not a power of 2";
478 else if (daemon_sleep < 1 || daemon_sleep > 15)
479 reason = "daemon sleep period out of range";
480 if (reason) {
481 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
482 bmname(bitmap), reason);
483 goto out;
484 }
485
486 /* keep the array size field of the bitmap superblock up to date */
487 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);
488
489 if (!bitmap->mddev->persistent)
490 goto success;
491
492 /*
493 * if we have a persistent array superblock, compare the
494 * bitmap's UUID and event counter to the mddev's
495 */
496 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
497 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n",
498 bmname(bitmap));
499 goto out;
500 }
501 events = le64_to_cpu(sb->events);
502 if (events < bitmap->mddev->events) {
503 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
504 "-- forcing full recovery\n", bmname(bitmap), events,
505 (unsigned long long) bitmap->mddev->events);
506 sb->state |= BITMAP_STALE;
507 }
508success:
509 /* assign fields using values from superblock */
510 bitmap->chunksize = chunksize;
511 bitmap->daemon_sleep = daemon_sleep;
512 bitmap->flags |= sb->state;
513 bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
514 err = 0;
515out:
516 kunmap(bitmap->sb_page);
517 if (err)
518 bitmap_print_sb(bitmap);
519 return err;
520}
521
522enum bitmap_mask_op {
523 MASK_SET,
524 MASK_UNSET
525};
526
527/* record the state of the bitmap in the superblock */
528static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
529 enum bitmap_mask_op op)
530{
531 bitmap_super_t *sb;
532 unsigned long flags;
533
534 spin_lock_irqsave(&bitmap->lock, flags);
535 if (!bitmap || !bitmap->sb_page) { /* can't set the state */
536 spin_unlock_irqrestore(&bitmap->lock, flags);
537 return;
538 }
539 page_cache_get(bitmap->sb_page);
540 spin_unlock_irqrestore(&bitmap->lock, flags);
541 sb = (bitmap_super_t *)kmap(bitmap->sb_page);
542 switch (op) {
543 case MASK_SET: sb->state |= bits;
544 break;
545 case MASK_UNSET: sb->state &= ~bits;
546 break;
547 default: BUG();
548 }
549 kunmap(bitmap->sb_page);
550 page_cache_release(bitmap->sb_page);
551}
552
553/*
554 * general bitmap file operations
555 */
556
557/* calculate the index of the page that contains this bit */
558static inline unsigned long file_page_index(unsigned long chunk)
559{
560 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT;
561}
562
563/* calculate the (bit) offset of this bit within a page */
564static inline unsigned long file_page_offset(unsigned long chunk)
565{
566 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1);
567}
568
569/*
570 * return a pointer to the page in the filemap that contains the given bit
571 *
572 * this lookup is complicated by the fact that the bitmap sb might be exactly
573 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page
574 * 0 or page 1
575 */
576static inline struct page *filemap_get_page(struct bitmap *bitmap,
577 unsigned long chunk)
578{
579 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)];
580}
581
582
583static void bitmap_file_unmap(struct bitmap *bitmap)
584{
585 struct page **map, *sb_page;
586 unsigned long *attr;
587 int pages;
588 unsigned long flags;
589
590 spin_lock_irqsave(&bitmap->lock, flags);
591 map = bitmap->filemap;
592 bitmap->filemap = NULL;
593 attr = bitmap->filemap_attr;
594 bitmap->filemap_attr = NULL;
595 pages = bitmap->file_pages;
596 bitmap->file_pages = 0;
597 sb_page = bitmap->sb_page;
598 bitmap->sb_page = NULL;
599 spin_unlock_irqrestore(&bitmap->lock, flags);
600
601 while (pages--)
602 if (map[pages]->index != 0) /* 0 is sb_page, release it below */
603 page_cache_release(map[pages]);
604 kfree(map);
605 kfree(attr);
606
607 if (sb_page)
608 page_cache_release(sb_page);
609}
610
611static void bitmap_stop_daemons(struct bitmap *bitmap);
612
613/* dequeue the next item in a page list -- don't call from irq context */
77ad4bc7 614static struct page_list *dequeue_page(struct bitmap *bitmap)
32a7627c
N
615{
616 struct page_list *item = NULL;
77ad4bc7 617 struct list_head *head = &bitmap->complete_pages;
32a7627c
N
618
619 spin_lock(&bitmap->write_lock);
620 if (list_empty(head))
621 goto out;
622 item = list_entry(head->prev, struct page_list, list);
623 list_del(head->prev);
624out:
625 spin_unlock(&bitmap->write_lock);
626 return item;
627}
628
629static void drain_write_queues(struct bitmap *bitmap)
630{
32a7627c 631 struct page_list *item;
32a7627c 632
77ad4bc7
N
633 while ((item = dequeue_page(bitmap))) {
634 /* don't bother to wait */
635 page_cache_release(item->page);
636 mempool_free(item, bitmap->write_pool);
32a7627c
N
637 }
638
32a7627c 639 wake_up(&bitmap->write_wait);
32a7627c
N
640}
641
642static void bitmap_file_put(struct bitmap *bitmap)
643{
644 struct file *file;
645 struct inode *inode;
646 unsigned long flags;
647
648 spin_lock_irqsave(&bitmap->lock, flags);
649 file = bitmap->file;
650 bitmap->file = NULL;
651 spin_unlock_irqrestore(&bitmap->lock, flags);
652
653 bitmap_stop_daemons(bitmap);
654
655 drain_write_queues(bitmap);
656
657 bitmap_file_unmap(bitmap);
658
659 if (file) {
660 inode = file->f_mapping->host;
661 spin_lock(&inode->i_lock);
662 atomic_set(&inode->i_writecount, 1); /* allow writes again */
663 spin_unlock(&inode->i_lock);
664 fput(file);
665 }
666}
667
668
669/*
670 * bitmap_file_kick - if an error occurs while manipulating the bitmap file
671 * then it is no longer reliable, so we stop using it and we mark the file
672 * as failed in the superblock
673 */
674static void bitmap_file_kick(struct bitmap *bitmap)
675{
676 char *path, *ptr = NULL;
677
678 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
679 bitmap_update_sb(bitmap);
680
a654b9d8
N
681 if (bitmap->file) {
682 path = kmalloc(PAGE_SIZE, GFP_KERNEL);
683 if (path)
684 ptr = file_path(bitmap->file, path, PAGE_SIZE);
32a7627c 685
a654b9d8
N
686 printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n",
687 bmname(bitmap), ptr ? ptr : "");
32a7627c 688
a654b9d8
N
689 kfree(path);
690 }
32a7627c
N
691
692 bitmap_file_put(bitmap);
693
694 return;
695}
696
697enum bitmap_page_attr {
698 BITMAP_PAGE_DIRTY = 1, // there are set bits that need to be synced
699 BITMAP_PAGE_CLEAN = 2, // there are bits that might need to be cleared
700 BITMAP_PAGE_NEEDWRITE=4, // there are cleared bits that need to be synced
701};
702
703static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
704 enum bitmap_page_attr attr)
705{
706 bitmap->filemap_attr[page->index] |= attr;
707}
708
709static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
710 enum bitmap_page_attr attr)
711{
712 bitmap->filemap_attr[page->index] &= ~attr;
713}
714
715static inline unsigned long get_page_attr(struct bitmap *bitmap, struct page *page)
716{
717 return bitmap->filemap_attr[page->index];
718}
719
720/*
721 * bitmap_file_set_bit -- called before performing a write to the md device
722 * to set (and eventually sync) a particular bit in the bitmap file
723 *
724 * we set the bit immediately, then we record the page number so that
725 * when an unplug occurs, we can flush the dirty pages out to disk
726 */
727static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
728{
729 unsigned long bit;
730 struct page *page;
731 void *kaddr;
732 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);
733
a654b9d8 734 if (!bitmap->filemap) {
32a7627c
N
735 return;
736 }
737
738 page = filemap_get_page(bitmap, chunk);
739 bit = file_page_offset(chunk);
740
741
742 /* make sure the page stays cached until it gets written out */
743 if (! (get_page_attr(bitmap, page) & BITMAP_PAGE_DIRTY))
744 page_cache_get(page);
745
746 /* set the bit */
747 kaddr = kmap_atomic(page, KM_USER0);
748 set_bit(bit, kaddr);
749 kunmap_atomic(kaddr, KM_USER0);
750 PRINTK("set file bit %lu page %lu\n", bit, page->index);
751
752 /* record page number so it gets flushed to disk when unplug occurs */
753 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
754
755}
756
757/* this gets called when the md device is ready to unplug its underlying
758 * (slave) device queues -- before we let any writes go down, we need to
759 * sync the dirty pages of the bitmap file to disk */
760int bitmap_unplug(struct bitmap *bitmap)
761{
762 unsigned long i, attr, flags;
763 struct page *page;
764 int wait = 0;
765
766 if (!bitmap)
767 return 0;
768
769 /* look at each page to see if there are any set bits that need to be
770 * flushed out to disk */
771 for (i = 0; i < bitmap->file_pages; i++) {
772 spin_lock_irqsave(&bitmap->lock, flags);
a654b9d8 773 if (!bitmap->filemap) {
32a7627c
N
774 spin_unlock_irqrestore(&bitmap->lock, flags);
775 return 0;
776 }
777 page = bitmap->filemap[i];
778 attr = get_page_attr(bitmap, page);
779 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
780 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
781 if ((attr & BITMAP_PAGE_DIRTY))
782 wait = 1;
783 spin_unlock_irqrestore(&bitmap->lock, flags);
784
785 if (attr & (BITMAP_PAGE_DIRTY | BITMAP_PAGE_NEEDWRITE))
77ad4bc7 786 if (write_page(bitmap, page, 0))
bfb39fba 787 return 1;
32a7627c
N
788 }
789 if (wait) { /* if any writes were performed, we need to wait on them */
a654b9d8
N
790 if (bitmap->file) {
791 spin_lock_irq(&bitmap->write_lock);
792 wait_event_lock_irq(bitmap->write_wait,
793 list_empty(&bitmap->complete_pages), bitmap->write_lock,
794 wake_up_process(bitmap->writeback_daemon->tsk));
795 spin_unlock_irq(&bitmap->write_lock);
796 } else
797 wait_event(bitmap->mddev->sb_wait,
798 atomic_read(&bitmap->mddev->pending_writes)==0);
32a7627c
N
799 }
800 return 0;
801}
802
803static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
cdbb4cc2 804 unsigned long sectors, int in_sync);
32a7627c
N
805/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
806 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
807 * memory mapping of the bitmap file
808 * Special cases:
809 * if there's no bitmap file, or if the bitmap file had been
810 * previously kicked from the array, we mark all the bits as
811 * 1's in order to cause a full resync.
812 */
cdbb4cc2 813static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
32a7627c
N
814{
815 unsigned long i, chunks, index, oldindex, bit;
816 struct page *page = NULL, *oldpage = NULL;
817 unsigned long num_pages, bit_cnt = 0;
818 struct file *file;
819 unsigned long bytes, offset, dummy;
820 int outofdate;
821 int ret = -ENOSPC;
822
823 chunks = bitmap->chunks;
824 file = bitmap->file;
825
a654b9d8 826 BUG_ON(!file && !bitmap->offset);
32a7627c
N
827
828#if INJECT_FAULTS_3
829 outofdate = 1;
830#else
831 outofdate = bitmap->flags & BITMAP_STALE;
832#endif
833 if (outofdate)
834 printk(KERN_INFO "%s: bitmap file is out of date, doing full "
835 "recovery\n", bmname(bitmap));
836
837 bytes = (chunks + 7) / 8;
bc7f77de 838
cdbb4cc2 839 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;
bc7f77de 840
a654b9d8 841 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
32a7627c
N
842 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
843 bmname(bitmap),
844 (unsigned long) i_size_read(file->f_mapping->host),
845 bytes + sizeof(bitmap_super_t));
846 goto out;
847 }
bc7f77de
N
848
849 ret = -ENOMEM;
850
32a7627c 851 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
bc7f77de 852 if (!bitmap->filemap)
32a7627c 853 goto out;
32a7627c
N
854
855 bitmap->filemap_attr = kmalloc(sizeof(long) * num_pages, GFP_KERNEL);
bc7f77de 856 if (!bitmap->filemap_attr)
32a7627c 857 goto out;
32a7627c
N
858
859 memset(bitmap->filemap_attr, 0, sizeof(long) * num_pages);
860
861 oldindex = ~0L;
862
863 for (i = 0; i < chunks; i++) {
864 index = file_page_index(i);
865 bit = file_page_offset(i);
866 if (index != oldindex) { /* this is a new page, read it in */
867 /* unmap the old page, we're done with it */
868 if (oldpage != NULL)
869 kunmap(oldpage);
870 if (index == 0) {
871 /*
872 * if we're here then the superblock page
873 * contains some bits (PAGE_SIZE != sizeof sb)
874 * we've already read it in, so just use it
875 */
876 page = bitmap->sb_page;
877 offset = sizeof(bitmap_super_t);
a654b9d8 878 } else if (file) {
32a7627c 879 page = read_page(file, index, &dummy);
a654b9d8
N
880 offset = 0;
881 } else {
882 page = read_sb_page(bitmap->mddev, bitmap->offset, index);
32a7627c
N
883 offset = 0;
884 }
a654b9d8
N
885 if (IS_ERR(page)) { /* read error */
886 ret = PTR_ERR(page);
887 goto out;
888 }
889
32a7627c
N
890 oldindex = index;
891 oldpage = page;
892 kmap(page);
893
894 if (outofdate) {
895 /*
896 * if bitmap is out of date, dirty the
897 * whole page and write it out
898 */
899 memset(page_address(page) + offset, 0xff,
900 PAGE_SIZE - offset);
77ad4bc7 901 ret = write_page(bitmap, page, 1);
32a7627c
N
902 if (ret) {
903 kunmap(page);
904 /* release, page not in filemap yet */
905 page_cache_release(page);
906 goto out;
907 }
908 }
909
910 bitmap->filemap[bitmap->file_pages++] = page;
911 }
912 if (test_bit(bit, page_address(page))) {
913 /* if the disk bit is set, set the memory bit */
914 bitmap_set_memory_bits(bitmap,
cdbb4cc2 915 i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync);
32a7627c
N
916 bit_cnt++;
917 }
32a7627c
N
918 }
919
920 /* everything went OK */
921 ret = 0;
922 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET);
923
924 if (page) /* unmap the last page */
925 kunmap(page);
926
927 if (bit_cnt) { /* Kick recovery if any bits were set */
928 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
929 md_wakeup_thread(bitmap->mddev->thread);
930 }
931
932out:
933 printk(KERN_INFO "%s: bitmap initialized from disk: "
934 "read %lu/%lu pages, set %lu bits, status: %d\n",
935 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt, ret);
936
937 return ret;
938}
939
a654b9d8
N
940void bitmap_write_all(struct bitmap *bitmap)
941{
942 /* We don't actually write all bitmap blocks here,
943 * just flag them as needing to be written
944 */
945
946 unsigned long chunks = bitmap->chunks;
947 unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t);
948 unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE;
949 while (num_pages--)
950 bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE;
951}
952
32a7627c
N
953
954static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
955{
956 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
957 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
958 bitmap->bp[page].count += inc;
959/*
960 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n",
961 (unsigned long long)offset, inc, bitmap->bp[page].count);
962*/
963 bitmap_checkfree(bitmap, page);
964}
965static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
966 sector_t offset, int *blocks,
967 int create);
968
969/*
970 * bitmap daemon -- periodically wakes up to clean bits and flush pages
971 * out to disk
972 */
973
974int bitmap_daemon_work(struct bitmap *bitmap)
975{
aa3163f8 976 unsigned long j;
32a7627c
N
977 unsigned long flags;
978 struct page *page = NULL, *lastpage = NULL;
979 int err = 0;
980 int blocks;
981 int attr;
982
983 if (bitmap == NULL)
984 return 0;
985 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
986 return 0;
987 bitmap->daemon_lastrun = jiffies;
988
989 for (j = 0; j < bitmap->chunks; j++) {
990 bitmap_counter_t *bmc;
991 spin_lock_irqsave(&bitmap->lock, flags);
a654b9d8 992 if (!bitmap->filemap) {
32a7627c
N
993 /* error or shutdown */
994 spin_unlock_irqrestore(&bitmap->lock, flags);
995 break;
996 }
997
998 page = filemap_get_page(bitmap, j);
32a7627c
N
999
1000 if (page != lastpage) {
aa3163f8
N
1001 /* skip this page unless it's marked as needing cleaning */
1002 if (!((attr=get_page_attr(bitmap, page)) & BITMAP_PAGE_CLEAN)) {
1003 if (attr & BITMAP_PAGE_NEEDWRITE) {
1004 page_cache_get(page);
1005 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
1006 }
1007 spin_unlock_irqrestore(&bitmap->lock, flags);
1008 if (attr & BITMAP_PAGE_NEEDWRITE) {
1009 if (write_page(bitmap, page, 0))
1010 bitmap_file_kick(bitmap);
1011 page_cache_release(page);
1012 }
1013 continue;
1014 }
1015
32a7627c
N
1016 /* grab the new page, sync and release the old */
1017 page_cache_get(page);
1018 if (lastpage != NULL) {
1019 if (get_page_attr(bitmap, lastpage) & BITMAP_PAGE_NEEDWRITE) {
1020 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1021 spin_unlock_irqrestore(&bitmap->lock, flags);
77ad4bc7 1022 err = write_page(bitmap, lastpage, 0);
32a7627c
N
1023 } else {
1024 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1025 spin_unlock_irqrestore(&bitmap->lock, flags);
1026 }
1027 kunmap(lastpage);
1028 page_cache_release(lastpage);
1029 if (err)
1030 bitmap_file_kick(bitmap);
1031 } else
1032 spin_unlock_irqrestore(&bitmap->lock, flags);
1033 lastpage = page;
1034 kmap(page);
1035/*
1036 printk("bitmap clean at page %lu\n", j);
1037*/
1038 spin_lock_irqsave(&bitmap->lock, flags);
1039 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1040 }
1041 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1042 &blocks, 0);
1043 if (bmc) {
1044/*
1045 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
1046*/
1047 if (*bmc == 2) {
1048 *bmc=1; /* maybe clear the bit next time */
1049 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1050 } else if (*bmc == 1) {
1051 /* we can clear the bit */
1052 *bmc = 0;
1053 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap),
1054 -1);
1055
1056 /* clear the bit */
aa3163f8 1057 clear_bit(file_page_offset(j), page_address(page));
32a7627c
N
1058 }
1059 }
1060 spin_unlock_irqrestore(&bitmap->lock, flags);
1061 }
1062
1063 /* now sync the final page */
1064 if (lastpage != NULL) {
1065 kunmap(lastpage);
1066 spin_lock_irqsave(&bitmap->lock, flags);
1067 if (get_page_attr(bitmap, lastpage) &BITMAP_PAGE_NEEDWRITE) {
1068 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1069 spin_unlock_irqrestore(&bitmap->lock, flags);
77ad4bc7 1070 err = write_page(bitmap, lastpage, 0);
32a7627c
N
1071 } else {
1072 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
1073 spin_unlock_irqrestore(&bitmap->lock, flags);
1074 }
1075
1076 page_cache_release(lastpage);
1077 }
1078
1079 return err;
1080}
1081
1082static void daemon_exit(struct bitmap *bitmap, mdk_thread_t **daemon)
1083{
1084 mdk_thread_t *dmn;
1085 unsigned long flags;
1086
1087 /* if no one is waiting on us, we'll free the md thread struct
1088 * and exit, otherwise we let the waiter clean things up */
1089 spin_lock_irqsave(&bitmap->lock, flags);
1090 if ((dmn = *daemon)) { /* no one is waiting, cleanup and exit */
1091 *daemon = NULL;
1092 spin_unlock_irqrestore(&bitmap->lock, flags);
1093 kfree(dmn);
1094 complete_and_exit(NULL, 0); /* do_exit not exported */
1095 }
1096 spin_unlock_irqrestore(&bitmap->lock, flags);
1097}
1098
1099static void bitmap_writeback_daemon(mddev_t *mddev)
1100{
1101 struct bitmap *bitmap = mddev->bitmap;
1102 struct page *page;
1103 struct page_list *item;
1104 int err = 0;
1105
77ad4bc7
N
1106 if (signal_pending(current)) {
1107 printk(KERN_INFO
1108 "%s: bitmap writeback daemon got signal, exiting...\n",
1109 bmname(bitmap));
1110 err = -EINTR;
1111 goto out;
1112 }
32a7627c 1113
77ad4bc7
N
1114 PRINTK("%s: bitmap writeback daemon woke up...\n", bmname(bitmap));
1115 /* wait on bitmap page writebacks */
1116 while ((item = dequeue_page(bitmap))) {
1117 page = item->page;
1118 mempool_free(item, bitmap->write_pool);
1119 PRINTK("wait on page writeback: %p\n", page);
1120 wait_on_page_writeback(page);
1121 PRINTK("finished page writeback: %p\n", page);
1122
1123 err = PageError(page);
1124 page_cache_release(page);
1125 if (err) {
1126 printk(KERN_WARNING "%s: bitmap file writeback "
1127 "failed (page %lu): %d\n",
1128 bmname(bitmap), page->index, err);
1129 bitmap_file_kick(bitmap);
1130 goto out;
32a7627c
N
1131 }
1132 }
77ad4bc7
N
1133 out:
1134 wake_up(&bitmap->write_wait);
32a7627c
N
1135 if (err) {
1136 printk(KERN_INFO "%s: bitmap writeback daemon exiting (%d)\n",
77ad4bc7 1137 bmname(bitmap), err);
32a7627c
N
1138 daemon_exit(bitmap, &bitmap->writeback_daemon);
1139 }
32a7627c
N
1140}
1141
1142static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,
1143 void (*func)(mddev_t *), char *name)
1144{
1145 mdk_thread_t *daemon;
1146 unsigned long flags;
1147 char namebuf[32];
1148
1149 spin_lock_irqsave(&bitmap->lock, flags);
1150 *ptr = NULL;
a654b9d8 1151
32a7627c
N
1152 if (!bitmap->file) /* no need for daemon if there's no backing file */
1153 goto out_unlock;
1154
1155 spin_unlock_irqrestore(&bitmap->lock, flags);
1156
1157#if INJECT_FATAL_FAULT_2
1158 daemon = NULL;
1159#else
1160 sprintf(namebuf, "%%s_%s", name);
1161 daemon = md_register_thread(func, bitmap->mddev, namebuf);
1162#endif
1163 if (!daemon) {
1164 printk(KERN_ERR "%s: failed to start bitmap daemon\n",
1165 bmname(bitmap));
1166 return -ECHILD;
1167 }
1168
1169 spin_lock_irqsave(&bitmap->lock, flags);
1170 *ptr = daemon;
1171
1172 md_wakeup_thread(daemon); /* start it running */
1173
1174 PRINTK("%s: %s daemon (pid %d) started...\n",
d80a138c 1175 bmname(bitmap), name, daemon->tsk->pid);
32a7627c
N
1176out_unlock:
1177 spin_unlock_irqrestore(&bitmap->lock, flags);
1178 return 0;
1179}
1180
1181static int bitmap_start_daemons(struct bitmap *bitmap)
1182{
1183 int err = bitmap_start_daemon(bitmap, &bitmap->writeback_daemon,
1184 bitmap_writeback_daemon, "bitmap_wb");
1185 return err;
1186}
1187
1188static void bitmap_stop_daemon(struct bitmap *bitmap, mdk_thread_t **ptr)
1189{
1190 mdk_thread_t *daemon;
1191 unsigned long flags;
1192
1193 spin_lock_irqsave(&bitmap->lock, flags);
1194 daemon = *ptr;
1195 *ptr = NULL;
1196 spin_unlock_irqrestore(&bitmap->lock, flags);
1197 if (daemon)
1198 md_unregister_thread(daemon); /* destroy the thread */
1199}
1200
1201static void bitmap_stop_daemons(struct bitmap *bitmap)
1202{
1203 /* the daemons can't stop themselves... they'll just exit instead... */
1204 if (bitmap->writeback_daemon &&
1205 current->pid != bitmap->writeback_daemon->tsk->pid)
1206 bitmap_stop_daemon(bitmap, &bitmap->writeback_daemon);
1207}
1208
1209static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
1210 sector_t offset, int *blocks,
1211 int create)
1212{
1213 /* If 'create', we might release the lock and reclaim it.
1214 * The lock must have been taken with interrupts enabled.
1215 * If !create, we don't release the lock.
1216 */
1217 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap);
1218 unsigned long page = chunk >> PAGE_COUNTER_SHIFT;
1219 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT;
1220 sector_t csize;
1221
1222 if (bitmap_checkpage(bitmap, page, create) < 0) {
1223 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1224 *blocks = csize - (offset & (csize- 1));
1225 return NULL;
1226 }
1227 /* now locked ... */
1228
1229 if (bitmap->bp[page].hijacked) { /* hijacked pointer */
1230 /* should we use the first or second counter field
1231 * of the hijacked pointer? */
1232 int hi = (pageoff > PAGE_COUNTER_MASK);
1233 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) +
1234 PAGE_COUNTER_SHIFT - 1);
1235 *blocks = csize - (offset & (csize- 1));
1236 return &((bitmap_counter_t *)
1237 &bitmap->bp[page].map)[hi];
1238 } else { /* page is allocated */
1239 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap));
1240 *blocks = csize - (offset & (csize- 1));
1241 return (bitmap_counter_t *)
1242 &(bitmap->bp[page].map[pageoff]);
1243 }
1244}
1245
1246int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors)
1247{
1248 if (!bitmap) return 0;
1249 while (sectors) {
1250 int blocks;
1251 bitmap_counter_t *bmc;
1252
1253 spin_lock_irq(&bitmap->lock);
1254 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1);
1255 if (!bmc) {
1256 spin_unlock_irq(&bitmap->lock);
1257 return 0;
1258 }
1259
1260 switch(*bmc) {
1261 case 0:
1262 bitmap_file_set_bit(bitmap, offset);
1263 bitmap_count_page(bitmap,offset, 1);
1264 blk_plug_device(bitmap->mddev->queue);
1265 /* fall through */
1266 case 1:
1267 *bmc = 2;
1268 }
1269 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) BUG();
1270 (*bmc)++;
1271
1272 spin_unlock_irq(&bitmap->lock);
1273
1274 offset += blocks;
1275 if (sectors > blocks)
1276 sectors -= blocks;
1277 else sectors = 0;
1278 }
1279 return 0;
1280}
1281
1282void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors,
1283 int success)
1284{
1285 if (!bitmap) return;
1286 while (sectors) {
1287 int blocks;
1288 unsigned long flags;
1289 bitmap_counter_t *bmc;
1290
1291 spin_lock_irqsave(&bitmap->lock, flags);
1292 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0);
1293 if (!bmc) {
1294 spin_unlock_irqrestore(&bitmap->lock, flags);
1295 return;
1296 }
1297
1298 if (!success && ! (*bmc & NEEDED_MASK))
1299 *bmc |= NEEDED_MASK;
1300
1301 (*bmc)--;
1302 if (*bmc <= 2) {
1303 set_page_attr(bitmap,
1304 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1305 BITMAP_PAGE_CLEAN);
1306 }
1307 spin_unlock_irqrestore(&bitmap->lock, flags);
1308 offset += blocks;
1309 if (sectors > blocks)
1310 sectors -= blocks;
1311 else sectors = 0;
1312 }
1313}
1314
1315int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks)
1316{
1317 bitmap_counter_t *bmc;
1318 int rv;
1319 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */
1320 *blocks = 1024;
1321 return 1; /* always resync if no bitmap */
1322 }
1323 spin_lock_irq(&bitmap->lock);
1324 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1325 rv = 0;
1326 if (bmc) {
1327 /* locked */
1328 if (RESYNC(*bmc))
1329 rv = 1;
1330 else if (NEEDED(*bmc)) {
1331 rv = 1;
1332 *bmc |= RESYNC_MASK;
1333 *bmc &= ~NEEDED_MASK;
1334 }
1335 }
1336 spin_unlock_irq(&bitmap->lock);
1337 return rv;
1338}
1339
1340void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
1341{
1342 bitmap_counter_t *bmc;
1343 unsigned long flags;
1344/*
1345 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted);
1346*/ if (bitmap == NULL) {
1347 *blocks = 1024;
1348 return;
1349 }
1350 spin_lock_irqsave(&bitmap->lock, flags);
1351 bmc = bitmap_get_counter(bitmap, offset, blocks, 0);
1352 if (bmc == NULL)
1353 goto unlock;
1354 /* locked */
1355/*
1356 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks);
1357*/
1358 if (RESYNC(*bmc)) {
1359 *bmc &= ~RESYNC_MASK;
1360
1361 if (!NEEDED(*bmc) && aborted)
1362 *bmc |= NEEDED_MASK;
1363 else {
1364 if (*bmc <= 2) {
1365 set_page_attr(bitmap,
1366 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)),
1367 BITMAP_PAGE_CLEAN);
1368 }
1369 }
1370 }
1371 unlock:
1372 spin_unlock_irqrestore(&bitmap->lock, flags);
1373}
1374
1375void bitmap_close_sync(struct bitmap *bitmap)
1376{
1377 /* Sync has finished, and any bitmap chunks that weren't synced
1378 * properly have been aborted. It remains to us to clear the
1379 * RESYNC bit wherever it is still on
1380 */
1381 sector_t sector = 0;
1382 int blocks;
1383 if (!bitmap) return;
1384 while (sector < bitmap->mddev->resync_max_sectors) {
1385 bitmap_end_sync(bitmap, sector, &blocks, 0);
1386/*
1387 if (sector < 500) printk("bitmap_close_sync: sec %llu blks %d\n",
1388 (unsigned long long)sector, blocks);
1389*/ sector += blocks;
1390 }
1391}
1392
1393static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset,
cdbb4cc2 1394 unsigned long sectors, int in_sync)
32a7627c
N
1395{
1396 /* For each chunk covered by any of these sectors, set the
cdbb4cc2 1397 * counter to 1 and set resync_needed unless in_sync. They should all
32a7627c
N
1398 * be 0 at this point
1399 */
1400 while (sectors) {
1401 int secs;
1402 bitmap_counter_t *bmc;
1403 spin_lock_irq(&bitmap->lock);
1404 bmc = bitmap_get_counter(bitmap, offset, &secs, 1);
1405 if (!bmc) {
1406 spin_unlock_irq(&bitmap->lock);
1407 return;
1408 }
cdbb4cc2
N
1409 if (! *bmc) {
1410 struct page *page;
1411 *bmc = 1 | (in_sync? 0 : NEEDED_MASK);
32a7627c 1412 bitmap_count_page(bitmap, offset, 1);
cdbb4cc2
N
1413 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap));
1414 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
32a7627c
N
1415 }
1416 spin_unlock_irq(&bitmap->lock);
1417 if (sectors > secs)
1418 sectors -= secs;
1419 else
1420 sectors = 0;
1421 }
1422}
1423
1424/* dirty the entire bitmap */
1425int bitmap_setallbits(struct bitmap *bitmap)
1426{
1427 unsigned long flags;
1428 unsigned long j;
1429
1430 /* dirty the in-memory bitmap */
1431 bitmap_set_memory_bits(bitmap, 0, bitmap->chunks << CHUNK_BLOCK_SHIFT(bitmap), 1);
1432
1433 /* dirty the bitmap file */
1434 for (j = 0; j < bitmap->file_pages; j++) {
1435 struct page *page = bitmap->filemap[j];
1436
1437 spin_lock_irqsave(&bitmap->lock, flags);
1438 page_cache_get(page);
1439 spin_unlock_irqrestore(&bitmap->lock, flags);
1440 memset(kmap(page), 0xff, PAGE_SIZE);
1441 kunmap(page);
77ad4bc7 1442 if (write_page(bitmap, page, 0))
bfb39fba 1443 return 1;
32a7627c
N
1444 }
1445
1446 return 0;
1447}
1448
1449/*
1450 * free memory that was allocated
1451 */
1452void bitmap_destroy(mddev_t *mddev)
1453{
1454 unsigned long k, pages;
1455 struct bitmap_page *bp;
1456 struct bitmap *bitmap = mddev->bitmap;
1457
1458 if (!bitmap) /* there was no bitmap */
1459 return;
1460
1461 mddev->bitmap = NULL; /* disconnect from the md device */
1462
1463 /* release the bitmap file and kill the daemon */
1464 bitmap_file_put(bitmap);
1465
1466 bp = bitmap->bp;
1467 pages = bitmap->pages;
1468
1469 /* free all allocated memory */
1470
1471 mempool_destroy(bitmap->write_pool);
1472
1473 if (bp) /* deallocate the page memory */
1474 for (k = 0; k < pages; k++)
1475 if (bp[k].map && !bp[k].hijacked)
1476 kfree(bp[k].map);
1477 kfree(bp);
1478 kfree(bitmap);
1479}
1480
1481/*
1482 * initialize the bitmap structure
1483 * if this returns an error, bitmap_destroy must be called to do clean up
1484 */
1485int bitmap_create(mddev_t *mddev)
1486{
1487 struct bitmap *bitmap;
1488 unsigned long blocks = mddev->resync_max_sectors;
1489 unsigned long chunks;
1490 unsigned long pages;
1491 struct file *file = mddev->bitmap_file;
1492 int err;
1493
1494 BUG_ON(sizeof(bitmap_super_t) != 256);
1495
a654b9d8 1496 if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
32a7627c
N
1497 return 0;
1498
a654b9d8
N
1499 BUG_ON(file && mddev->bitmap_offset);
1500
32a7627c
N
1501 bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
1502 if (!bitmap)
1503 return -ENOMEM;
1504
1505 memset(bitmap, 0, sizeof(*bitmap));
1506
1507 spin_lock_init(&bitmap->lock);
1508 bitmap->mddev = mddev;
1509 mddev->bitmap = bitmap;
1510
1511 spin_lock_init(&bitmap->write_lock);
32a7627c
N
1512 INIT_LIST_HEAD(&bitmap->complete_pages);
1513 init_waitqueue_head(&bitmap->write_wait);
1514 bitmap->write_pool = mempool_create(WRITE_POOL_SIZE, write_pool_alloc,
1515 write_pool_free, NULL);
1516 if (!bitmap->write_pool)
1517 return -ENOMEM;
1518
1519 bitmap->file = file;
a654b9d8
N
1520 bitmap->offset = mddev->bitmap_offset;
1521 if (file) get_file(file);
32a7627c
N
1522 /* read superblock from bitmap file (this sets bitmap->chunksize) */
1523 err = bitmap_read_sb(bitmap);
1524 if (err)
1525 return err;
1526
1527 bitmap->chunkshift = find_first_bit(&bitmap->chunksize,
1528 sizeof(bitmap->chunksize));
1529
1530 /* now that chunksize and chunkshift are set, we can use these macros */
1531 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) /
1532 CHUNK_BLOCK_RATIO(bitmap);
1533 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO;
1534
1535 BUG_ON(!pages);
1536
1537 bitmap->chunks = chunks;
1538 bitmap->pages = pages;
1539 bitmap->missing_pages = pages;
1540 bitmap->counter_bits = COUNTER_BITS;
1541
1542 bitmap->syncchunk = ~0UL;
1543
1544#if INJECT_FATAL_FAULT_1
1545 bitmap->bp = NULL;
1546#else
1547 bitmap->bp = kmalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL);
1548#endif
1549 if (!bitmap->bp)
1550 return -ENOMEM;
1551 memset(bitmap->bp, 0, pages * sizeof(*bitmap->bp));
1552
1553 bitmap->flags |= BITMAP_ACTIVE;
1554
1555 /* now that we have some pages available, initialize the in-memory
1556 * bitmap from the on-disk bitmap */
cdbb4cc2 1557 err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector);
32a7627c
N
1558 if (err)
1559 return err;
1560
1561 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
1562 pages, bmname(bitmap));
1563
1564 /* kick off the bitmap daemons */
1565 err = bitmap_start_daemons(bitmap);
1566 if (err)
1567 return err;
1568 return bitmap_update_sb(bitmap);
1569}
1570
1571/* the bitmap API -- for raid personalities */
1572EXPORT_SYMBOL(bitmap_startwrite);
1573EXPORT_SYMBOL(bitmap_endwrite);
1574EXPORT_SYMBOL(bitmap_start_sync);
1575EXPORT_SYMBOL(bitmap_end_sync);
1576EXPORT_SYMBOL(bitmap_unplug);
1577EXPORT_SYMBOL(bitmap_close_sync);
1578EXPORT_SYMBOL(bitmap_daemon_work);