Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / md / dm-snap.c
CommitLineData
1da177e4
LT
1/*
2 * dm-snapshot.c
3 *
4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited.
5 *
6 * This file is released under the GPL.
7 */
8
9#include <linux/blkdev.h>
1da177e4 10#include <linux/device-mapper.h>
90fa1527 11#include <linux/delay.h>
1da177e4
LT
12#include <linux/fs.h>
13#include <linux/init.h>
14#include <linux/kdev_t.h>
15#include <linux/list.h>
f79ae415 16#include <linux/list_bl.h>
1da177e4
LT
17#include <linux/mempool.h>
18#include <linux/module.h>
19#include <linux/slab.h>
20#include <linux/vmalloc.h>
6f3c3f0a 21#include <linux/log2.h>
a765e20e 22#include <linux/dm-kcopyd.h>
721b1d98 23#include <linux/semaphore.h>
1da177e4 24
b735fede
MP
25#include "dm.h"
26
aea53d92 27#include "dm-exception-store.h"
1da177e4 28
72d94861
AK
29#define DM_MSG_PREFIX "snapshots"
30
d698aa45
MP
31static const char dm_snapshot_merge_target_name[] = "snapshot-merge";
32
33#define dm_target_is_snapshot_merge(ti) \
34 ((ti)->type->name == dm_snapshot_merge_target_name)
35
cd45daff
MP
36/*
37 * The size of the mempool used to track chunks in use.
38 */
39#define MIN_IOS 256
40
ccc45ea8
JB
41#define DM_TRACKED_CHUNK_HASH_SIZE 16
42#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
43 (DM_TRACKED_CHUNK_HASH_SIZE - 1))
44
191437a5 45struct dm_exception_table {
ccc45ea8
JB
46 uint32_t hash_mask;
47 unsigned hash_shift;
f79ae415 48 struct hlist_bl_head *table;
ccc45ea8
JB
49};
50
51struct dm_snapshot {
4ad8d880 52 struct rw_semaphore lock;
ccc45ea8
JB
53
54 struct dm_dev *origin;
fc56f6fb
MS
55 struct dm_dev *cow;
56
57 struct dm_target *ti;
ccc45ea8
JB
58
59 /* List of snapshots per Origin */
60 struct list_head list;
61
d8ddb1cf
MS
62 /*
63 * You can't use a snapshot if this is 0 (e.g. if full).
64 * A snapshot-merge target never clears this.
65 */
ccc45ea8
JB
66 int valid;
67
76c44f6d
MP
68 /*
69 * The snapshot overflowed because of a write to the snapshot device.
70 * We don't have to invalidate the snapshot in this case, but we need
71 * to prevent further writes.
72 */
73 int snapshot_overflowed;
74
ccc45ea8
JB
75 /* Origin writes don't trigger exceptions until this is set */
76 int active;
77
ccc45ea8
JB
78 atomic_t pending_exceptions_count;
79
3f1637f2
NT
80 spinlock_t pe_allocation_lock;
81
82 /* Protected by "pe_allocation_lock" */
230c83af
MP
83 sector_t exception_start_sequence;
84
85 /* Protected by kcopyd single-threaded callback */
86 sector_t exception_complete_sequence;
87
88 /*
89 * A list of pending exceptions that completed out of order.
90 * Protected by kcopyd single-threaded callback.
91 */
3db2776d 92 struct rb_root out_of_order_tree;
230c83af 93
6f1c819c 94 mempool_t pending_pool;
924e600d 95
191437a5
JB
96 struct dm_exception_table pending;
97 struct dm_exception_table complete;
ccc45ea8
JB
98
99 /*
100 * pe_lock protects all pending_exception operations and access
101 * as well as the snapshot_bios list.
102 */
103 spinlock_t pe_lock;
104
924e600d
MS
105 /* Chunks with outstanding reads */
106 spinlock_t tracked_chunk_lock;
924e600d
MS
107 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
108
ccc45ea8
JB
109 /* The on disk metadata handler */
110 struct dm_exception_store *store;
111
721b1d98
NT
112 /* Maximum number of in-flight COW jobs. */
113 struct semaphore cow_count;
114
ccc45ea8
JB
115 struct dm_kcopyd_client *kcopyd_client;
116
924e600d
MS
117 /* Wait for events based on state_bits */
118 unsigned long state_bits;
119
120 /* Range of chunks currently being merged. */
121 chunk_t first_merging_chunk;
122 int num_merging_chunks;
1e03f97e 123
d8ddb1cf
MS
124 /*
125 * The merge operation failed if this flag is set.
126 * Failure modes are handled as follows:
127 * - I/O error reading the header
128 * => don't load the target; abort.
129 * - Header does not have "valid" flag set
130 * => use the origin; forget about the snapshot.
131 * - I/O error when reading exceptions
132 * => don't load the target; abort.
133 * (We can't use the intermediate origin state.)
134 * - I/O error while merging
135 * => stop merging; set merge_failed; process I/O normally.
136 */
137 int merge_failed;
138
9fe86254
MP
139 /*
140 * Incoming bios that overlap with chunks being merged must wait
141 * for them to be committed.
142 */
143 struct bio_list bios_queued_during_merge;
ccc45ea8
JB
144};
145
1e03f97e
MP
146/*
147 * state_bits:
148 * RUNNING_MERGE - Merge operation is in progress.
149 * SHUTDOWN_MERGE - Set to signal that merge needs to be stopped;
150 * cleared afterwards.
151 */
152#define RUNNING_MERGE 0
153#define SHUTDOWN_MERGE 1
154
721b1d98
NT
155/*
156 * Maximum number of chunks being copied on write.
157 *
158 * The value was decided experimentally as a trade-off between memory
159 * consumption, stalling the kernel's workqueues and maintaining a high enough
160 * throughput.
161 */
162#define DEFAULT_COW_THRESHOLD 2048
163
164static int cow_threshold = DEFAULT_COW_THRESHOLD;
165module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644);
166MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write");
167
df5d2e90
MP
168DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle,
169 "A percentage of time allocated for copy on write");
170
c2411045
MP
171struct dm_dev *dm_snap_origin(struct dm_snapshot *s)
172{
173 return s->origin;
174}
175EXPORT_SYMBOL(dm_snap_origin);
176
fc56f6fb
MS
177struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
178{
179 return s->cow;
180}
181EXPORT_SYMBOL(dm_snap_cow);
182
ccc45ea8
JB
183static sector_t chunk_to_sector(struct dm_exception_store *store,
184 chunk_t chunk)
185{
186 return chunk << store->chunk_shift;
187}
188
189static int bdev_equal(struct block_device *lhs, struct block_device *rhs)
190{
191 /*
192 * There is only ever one instance of a particular block
193 * device so we can compare pointers safely.
194 */
195 return lhs == rhs;
196}
197
028867ac 198struct dm_snap_pending_exception {
1d4989c8 199 struct dm_exception e;
1da177e4
LT
200
201 /*
202 * Origin buffers waiting for this to complete are held
203 * in a bio list
204 */
205 struct bio_list origin_bios;
206 struct bio_list snapshot_bios;
207
1da177e4
LT
208 /* Pointer back to snapshot context */
209 struct dm_snapshot *snap;
210
211 /*
212 * 1 indicates the exception has already been sent to
213 * kcopyd.
214 */
215 int started;
a6e50b40 216
230c83af
MP
217 /* There was copying error. */
218 int copy_error;
219
220 /* A sequence number, it is used for in-order completion. */
221 sector_t exception_sequence;
222
3db2776d 223 struct rb_node out_of_order_node;
230c83af 224
a6e50b40
MP
225 /*
226 * For writing a complete chunk, bypassing the copy.
227 */
228 struct bio *full_bio;
229 bio_end_io_t *full_bio_end_io;
1da177e4
LT
230};
231
232/*
233 * Hash table mapping origin volumes to lists of snapshots and
234 * a lock to protect it
235 */
e18b890b
CL
236static struct kmem_cache *exception_cache;
237static struct kmem_cache *pending_cache;
1da177e4 238
cd45daff
MP
239struct dm_snap_tracked_chunk {
240 struct hlist_node node;
241 chunk_t chunk;
242};
243
ee18026a
MP
244static void init_tracked_chunk(struct bio *bio)
245{
246 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
247 INIT_HLIST_NODE(&c->node);
248}
249
250static bool is_bio_tracked(struct bio *bio)
251{
252 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
253 return !hlist_unhashed(&c->node);
254}
255
256static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk)
cd45daff 257{
42bc954f 258 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
cd45daff
MP
259
260 c->chunk = chunk;
261
9aa0c0e6 262 spin_lock_irq(&s->tracked_chunk_lock);
cd45daff
MP
263 hlist_add_head(&c->node,
264 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]);
9aa0c0e6 265 spin_unlock_irq(&s->tracked_chunk_lock);
cd45daff
MP
266}
267
ee18026a 268static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio)
cd45daff 269{
ee18026a 270 struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk));
cd45daff
MP
271 unsigned long flags;
272
273 spin_lock_irqsave(&s->tracked_chunk_lock, flags);
274 hlist_del(&c->node);
275 spin_unlock_irqrestore(&s->tracked_chunk_lock, flags);
cd45daff
MP
276}
277
a8d41b59
MP
278static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk)
279{
280 struct dm_snap_tracked_chunk *c;
a8d41b59
MP
281 int found = 0;
282
283 spin_lock_irq(&s->tracked_chunk_lock);
284
b67bfe0d 285 hlist_for_each_entry(c,
a8d41b59
MP
286 &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)], node) {
287 if (c->chunk == chunk) {
288 found = 1;
289 break;
290 }
291 }
292
293 spin_unlock_irq(&s->tracked_chunk_lock);
294
295 return found;
296}
297
615d1eb9
MS
298/*
299 * This conflicting I/O is extremely improbable in the caller,
300 * so msleep(1) is sufficient and there is no need for a wait queue.
301 */
302static void __check_for_conflicting_io(struct dm_snapshot *s, chunk_t chunk)
303{
304 while (__chunk_is_tracked(s, chunk))
305 msleep(1);
306}
307
1da177e4
LT
308/*
309 * One of these per registered origin, held in the snapshot_origins hash
310 */
311struct origin {
312 /* The origin device */
313 struct block_device *bdev;
314
315 struct list_head hash_list;
316
317 /* List of snapshots for this origin */
318 struct list_head snapshots;
319};
320
b735fede
MP
321/*
322 * This structure is allocated for each origin target
323 */
324struct dm_origin {
325 struct dm_dev *dev;
326 struct dm_target *ti;
327 unsigned split_boundary;
328 struct list_head hash_list;
329};
330
1da177e4
LT
331/*
332 * Size of the hash table for origin volumes. If we make this
333 * the size of the minors list then it should be nearly perfect
334 */
335#define ORIGIN_HASH_SIZE 256
336#define ORIGIN_MASK 0xFF
337static struct list_head *_origins;
b735fede 338static struct list_head *_dm_origins;
1da177e4
LT
339static struct rw_semaphore _origins_lock;
340
73dfd078
MP
341static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
342static DEFINE_SPINLOCK(_pending_exceptions_done_spinlock);
343static uint64_t _pending_exceptions_done_count;
344
1da177e4
LT
345static int init_origin_hash(void)
346{
347 int i;
348
6da2ec56
KC
349 _origins = kmalloc_array(ORIGIN_HASH_SIZE, sizeof(struct list_head),
350 GFP_KERNEL);
1da177e4 351 if (!_origins) {
b735fede 352 DMERR("unable to allocate memory for _origins");
1da177e4
LT
353 return -ENOMEM;
354 }
1da177e4
LT
355 for (i = 0; i < ORIGIN_HASH_SIZE; i++)
356 INIT_LIST_HEAD(_origins + i);
b735fede 357
6da2ec56
KC
358 _dm_origins = kmalloc_array(ORIGIN_HASH_SIZE,
359 sizeof(struct list_head),
360 GFP_KERNEL);
b735fede
MP
361 if (!_dm_origins) {
362 DMERR("unable to allocate memory for _dm_origins");
363 kfree(_origins);
364 return -ENOMEM;
365 }
366 for (i = 0; i < ORIGIN_HASH_SIZE; i++)
367 INIT_LIST_HEAD(_dm_origins + i);
368
1da177e4
LT
369 init_rwsem(&_origins_lock);
370
371 return 0;
372}
373
374static void exit_origin_hash(void)
375{
376 kfree(_origins);
b735fede 377 kfree(_dm_origins);
1da177e4
LT
378}
379
028867ac 380static unsigned origin_hash(struct block_device *bdev)
1da177e4
LT
381{
382 return bdev->bd_dev & ORIGIN_MASK;
383}
384
385static struct origin *__lookup_origin(struct block_device *origin)
386{
387 struct list_head *ol;
388 struct origin *o;
389
390 ol = &_origins[origin_hash(origin)];
391 list_for_each_entry (o, ol, hash_list)
392 if (bdev_equal(o->bdev, origin))
393 return o;
394
395 return NULL;
396}
397
398static void __insert_origin(struct origin *o)
399{
400 struct list_head *sl = &_origins[origin_hash(o->bdev)];
401 list_add_tail(&o->hash_list, sl);
402}
403
b735fede
MP
404static struct dm_origin *__lookup_dm_origin(struct block_device *origin)
405{
406 struct list_head *ol;
407 struct dm_origin *o;
408
409 ol = &_dm_origins[origin_hash(origin)];
410 list_for_each_entry (o, ol, hash_list)
411 if (bdev_equal(o->dev->bdev, origin))
412 return o;
413
414 return NULL;
415}
416
417static void __insert_dm_origin(struct dm_origin *o)
418{
419 struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)];
420 list_add_tail(&o->hash_list, sl);
421}
422
423static void __remove_dm_origin(struct dm_origin *o)
424{
425 list_del(&o->hash_list);
426}
427
c1f0c183
MS
428/*
429 * _origins_lock must be held when calling this function.
430 * Returns number of snapshots registered using the supplied cow device, plus:
431 * snap_src - a snapshot suitable for use as a source of exception handover
432 * snap_dest - a snapshot capable of receiving exception handover.
9d3b15c4
MP
433 * snap_merge - an existing snapshot-merge target linked to the same origin.
434 * There can be at most one snapshot-merge target. The parameter is optional.
c1f0c183 435 *
9d3b15c4 436 * Possible return values and states of snap_src and snap_dest.
c1f0c183
MS
437 * 0: NULL, NULL - first new snapshot
438 * 1: snap_src, NULL - normal snapshot
439 * 2: snap_src, snap_dest - waiting for handover
440 * 2: snap_src, NULL - handed over, waiting for old to be deleted
441 * 1: NULL, snap_dest - source got destroyed without handover
442 */
443static int __find_snapshots_sharing_cow(struct dm_snapshot *snap,
444 struct dm_snapshot **snap_src,
9d3b15c4
MP
445 struct dm_snapshot **snap_dest,
446 struct dm_snapshot **snap_merge)
c1f0c183
MS
447{
448 struct dm_snapshot *s;
449 struct origin *o;
450 int count = 0;
451 int active;
452
453 o = __lookup_origin(snap->origin->bdev);
454 if (!o)
455 goto out;
456
457 list_for_each_entry(s, &o->snapshots, list) {
9d3b15c4
MP
458 if (dm_target_is_snapshot_merge(s->ti) && snap_merge)
459 *snap_merge = s;
c1f0c183
MS
460 if (!bdev_equal(s->cow->bdev, snap->cow->bdev))
461 continue;
462
4ad8d880 463 down_read(&s->lock);
c1f0c183 464 active = s->active;
4ad8d880 465 up_read(&s->lock);
c1f0c183
MS
466
467 if (active) {
468 if (snap_src)
469 *snap_src = s;
470 } else if (snap_dest)
471 *snap_dest = s;
472
473 count++;
474 }
475
476out:
477 return count;
478}
479
480/*
481 * On success, returns 1 if this snapshot is a handover destination,
482 * otherwise returns 0.
483 */
484static int __validate_exception_handover(struct dm_snapshot *snap)
485{
486 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
9d3b15c4 487 struct dm_snapshot *snap_merge = NULL;
c1f0c183
MS
488
489 /* Does snapshot need exceptions handed over to it? */
9d3b15c4
MP
490 if ((__find_snapshots_sharing_cow(snap, &snap_src, &snap_dest,
491 &snap_merge) == 2) ||
c1f0c183
MS
492 snap_dest) {
493 snap->ti->error = "Snapshot cow pairing for exception "
494 "table handover failed";
495 return -EINVAL;
496 }
497
498 /*
499 * If no snap_src was found, snap cannot become a handover
500 * destination.
501 */
502 if (!snap_src)
503 return 0;
504
9d3b15c4
MP
505 /*
506 * Non-snapshot-merge handover?
507 */
508 if (!dm_target_is_snapshot_merge(snap->ti))
509 return 1;
510
511 /*
512 * Do not allow more than one merging snapshot.
513 */
514 if (snap_merge) {
515 snap->ti->error = "A snapshot is already merging.";
516 return -EINVAL;
517 }
518
1e03f97e
MP
519 if (!snap_src->store->type->prepare_merge ||
520 !snap_src->store->type->commit_merge) {
521 snap->ti->error = "Snapshot exception store does not "
522 "support snapshot-merge.";
523 return -EINVAL;
524 }
525
c1f0c183
MS
526 return 1;
527}
528
529static void __insert_snapshot(struct origin *o, struct dm_snapshot *s)
530{
531 struct dm_snapshot *l;
532
533 /* Sort the list according to chunk size, largest-first smallest-last */
534 list_for_each_entry(l, &o->snapshots, list)
535 if (l->store->chunk_size < s->store->chunk_size)
536 break;
537 list_add_tail(&s->list, &l->list);
538}
539
1da177e4
LT
540/*
541 * Make a note of the snapshot and its origin so we can look it
542 * up when the origin has a write on it.
c1f0c183
MS
543 *
544 * Also validate snapshot exception store handovers.
545 * On success, returns 1 if this registration is a handover destination,
546 * otherwise returns 0.
1da177e4
LT
547 */
548static int register_snapshot(struct dm_snapshot *snap)
549{
c1f0c183 550 struct origin *o, *new_o = NULL;
1da177e4 551 struct block_device *bdev = snap->origin->bdev;
c1f0c183 552 int r = 0;
1da177e4 553
60c856c8
MP
554 new_o = kmalloc(sizeof(*new_o), GFP_KERNEL);
555 if (!new_o)
556 return -ENOMEM;
557
1da177e4 558 down_write(&_origins_lock);
1da177e4 559
c1f0c183
MS
560 r = __validate_exception_handover(snap);
561 if (r < 0) {
562 kfree(new_o);
563 goto out;
564 }
565
566 o = __lookup_origin(bdev);
60c856c8
MP
567 if (o)
568 kfree(new_o);
569 else {
1da177e4 570 /* New origin */
60c856c8 571 o = new_o;
1da177e4
LT
572
573 /* Initialise the struct */
574 INIT_LIST_HEAD(&o->snapshots);
575 o->bdev = bdev;
576
577 __insert_origin(o);
578 }
579
c1f0c183
MS
580 __insert_snapshot(o, snap);
581
582out:
583 up_write(&_origins_lock);
584
585 return r;
586}
587
588/*
589 * Move snapshot to correct place in list according to chunk size.
590 */
591static void reregister_snapshot(struct dm_snapshot *s)
592{
593 struct block_device *bdev = s->origin->bdev;
594
595 down_write(&_origins_lock);
596
597 list_del(&s->list);
598 __insert_snapshot(__lookup_origin(bdev), s);
1da177e4
LT
599
600 up_write(&_origins_lock);
1da177e4
LT
601}
602
603static void unregister_snapshot(struct dm_snapshot *s)
604{
605 struct origin *o;
606
607 down_write(&_origins_lock);
608 o = __lookup_origin(s->origin->bdev);
609
610 list_del(&s->list);
c1f0c183 611 if (o && list_empty(&o->snapshots)) {
1da177e4
LT
612 list_del(&o->hash_list);
613 kfree(o);
614 }
615
616 up_write(&_origins_lock);
617}
618
619/*
620 * Implementation of the exception hash tables.
d74f81f8
MB
621 * The lowest hash_shift bits of the chunk number are ignored, allowing
622 * some consecutive chunks to be grouped together.
1da177e4 623 */
f79ae415
NT
624static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk);
625
626/* Lock to protect access to the completed and pending exception hash tables. */
627struct dm_exception_table_lock {
628 struct hlist_bl_head *complete_slot;
629 struct hlist_bl_head *pending_slot;
630};
631
632static void dm_exception_table_lock_init(struct dm_snapshot *s, chunk_t chunk,
633 struct dm_exception_table_lock *lock)
634{
635 struct dm_exception_table *complete = &s->complete;
636 struct dm_exception_table *pending = &s->pending;
637
638 lock->complete_slot = &complete->table[exception_hash(complete, chunk)];
639 lock->pending_slot = &pending->table[exception_hash(pending, chunk)];
640}
641
642static void dm_exception_table_lock(struct dm_exception_table_lock *lock)
643{
644 hlist_bl_lock(lock->complete_slot);
645 hlist_bl_lock(lock->pending_slot);
646}
647
648static void dm_exception_table_unlock(struct dm_exception_table_lock *lock)
649{
650 hlist_bl_unlock(lock->pending_slot);
651 hlist_bl_unlock(lock->complete_slot);
652}
653
3510cb94
JB
654static int dm_exception_table_init(struct dm_exception_table *et,
655 uint32_t size, unsigned hash_shift)
1da177e4
LT
656{
657 unsigned int i;
658
d74f81f8 659 et->hash_shift = hash_shift;
1da177e4 660 et->hash_mask = size - 1;
f79ae415 661 et->table = dm_vcalloc(size, sizeof(struct hlist_bl_head));
1da177e4
LT
662 if (!et->table)
663 return -ENOMEM;
664
665 for (i = 0; i < size; i++)
f79ae415 666 INIT_HLIST_BL_HEAD(et->table + i);
1da177e4
LT
667
668 return 0;
669}
670
3510cb94
JB
671static void dm_exception_table_exit(struct dm_exception_table *et,
672 struct kmem_cache *mem)
1da177e4 673{
f79ae415
NT
674 struct hlist_bl_head *slot;
675 struct dm_exception *ex;
676 struct hlist_bl_node *pos, *n;
1da177e4
LT
677 int i, size;
678
679 size = et->hash_mask + 1;
680 for (i = 0; i < size; i++) {
681 slot = et->table + i;
682
f79ae415 683 hlist_bl_for_each_entry_safe(ex, pos, n, slot, hash_list)
1da177e4
LT
684 kmem_cache_free(mem, ex);
685 }
686
687 vfree(et->table);
688}
689
191437a5 690static uint32_t exception_hash(struct dm_exception_table *et, chunk_t chunk)
1da177e4 691{
d74f81f8 692 return (chunk >> et->hash_shift) & et->hash_mask;
1da177e4
LT
693}
694
3510cb94 695static void dm_remove_exception(struct dm_exception *e)
1da177e4 696{
f79ae415 697 hlist_bl_del(&e->hash_list);
1da177e4
LT
698}
699
700/*
701 * Return the exception data for a sector, or NULL if not
702 * remapped.
703 */
3510cb94
JB
704static struct dm_exception *dm_lookup_exception(struct dm_exception_table *et,
705 chunk_t chunk)
1da177e4 706{
f79ae415
NT
707 struct hlist_bl_head *slot;
708 struct hlist_bl_node *pos;
1d4989c8 709 struct dm_exception *e;
1da177e4
LT
710
711 slot = &et->table[exception_hash(et, chunk)];
f79ae415 712 hlist_bl_for_each_entry(e, pos, slot, hash_list)
d74f81f8
MB
713 if (chunk >= e->old_chunk &&
714 chunk <= e->old_chunk + dm_consecutive_chunk_count(e))
1da177e4
LT
715 return e;
716
717 return NULL;
718}
719
119bc547 720static struct dm_exception *alloc_completed_exception(gfp_t gfp)
1da177e4 721{
1d4989c8 722 struct dm_exception *e;
1da177e4 723
119bc547
MP
724 e = kmem_cache_alloc(exception_cache, gfp);
725 if (!e && gfp == GFP_NOIO)
1da177e4
LT
726 e = kmem_cache_alloc(exception_cache, GFP_ATOMIC);
727
728 return e;
729}
730
3510cb94 731static void free_completed_exception(struct dm_exception *e)
1da177e4
LT
732{
733 kmem_cache_free(exception_cache, e);
734}
735
92e86812 736static struct dm_snap_pending_exception *alloc_pending_exception(struct dm_snapshot *s)
1da177e4 737{
6f1c819c 738 struct dm_snap_pending_exception *pe = mempool_alloc(&s->pending_pool,
92e86812
MP
739 GFP_NOIO);
740
879129d2 741 atomic_inc(&s->pending_exceptions_count);
92e86812
MP
742 pe->snap = s;
743
744 return pe;
1da177e4
LT
745}
746
028867ac 747static void free_pending_exception(struct dm_snap_pending_exception *pe)
1da177e4 748{
879129d2
MP
749 struct dm_snapshot *s = pe->snap;
750
6f1c819c 751 mempool_free(pe, &s->pending_pool);
4e857c58 752 smp_mb__before_atomic();
879129d2 753 atomic_dec(&s->pending_exceptions_count);
1da177e4
LT
754}
755
3510cb94
JB
756static void dm_insert_exception(struct dm_exception_table *eh,
757 struct dm_exception *new_e)
d74f81f8 758{
f79ae415
NT
759 struct hlist_bl_head *l;
760 struct hlist_bl_node *pos;
1d4989c8 761 struct dm_exception *e = NULL;
d74f81f8
MB
762
763 l = &eh->table[exception_hash(eh, new_e->old_chunk)];
764
765 /* Add immediately if this table doesn't support consecutive chunks */
766 if (!eh->hash_shift)
767 goto out;
768
769 /* List is ordered by old_chunk */
f79ae415 770 hlist_bl_for_each_entry(e, pos, l, hash_list) {
d74f81f8
MB
771 /* Insert after an existing chunk? */
772 if (new_e->old_chunk == (e->old_chunk +
773 dm_consecutive_chunk_count(e) + 1) &&
774 new_e->new_chunk == (dm_chunk_number(e->new_chunk) +
775 dm_consecutive_chunk_count(e) + 1)) {
776 dm_consecutive_chunk_count_inc(e);
3510cb94 777 free_completed_exception(new_e);
d74f81f8
MB
778 return;
779 }
780
781 /* Insert before an existing chunk? */
782 if (new_e->old_chunk == (e->old_chunk - 1) &&
783 new_e->new_chunk == (dm_chunk_number(e->new_chunk) - 1)) {
784 dm_consecutive_chunk_count_inc(e);
785 e->old_chunk--;
786 e->new_chunk--;
3510cb94 787 free_completed_exception(new_e);
d74f81f8
MB
788 return;
789 }
790
f79ae415 791 if (new_e->old_chunk < e->old_chunk)
d74f81f8
MB
792 break;
793 }
794
795out:
f79ae415
NT
796 if (!e) {
797 /*
798 * Either the table doesn't support consecutive chunks or slot
799 * l is empty.
800 */
801 hlist_bl_add_head(&new_e->hash_list, l);
802 } else if (new_e->old_chunk < e->old_chunk) {
803 /* Add before an existing exception */
804 hlist_bl_add_before(&new_e->hash_list, &e->hash_list);
805 } else {
806 /* Add to l's tail: e is the last exception in this slot */
807 hlist_bl_add_behind(&new_e->hash_list, &e->hash_list);
808 }
d74f81f8
MB
809}
810
a159c1ac
JB
811/*
812 * Callback used by the exception stores to load exceptions when
813 * initialising.
814 */
815static int dm_add_exception(void *context, chunk_t old, chunk_t new)
1da177e4 816{
f79ae415 817 struct dm_exception_table_lock lock;
a159c1ac 818 struct dm_snapshot *s = context;
1d4989c8 819 struct dm_exception *e;
1da177e4 820
119bc547 821 e = alloc_completed_exception(GFP_KERNEL);
1da177e4
LT
822 if (!e)
823 return -ENOMEM;
824
825 e->old_chunk = old;
d74f81f8
MB
826
827 /* Consecutive_count is implicitly initialised to zero */
1da177e4 828 e->new_chunk = new;
d74f81f8 829
f79ae415
NT
830 /*
831 * Although there is no need to lock access to the exception tables
832 * here, if we don't then hlist_bl_add_head(), called by
833 * dm_insert_exception(), will complain about accessing the
834 * corresponding list without locking it first.
835 */
836 dm_exception_table_lock_init(s, old, &lock);
837
838 dm_exception_table_lock(&lock);
3510cb94 839 dm_insert_exception(&s->complete, e);
f79ae415 840 dm_exception_table_unlock(&lock);
d74f81f8 841
1da177e4
LT
842 return 0;
843}
844
7e201b35
MP
845/*
846 * Return a minimum chunk size of all snapshots that have the specified origin.
847 * Return zero if the origin has no snapshots.
848 */
542f9038 849static uint32_t __minimum_chunk_size(struct origin *o)
7e201b35
MP
850{
851 struct dm_snapshot *snap;
852 unsigned chunk_size = 0;
853
854 if (o)
855 list_for_each_entry(snap, &o->snapshots, list)
856 chunk_size = min_not_zero(chunk_size,
857 snap->store->chunk_size);
858
542f9038 859 return (uint32_t) chunk_size;
7e201b35
MP
860}
861
1da177e4
LT
862/*
863 * Hard coded magic.
864 */
865static int calc_max_buckets(void)
866{
867 /* use a fixed size of 2MB */
868 unsigned long mem = 2 * 1024 * 1024;
f79ae415 869 mem /= sizeof(struct hlist_bl_head);
1da177e4
LT
870
871 return mem;
872}
873
1da177e4
LT
874/*
875 * Allocate room for a suitable hash table.
876 */
fee1998e 877static int init_hash_tables(struct dm_snapshot *s)
1da177e4 878{
60e356f3 879 sector_t hash_size, cow_dev_size, max_buckets;
1da177e4
LT
880
881 /*
882 * Calculate based on the size of the original volume or
883 * the COW volume...
884 */
fc56f6fb 885 cow_dev_size = get_dev_size(s->cow->bdev);
1da177e4
LT
886 max_buckets = calc_max_buckets();
887
60e356f3 888 hash_size = cow_dev_size >> s->store->chunk_shift;
1da177e4
LT
889 hash_size = min(hash_size, max_buckets);
890
8e87b9b8
MP
891 if (hash_size < 64)
892 hash_size = 64;
8defd830 893 hash_size = rounddown_pow_of_two(hash_size);
3510cb94
JB
894 if (dm_exception_table_init(&s->complete, hash_size,
895 DM_CHUNK_CONSECUTIVE_BITS))
1da177e4
LT
896 return -ENOMEM;
897
898 /*
899 * Allocate hash table for in-flight exceptions
900 * Make this smaller than the real hash table
901 */
902 hash_size >>= 3;
903 if (hash_size < 64)
904 hash_size = 64;
905
3510cb94
JB
906 if (dm_exception_table_init(&s->pending, hash_size, 0)) {
907 dm_exception_table_exit(&s->complete, exception_cache);
1da177e4
LT
908 return -ENOMEM;
909 }
910
911 return 0;
912}
913
1e03f97e
MP
914static void merge_shutdown(struct dm_snapshot *s)
915{
916 clear_bit_unlock(RUNNING_MERGE, &s->state_bits);
4e857c58 917 smp_mb__after_atomic();
1e03f97e
MP
918 wake_up_bit(&s->state_bits, RUNNING_MERGE);
919}
920
9fe86254
MP
921static struct bio *__release_queued_bios_after_merge(struct dm_snapshot *s)
922{
923 s->first_merging_chunk = 0;
924 s->num_merging_chunks = 0;
925
926 return bio_list_get(&s->bios_queued_during_merge);
927}
928
1e03f97e
MP
929/*
930 * Remove one chunk from the index of completed exceptions.
931 */
932static int __remove_single_exception_chunk(struct dm_snapshot *s,
933 chunk_t old_chunk)
934{
935 struct dm_exception *e;
936
1e03f97e
MP
937 e = dm_lookup_exception(&s->complete, old_chunk);
938 if (!e) {
939 DMERR("Corruption detected: exception for block %llu is "
940 "on disk but not in memory",
941 (unsigned long long)old_chunk);
942 return -EINVAL;
943 }
944
945 /*
946 * If this is the only chunk using this exception, remove exception.
947 */
948 if (!dm_consecutive_chunk_count(e)) {
949 dm_remove_exception(e);
950 free_completed_exception(e);
951 return 0;
952 }
953
954 /*
955 * The chunk may be either at the beginning or the end of a
956 * group of consecutive chunks - never in the middle. We are
957 * removing chunks in the opposite order to that in which they
958 * were added, so this should always be true.
959 * Decrement the consecutive chunk counter and adjust the
960 * starting point if necessary.
961 */
962 if (old_chunk == e->old_chunk) {
963 e->old_chunk++;
964 e->new_chunk++;
965 } else if (old_chunk != e->old_chunk +
966 dm_consecutive_chunk_count(e)) {
967 DMERR("Attempt to merge block %llu from the "
968 "middle of a chunk range [%llu - %llu]",
969 (unsigned long long)old_chunk,
970 (unsigned long long)e->old_chunk,
971 (unsigned long long)
972 e->old_chunk + dm_consecutive_chunk_count(e));
973 return -EINVAL;
974 }
975
976 dm_consecutive_chunk_count_dec(e);
977
978 return 0;
979}
980
9fe86254
MP
981static void flush_bios(struct bio *bio);
982
983static int remove_single_exception_chunk(struct dm_snapshot *s)
1e03f97e 984{
9fe86254
MP
985 struct bio *b = NULL;
986 int r;
987 chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1;
1e03f97e 988
4ad8d880 989 down_write(&s->lock);
9fe86254
MP
990
991 /*
992 * Process chunks (and associated exceptions) in reverse order
993 * so that dm_consecutive_chunk_count_dec() accounting works.
994 */
995 do {
996 r = __remove_single_exception_chunk(s, old_chunk);
997 if (r)
998 goto out;
999 } while (old_chunk-- > s->first_merging_chunk);
1000
1001 b = __release_queued_bios_after_merge(s);
1002
1003out:
4ad8d880 1004 up_write(&s->lock);
9fe86254
MP
1005 if (b)
1006 flush_bios(b);
1e03f97e
MP
1007
1008 return r;
1009}
1010
73dfd078
MP
1011static int origin_write_extent(struct dm_snapshot *merging_snap,
1012 sector_t sector, unsigned chunk_size);
1013
1e03f97e
MP
1014static void merge_callback(int read_err, unsigned long write_err,
1015 void *context);
1016
73dfd078
MP
1017static uint64_t read_pending_exceptions_done_count(void)
1018{
1019 uint64_t pending_exceptions_done;
1020
1021 spin_lock(&_pending_exceptions_done_spinlock);
1022 pending_exceptions_done = _pending_exceptions_done_count;
1023 spin_unlock(&_pending_exceptions_done_spinlock);
1024
1025 return pending_exceptions_done;
1026}
1027
1028static void increment_pending_exceptions_done_count(void)
1029{
1030 spin_lock(&_pending_exceptions_done_spinlock);
1031 _pending_exceptions_done_count++;
1032 spin_unlock(&_pending_exceptions_done_spinlock);
1033
1034 wake_up_all(&_pending_exceptions_done);
1035}
1036
1e03f97e
MP
1037static void snapshot_merge_next_chunks(struct dm_snapshot *s)
1038{
8a2d5286 1039 int i, linear_chunks;
1e03f97e
MP
1040 chunk_t old_chunk, new_chunk;
1041 struct dm_io_region src, dest;
8a2d5286 1042 sector_t io_size;
73dfd078 1043 uint64_t previous_count;
1e03f97e
MP
1044
1045 BUG_ON(!test_bit(RUNNING_MERGE, &s->state_bits));
1046 if (unlikely(test_bit(SHUTDOWN_MERGE, &s->state_bits)))
1047 goto shut;
1048
1049 /*
1050 * valid flag never changes during merge, so no lock required.
1051 */
1052 if (!s->valid) {
1053 DMERR("Snapshot is invalid: can't merge");
1054 goto shut;
1055 }
1056
8a2d5286
MS
1057 linear_chunks = s->store->type->prepare_merge(s->store, &old_chunk,
1058 &new_chunk);
1059 if (linear_chunks <= 0) {
d8ddb1cf 1060 if (linear_chunks < 0) {
1e03f97e
MP
1061 DMERR("Read error in exception store: "
1062 "shutting down merge");
4ad8d880 1063 down_write(&s->lock);
d8ddb1cf 1064 s->merge_failed = 1;
4ad8d880 1065 up_write(&s->lock);
d8ddb1cf 1066 }
1e03f97e
MP
1067 goto shut;
1068 }
1069
8a2d5286
MS
1070 /* Adjust old_chunk and new_chunk to reflect start of linear region */
1071 old_chunk = old_chunk + 1 - linear_chunks;
1072 new_chunk = new_chunk + 1 - linear_chunks;
1073
1074 /*
1075 * Use one (potentially large) I/O to copy all 'linear_chunks'
1076 * from the exception store to the origin
1077 */
1078 io_size = linear_chunks * s->store->chunk_size;
1e03f97e 1079
1e03f97e
MP
1080 dest.bdev = s->origin->bdev;
1081 dest.sector = chunk_to_sector(s->store, old_chunk);
8a2d5286 1082 dest.count = min(io_size, get_dev_size(dest.bdev) - dest.sector);
1e03f97e
MP
1083
1084 src.bdev = s->cow->bdev;
1085 src.sector = chunk_to_sector(s->store, new_chunk);
1086 src.count = dest.count;
1087
73dfd078
MP
1088 /*
1089 * Reallocate any exceptions needed in other snapshots then
1090 * wait for the pending exceptions to complete.
1091 * Each time any pending exception (globally on the system)
1092 * completes we are woken and repeat the process to find out
1093 * if we can proceed. While this may not seem a particularly
1094 * efficient algorithm, it is not expected to have any
1095 * significant impact on performance.
1096 */
1097 previous_count = read_pending_exceptions_done_count();
8a2d5286 1098 while (origin_write_extent(s, dest.sector, io_size)) {
73dfd078
MP
1099 wait_event(_pending_exceptions_done,
1100 (read_pending_exceptions_done_count() !=
1101 previous_count));
1102 /* Retry after the wait, until all exceptions are done. */
1103 previous_count = read_pending_exceptions_done_count();
1104 }
1105
4ad8d880 1106 down_write(&s->lock);
9fe86254 1107 s->first_merging_chunk = old_chunk;
8a2d5286 1108 s->num_merging_chunks = linear_chunks;
4ad8d880 1109 up_write(&s->lock);
9fe86254 1110
8a2d5286
MS
1111 /* Wait until writes to all 'linear_chunks' drain */
1112 for (i = 0; i < linear_chunks; i++)
1113 __check_for_conflicting_io(s, old_chunk + i);
9fe86254 1114
1e03f97e
MP
1115 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, merge_callback, s);
1116 return;
1117
1118shut:
1119 merge_shutdown(s);
1120}
1121
9fe86254
MP
1122static void error_bios(struct bio *bio);
1123
1e03f97e
MP
1124static void merge_callback(int read_err, unsigned long write_err, void *context)
1125{
1126 struct dm_snapshot *s = context;
9fe86254 1127 struct bio *b = NULL;
1e03f97e
MP
1128
1129 if (read_err || write_err) {
1130 if (read_err)
1131 DMERR("Read error: shutting down merge.");
1132 else
1133 DMERR("Write error: shutting down merge.");
1134 goto shut;
1135 }
1136
9fe86254
MP
1137 if (s->store->type->commit_merge(s->store,
1138 s->num_merging_chunks) < 0) {
1e03f97e
MP
1139 DMERR("Write error in exception store: shutting down merge");
1140 goto shut;
1141 }
1142
9fe86254
MP
1143 if (remove_single_exception_chunk(s) < 0)
1144 goto shut;
1145
1e03f97e
MP
1146 snapshot_merge_next_chunks(s);
1147
1148 return;
1149
1150shut:
4ad8d880 1151 down_write(&s->lock);
d8ddb1cf 1152 s->merge_failed = 1;
9fe86254 1153 b = __release_queued_bios_after_merge(s);
4ad8d880 1154 up_write(&s->lock);
9fe86254
MP
1155 error_bios(b);
1156
1e03f97e
MP
1157 merge_shutdown(s);
1158}
1159
1160static void start_merge(struct dm_snapshot *s)
1161{
1162 if (!test_and_set_bit(RUNNING_MERGE, &s->state_bits))
1163 snapshot_merge_next_chunks(s);
1164}
1165
1e03f97e
MP
1166/*
1167 * Stop the merging process and wait until it finishes.
1168 */
1169static void stop_merge(struct dm_snapshot *s)
1170{
1171 set_bit(SHUTDOWN_MERGE, &s->state_bits);
74316201 1172 wait_on_bit(&s->state_bits, RUNNING_MERGE, TASK_UNINTERRUPTIBLE);
1e03f97e
MP
1173 clear_bit(SHUTDOWN_MERGE, &s->state_bits);
1174}
1175
1da177e4 1176/*
b0d3cc01 1177 * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size>
1da177e4
LT
1178 */
1179static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1180{
1181 struct dm_snapshot *s;
cd45daff 1182 int i;
1da177e4 1183 int r = -EINVAL;
fc56f6fb 1184 char *origin_path, *cow_path;
4df2bf46 1185 dev_t origin_dev, cow_dev;
55a62eef 1186 unsigned args_used, num_flush_bios = 1;
10b8106a 1187 fmode_t origin_mode = FMODE_READ;
1da177e4 1188
4c7e3bf4 1189 if (argc != 4) {
72d94861 1190 ti->error = "requires exactly 4 arguments";
1da177e4 1191 r = -EINVAL;
fc56f6fb 1192 goto bad;
1da177e4
LT
1193 }
1194
10b8106a 1195 if (dm_target_is_snapshot_merge(ti)) {
55a62eef 1196 num_flush_bios = 2;
10b8106a
MS
1197 origin_mode = FMODE_WRITE;
1198 }
1199
d3775354 1200 s = kzalloc(sizeof(*s), GFP_KERNEL);
fc56f6fb 1201 if (!s) {
a2d2b034 1202 ti->error = "Cannot allocate private snapshot structure";
fc56f6fb
MS
1203 r = -ENOMEM;
1204 goto bad;
1205 }
1206
c2411045
MP
1207 origin_path = argv[0];
1208 argv++;
1209 argc--;
1210
1211 r = dm_get_device(ti, origin_path, origin_mode, &s->origin);
1212 if (r) {
1213 ti->error = "Cannot get origin device";
1214 goto bad_origin;
1215 }
4df2bf46 1216 origin_dev = s->origin->bdev->bd_dev;
c2411045 1217
fc56f6fb
MS
1218 cow_path = argv[0];
1219 argv++;
1220 argc--;
1221
4df2bf46
D
1222 cow_dev = dm_get_dev_t(cow_path);
1223 if (cow_dev && cow_dev == origin_dev) {
1224 ti->error = "COW device cannot be the same as origin device";
1225 r = -EINVAL;
1226 goto bad_cow;
1227 }
1228
024d37e9 1229 r = dm_get_device(ti, cow_path, dm_table_get_mode(ti->table), &s->cow);
fc56f6fb
MS
1230 if (r) {
1231 ti->error = "Cannot get COW device";
1232 goto bad_cow;
1233 }
1234
1235 r = dm_exception_store_create(ti, argc, argv, s, &args_used, &s->store);
fee1998e
JB
1236 if (r) {
1237 ti->error = "Couldn't create exception store";
1da177e4 1238 r = -EINVAL;
fc56f6fb 1239 goto bad_store;
1da177e4
LT
1240 }
1241
fee1998e
JB
1242 argv += args_used;
1243 argc -= args_used;
1244
fc56f6fb 1245 s->ti = ti;
1da177e4 1246 s->valid = 1;
76c44f6d 1247 s->snapshot_overflowed = 0;
aa14edeb 1248 s->active = 0;
879129d2 1249 atomic_set(&s->pending_exceptions_count, 0);
3f1637f2 1250 spin_lock_init(&s->pe_allocation_lock);
230c83af
MP
1251 s->exception_start_sequence = 0;
1252 s->exception_complete_sequence = 0;
3db2776d 1253 s->out_of_order_tree = RB_ROOT;
4ad8d880 1254 init_rwsem(&s->lock);
c1f0c183 1255 INIT_LIST_HEAD(&s->list);
ca3a931f 1256 spin_lock_init(&s->pe_lock);
1e03f97e 1257 s->state_bits = 0;
d8ddb1cf 1258 s->merge_failed = 0;
9fe86254
MP
1259 s->first_merging_chunk = 0;
1260 s->num_merging_chunks = 0;
1261 bio_list_init(&s->bios_queued_during_merge);
1da177e4
LT
1262
1263 /* Allocate hash table for COW data */
fee1998e 1264 if (init_hash_tables(s)) {
1da177e4
LT
1265 ti->error = "Unable to allocate hash table space";
1266 r = -ENOMEM;
fee1998e 1267 goto bad_hash_tables;
1da177e4
LT
1268 }
1269
721b1d98
NT
1270 sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX);
1271
df5d2e90 1272 s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle);
fa34ce73
MP
1273 if (IS_ERR(s->kcopyd_client)) {
1274 r = PTR_ERR(s->kcopyd_client);
1da177e4 1275 ti->error = "Could not create kcopyd client";
fee1998e 1276 goto bad_kcopyd;
1da177e4
LT
1277 }
1278
6f1c819c
KO
1279 r = mempool_init_slab_pool(&s->pending_pool, MIN_IOS, pending_cache);
1280 if (r) {
92e86812 1281 ti->error = "Could not allocate mempool for pending exceptions";
fee1998e 1282 goto bad_pending_pool;
92e86812
MP
1283 }
1284
cd45daff
MP
1285 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1286 INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]);
1287
1288 spin_lock_init(&s->tracked_chunk_lock);
1289
c1f0c183 1290 ti->private = s;
55a62eef 1291 ti->num_flush_bios = num_flush_bios;
30187e1d 1292 ti->per_io_data_size = sizeof(struct dm_snap_tracked_chunk);
c1f0c183
MS
1293
1294 /* Add snapshot to the list of snapshots for this origin */
1295 /* Exceptions aren't triggered till snapshot_resume() is called */
1296 r = register_snapshot(s);
1297 if (r == -ENOMEM) {
1298 ti->error = "Snapshot origin struct allocation failed";
1299 goto bad_load_and_register;
1300 } else if (r < 0) {
1301 /* invalid handover, register_snapshot has set ti->error */
1302 goto bad_load_and_register;
1303 }
1304
1305 /*
1306 * Metadata must only be loaded into one table at once, so skip this
1307 * if metadata will be handed over during resume.
1308 * Chunk size will be set during the handover - set it to zero to
1309 * ensure it's ignored.
1310 */
1311 if (r > 0) {
1312 s->store->chunk_size = 0;
1313 return 0;
1314 }
1315
493df71c
JB
1316 r = s->store->type->read_metadata(s->store, dm_add_exception,
1317 (void *)s);
0764147b 1318 if (r < 0) {
f9cea4f7 1319 ti->error = "Failed to read snapshot metadata";
c1f0c183 1320 goto bad_read_metadata;
0764147b
MB
1321 } else if (r > 0) {
1322 s->valid = 0;
1323 DMWARN("Snapshot is marked invalid.");
f9cea4f7 1324 }
aa14edeb 1325
3f2412dc
MP
1326 if (!s->store->chunk_size) {
1327 ti->error = "Chunk size not set";
c1f0c183 1328 goto bad_read_metadata;
1da177e4 1329 }
542f9038
MS
1330
1331 r = dm_set_target_max_io_len(ti, s->store->chunk_size);
1332 if (r)
1333 goto bad_read_metadata;
1da177e4
LT
1334
1335 return 0;
1336
c1f0c183
MS
1337bad_read_metadata:
1338 unregister_snapshot(s);
1339
fee1998e 1340bad_load_and_register:
6f1c819c 1341 mempool_exit(&s->pending_pool);
92e86812 1342
fee1998e 1343bad_pending_pool:
eb69aca5 1344 dm_kcopyd_client_destroy(s->kcopyd_client);
1da177e4 1345
fee1998e 1346bad_kcopyd:
3510cb94
JB
1347 dm_exception_table_exit(&s->pending, pending_cache);
1348 dm_exception_table_exit(&s->complete, exception_cache);
1da177e4 1349
fee1998e 1350bad_hash_tables:
fc56f6fb 1351 dm_exception_store_destroy(s->store);
1da177e4 1352
fc56f6fb
MS
1353bad_store:
1354 dm_put_device(ti, s->cow);
fee1998e 1355
fc56f6fb 1356bad_cow:
c2411045
MP
1357 dm_put_device(ti, s->origin);
1358
1359bad_origin:
fc56f6fb
MS
1360 kfree(s);
1361
1362bad:
1da177e4
LT
1363 return r;
1364}
1365
31c93a0c
MB
1366static void __free_exceptions(struct dm_snapshot *s)
1367{
eb69aca5 1368 dm_kcopyd_client_destroy(s->kcopyd_client);
31c93a0c
MB
1369 s->kcopyd_client = NULL;
1370
3510cb94
JB
1371 dm_exception_table_exit(&s->pending, pending_cache);
1372 dm_exception_table_exit(&s->complete, exception_cache);
31c93a0c
MB
1373}
1374
c1f0c183
MS
1375static void __handover_exceptions(struct dm_snapshot *snap_src,
1376 struct dm_snapshot *snap_dest)
1377{
1378 union {
1379 struct dm_exception_table table_swap;
1380 struct dm_exception_store *store_swap;
1381 } u;
1382
1383 /*
1384 * Swap all snapshot context information between the two instances.
1385 */
1386 u.table_swap = snap_dest->complete;
1387 snap_dest->complete = snap_src->complete;
1388 snap_src->complete = u.table_swap;
1389
1390 u.store_swap = snap_dest->store;
1391 snap_dest->store = snap_src->store;
b0d3cc01 1392 snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
c1f0c183
MS
1393 snap_src->store = u.store_swap;
1394
1395 snap_dest->store->snap = snap_dest;
1396 snap_src->store->snap = snap_src;
1397
542f9038 1398 snap_dest->ti->max_io_len = snap_dest->store->chunk_size;
c1f0c183 1399 snap_dest->valid = snap_src->valid;
76c44f6d 1400 snap_dest->snapshot_overflowed = snap_src->snapshot_overflowed;
c1f0c183
MS
1401
1402 /*
1403 * Set source invalid to ensure it receives no further I/O.
1404 */
1405 snap_src->valid = 0;
1406}
1407
1da177e4
LT
1408static void snapshot_dtr(struct dm_target *ti)
1409{
cd45daff
MP
1410#ifdef CONFIG_DM_DEBUG
1411 int i;
1412#endif
028867ac 1413 struct dm_snapshot *s = ti->private;
c1f0c183 1414 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
1da177e4 1415
c1f0c183
MS
1416 down_read(&_origins_lock);
1417 /* Check whether exception handover must be cancelled */
9d3b15c4 1418 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
c1f0c183 1419 if (snap_src && snap_dest && (s == snap_src)) {
4ad8d880 1420 down_write(&snap_dest->lock);
c1f0c183 1421 snap_dest->valid = 0;
4ad8d880 1422 up_write(&snap_dest->lock);
c1f0c183
MS
1423 DMERR("Cancelling snapshot handover.");
1424 }
1425 up_read(&_origins_lock);
1426
1e03f97e
MP
1427 if (dm_target_is_snapshot_merge(ti))
1428 stop_merge(s);
1429
138728dc
AK
1430 /* Prevent further origin writes from using this snapshot. */
1431 /* After this returns there can be no new kcopyd jobs. */
1da177e4
LT
1432 unregister_snapshot(s);
1433
879129d2 1434 while (atomic_read(&s->pending_exceptions_count))
90fa1527 1435 msleep(1);
879129d2 1436 /*
6f1c819c 1437 * Ensure instructions in mempool_exit aren't reordered
879129d2
MP
1438 * before atomic_read.
1439 */
1440 smp_mb();
1441
cd45daff
MP
1442#ifdef CONFIG_DM_DEBUG
1443 for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++)
1444 BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i]));
1445#endif
1446
31c93a0c 1447 __free_exceptions(s);
1da177e4 1448
6f1c819c 1449 mempool_exit(&s->pending_pool);
92e86812 1450
fee1998e 1451 dm_exception_store_destroy(s->store);
138728dc 1452
fc56f6fb
MS
1453 dm_put_device(ti, s->cow);
1454
c2411045
MP
1455 dm_put_device(ti, s->origin);
1456
1da177e4
LT
1457 kfree(s);
1458}
1459
1460/*
1461 * Flush a list of buffers.
1462 */
1463static void flush_bios(struct bio *bio)
1464{
1465 struct bio *n;
1466
1467 while (bio) {
1468 n = bio->bi_next;
1469 bio->bi_next = NULL;
1470 generic_make_request(bio);
1471 bio = n;
1472 }
1473}
1474
515ad66c
MP
1475static int do_origin(struct dm_dev *origin, struct bio *bio);
1476
1477/*
1478 * Flush a list of buffers.
1479 */
1480static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio)
1481{
1482 struct bio *n;
1483 int r;
1484
1485 while (bio) {
1486 n = bio->bi_next;
1487 bio->bi_next = NULL;
1488 r = do_origin(s->origin, bio);
1489 if (r == DM_MAPIO_REMAPPED)
1490 generic_make_request(bio);
1491 bio = n;
1492 }
1493}
1494
1da177e4
LT
1495/*
1496 * Error a list of buffers.
1497 */
1498static void error_bios(struct bio *bio)
1499{
1500 struct bio *n;
1501
1502 while (bio) {
1503 n = bio->bi_next;
1504 bio->bi_next = NULL;
6712ecf8 1505 bio_io_error(bio);
1da177e4
LT
1506 bio = n;
1507 }
1508}
1509
695368ac 1510static void __invalidate_snapshot(struct dm_snapshot *s, int err)
76df1c65
AK
1511{
1512 if (!s->valid)
1513 return;
1514
1515 if (err == -EIO)
1516 DMERR("Invalidating snapshot: Error reading/writing.");
1517 else if (err == -ENOMEM)
1518 DMERR("Invalidating snapshot: Unable to allocate exception.");
1519
493df71c
JB
1520 if (s->store->type->drop_snapshot)
1521 s->store->type->drop_snapshot(s->store);
76df1c65
AK
1522
1523 s->valid = 0;
1524
fc56f6fb 1525 dm_table_event(s->ti->table);
76df1c65
AK
1526}
1527
3f1637f2
NT
1528static void invalidate_snapshot(struct dm_snapshot *s, int err)
1529{
1530 down_write(&s->lock);
1531 __invalidate_snapshot(s, err);
1532 up_write(&s->lock);
1533}
1534
385277bf 1535static void pending_complete(void *context, int success)
1da177e4 1536{
385277bf 1537 struct dm_snap_pending_exception *pe = context;
1d4989c8 1538 struct dm_exception *e;
1da177e4 1539 struct dm_snapshot *s = pe->snap;
9d493fa8
AK
1540 struct bio *origin_bios = NULL;
1541 struct bio *snapshot_bios = NULL;
a6e50b40 1542 struct bio *full_bio = NULL;
f79ae415 1543 struct dm_exception_table_lock lock;
9d493fa8 1544 int error = 0;
1da177e4 1545
f79ae415
NT
1546 dm_exception_table_lock_init(s, pe->e.old_chunk, &lock);
1547
76df1c65
AK
1548 if (!success) {
1549 /* Read/write error - snapshot is unusable */
3f1637f2 1550 invalidate_snapshot(s, -EIO);
9d493fa8 1551 error = 1;
f79ae415
NT
1552
1553 dm_exception_table_lock(&lock);
76df1c65
AK
1554 goto out;
1555 }
1556
119bc547 1557 e = alloc_completed_exception(GFP_NOIO);
76df1c65 1558 if (!e) {
3f1637f2 1559 invalidate_snapshot(s, -ENOMEM);
9d493fa8 1560 error = 1;
f79ae415
NT
1561
1562 dm_exception_table_lock(&lock);
76df1c65
AK
1563 goto out;
1564 }
1565 *e = pe->e;
1da177e4 1566
3f1637f2 1567 down_read(&s->lock);
f79ae415 1568 dm_exception_table_lock(&lock);
76df1c65 1569 if (!s->valid) {
3f1637f2 1570 up_read(&s->lock);
3510cb94 1571 free_completed_exception(e);
9d493fa8 1572 error = 1;
3f1637f2 1573
76df1c65 1574 goto out;
1da177e4
LT
1575 }
1576
9d493fa8 1577 /*
65fc7c37
NT
1578 * Add a proper exception. After inserting the completed exception all
1579 * subsequent snapshot reads to this chunk will be redirected to the
1580 * COW device. This ensures that we do not starve. Moreover, as long
1581 * as the pending exception exists, neither origin writes nor snapshot
1582 * merging can overwrite the chunk in origin.
9d493fa8 1583 */
3510cb94 1584 dm_insert_exception(&s->complete, e);
3f1637f2 1585 up_read(&s->lock);
76df1c65 1586
65fc7c37
NT
1587 /* Wait for conflicting reads to drain */
1588 if (__chunk_is_tracked(s, pe->e.old_chunk)) {
f79ae415 1589 dm_exception_table_unlock(&lock);
65fc7c37 1590 __check_for_conflicting_io(s, pe->e.old_chunk);
f79ae415 1591 dm_exception_table_lock(&lock);
65fc7c37
NT
1592 }
1593
a2d2b034 1594out:
65fc7c37 1595 /* Remove the in-flight exception from the list */
3510cb94 1596 dm_remove_exception(&pe->e);
f79ae415
NT
1597
1598 dm_exception_table_unlock(&lock);
1599
9d493fa8 1600 snapshot_bios = bio_list_get(&pe->snapshot_bios);
515ad66c 1601 origin_bios = bio_list_get(&pe->origin_bios);
a6e50b40 1602 full_bio = pe->full_bio;
fe3265b1 1603 if (full_bio)
a6e50b40 1604 full_bio->bi_end_io = pe->full_bio_end_io;
73dfd078
MP
1605 increment_pending_exceptions_done_count();
1606
9d493fa8 1607 /* Submit any pending write bios */
a6e50b40
MP
1608 if (error) {
1609 if (full_bio)
1610 bio_io_error(full_bio);
9d493fa8 1611 error_bios(snapshot_bios);
a6e50b40
MP
1612 } else {
1613 if (full_bio)
4246a0b6 1614 bio_endio(full_bio);
9d493fa8 1615 flush_bios(snapshot_bios);
a6e50b40 1616 }
9d493fa8 1617
515ad66c 1618 retry_origin_bios(s, origin_bios);
22aa66a3
MP
1619
1620 free_pending_exception(pe);
1da177e4
LT
1621}
1622
230c83af
MP
1623static void complete_exception(struct dm_snap_pending_exception *pe)
1624{
1625 struct dm_snapshot *s = pe->snap;
1626
385277bf
MP
1627 /* Update the metadata if we are persistent */
1628 s->store->type->commit_exception(s->store, &pe->e, !pe->copy_error,
1629 pending_complete, pe);
230c83af
MP
1630}
1631
1da177e4
LT
1632/*
1633 * Called when the copy I/O has finished. kcopyd actually runs
1634 * this code so don't block.
1635 */
4cdc1d1f 1636static void copy_callback(int read_err, unsigned long write_err, void *context)
1da177e4 1637{
028867ac 1638 struct dm_snap_pending_exception *pe = context;
1da177e4
LT
1639 struct dm_snapshot *s = pe->snap;
1640
230c83af 1641 pe->copy_error = read_err || write_err;
1da177e4 1642
230c83af 1643 if (pe->exception_sequence == s->exception_complete_sequence) {
3db2776d
DJ
1644 struct rb_node *next;
1645
230c83af
MP
1646 s->exception_complete_sequence++;
1647 complete_exception(pe);
1648
3db2776d
DJ
1649 next = rb_first(&s->out_of_order_tree);
1650 while (next) {
1651 pe = rb_entry(next, struct dm_snap_pending_exception,
1652 out_of_order_node);
230c83af
MP
1653 if (pe->exception_sequence != s->exception_complete_sequence)
1654 break;
3db2776d 1655 next = rb_next(next);
230c83af 1656 s->exception_complete_sequence++;
3db2776d 1657 rb_erase(&pe->out_of_order_node, &s->out_of_order_tree);
230c83af 1658 complete_exception(pe);
3db2776d 1659 cond_resched();
230c83af
MP
1660 }
1661 } else {
3db2776d
DJ
1662 struct rb_node *parent = NULL;
1663 struct rb_node **p = &s->out_of_order_tree.rb_node;
230c83af
MP
1664 struct dm_snap_pending_exception *pe2;
1665
3db2776d
DJ
1666 while (*p) {
1667 pe2 = rb_entry(*p, struct dm_snap_pending_exception, out_of_order_node);
1668 parent = *p;
1669
1670 BUG_ON(pe->exception_sequence == pe2->exception_sequence);
1671 if (pe->exception_sequence < pe2->exception_sequence)
1672 p = &((*p)->rb_left);
1673 else
1674 p = &((*p)->rb_right);
230c83af 1675 }
3db2776d
DJ
1676
1677 rb_link_node(&pe->out_of_order_node, parent, p);
1678 rb_insert_color(&pe->out_of_order_node, &s->out_of_order_tree);
230c83af 1679 }
721b1d98 1680 up(&s->cow_count);
1da177e4
LT
1681}
1682
1683/*
1684 * Dispatches the copy operation to kcopyd.
1685 */
028867ac 1686static void start_copy(struct dm_snap_pending_exception *pe)
1da177e4
LT
1687{
1688 struct dm_snapshot *s = pe->snap;
22a1ceb1 1689 struct dm_io_region src, dest;
1da177e4
LT
1690 struct block_device *bdev = s->origin->bdev;
1691 sector_t dev_size;
1692
1693 dev_size = get_dev_size(bdev);
1694
1695 src.bdev = bdev;
71fab00a 1696 src.sector = chunk_to_sector(s->store, pe->e.old_chunk);
df96eee6 1697 src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector);
1da177e4 1698
fc56f6fb 1699 dest.bdev = s->cow->bdev;
71fab00a 1700 dest.sector = chunk_to_sector(s->store, pe->e.new_chunk);
1da177e4
LT
1701 dest.count = src.count;
1702
1703 /* Hand over to kcopyd */
721b1d98 1704 down(&s->cow_count);
a2d2b034 1705 dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe);
1da177e4
LT
1706}
1707
4246a0b6 1708static void full_bio_end_io(struct bio *bio)
a6e50b40
MP
1709{
1710 void *callback_data = bio->bi_private;
1711
4e4cbee9 1712 dm_kcopyd_do_callback(callback_data, 0, bio->bi_status ? 1 : 0);
a6e50b40
MP
1713}
1714
1715static void start_full_bio(struct dm_snap_pending_exception *pe,
1716 struct bio *bio)
1717{
1718 struct dm_snapshot *s = pe->snap;
1719 void *callback_data;
1720
1721 pe->full_bio = bio;
1722 pe->full_bio_end_io = bio->bi_end_io;
a6e50b40 1723
721b1d98 1724 down(&s->cow_count);
a6e50b40
MP
1725 callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client,
1726 copy_callback, pe);
1727
1728 bio->bi_end_io = full_bio_end_io;
1729 bio->bi_private = callback_data;
1730
1731 generic_make_request(bio);
1732}
1733
2913808e
MP
1734static struct dm_snap_pending_exception *
1735__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk)
1736{
3510cb94 1737 struct dm_exception *e = dm_lookup_exception(&s->pending, chunk);
2913808e
MP
1738
1739 if (!e)
1740 return NULL;
1741
1742 return container_of(e, struct dm_snap_pending_exception, e);
1743}
1744
1da177e4 1745/*
65fc7c37 1746 * Inserts a pending exception into the pending table.
1da177e4 1747 *
3f1637f2
NT
1748 * NOTE: a write lock must be held on the chunk's pending exception table slot
1749 * before calling this.
1da177e4 1750 */
028867ac 1751static struct dm_snap_pending_exception *
65fc7c37
NT
1752__insert_pending_exception(struct dm_snapshot *s,
1753 struct dm_snap_pending_exception *pe, chunk_t chunk)
1da177e4 1754{
76df1c65
AK
1755 pe->e.old_chunk = chunk;
1756 bio_list_init(&pe->origin_bios);
1757 bio_list_init(&pe->snapshot_bios);
76df1c65 1758 pe->started = 0;
a6e50b40 1759 pe->full_bio = NULL;
76df1c65 1760
3f1637f2 1761 spin_lock(&s->pe_allocation_lock);
493df71c 1762 if (s->store->type->prepare_exception(s->store, &pe->e)) {
3f1637f2 1763 spin_unlock(&s->pe_allocation_lock);
76df1c65
AK
1764 free_pending_exception(pe);
1765 return NULL;
1766 }
1767
230c83af 1768 pe->exception_sequence = s->exception_start_sequence++;
3f1637f2 1769 spin_unlock(&s->pe_allocation_lock);
230c83af 1770
3510cb94 1771 dm_insert_exception(&s->pending, &pe->e);
76df1c65 1772
1da177e4
LT
1773 return pe;
1774}
1775
65fc7c37
NT
1776/*
1777 * Looks to see if this snapshot already has a pending exception
1778 * for this chunk, otherwise it allocates a new one and inserts
1779 * it into the pending table.
1780 *
3f1637f2
NT
1781 * NOTE: a write lock must be held on the chunk's pending exception table slot
1782 * before calling this.
65fc7c37
NT
1783 */
1784static struct dm_snap_pending_exception *
1785__find_pending_exception(struct dm_snapshot *s,
1786 struct dm_snap_pending_exception *pe, chunk_t chunk)
1787{
1788 struct dm_snap_pending_exception *pe2;
1789
1790 pe2 = __lookup_pending_exception(s, chunk);
1791 if (pe2) {
1792 free_pending_exception(pe);
1793 return pe2;
1794 }
1795
1796 return __insert_pending_exception(s, pe, chunk);
1797}
1798
1d4989c8 1799static void remap_exception(struct dm_snapshot *s, struct dm_exception *e,
d74f81f8 1800 struct bio *bio, chunk_t chunk)
1da177e4 1801{
74d46992 1802 bio_set_dev(bio, s->cow->bdev);
4f024f37
KO
1803 bio->bi_iter.bi_sector =
1804 chunk_to_sector(s->store, dm_chunk_number(e->new_chunk) +
1805 (chunk - e->old_chunk)) +
1806 (bio->bi_iter.bi_sector & s->store->chunk_mask);
1da177e4
LT
1807}
1808
7de3ee57 1809static int snapshot_map(struct dm_target *ti, struct bio *bio)
1da177e4 1810{
1d4989c8 1811 struct dm_exception *e;
028867ac 1812 struct dm_snapshot *s = ti->private;
d2a7ad29 1813 int r = DM_MAPIO_REMAPPED;
1da177e4 1814 chunk_t chunk;
028867ac 1815 struct dm_snap_pending_exception *pe = NULL;
f79ae415 1816 struct dm_exception_table_lock lock;
1da177e4 1817
ee18026a
MP
1818 init_tracked_chunk(bio);
1819
1eff9d32 1820 if (bio->bi_opf & REQ_PREFLUSH) {
74d46992 1821 bio_set_dev(bio, s->cow->bdev);
494b3ee7
MP
1822 return DM_MAPIO_REMAPPED;
1823 }
1824
4f024f37 1825 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
f79ae415 1826 dm_exception_table_lock_init(s, chunk, &lock);
1da177e4
LT
1827
1828 /* Full snapshots are not usable */
76df1c65 1829 /* To get here the table must be live so s->active is always set. */
1da177e4 1830 if (!s->valid)
846785e6 1831 return DM_MAPIO_KILL;
1da177e4 1832
3f1637f2 1833 down_read(&s->lock);
f79ae415 1834 dm_exception_table_lock(&lock);
ba40a2aa 1835
70246286
CH
1836 if (!s->valid || (unlikely(s->snapshot_overflowed) &&
1837 bio_data_dir(bio) == WRITE)) {
846785e6 1838 r = DM_MAPIO_KILL;
ba40a2aa
AK
1839 goto out_unlock;
1840 }
1841
1842 /* If the block is already remapped - use that, else remap it */
3510cb94 1843 e = dm_lookup_exception(&s->complete, chunk);
ba40a2aa 1844 if (e) {
d74f81f8 1845 remap_exception(s, e, bio, chunk);
ba40a2aa
AK
1846 goto out_unlock;
1847 }
1848
1da177e4
LT
1849 /*
1850 * Write to snapshot - higher level takes care of RW/RO
1851 * flags so we should only get this if we are
1852 * writeable.
1853 */
70246286 1854 if (bio_data_dir(bio) == WRITE) {
2913808e 1855 pe = __lookup_pending_exception(s, chunk);
76df1c65 1856 if (!pe) {
f79ae415 1857 dm_exception_table_unlock(&lock);
c6621392 1858 pe = alloc_pending_exception(s);
f79ae415 1859 dm_exception_table_lock(&lock);
c6621392 1860
3510cb94 1861 e = dm_lookup_exception(&s->complete, chunk);
35bf659b
MP
1862 if (e) {
1863 free_pending_exception(pe);
1864 remap_exception(s, e, bio, chunk);
1865 goto out_unlock;
1866 }
1867
c6621392 1868 pe = __find_pending_exception(s, pe, chunk);
2913808e 1869 if (!pe) {
f79ae415 1870 dm_exception_table_unlock(&lock);
3f1637f2
NT
1871 up_read(&s->lock);
1872
1873 down_write(&s->lock);
f79ae415 1874
b0d3cc01 1875 if (s->store->userspace_supports_overflow) {
3f1637f2
NT
1876 if (s->valid && !s->snapshot_overflowed) {
1877 s->snapshot_overflowed = 1;
1878 DMERR("Snapshot overflowed: Unable to allocate exception.");
1879 }
b0d3cc01
MS
1880 } else
1881 __invalidate_snapshot(s, -ENOMEM);
f79ae415
NT
1882 up_write(&s->lock);
1883
846785e6 1884 r = DM_MAPIO_KILL;
f79ae415 1885 goto out;
2913808e 1886 }
1da177e4
LT
1887 }
1888
d74f81f8 1889 remap_exception(s, &pe->e, bio, chunk);
76df1c65 1890
d2a7ad29 1891 r = DM_MAPIO_SUBMITTED;
ba40a2aa 1892
a6e50b40 1893 if (!pe->started &&
4f024f37
KO
1894 bio->bi_iter.bi_size ==
1895 (s->store->chunk_size << SECTOR_SHIFT)) {
a6e50b40 1896 pe->started = 1;
3f1637f2 1897
f79ae415 1898 dm_exception_table_unlock(&lock);
3f1637f2
NT
1899 up_read(&s->lock);
1900
a6e50b40
MP
1901 start_full_bio(pe, bio);
1902 goto out;
1903 }
1904
1905 bio_list_add(&pe->snapshot_bios, bio);
1906
76df1c65 1907 if (!pe->started) {
3f1637f2 1908 /* this is protected by the exception table lock */
76df1c65 1909 pe->started = 1;
3f1637f2 1910
f79ae415 1911 dm_exception_table_unlock(&lock);
3f1637f2
NT
1912 up_read(&s->lock);
1913
76df1c65 1914 start_copy(pe);
ba40a2aa
AK
1915 goto out;
1916 }
cd45daff 1917 } else {
74d46992 1918 bio_set_dev(bio, s->origin->bdev);
ee18026a 1919 track_chunk(s, bio, chunk);
cd45daff 1920 }
1da177e4 1921
a2d2b034 1922out_unlock:
f79ae415 1923 dm_exception_table_unlock(&lock);
3f1637f2 1924 up_read(&s->lock);
a2d2b034 1925out:
1da177e4
LT
1926 return r;
1927}
1928
3452c2a1
MP
1929/*
1930 * A snapshot-merge target behaves like a combination of a snapshot
1931 * target and a snapshot-origin target. It only generates new
1932 * exceptions in other snapshots and not in the one that is being
1933 * merged.
1934 *
1935 * For each chunk, if there is an existing exception, it is used to
1936 * redirect I/O to the cow device. Otherwise I/O is sent to the origin,
1937 * which in turn might generate exceptions in other snapshots.
9fe86254
MP
1938 * If merging is currently taking place on the chunk in question, the
1939 * I/O is deferred by adding it to s->bios_queued_during_merge.
3452c2a1 1940 */
7de3ee57 1941static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
3452c2a1
MP
1942{
1943 struct dm_exception *e;
1944 struct dm_snapshot *s = ti->private;
1945 int r = DM_MAPIO_REMAPPED;
1946 chunk_t chunk;
1947
ee18026a
MP
1948 init_tracked_chunk(bio);
1949
1eff9d32 1950 if (bio->bi_opf & REQ_PREFLUSH) {
55a62eef 1951 if (!dm_bio_get_target_bio_nr(bio))
74d46992 1952 bio_set_dev(bio, s->origin->bdev);
10b8106a 1953 else
74d46992 1954 bio_set_dev(bio, s->cow->bdev);
10b8106a
MS
1955 return DM_MAPIO_REMAPPED;
1956 }
1957
4f024f37 1958 chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector);
3452c2a1 1959
4ad8d880 1960 down_write(&s->lock);
3452c2a1 1961
d2fdb776
MP
1962 /* Full merging snapshots are redirected to the origin */
1963 if (!s->valid)
1964 goto redirect_to_origin;
3452c2a1
MP
1965
1966 /* If the block is already remapped - use that */
1967 e = dm_lookup_exception(&s->complete, chunk);
1968 if (e) {
9fe86254 1969 /* Queue writes overlapping with chunks being merged */
70246286 1970 if (bio_data_dir(bio) == WRITE &&
9fe86254
MP
1971 chunk >= s->first_merging_chunk &&
1972 chunk < (s->first_merging_chunk +
1973 s->num_merging_chunks)) {
74d46992 1974 bio_set_dev(bio, s->origin->bdev);
9fe86254
MP
1975 bio_list_add(&s->bios_queued_during_merge, bio);
1976 r = DM_MAPIO_SUBMITTED;
1977 goto out_unlock;
1978 }
17aa0332 1979
3452c2a1 1980 remap_exception(s, e, bio, chunk);
17aa0332 1981
70246286 1982 if (bio_data_dir(bio) == WRITE)
ee18026a 1983 track_chunk(s, bio, chunk);
3452c2a1
MP
1984 goto out_unlock;
1985 }
1986
d2fdb776 1987redirect_to_origin:
74d46992 1988 bio_set_dev(bio, s->origin->bdev);
3452c2a1 1989
70246286 1990 if (bio_data_dir(bio) == WRITE) {
4ad8d880 1991 up_write(&s->lock);
3452c2a1
MP
1992 return do_origin(s->origin, bio);
1993 }
1994
1995out_unlock:
4ad8d880 1996 up_write(&s->lock);
3452c2a1
MP
1997
1998 return r;
1999}
2000
4e4cbee9
CH
2001static int snapshot_end_io(struct dm_target *ti, struct bio *bio,
2002 blk_status_t *error)
cd45daff
MP
2003{
2004 struct dm_snapshot *s = ti->private;
cd45daff 2005
ee18026a
MP
2006 if (is_bio_tracked(bio))
2007 stop_tracking_chunk(s, bio);
cd45daff 2008
1be56909 2009 return DM_ENDIO_DONE;
cd45daff
MP
2010}
2011
1e03f97e
MP
2012static void snapshot_merge_presuspend(struct dm_target *ti)
2013{
2014 struct dm_snapshot *s = ti->private;
2015
2016 stop_merge(s);
2017}
2018
c1f0c183
MS
2019static int snapshot_preresume(struct dm_target *ti)
2020{
2021 int r = 0;
2022 struct dm_snapshot *s = ti->private;
2023 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
2024
2025 down_read(&_origins_lock);
9d3b15c4 2026 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
c1f0c183 2027 if (snap_src && snap_dest) {
4ad8d880 2028 down_read(&snap_src->lock);
c1f0c183
MS
2029 if (s == snap_src) {
2030 DMERR("Unable to resume snapshot source until "
2031 "handover completes.");
2032 r = -EINVAL;
b83b2f29 2033 } else if (!dm_suspended(snap_src->ti)) {
c1f0c183
MS
2034 DMERR("Unable to perform snapshot handover until "
2035 "source is suspended.");
2036 r = -EINVAL;
2037 }
4ad8d880 2038 up_read(&snap_src->lock);
c1f0c183
MS
2039 }
2040 up_read(&_origins_lock);
2041
2042 return r;
2043}
2044
1da177e4
LT
2045static void snapshot_resume(struct dm_target *ti)
2046{
028867ac 2047 struct dm_snapshot *s = ti->private;
09ee96b2 2048 struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
b735fede
MP
2049 struct dm_origin *o;
2050 struct mapped_device *origin_md = NULL;
09ee96b2 2051 bool must_restart_merging = false;
c1f0c183
MS
2052
2053 down_read(&_origins_lock);
b735fede
MP
2054
2055 o = __lookup_dm_origin(s->origin->bdev);
2056 if (o)
2057 origin_md = dm_table_get_md(o->ti->table);
09ee96b2
MP
2058 if (!origin_md) {
2059 (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
2060 if (snap_merging)
2061 origin_md = dm_table_get_md(snap_merging->ti->table);
2062 }
b735fede
MP
2063 if (origin_md == dm_table_get_md(ti->table))
2064 origin_md = NULL;
09ee96b2
MP
2065 if (origin_md) {
2066 if (dm_hold(origin_md))
2067 origin_md = NULL;
2068 }
b735fede 2069
09ee96b2
MP
2070 up_read(&_origins_lock);
2071
2072 if (origin_md) {
b735fede 2073 dm_internal_suspend_fast(origin_md);
09ee96b2
MP
2074 if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
2075 must_restart_merging = true;
2076 stop_merge(snap_merging);
2077 }
2078 }
2079
2080 down_read(&_origins_lock);
b735fede 2081
9d3b15c4 2082 (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
c1f0c183 2083 if (snap_src && snap_dest) {
4ad8d880
NT
2084 down_write(&snap_src->lock);
2085 down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING);
c1f0c183 2086 __handover_exceptions(snap_src, snap_dest);
4ad8d880
NT
2087 up_write(&snap_dest->lock);
2088 up_write(&snap_src->lock);
c1f0c183 2089 }
b735fede 2090
c1f0c183
MS
2091 up_read(&_origins_lock);
2092
09ee96b2
MP
2093 if (origin_md) {
2094 if (must_restart_merging)
2095 start_merge(snap_merging);
2096 dm_internal_resume_fast(origin_md);
2097 dm_put(origin_md);
2098 }
2099
c1f0c183
MS
2100 /* Now we have correct chunk size, reregister */
2101 reregister_snapshot(s);
1da177e4 2102
4ad8d880 2103 down_write(&s->lock);
aa14edeb 2104 s->active = 1;
4ad8d880 2105 up_write(&s->lock);
1da177e4
LT
2106}
2107
542f9038 2108static uint32_t get_origin_minimum_chunksize(struct block_device *bdev)
1e03f97e 2109{
542f9038 2110 uint32_t min_chunksize;
1e03f97e
MP
2111
2112 down_read(&_origins_lock);
2113 min_chunksize = __minimum_chunk_size(__lookup_origin(bdev));
2114 up_read(&_origins_lock);
2115
2116 return min_chunksize;
2117}
2118
2119static void snapshot_merge_resume(struct dm_target *ti)
2120{
2121 struct dm_snapshot *s = ti->private;
2122
2123 /*
2124 * Handover exceptions from existing snapshot.
2125 */
2126 snapshot_resume(ti);
2127
2128 /*
542f9038 2129 * snapshot-merge acts as an origin, so set ti->max_io_len
1e03f97e 2130 */
542f9038 2131 ti->max_io_len = get_origin_minimum_chunksize(s->origin->bdev);
1e03f97e
MP
2132
2133 start_merge(s);
2134}
2135
fd7c092e
MP
2136static void snapshot_status(struct dm_target *ti, status_type_t type,
2137 unsigned status_flags, char *result, unsigned maxlen)
1da177e4 2138{
2e4a31df 2139 unsigned sz = 0;
028867ac 2140 struct dm_snapshot *snap = ti->private;
1da177e4
LT
2141
2142 switch (type) {
2143 case STATUSTYPE_INFO:
94e76572 2144
4ad8d880 2145 down_write(&snap->lock);
94e76572 2146
1da177e4 2147 if (!snap->valid)
2e4a31df 2148 DMEMIT("Invalid");
d8ddb1cf
MS
2149 else if (snap->merge_failed)
2150 DMEMIT("Merge failed");
76c44f6d
MP
2151 else if (snap->snapshot_overflowed)
2152 DMEMIT("Overflow");
1da177e4 2153 else {
985903bb
MS
2154 if (snap->store->type->usage) {
2155 sector_t total_sectors, sectors_allocated,
2156 metadata_sectors;
2157 snap->store->type->usage(snap->store,
2158 &total_sectors,
2159 &sectors_allocated,
2160 &metadata_sectors);
2161 DMEMIT("%llu/%llu %llu",
2162 (unsigned long long)sectors_allocated,
2163 (unsigned long long)total_sectors,
2164 (unsigned long long)metadata_sectors);
1da177e4
LT
2165 }
2166 else
2e4a31df 2167 DMEMIT("Unknown");
1da177e4 2168 }
94e76572 2169
4ad8d880 2170 up_write(&snap->lock);
94e76572 2171
1da177e4
LT
2172 break;
2173
2174 case STATUSTYPE_TABLE:
2175 /*
2176 * kdevname returns a static pointer so we need
2177 * to make private copies if the output is to
2178 * make sense.
2179 */
fc56f6fb 2180 DMEMIT("%s %s", snap->origin->name, snap->cow->name);
1e302a92
JB
2181 snap->store->type->status(snap->store, type, result + sz,
2182 maxlen - sz);
1da177e4
LT
2183 break;
2184 }
1da177e4
LT
2185}
2186
8811f46c
MS
2187static int snapshot_iterate_devices(struct dm_target *ti,
2188 iterate_devices_callout_fn fn, void *data)
2189{
2190 struct dm_snapshot *snap = ti->private;
1e5554c8
MP
2191 int r;
2192
2193 r = fn(ti, snap->origin, 0, ti->len, data);
8811f46c 2194
1e5554c8
MP
2195 if (!r)
2196 r = fn(ti, snap->cow, 0, get_dev_size(snap->cow->bdev), data);
2197
2198 return r;
8811f46c
MS
2199}
2200
2201
1da177e4
LT
2202/*-----------------------------------------------------------------
2203 * Origin methods
2204 *---------------------------------------------------------------*/
9eaae8ff
MP
2205
2206/*
2207 * If no exceptions need creating, DM_MAPIO_REMAPPED is returned and any
2208 * supplied bio was ignored. The caller may submit it immediately.
2209 * (No remapping actually occurs as the origin is always a direct linear
2210 * map.)
2211 *
2212 * If further exceptions are required, DM_MAPIO_SUBMITTED is returned
2213 * and any supplied bio is added to a list to be submitted once all
2214 * the necessary exceptions exist.
2215 */
2216static int __origin_write(struct list_head *snapshots, sector_t sector,
2217 struct bio *bio)
1da177e4 2218{
515ad66c 2219 int r = DM_MAPIO_REMAPPED;
1da177e4 2220 struct dm_snapshot *snap;
1d4989c8 2221 struct dm_exception *e;
65fc7c37 2222 struct dm_snap_pending_exception *pe, *pe2;
515ad66c
MP
2223 struct dm_snap_pending_exception *pe_to_start_now = NULL;
2224 struct dm_snap_pending_exception *pe_to_start_last = NULL;
f79ae415 2225 struct dm_exception_table_lock lock;
1da177e4
LT
2226 chunk_t chunk;
2227
2228 /* Do all the snapshots on this origin */
2229 list_for_each_entry (snap, snapshots, list) {
3452c2a1
MP
2230 /*
2231 * Don't make new exceptions in a merging snapshot
2232 * because it has effectively been deleted
2233 */
2234 if (dm_target_is_snapshot_merge(snap->ti))
2235 continue;
2236
d5e404c1 2237 /* Nothing to do if writing beyond end of snapshot */
9eaae8ff 2238 if (sector >= dm_table_get_size(snap->ti->table))
f79ae415 2239 continue;
1da177e4
LT
2240
2241 /*
2242 * Remember, different snapshots can have
2243 * different chunk sizes.
2244 */
9eaae8ff 2245 chunk = sector_to_chunk(snap->store, sector);
f79ae415
NT
2246 dm_exception_table_lock_init(snap, chunk, &lock);
2247
3f1637f2 2248 down_read(&snap->lock);
f79ae415
NT
2249 dm_exception_table_lock(&lock);
2250
2251 /* Only deal with valid and active snapshots */
2252 if (!snap->valid || !snap->active)
2253 goto next_snapshot;
1da177e4 2254
2913808e 2255 pe = __lookup_pending_exception(snap, chunk);
76df1c65 2256 if (!pe) {
65fc7c37
NT
2257 /*
2258 * Check exception table to see if block is already
2259 * remapped in this snapshot and trigger an exception
2260 * if not.
2261 */
2262 e = dm_lookup_exception(&snap->complete, chunk);
2263 if (e)
2264 goto next_snapshot;
2265
f79ae415 2266 dm_exception_table_unlock(&lock);
c6621392 2267 pe = alloc_pending_exception(snap);
f79ae415 2268 dm_exception_table_lock(&lock);
c6621392 2269
65fc7c37
NT
2270 pe2 = __lookup_pending_exception(snap, chunk);
2271
2272 if (!pe2) {
2273 e = dm_lookup_exception(&snap->complete, chunk);
2274 if (e) {
2275 free_pending_exception(pe);
2276 goto next_snapshot;
2277 }
2278
2279 pe = __insert_pending_exception(snap, pe, chunk);
2280 if (!pe) {
f79ae415 2281 dm_exception_table_unlock(&lock);
3f1637f2 2282 up_read(&snap->lock);
f79ae415 2283
3f1637f2 2284 invalidate_snapshot(snap, -ENOMEM);
f79ae415 2285 continue;
65fc7c37
NT
2286 }
2287 } else {
35bf659b 2288 free_pending_exception(pe);
65fc7c37 2289 pe = pe2;
2913808e 2290 }
76df1c65
AK
2291 }
2292
515ad66c 2293 r = DM_MAPIO_SUBMITTED;
76df1c65 2294
515ad66c
MP
2295 /*
2296 * If an origin bio was supplied, queue it to wait for the
2297 * completion of this exception, and start this one last,
2298 * at the end of the function.
2299 */
2300 if (bio) {
2301 bio_list_add(&pe->origin_bios, bio);
2302 bio = NULL;
76df1c65 2303
515ad66c
MP
2304 if (!pe->started) {
2305 pe->started = 1;
2306 pe_to_start_last = pe;
2307 }
76df1c65
AK
2308 }
2309
2310 if (!pe->started) {
2311 pe->started = 1;
515ad66c 2312 pe_to_start_now = pe;
1da177e4
LT
2313 }
2314
a2d2b034 2315next_snapshot:
f79ae415 2316 dm_exception_table_unlock(&lock);
3f1637f2 2317 up_read(&snap->lock);
1da177e4 2318
515ad66c
MP
2319 if (pe_to_start_now) {
2320 start_copy(pe_to_start_now);
2321 pe_to_start_now = NULL;
2322 }
b4b610f6
AK
2323 }
2324
1da177e4 2325 /*
515ad66c
MP
2326 * Submit the exception against which the bio is queued last,
2327 * to give the other exceptions a head start.
1da177e4 2328 */
515ad66c
MP
2329 if (pe_to_start_last)
2330 start_copy(pe_to_start_last);
1da177e4
LT
2331
2332 return r;
2333}
2334
2335/*
2336 * Called on a write from the origin driver.
2337 */
2338static int do_origin(struct dm_dev *origin, struct bio *bio)
2339{
2340 struct origin *o;
d2a7ad29 2341 int r = DM_MAPIO_REMAPPED;
1da177e4
LT
2342
2343 down_read(&_origins_lock);
2344 o = __lookup_origin(origin->bdev);
2345 if (o)
4f024f37 2346 r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio);
1da177e4
LT
2347 up_read(&_origins_lock);
2348
2349 return r;
2350}
2351
73dfd078
MP
2352/*
2353 * Trigger exceptions in all non-merging snapshots.
2354 *
2355 * The chunk size of the merging snapshot may be larger than the chunk
2356 * size of some other snapshot so we may need to reallocate multiple
2357 * chunks in other snapshots.
2358 *
2359 * We scan all the overlapping exceptions in the other snapshots.
2360 * Returns 1 if anything was reallocated and must be waited for,
2361 * otherwise returns 0.
2362 *
2363 * size must be a multiple of merging_snap's chunk_size.
2364 */
2365static int origin_write_extent(struct dm_snapshot *merging_snap,
2366 sector_t sector, unsigned size)
2367{
2368 int must_wait = 0;
2369 sector_t n;
2370 struct origin *o;
2371
2372 /*
542f9038 2373 * The origin's __minimum_chunk_size() got stored in max_io_len
73dfd078
MP
2374 * by snapshot_merge_resume().
2375 */
2376 down_read(&_origins_lock);
2377 o = __lookup_origin(merging_snap->origin->bdev);
542f9038 2378 for (n = 0; n < size; n += merging_snap->ti->max_io_len)
73dfd078
MP
2379 if (__origin_write(&o->snapshots, sector + n, NULL) ==
2380 DM_MAPIO_SUBMITTED)
2381 must_wait = 1;
2382 up_read(&_origins_lock);
2383
2384 return must_wait;
2385}
2386
1da177e4
LT
2387/*
2388 * Origin: maps a linear range of a device, with hooks for snapshotting.
2389 */
2390
2391/*
2392 * Construct an origin mapping: <dev_path>
2393 * The context for an origin is merely a 'struct dm_dev *'
2394 * pointing to the real device.
2395 */
2396static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
2397{
2398 int r;
599cdf3b 2399 struct dm_origin *o;
1da177e4
LT
2400
2401 if (argc != 1) {
72d94861 2402 ti->error = "origin: incorrect number of arguments";
1da177e4
LT
2403 return -EINVAL;
2404 }
2405
599cdf3b
MP
2406 o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL);
2407 if (!o) {
2408 ti->error = "Cannot allocate private origin structure";
2409 r = -ENOMEM;
2410 goto bad_alloc;
2411 }
2412
2413 r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev);
1da177e4
LT
2414 if (r) {
2415 ti->error = "Cannot get target device";
599cdf3b 2416 goto bad_open;
1da177e4
LT
2417 }
2418
b735fede 2419 o->ti = ti;
599cdf3b 2420 ti->private = o;
55a62eef 2421 ti->num_flush_bios = 1;
494b3ee7 2422
1da177e4 2423 return 0;
599cdf3b
MP
2424
2425bad_open:
2426 kfree(o);
2427bad_alloc:
2428 return r;
1da177e4
LT
2429}
2430
2431static void origin_dtr(struct dm_target *ti)
2432{
599cdf3b 2433 struct dm_origin *o = ti->private;
b735fede 2434
599cdf3b
MP
2435 dm_put_device(ti, o->dev);
2436 kfree(o);
1da177e4
LT
2437}
2438
7de3ee57 2439static int origin_map(struct dm_target *ti, struct bio *bio)
1da177e4 2440{
599cdf3b 2441 struct dm_origin *o = ti->private;
298eaa89 2442 unsigned available_sectors;
1da177e4 2443
74d46992 2444 bio_set_dev(bio, o->dev->bdev);
1da177e4 2445
1eff9d32 2446 if (unlikely(bio->bi_opf & REQ_PREFLUSH))
494b3ee7
MP
2447 return DM_MAPIO_REMAPPED;
2448
70246286 2449 if (bio_data_dir(bio) != WRITE)
494b3ee7
MP
2450 return DM_MAPIO_REMAPPED;
2451
298eaa89
MP
2452 available_sectors = o->split_boundary -
2453 ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1));
2454
2455 if (bio_sectors(bio) > available_sectors)
2456 dm_accept_partial_bio(bio, available_sectors);
2457
1da177e4 2458 /* Only tell snapshots if this is a write */
298eaa89 2459 return do_origin(o->dev, bio);
1da177e4
LT
2460}
2461
1da177e4 2462/*
542f9038 2463 * Set the target "max_io_len" field to the minimum of all the snapshots'
1da177e4
LT
2464 * chunk sizes.
2465 */
2466static void origin_resume(struct dm_target *ti)
2467{
599cdf3b 2468 struct dm_origin *o = ti->private;
1da177e4 2469
298eaa89 2470 o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
b735fede
MP
2471
2472 down_write(&_origins_lock);
2473 __insert_dm_origin(o);
2474 up_write(&_origins_lock);
2475}
2476
2477static void origin_postsuspend(struct dm_target *ti)
2478{
2479 struct dm_origin *o = ti->private;
2480
2481 down_write(&_origins_lock);
2482 __remove_dm_origin(o);
2483 up_write(&_origins_lock);
1da177e4
LT
2484}
2485
fd7c092e
MP
2486static void origin_status(struct dm_target *ti, status_type_t type,
2487 unsigned status_flags, char *result, unsigned maxlen)
1da177e4 2488{
599cdf3b 2489 struct dm_origin *o = ti->private;
1da177e4
LT
2490
2491 switch (type) {
2492 case STATUSTYPE_INFO:
2493 result[0] = '\0';
2494 break;
2495
2496 case STATUSTYPE_TABLE:
599cdf3b 2497 snprintf(result, maxlen, "%s", o->dev->name);
1da177e4
LT
2498 break;
2499 }
1da177e4
LT
2500}
2501
8811f46c
MS
2502static int origin_iterate_devices(struct dm_target *ti,
2503 iterate_devices_callout_fn fn, void *data)
2504{
599cdf3b 2505 struct dm_origin *o = ti->private;
8811f46c 2506
599cdf3b 2507 return fn(ti, o->dev, 0, ti->len, data);
8811f46c
MS
2508}
2509
1da177e4
LT
2510static struct target_type origin_target = {
2511 .name = "snapshot-origin",
b735fede 2512 .version = {1, 9, 0},
1da177e4
LT
2513 .module = THIS_MODULE,
2514 .ctr = origin_ctr,
2515 .dtr = origin_dtr,
2516 .map = origin_map,
2517 .resume = origin_resume,
b735fede 2518 .postsuspend = origin_postsuspend,
1da177e4 2519 .status = origin_status,
8811f46c 2520 .iterate_devices = origin_iterate_devices,
1da177e4
LT
2521};
2522
2523static struct target_type snapshot_target = {
2524 .name = "snapshot",
b0d3cc01 2525 .version = {1, 15, 0},
1da177e4
LT
2526 .module = THIS_MODULE,
2527 .ctr = snapshot_ctr,
2528 .dtr = snapshot_dtr,
2529 .map = snapshot_map,
cd45daff 2530 .end_io = snapshot_end_io,
c1f0c183 2531 .preresume = snapshot_preresume,
1da177e4
LT
2532 .resume = snapshot_resume,
2533 .status = snapshot_status,
8811f46c 2534 .iterate_devices = snapshot_iterate_devices,
1da177e4
LT
2535};
2536
d698aa45
MP
2537static struct target_type merge_target = {
2538 .name = dm_snapshot_merge_target_name,
b0d3cc01 2539 .version = {1, 4, 0},
d698aa45
MP
2540 .module = THIS_MODULE,
2541 .ctr = snapshot_ctr,
2542 .dtr = snapshot_dtr,
3452c2a1 2543 .map = snapshot_merge_map,
d698aa45 2544 .end_io = snapshot_end_io,
1e03f97e 2545 .presuspend = snapshot_merge_presuspend,
d698aa45 2546 .preresume = snapshot_preresume,
1e03f97e 2547 .resume = snapshot_merge_resume,
d698aa45
MP
2548 .status = snapshot_status,
2549 .iterate_devices = snapshot_iterate_devices,
2550};
2551
1da177e4
LT
2552static int __init dm_snapshot_init(void)
2553{
2554 int r;
2555
4db6bfe0
AK
2556 r = dm_exception_store_init();
2557 if (r) {
2558 DMERR("Failed to initialize exception stores");
2559 return r;
2560 }
2561
1da177e4
LT
2562 r = init_origin_hash();
2563 if (r) {
2564 DMERR("init_origin_hash failed.");
d698aa45 2565 goto bad_origin_hash;
1da177e4
LT
2566 }
2567
1d4989c8 2568 exception_cache = KMEM_CACHE(dm_exception, 0);
1da177e4
LT
2569 if (!exception_cache) {
2570 DMERR("Couldn't create exception cache.");
2571 r = -ENOMEM;
d698aa45 2572 goto bad_exception_cache;
1da177e4
LT
2573 }
2574
028867ac 2575 pending_cache = KMEM_CACHE(dm_snap_pending_exception, 0);
1da177e4
LT
2576 if (!pending_cache) {
2577 DMERR("Couldn't create pending cache.");
2578 r = -ENOMEM;
d698aa45 2579 goto bad_pending_cache;
1da177e4
LT
2580 }
2581
7e6358d2 2582 r = dm_register_target(&snapshot_target);
2583 if (r < 0) {
2584 DMERR("snapshot target register failed %d", r);
2585 goto bad_register_snapshot_target;
2586 }
2587
2588 r = dm_register_target(&origin_target);
2589 if (r < 0) {
2590 DMERR("Origin target register failed %d", r);
2591 goto bad_register_origin_target;
2592 }
2593
2594 r = dm_register_target(&merge_target);
2595 if (r < 0) {
2596 DMERR("Merge target register failed %d", r);
2597 goto bad_register_merge_target;
2598 }
2599
1da177e4
LT
2600 return 0;
2601
d698aa45 2602bad_register_merge_target:
1da177e4 2603 dm_unregister_target(&origin_target);
d698aa45 2604bad_register_origin_target:
1da177e4 2605 dm_unregister_target(&snapshot_target);
034a186d 2606bad_register_snapshot_target:
7e6358d2 2607 kmem_cache_destroy(pending_cache);
2608bad_pending_cache:
2609 kmem_cache_destroy(exception_cache);
2610bad_exception_cache:
2611 exit_origin_hash();
2612bad_origin_hash:
034a186d 2613 dm_exception_store_exit();
d698aa45 2614
1da177e4
LT
2615 return r;
2616}
2617
2618static void __exit dm_snapshot_exit(void)
2619{
10d3bd09
MP
2620 dm_unregister_target(&snapshot_target);
2621 dm_unregister_target(&origin_target);
d698aa45 2622 dm_unregister_target(&merge_target);
1da177e4
LT
2623
2624 exit_origin_hash();
1da177e4
LT
2625 kmem_cache_destroy(pending_cache);
2626 kmem_cache_destroy(exception_cache);
4db6bfe0
AK
2627
2628 dm_exception_store_exit();
1da177e4
LT
2629}
2630
2631/* Module hooks */
2632module_init(dm_snapshot_init);
2633module_exit(dm_snapshot_exit);
2634
2635MODULE_DESCRIPTION(DM_NAME " snapshot target");
2636MODULE_AUTHOR("Joe Thornber");
2637MODULE_LICENSE("GPL");
23cb2109
MP
2638MODULE_ALIAS("dm-snapshot-origin");
2639MODULE_ALIAS("dm-snapshot-merge");