Commit | Line | Data |
---|---|---|
3bd94003 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
3241b1d3 JT |
2 | /* |
3 | * Copyright (C) 2011 Red Hat, Inc. | |
4 | * | |
5 | * This file is released under the GPL. | |
6 | */ | |
7 | #ifndef _LINUX_DM_BTREE_H | |
8 | #define _LINUX_DM_BTREE_H | |
9 | ||
10 | #include "dm-block-manager.h" | |
11 | ||
12 | struct dm_transaction_manager; | |
13 | ||
14 | /*----------------------------------------------------------------*/ | |
15 | ||
16 | /* | |
17 | * Annotations used to check on-disk metadata is handled as little-endian. | |
18 | */ | |
19 | #ifdef __CHECKER__ | |
20 | # define __dm_written_to_disk(x) __releases(x) | |
21 | # define __dm_reads_from_disk(x) __acquires(x) | |
22 | # define __dm_bless_for_disk(x) __acquire(x) | |
23 | # define __dm_unbless_for_disk(x) __release(x) | |
24 | #else | |
25 | # define __dm_written_to_disk(x) | |
26 | # define __dm_reads_from_disk(x) | |
27 | # define __dm_bless_for_disk(x) | |
28 | # define __dm_unbless_for_disk(x) | |
29 | #endif | |
30 | ||
31 | /*----------------------------------------------------------------*/ | |
32 | ||
33 | /* | |
34 | * Manipulates hierarchical B+ trees with 64-bit keys and arbitrary-sized | |
35 | * values. | |
36 | */ | |
37 | ||
38 | /* | |
83f0d77a | 39 | * Information about the values stored within the btree. |
3241b1d3 JT |
40 | */ |
41 | struct dm_btree_value_type { | |
42 | void *context; | |
43 | ||
44 | /* | |
45 | * The size in bytes of each value. | |
46 | */ | |
47 | uint32_t size; | |
48 | ||
49 | /* | |
50 | * Any of these methods can be safely set to NULL if you do not | |
51 | * need the corresponding feature. | |
52 | */ | |
53 | ||
54 | /* | |
be500ed7 | 55 | * The btree is making a duplicate of a run of values, for instance |
3241b1d3 JT |
56 | * because previously-shared btree nodes have now diverged. |
57 | * @value argument is the new copy that the copy function may modify. | |
58 | * (Probably it just wants to increment a reference count | |
59 | * somewhere.) This method is _not_ called for insertion of a new | |
60 | * value: It is assumed the ref count is already 1. | |
61 | */ | |
86a3238c | 62 | void (*inc)(void *context, const void *value, unsigned int count); |
3241b1d3 JT |
63 | |
64 | /* | |
be500ed7 | 65 | * These values are being deleted. The btree takes care of freeing |
3241b1d3 | 66 | * the memory pointed to by @value. Often the del function just |
be500ed7 | 67 | * needs to decrement a reference counts somewhere. |
3241b1d3 | 68 | */ |
86a3238c | 69 | void (*dec)(void *context, const void *value, unsigned int count); |
3241b1d3 JT |
70 | |
71 | /* | |
72 | * A test for equality between two values. When a value is | |
73 | * overwritten with a new one, the old one has the dec method | |
74 | * called _unless_ the new and old value are deemed equal. | |
75 | */ | |
018cede9 | 76 | int (*equal)(void *context, const void *value1, const void *value2); |
3241b1d3 JT |
77 | }; |
78 | ||
79 | /* | |
80 | * The shape and contents of a btree. | |
81 | */ | |
82 | struct dm_btree_info { | |
83 | struct dm_transaction_manager *tm; | |
84 | ||
85 | /* | |
86 | * Number of nested btrees. (Not the depth of a single tree.) | |
87 | */ | |
86a3238c | 88 | unsigned int levels; |
3241b1d3 JT |
89 | struct dm_btree_value_type value_type; |
90 | }; | |
91 | ||
92 | /* | |
93 | * Set up an empty tree. O(1). | |
94 | */ | |
95 | int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root); | |
96 | ||
97 | /* | |
98 | * Delete a tree. O(n) - this is the slow one! It can also block, so | |
99 | * please don't call it on an IO path. | |
100 | */ | |
101 | int dm_btree_del(struct dm_btree_info *info, dm_block_t root); | |
102 | ||
103 | /* | |
104 | * All the lookup functions return -ENODATA if the key cannot be found. | |
105 | */ | |
106 | ||
107 | /* | |
108 | * Tries to find a key that matches exactly. O(ln(n)) | |
109 | */ | |
110 | int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, | |
111 | uint64_t *keys, void *value_le); | |
112 | ||
993ceab9 JT |
113 | /* |
114 | * Tries to find the first key where the bottom level key is >= to that | |
115 | * given. Useful for skipping empty sections of the btree. | |
116 | */ | |
117 | int dm_btree_lookup_next(struct dm_btree_info *info, dm_block_t root, | |
118 | uint64_t *keys, uint64_t *rkey, void *value_le); | |
119 | ||
3241b1d3 JT |
120 | /* |
121 | * Insertion (or overwrite an existing value). O(ln(n)) | |
122 | */ | |
123 | int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, | |
124 | uint64_t *keys, void *value, dm_block_t *new_root) | |
125 | __dm_written_to_disk(value); | |
126 | ||
127 | /* | |
128 | * A variant of insert that indicates whether it actually inserted or just | |
129 | * overwrote. Useful if you're keeping track of the number of entries in a | |
130 | * tree. | |
131 | */ | |
132 | int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, | |
133 | uint64_t *keys, void *value, dm_block_t *new_root, | |
134 | int *inserted) | |
135 | __dm_written_to_disk(value); | |
136 | ||
137 | /* | |
138 | * Remove a key if present. This doesn't remove empty sub trees. Normally | |
139 | * subtrees represent a separate entity, like a snapshot map, so this is | |
140 | * correct behaviour. O(ln(n)). | |
141 | */ | |
142 | int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, | |
143 | uint64_t *keys, dm_block_t *new_root); | |
144 | ||
4ec331c3 | 145 | /* |
993ceab9 JT |
146 | * Removes a _contiguous_ run of values starting from 'keys' and not |
147 | * reaching keys2 (where keys2 is keys with the final key replaced with | |
148 | * 'end_key'). 'end_key' is the one-past-the-end value. 'keys' may be | |
149 | * altered. | |
4ec331c3 JT |
150 | */ |
151 | int dm_btree_remove_leaves(struct dm_btree_info *info, dm_block_t root, | |
152 | uint64_t *keys, uint64_t end_key, | |
86a3238c | 153 | dm_block_t *new_root, unsigned int *nr_removed); |
4ec331c3 | 154 | |
f164e690 JT |
155 | /* |
156 | * Returns < 0 on failure. Otherwise the number of key entries that have | |
157 | * been filled out. Remember trees can have zero entries, and as such have | |
158 | * no lowest key. | |
159 | */ | |
160 | int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, | |
161 | uint64_t *result_keys); | |
162 | ||
3241b1d3 JT |
163 | /* |
164 | * Returns < 0 on failure. Otherwise the number of key entries that have | |
165 | * been filled out. Remember trees can have zero entries, and as such have | |
166 | * no highest key. | |
167 | */ | |
168 | int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, | |
169 | uint64_t *result_keys); | |
170 | ||
4e7f1f90 JT |
171 | /* |
172 | * Iterate through the a btree, calling fn() on each entry. | |
173 | * It only works for single level trees and is internally recursive, so | |
174 | * monitor stack usage carefully. | |
175 | */ | |
176 | int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, | |
177 | int (*fn)(void *context, uint64_t *keys, void *leaf), | |
178 | void *context); | |
179 | ||
7d111c81 JT |
180 | |
181 | /*----------------------------------------------------------------*/ | |
182 | ||
183 | /* | |
184 | * Cursor API. This does not follow the rolling lock convention. Since we | |
185 | * know the order that values are required we can issue prefetches to speed | |
186 | * up iteration. Use on a single level btree only. | |
187 | */ | |
188 | #define DM_BTREE_CURSOR_MAX_DEPTH 16 | |
189 | ||
190 | struct cursor_node { | |
191 | struct dm_block *b; | |
86a3238c | 192 | unsigned int index; |
7d111c81 JT |
193 | }; |
194 | ||
195 | struct dm_btree_cursor { | |
196 | struct dm_btree_info *info; | |
197 | dm_block_t root; | |
198 | ||
199 | bool prefetch_leaves; | |
86a3238c | 200 | unsigned int depth; |
7d111c81 JT |
201 | struct cursor_node nodes[DM_BTREE_CURSOR_MAX_DEPTH]; |
202 | }; | |
203 | ||
204 | /* | |
205 | * Creates a fresh cursor. If prefetch_leaves is set then it is assumed | |
206 | * the btree contains block indexes that will be prefetched. The cursor is | |
207 | * quite large, so you probably don't want to put it on the stack. | |
208 | */ | |
209 | int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root, | |
210 | bool prefetch_leaves, struct dm_btree_cursor *c); | |
211 | void dm_btree_cursor_end(struct dm_btree_cursor *c); | |
212 | int dm_btree_cursor_next(struct dm_btree_cursor *c); | |
9b696229 | 213 | int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count); |
7d111c81 JT |
214 | int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le); |
215 | ||
3241b1d3 | 216 | #endif /* _LINUX_DM_BTREE_H */ |