Commit | Line | Data |
---|---|---|
9888c340 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
a542ad1b JS |
2 | /* |
3 | * Copyright (C) 2011 STRATO. All rights reserved. | |
a542ad1b JS |
4 | */ |
5 | ||
9888c340 DS |
6 | #ifndef BTRFS_BACKREF_H |
7 | #define BTRFS_BACKREF_H | |
a542ad1b | 8 | |
55e301fd | 9 | #include <linux/btrfs.h> |
9b569ea0 | 10 | #include "messages.h" |
8da6d581 | 11 | #include "ulist.h" |
741188d3 | 12 | #include "disk-io.h" |
91cb916c | 13 | #include "extent_io.h" |
a542ad1b JS |
14 | |
15 | struct inode_fs_paths { | |
16 | struct btrfs_path *btrfs_path; | |
17 | struct btrfs_root *fs_root; | |
18 | struct btrfs_data_container *fspath; | |
19 | }; | |
20 | ||
12a824dc FM |
21 | struct btrfs_backref_shared_cache_entry { |
22 | u64 bytenr; | |
23 | u64 gen; | |
24 | bool is_shared; | |
25 | }; | |
26 | ||
73e339e6 FM |
27 | #define BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE 8 |
28 | ||
61dbb952 | 29 | struct btrfs_backref_share_check_ctx { |
84a7949d FM |
30 | /* Ulists used during backref walking. */ |
31 | struct ulist refs; | |
877c1476 FM |
32 | /* |
33 | * The current leaf the caller of btrfs_is_data_extent_shared() is at. | |
34 | * Typically the caller (at the moment only fiemap) tries to determine | |
35 | * the sharedness of data extents point by file extent items from entire | |
36 | * leaves. | |
37 | */ | |
38 | u64 curr_leaf_bytenr; | |
39 | /* | |
40 | * The previous leaf the caller was at in the previous call to | |
41 | * btrfs_is_data_extent_shared(). This may be the same as the current | |
42 | * leaf. On the first call it must be 0. | |
43 | */ | |
44 | u64 prev_leaf_bytenr; | |
12a824dc FM |
45 | /* |
46 | * A path from a root to a leaf that has a file extent item pointing to | |
47 | * a given data extent should never exceed the maximum b+tree height. | |
48 | */ | |
61dbb952 FM |
49 | struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL]; |
50 | bool use_path_cache; | |
73e339e6 FM |
51 | /* |
52 | * Cache the sharedness result for the last few extents we have found, | |
53 | * but only for extents for which we have multiple file extent items | |
54 | * that point to them. | |
55 | * It's very common to have several file extent items that point to the | |
56 | * same extent (bytenr) but with different offsets and lengths. This | |
57 | * typically happens for COW writes, partial writes into prealloc | |
58 | * extents, NOCOW writes after snapshoting a root, hole punching or | |
59 | * reflinking within the same file (less common perhaps). | |
60 | * So keep a small cache with the lookup results for the extent pointed | |
61 | * by the last few file extent items. This cache is checked, with a | |
62 | * linear scan, whenever btrfs_is_data_extent_shared() is called, so | |
63 | * it must be small so that it does not negatively affect performance in | |
64 | * case we don't have multiple file extent items that point to the same | |
65 | * data extent. | |
66 | */ | |
67 | struct { | |
68 | u64 bytenr; | |
69 | bool is_shared; | |
70 | } prev_extents_cache[BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE]; | |
71 | /* | |
72 | * The slot in the prev_extents_cache array that will be used for | |
73 | * storing the sharedness result of a new data extent. | |
74 | */ | |
75 | int prev_extents_cache_slot; | |
12a824dc FM |
76 | }; |
77 | ||
a542ad1b JS |
78 | typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, |
79 | void *ctx); | |
a542ad1b | 80 | |
84a7949d FM |
81 | struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void); |
82 | void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx); | |
83 | ||
a542ad1b | 84 | int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, |
69917e43 LB |
85 | struct btrfs_path *path, struct btrfs_key *found_key, |
86 | u64 *flags); | |
a542ad1b JS |
87 | |
88 | int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |
6eda71d0 LB |
89 | struct btrfs_key *key, struct btrfs_extent_item *ei, |
90 | u32 item_size, u64 *out_root, u8 *out_level); | |
a542ad1b JS |
91 | |
92 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |
a542ad1b | 93 | u64 extent_item_objectid, |
7a3ae2f8 | 94 | u64 extent_offset, int search_commit_root, |
c995ab3c ZB |
95 | iterate_extent_inodes_t *iterate, void *ctx, |
96 | bool ignore_offset); | |
a542ad1b JS |
97 | |
98 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |
e3059ec0 | 99 | struct btrfs_path *path, void *ctx, |
c995ab3c | 100 | bool ignore_offset); |
a542ad1b JS |
101 | |
102 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | |
103 | ||
19b546d7 QW |
104 | int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
105 | struct btrfs_fs_info *fs_info, u64 bytenr, | |
106 | u64 time_seq, struct ulist **leafs, | |
107 | const u64 *extent_item_pos, bool ignore_offset); | |
8da6d581 | 108 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
fcebe456 | 109 | struct btrfs_fs_info *fs_info, u64 bytenr, |
c7bcbb21 | 110 | u64 time_seq, struct ulist **roots, |
8949b9a1 | 111 | bool skip_commit_root_sem); |
96b5bd77 JS |
112 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
113 | u32 name_len, unsigned long name_off, | |
114 | struct extent_buffer *eb_in, u64 parent, | |
115 | char *dest, u32 size); | |
8da6d581 | 116 | |
a542ad1b JS |
117 | struct btrfs_data_container *init_data_container(u32 total_bytes); |
118 | struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, | |
119 | struct btrfs_path *path); | |
120 | void free_ipath(struct inode_fs_paths *ipath); | |
121 | ||
f186373f MF |
122 | int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, |
123 | u64 start_off, struct btrfs_path *path, | |
124 | struct btrfs_inode_extref **ret_extref, | |
125 | u64 *found_off); | |
ceb707da | 126 | int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, |
b8f164e3 | 127 | u64 extent_gen, |
61dbb952 | 128 | struct btrfs_backref_share_check_ctx *ctx); |
f186373f | 129 | |
b9e9a6cb | 130 | int __init btrfs_prelim_ref_init(void); |
e67c718b | 131 | void __cold btrfs_prelim_ref_exit(void); |
00142756 JM |
132 | |
133 | struct prelim_ref { | |
134 | struct rb_node rbnode; | |
135 | u64 root_id; | |
136 | struct btrfs_key key_for_search; | |
137 | int level; | |
138 | int count; | |
139 | struct extent_inode_elem *inode_list; | |
140 | u64 parent; | |
141 | u64 wanted_disk_byte; | |
142 | }; | |
143 | ||
a37f232b QW |
144 | /* |
145 | * Iterate backrefs of one extent. | |
146 | * | |
147 | * Now it only supports iteration of tree block in commit root. | |
148 | */ | |
149 | struct btrfs_backref_iter { | |
150 | u64 bytenr; | |
151 | struct btrfs_path *path; | |
152 | struct btrfs_fs_info *fs_info; | |
153 | struct btrfs_key cur_key; | |
154 | u32 item_ptr; | |
155 | u32 cur_ptr; | |
156 | u32 end_ptr; | |
157 | }; | |
158 | ||
159 | struct btrfs_backref_iter *btrfs_backref_iter_alloc( | |
160 | struct btrfs_fs_info *fs_info, gfp_t gfp_flag); | |
161 | ||
162 | static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter) | |
163 | { | |
164 | if (!iter) | |
165 | return; | |
166 | btrfs_free_path(iter->path); | |
167 | kfree(iter); | |
168 | } | |
169 | ||
c39c2ddc QW |
170 | static inline struct extent_buffer *btrfs_backref_get_eb( |
171 | struct btrfs_backref_iter *iter) | |
172 | { | |
173 | if (!iter) | |
174 | return NULL; | |
175 | return iter->path->nodes[0]; | |
176 | } | |
177 | ||
178 | /* | |
179 | * For metadata with EXTENT_ITEM key (non-skinny) case, the first inline data | |
180 | * is btrfs_tree_block_info, without a btrfs_extent_inline_ref header. | |
181 | * | |
182 | * This helper determines if that's the case. | |
183 | */ | |
184 | static inline bool btrfs_backref_has_tree_block_info( | |
185 | struct btrfs_backref_iter *iter) | |
186 | { | |
187 | if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY && | |
188 | iter->cur_ptr - iter->item_ptr == sizeof(struct btrfs_extent_item)) | |
189 | return true; | |
190 | return false; | |
191 | } | |
192 | ||
a37f232b QW |
193 | int btrfs_backref_iter_start(struct btrfs_backref_iter *iter, u64 bytenr); |
194 | ||
c39c2ddc QW |
195 | int btrfs_backref_iter_next(struct btrfs_backref_iter *iter); |
196 | ||
197 | static inline bool btrfs_backref_iter_is_inline_ref( | |
198 | struct btrfs_backref_iter *iter) | |
199 | { | |
200 | if (iter->cur_key.type == BTRFS_EXTENT_ITEM_KEY || | |
201 | iter->cur_key.type == BTRFS_METADATA_ITEM_KEY) | |
202 | return true; | |
203 | return false; | |
204 | } | |
205 | ||
a37f232b QW |
206 | static inline void btrfs_backref_iter_release(struct btrfs_backref_iter *iter) |
207 | { | |
208 | iter->bytenr = 0; | |
209 | iter->item_ptr = 0; | |
210 | iter->cur_ptr = 0; | |
211 | iter->end_ptr = 0; | |
212 | btrfs_release_path(iter->path); | |
213 | memset(&iter->cur_key, 0, sizeof(iter->cur_key)); | |
214 | } | |
215 | ||
70535441 QW |
216 | /* |
217 | * Backref cache related structures | |
218 | * | |
219 | * The whole objective of backref_cache is to build a bi-directional map | |
220 | * of tree blocks (represented by backref_node) and all their parents. | |
221 | */ | |
222 | ||
223 | /* | |
224 | * Represent a tree block in the backref cache | |
225 | */ | |
226 | struct btrfs_backref_node { | |
e9a28dc5 QW |
227 | struct { |
228 | struct rb_node rb_node; | |
229 | u64 bytenr; | |
230 | }; /* Use rb_simple_node for search/insert */ | |
70535441 QW |
231 | |
232 | u64 new_bytenr; | |
233 | /* Objectid of tree block owner, can be not uptodate */ | |
234 | u64 owner; | |
235 | /* Link to pending, changed or detached list */ | |
236 | struct list_head list; | |
237 | ||
238 | /* List of upper level edges, which link this node to its parents */ | |
239 | struct list_head upper; | |
240 | /* List of lower level edges, which link this node to its children */ | |
241 | struct list_head lower; | |
242 | ||
243 | /* NULL if this node is not tree root */ | |
244 | struct btrfs_root *root; | |
245 | /* Extent buffer got by COWing the block */ | |
246 | struct extent_buffer *eb; | |
247 | /* Level of the tree block */ | |
248 | unsigned int level:8; | |
92a7cc42 | 249 | /* Is the block in a non-shareable tree */ |
70535441 QW |
250 | unsigned int cowonly:1; |
251 | /* 1 if no child node is in the cache */ | |
252 | unsigned int lowest:1; | |
253 | /* Is the extent buffer locked */ | |
254 | unsigned int locked:1; | |
255 | /* Has the block been processed */ | |
256 | unsigned int processed:1; | |
257 | /* Have backrefs of this block been checked */ | |
258 | unsigned int checked:1; | |
259 | /* | |
260 | * 1 if corresponding block has been COWed but some upper level block | |
261 | * pointers may not point to the new location | |
262 | */ | |
263 | unsigned int pending:1; | |
264 | /* 1 if the backref node isn't connected to any other backref node */ | |
265 | unsigned int detached:1; | |
266 | ||
267 | /* | |
268 | * For generic purpose backref cache, where we only care if it's a reloc | |
269 | * root, doesn't care the source subvolid. | |
270 | */ | |
271 | unsigned int is_reloc_root:1; | |
272 | }; | |
273 | ||
274 | #define LOWER 0 | |
275 | #define UPPER 1 | |
276 | ||
277 | /* | |
278 | * Represent an edge connecting upper and lower backref nodes. | |
279 | */ | |
280 | struct btrfs_backref_edge { | |
281 | /* | |
282 | * list[LOWER] is linked to btrfs_backref_node::upper of lower level | |
283 | * node, and list[UPPER] is linked to btrfs_backref_node::lower of | |
284 | * upper level node. | |
285 | * | |
286 | * Also, build_backref_tree() uses list[UPPER] for pending edges, before | |
287 | * linking list[UPPER] to its upper level nodes. | |
288 | */ | |
289 | struct list_head list[2]; | |
290 | ||
291 | /* Two related nodes */ | |
292 | struct btrfs_backref_node *node[2]; | |
293 | }; | |
294 | ||
295 | struct btrfs_backref_cache { | |
296 | /* Red black tree of all backref nodes in the cache */ | |
297 | struct rb_root rb_root; | |
298 | /* For passing backref nodes to btrfs_reloc_cow_block */ | |
299 | struct btrfs_backref_node *path[BTRFS_MAX_LEVEL]; | |
300 | /* | |
301 | * List of blocks that have been COWed but some block pointers in upper | |
302 | * level blocks may not reflect the new location | |
303 | */ | |
304 | struct list_head pending[BTRFS_MAX_LEVEL]; | |
305 | /* List of backref nodes with no child node */ | |
306 | struct list_head leaves; | |
307 | /* List of blocks that have been COWed in current transaction */ | |
308 | struct list_head changed; | |
309 | /* List of detached backref node. */ | |
310 | struct list_head detached; | |
311 | ||
312 | u64 last_trans; | |
313 | ||
314 | int nr_nodes; | |
315 | int nr_edges; | |
316 | ||
317 | /* List of unchecked backref edges during backref cache build */ | |
318 | struct list_head pending_edge; | |
319 | ||
320 | /* List of useless backref nodes during backref cache build */ | |
321 | struct list_head useless_node; | |
322 | ||
323 | struct btrfs_fs_info *fs_info; | |
324 | ||
325 | /* | |
326 | * Whether this cache is for relocation | |
327 | * | |
328 | * Reloction backref cache require more info for reloc root compared | |
329 | * to generic backref cache. | |
330 | */ | |
331 | unsigned int is_reloc; | |
332 | }; | |
333 | ||
584fb121 QW |
334 | void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info, |
335 | struct btrfs_backref_cache *cache, int is_reloc); | |
b1818dab QW |
336 | struct btrfs_backref_node *btrfs_backref_alloc_node( |
337 | struct btrfs_backref_cache *cache, u64 bytenr, int level); | |
47254d07 QW |
338 | struct btrfs_backref_edge *btrfs_backref_alloc_edge( |
339 | struct btrfs_backref_cache *cache); | |
584fb121 | 340 | |
f39911e5 QW |
341 | #define LINK_LOWER (1 << 0) |
342 | #define LINK_UPPER (1 << 1) | |
343 | static inline void btrfs_backref_link_edge(struct btrfs_backref_edge *edge, | |
344 | struct btrfs_backref_node *lower, | |
345 | struct btrfs_backref_node *upper, | |
346 | int link_which) | |
347 | { | |
348 | ASSERT(upper && lower && upper->level == lower->level + 1); | |
349 | edge->node[LOWER] = lower; | |
350 | edge->node[UPPER] = upper; | |
351 | if (link_which & LINK_LOWER) | |
352 | list_add_tail(&edge->list[LOWER], &lower->upper); | |
353 | if (link_which & LINK_UPPER) | |
354 | list_add_tail(&edge->list[UPPER], &upper->lower); | |
355 | } | |
356 | ||
741188d3 QW |
357 | static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache, |
358 | struct btrfs_backref_node *node) | |
359 | { | |
360 | if (node) { | |
eddda68d JB |
361 | ASSERT(list_empty(&node->list)); |
362 | ASSERT(list_empty(&node->lower)); | |
363 | ASSERT(node->eb == NULL); | |
741188d3 QW |
364 | cache->nr_nodes--; |
365 | btrfs_put_root(node->root); | |
366 | kfree(node); | |
367 | } | |
368 | } | |
369 | ||
370 | static inline void btrfs_backref_free_edge(struct btrfs_backref_cache *cache, | |
371 | struct btrfs_backref_edge *edge) | |
372 | { | |
373 | if (edge) { | |
374 | cache->nr_edges--; | |
375 | kfree(edge); | |
376 | } | |
377 | } | |
378 | ||
b0fe7078 QW |
379 | static inline void btrfs_backref_unlock_node_buffer( |
380 | struct btrfs_backref_node *node) | |
381 | { | |
382 | if (node->locked) { | |
383 | btrfs_tree_unlock(node->eb); | |
384 | node->locked = 0; | |
385 | } | |
386 | } | |
387 | ||
388 | static inline void btrfs_backref_drop_node_buffer( | |
389 | struct btrfs_backref_node *node) | |
390 | { | |
391 | if (node->eb) { | |
392 | btrfs_backref_unlock_node_buffer(node); | |
393 | free_extent_buffer(node->eb); | |
394 | node->eb = NULL; | |
395 | } | |
396 | } | |
397 | ||
398 | /* | |
399 | * Drop the backref node from cache without cleaning up its children | |
400 | * edges. | |
401 | * | |
402 | * This can only be called on node without parent edges. | |
403 | * The children edges are still kept as is. | |
404 | */ | |
405 | static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree, | |
406 | struct btrfs_backref_node *node) | |
407 | { | |
eddda68d | 408 | ASSERT(list_empty(&node->upper)); |
b0fe7078 QW |
409 | |
410 | btrfs_backref_drop_node_buffer(node); | |
eddda68d JB |
411 | list_del_init(&node->list); |
412 | list_del_init(&node->lower); | |
b0fe7078 QW |
413 | if (!RB_EMPTY_NODE(&node->rb_node)) |
414 | rb_erase(&node->rb_node, &tree->rb_root); | |
415 | btrfs_backref_free_node(tree, node); | |
416 | } | |
417 | ||
023acb07 QW |
418 | void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, |
419 | struct btrfs_backref_node *node); | |
420 | ||
13fe1bdb QW |
421 | void btrfs_backref_release_cache(struct btrfs_backref_cache *cache); |
422 | ||
982c92cb QW |
423 | static inline void btrfs_backref_panic(struct btrfs_fs_info *fs_info, |
424 | u64 bytenr, int errno) | |
425 | { | |
426 | btrfs_panic(fs_info, errno, | |
427 | "Inconsistency in backref cache found at offset %llu", | |
428 | bytenr); | |
429 | } | |
430 | ||
1b60d2ec QW |
431 | int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache, |
432 | struct btrfs_path *path, | |
433 | struct btrfs_backref_iter *iter, | |
434 | struct btrfs_key *node_key, | |
435 | struct btrfs_backref_node *cur); | |
436 | ||
fc997ed0 QW |
437 | int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache, |
438 | struct btrfs_backref_node *start); | |
439 | ||
1b23ea18 QW |
440 | void btrfs_backref_error_cleanup(struct btrfs_backref_cache *cache, |
441 | struct btrfs_backref_node *node); | |
442 | ||
a542ad1b | 443 | #endif |