Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
199a31c6 CH |
2 | #ifndef LINUX_IOMAP_H |
3 | #define LINUX_IOMAP_H 1 | |
4 | ||
9dc55f13 CH |
5 | #include <linux/atomic.h> |
6 | #include <linux/bitmap.h> | |
598ecfba | 7 | #include <linux/blk_types.h> |
9dc55f13 | 8 | #include <linux/mm.h> |
199a31c6 | 9 | #include <linux/types.h> |
5780a02f | 10 | #include <linux/mm_types.h> |
db074436 | 11 | #include <linux/blkdev.h> |
199a31c6 | 12 | |
89eb1906 | 13 | struct address_space; |
8be9f564 | 14 | struct fiemap_extent_info; |
ae259a9c | 15 | struct inode; |
9060bc4d | 16 | struct iomap_iter; |
c3d4ed1a | 17 | struct iomap_dio; |
598ecfba | 18 | struct iomap_writepage_ctx; |
ae259a9c CH |
19 | struct iov_iter; |
20 | struct kiocb; | |
63899c6f | 21 | struct page; |
ae259a9c CH |
22 | struct vm_area_struct; |
23 | struct vm_fault; | |
24 | ||
25 | /* | |
26 | * Types of block ranges for iomap mappings: | |
27 | */ | |
eb81cf9d CH |
28 | #define IOMAP_HOLE 0 /* no blocks allocated, need allocation */ |
29 | #define IOMAP_DELALLOC 1 /* delayed allocation blocks */ | |
30 | #define IOMAP_MAPPED 2 /* blocks allocated at @addr */ | |
31 | #define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */ | |
32 | #define IOMAP_INLINE 4 /* data inline in the inode */ | |
199a31c6 | 33 | |
17de0a9f | 34 | /* |
65a60e86 CH |
35 | * Flags reported by the file system from iomap_begin: |
36 | * | |
37 | * IOMAP_F_NEW indicates that the blocks have been newly allocated and need | |
38 | * zeroing for areas that no data is copied to. | |
a3841f94 | 39 | * |
caa51d26 JK |
40 | * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access |
41 | * written data and requires fdatasync to commit them to persistent storage. | |
7684e2c4 DC |
42 | * This needs to take into account metadata changes that *may* be made at IO |
43 | * completion, such as file size updates from direct IO. | |
65a60e86 CH |
44 | * |
45 | * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be | |
46 | * unshared as part a write. | |
47 | * | |
48 | * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block | |
49 | * mappings. | |
50 | * | |
51 | * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of | |
52 | * buffer heads for this mapping. | |
d7b64041 DC |
53 | * |
54 | * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent | |
55 | * rather than a file data extent. | |
17de0a9f | 56 | */ |
d7b64041 DC |
57 | #define IOMAP_F_NEW (1U << 0) |
58 | #define IOMAP_F_DIRTY (1U << 1) | |
59 | #define IOMAP_F_SHARED (1U << 2) | |
60 | #define IOMAP_F_MERGED (1U << 3) | |
925c86a1 | 61 | #ifdef CONFIG_BUFFER_HEAD |
d7b64041 | 62 | #define IOMAP_F_BUFFER_HEAD (1U << 4) |
925c86a1 CH |
63 | #else |
64 | #define IOMAP_F_BUFFER_HEAD 0 | |
65 | #endif /* CONFIG_BUFFER_HEAD */ | |
8e81aa16 | 66 | #define IOMAP_F_XATTR (1U << 5) |
d33fd776 CH |
67 | |
68 | /* | |
65a60e86 CH |
69 | * Flags set by the core iomap code during operations: |
70 | * | |
71 | * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size | |
72 | * has changed as the result of this write operation. | |
d7b64041 DC |
73 | * |
74 | * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file | |
75 | * range it covers needs to be remapped by the high level before the operation | |
76 | * can proceed. | |
d33fd776 | 77 | */ |
d7b64041 DC |
78 | #define IOMAP_F_SIZE_CHANGED (1U << 8) |
79 | #define IOMAP_F_STALE (1U << 9) | |
17de0a9f | 80 | |
7ee66c03 CH |
81 | /* |
82 | * Flags from 0x1000 up are for file system specific usage: | |
83 | */ | |
d7b64041 | 84 | #define IOMAP_F_PRIVATE (1U << 12) |
7ee66c03 CH |
85 | |
86 | ||
ae259a9c | 87 | /* |
19fe5f64 | 88 | * Magic value for addr: |
ae259a9c | 89 | */ |
19fe5f64 | 90 | #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ |
199a31c6 | 91 | |
471859f5 | 92 | struct iomap_folio_ops; |
df0db3ec | 93 | |
199a31c6 | 94 | struct iomap { |
19fe5f64 | 95 | u64 addr; /* disk offset of mapping, bytes */ |
ae259a9c CH |
96 | loff_t offset; /* file offset of mapping, bytes */ |
97 | u64 length; /* length of mapping, bytes */ | |
17de0a9f CH |
98 | u16 type; /* type of mapping */ |
99 | u16 flags; /* flags for mapping */ | |
ae259a9c | 100 | struct block_device *bdev; /* block device for I/O */ |
fa5d932c | 101 | struct dax_device *dax_dev; /* dax_dev for dax operations */ |
19e0c58f | 102 | void *inline_data; |
e184fde6 | 103 | void *private; /* filesystem private */ |
471859f5 | 104 | const struct iomap_folio_ops *folio_ops; |
d7b64041 | 105 | u64 validity_cookie; /* used with .iomap_valid() */ |
df0db3ec | 106 | }; |
63899c6f | 107 | |
66b8165e | 108 | static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) |
db074436 DW |
109 | { |
110 | return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; | |
111 | } | |
112 | ||
69f4a26c GX |
113 | /* |
114 | * Returns the inline data pointer for logical offset @pos. | |
115 | */ | |
4495c33e | 116 | static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) |
69f4a26c GX |
117 | { |
118 | return iomap->inline_data + pos - iomap->offset; | |
119 | } | |
120 | ||
121 | /* | |
122 | * Check if the mapping's length is within the valid range for inline data. | |
123 | * This is used to guard against accessing data beyond the page inline_data | |
124 | * points at. | |
125 | */ | |
e3c4ffb0 | 126 | static inline bool iomap_inline_data_valid(const struct iomap *iomap) |
69f4a26c GX |
127 | { |
128 | return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); | |
129 | } | |
130 | ||
df0db3ec | 131 | /* |
471859f5 | 132 | * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio |
40405ddd AG |
133 | * and put_folio will be called for each folio written to. This only applies |
134 | * to buffered writes as unbuffered writes will not typically have folios | |
df0db3ec AG |
135 | * associated with them. |
136 | * | |
c82abc23 | 137 | * When get_folio succeeds, put_folio will always be called to do any |
9060bc4d AG |
138 | * cleanup work necessary. put_folio is responsible for unlocking and putting |
139 | * @folio. | |
df0db3ec | 140 | */ |
471859f5 | 141 | struct iomap_folio_ops { |
c82abc23 | 142 | struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, |
9060bc4d | 143 | unsigned len); |
40405ddd | 144 | void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, |
80baab88 | 145 | struct folio *folio); |
d7b64041 DC |
146 | |
147 | /* | |
148 | * Check that the cached iomap still maps correctly to the filesystem's | |
149 | * internal extent map. FS internal extent maps can change while iomap | |
150 | * is iterating a cached iomap, so this hook allows iomap to detect that | |
151 | * the iomap needs to be refreshed during a long running write | |
152 | * operation. | |
153 | * | |
154 | * The filesystem can store internal state (e.g. a sequence number) in | |
155 | * iomap->validity_cookie when the iomap is first mapped to be able to | |
156 | * detect changes between mapping time and whenever .iomap_valid() is | |
157 | * called. | |
158 | * | |
159 | * This is called with the folio over the specified file position held | |
160 | * locked by the iomap code. | |
161 | */ | |
162 | bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); | |
ae259a9c CH |
163 | }; |
164 | ||
165 | /* | |
166 | * Flags for iomap_begin / iomap_end. No flag implies a read. | |
167 | */ | |
d33fd776 CH |
168 | #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ |
169 | #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ | |
170 | #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ | |
9484ab1b | 171 | #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ |
ff6a9292 | 172 | #define IOMAP_DIRECT (1 << 4) /* direct I/O */ |
9ecac0ef | 173 | #define IOMAP_NOWAIT (1 << 5) /* do not block */ |
213f6271 | 174 | #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ |
b74b1293 | 175 | #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ |
952da063 CH |
176 | #ifdef CONFIG_FS_DAX |
177 | #define IOMAP_DAX (1 << 8) /* DAX mapping */ | |
178 | #else | |
179 | #define IOMAP_DAX 0 | |
180 | #endif /* CONFIG_FS_DAX */ | |
ae259a9c CH |
181 | |
182 | struct iomap_ops { | |
183 | /* | |
184 | * Return the existing mapping at pos, or reserve space starting at | |
185 | * pos for up to length, as long as we can do it as a single mapping. | |
186 | * The actual length is returned in iomap->length. | |
187 | */ | |
188 | int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, | |
c039b997 GR |
189 | unsigned flags, struct iomap *iomap, |
190 | struct iomap *srcmap); | |
ae259a9c CH |
191 | |
192 | /* | |
193 | * Commit and/or unreserve space previous allocated using iomap_begin. | |
194 | * Written indicates the length of the successful write operation which | |
195 | * needs to be commited, while the rest needs to be unreserved. | |
196 | * Written might be zero if no data was written. | |
197 | */ | |
198 | int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length, | |
199 | ssize_t written, unsigned flags, struct iomap *iomap); | |
199a31c6 CH |
200 | }; |
201 | ||
f4b896c2 CH |
202 | /** |
203 | * struct iomap_iter - Iterate through a range of a file | |
204 | * @inode: Set at the start of the iteration and should not change. | |
205 | * @pos: The current file position we are operating on. It is updated by | |
206 | * calls to iomap_iter(). Treat as read-only in the body. | |
207 | * @len: The remaining length of the file segment we're operating on. | |
208 | * It is updated at the same time as @pos. | |
209 | * @processed: The number of bytes processed by the body in the most recent | |
210 | * iteration, or a negative errno. 0 causes the iteration to stop. | |
211 | * @flags: Zero or more of the iomap_begin flags above. | |
212 | * @iomap: Map describing the I/O iteration | |
213 | * @srcmap: Source map for COW operations | |
214 | */ | |
215 | struct iomap_iter { | |
216 | struct inode *inode; | |
217 | loff_t pos; | |
218 | u64 len; | |
219 | s64 processed; | |
220 | unsigned flags; | |
221 | struct iomap iomap; | |
222 | struct iomap srcmap; | |
786f847f | 223 | void *private; |
f4b896c2 CH |
224 | }; |
225 | ||
226 | int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); | |
227 | ||
228 | /** | |
229 | * iomap_length - length of the current iomap iteration | |
230 | * @iter: iteration structure | |
231 | * | |
232 | * Returns the length that the operation applies to for the current iteration. | |
233 | */ | |
234 | static inline u64 iomap_length(const struct iomap_iter *iter) | |
235 | { | |
236 | u64 end = iter->iomap.offset + iter->iomap.length; | |
237 | ||
238 | if (iter->srcmap.type != IOMAP_HOLE) | |
239 | end = min(end, iter->srcmap.offset + iter->srcmap.length); | |
240 | return min(iter->len, end - iter->pos); | |
241 | } | |
242 | ||
243 | /** | |
244 | * iomap_iter_srcmap - return the source map for the current iomap iteration | |
245 | * @i: iteration structure | |
246 | * | |
247 | * Write operations on file systems with reflink support might require a | |
248 | * source and a destination map. This function retourns the source map | |
249 | * for a given operation, which may or may no be identical to the destination | |
250 | * map in &i->iomap. | |
251 | */ | |
fad0a1ab | 252 | static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) |
f4b896c2 CH |
253 | { |
254 | if (i->srcmap.type != IOMAP_HOLE) | |
255 | return &i->srcmap; | |
256 | return &i->iomap; | |
257 | } | |
258 | ||
ae259a9c | 259 | ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, |
31754ea6 | 260 | const struct iomap_ops *ops, void *private); |
7479c505 | 261 | int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); |
9d24a13a | 262 | void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); |
2e7e80f7 | 263 | bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); |
d6bb59a9 | 264 | struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); |
8597447d | 265 | bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); |
8306a5f5 | 266 | void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); |
4ce02c67 | 267 | bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); |
3590c4d8 | 268 | int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, |
8ff6daa1 | 269 | const struct iomap_ops *ops); |
ae259a9c | 270 | int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, |
8ff6daa1 | 271 | bool *did_zero, const struct iomap_ops *ops); |
ae259a9c | 272 | int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, |
8ff6daa1 | 273 | const struct iomap_ops *ops); |
5780a02f SJ |
274 | vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, |
275 | const struct iomap_ops *ops); | |
11596dc3 | 276 | |
4bceb9ba | 277 | typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, |
492f5375 | 278 | struct iomap *iomap); |
4bceb9ba | 279 | void iomap_file_buffered_write_punch_delalloc(struct inode *inode, loff_t pos, |
11596dc3 CH |
280 | loff_t length, ssize_t written, unsigned flag, |
281 | struct iomap *iomap, iomap_punch_t punch); | |
282 | ||
8be9f564 | 283 | int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
27328818 | 284 | u64 start, u64 len, const struct iomap_ops *ops); |
0ed3b0d4 AG |
285 | loff_t iomap_seek_hole(struct inode *inode, loff_t offset, |
286 | const struct iomap_ops *ops); | |
287 | loff_t iomap_seek_data(struct inode *inode, loff_t offset, | |
288 | const struct iomap_ops *ops); | |
89eb1906 CH |
289 | sector_t iomap_bmap(struct address_space *mapping, sector_t bno, |
290 | const struct iomap_ops *ops); | |
ae259a9c | 291 | |
598ecfba CH |
292 | /* |
293 | * Structure for writeback I/O completions. | |
294 | */ | |
295 | struct iomap_ioend { | |
296 | struct list_head io_list; /* next ioend in chain */ | |
297 | u16 io_type; | |
298 | u16 io_flags; /* IOMAP_F_* */ | |
299 | struct inode *io_inode; /* file being written to */ | |
300 | size_t io_size; /* size of the extent */ | |
301 | loff_t io_offset; /* offset in the file */ | |
ebb7fb15 | 302 | sector_t io_sector; /* start sector of ioend */ |
ae5535ef | 303 | struct bio io_bio; /* MUST BE LAST! */ |
598ecfba CH |
304 | }; |
305 | ||
ae5535ef CH |
306 | static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) |
307 | { | |
308 | return container_of(bio, struct iomap_ioend, io_bio); | |
309 | } | |
310 | ||
598ecfba CH |
311 | struct iomap_writeback_ops { |
312 | /* | |
313 | * Required, maps the blocks so that writeback can be performed on | |
314 | * the range starting at offset. | |
30deff85 CH |
315 | * |
316 | * Can return arbitrarily large regions, but we need to call into it at | |
317 | * least once per folio to allow the file systems to synchronize with | |
318 | * the write path that could be invalidating mappings. | |
319 | * | |
320 | * An existing mapping from a previous call to this method can be reused | |
321 | * by the file system if it is still valid. | |
598ecfba CH |
322 | */ |
323 | int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, | |
19871b5c | 324 | loff_t offset, unsigned len); |
598ecfba CH |
325 | |
326 | /* | |
327 | * Optional, allows the file systems to perform actions just before | |
328 | * submitting the bio and/or override the bio end_io handler for complex | |
329 | * operations like copy on write extent manipulation or unwritten extent | |
330 | * conversions. | |
331 | */ | |
332 | int (*prepare_ioend)(struct iomap_ioend *ioend, int status); | |
333 | ||
334 | /* | |
335 | * Optional, allows the file system to discard state on a page where | |
336 | * we failed to submit any I/O. | |
337 | */ | |
6e478521 | 338 | void (*discard_folio)(struct folio *folio, loff_t pos); |
598ecfba CH |
339 | }; |
340 | ||
341 | struct iomap_writepage_ctx { | |
342 | struct iomap iomap; | |
343 | struct iomap_ioend *ioend; | |
344 | const struct iomap_writeback_ops *ops; | |
432acd55 | 345 | u32 nr_folios; /* folios added to the ioend */ |
598ecfba CH |
346 | }; |
347 | ||
348 | void iomap_finish_ioends(struct iomap_ioend *ioend, int error); | |
349 | void iomap_ioend_try_merge(struct iomap_ioend *ioend, | |
6e552494 | 350 | struct list_head *more_ioends); |
598ecfba | 351 | void iomap_sort_ioends(struct list_head *ioend_list); |
598ecfba CH |
352 | int iomap_writepages(struct address_space *mapping, |
353 | struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, | |
354 | const struct iomap_writeback_ops *ops); | |
355 | ||
ff6a9292 CH |
356 | /* |
357 | * Flags for direct I/O ->end_io: | |
358 | */ | |
359 | #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ | |
360 | #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ | |
838c4f3d CH |
361 | |
362 | struct iomap_dio_ops { | |
363 | int (*end_io)(struct kiocb *iocb, ssize_t size, int error, | |
364 | unsigned flags); | |
3e08773c CH |
365 | void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, |
366 | loff_t file_offset); | |
908c5490 CH |
367 | |
368 | /* | |
369 | * Filesystems wishing to attach private information to a direct io bio | |
370 | * must provide a ->submit_io method that attaches the additional | |
371 | * information to the bio and changes the ->bi_end_io callback to a | |
372 | * custom function. This function should, at a minimum, perform any | |
373 | * relevant post-processing of the bio and end with a call to | |
374 | * iomap_dio_bio_end_io. | |
375 | */ | |
376 | struct bio_set *bio_set; | |
838c4f3d CH |
377 | }; |
378 | ||
2f632965 CH |
379 | /* |
380 | * Wait for the I/O to complete in iomap_dio_rw even if the kiocb is not | |
381 | * synchronous. | |
382 | */ | |
383 | #define IOMAP_DIO_FORCE_WAIT (1 << 0) | |
384 | ||
213f6271 CH |
385 | /* |
386 | * Do not allocate blocks or zero partial blocks, but instead fall back to | |
387 | * the caller by returning -EAGAIN. Used to optimize direct I/O writes that | |
388 | * are not aligned to the file system block size. | |
389 | */ | |
390 | #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) | |
391 | ||
97308f8b AG |
392 | /* |
393 | * When a page fault occurs, return a partial synchronous result and allow | |
394 | * the caller to retry the rest of the operation after dealing with the page | |
395 | * fault. | |
396 | */ | |
397 | #define IOMAP_DIO_PARTIAL (1 << 2) | |
398 | ||
ff6a9292 | 399 | ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
13ef9544 | 400 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops, |
786f847f | 401 | unsigned int dio_flags, void *private, size_t done_before); |
c3d4ed1a CH |
402 | struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, |
403 | const struct iomap_ops *ops, const struct iomap_dio_ops *dops, | |
786f847f | 404 | unsigned int dio_flags, void *private, size_t done_before); |
c3d4ed1a | 405 | ssize_t iomap_dio_complete(struct iomap_dio *dio); |
908c5490 | 406 | void iomap_dio_bio_end_io(struct bio *bio); |
ff6a9292 | 407 | |
67482129 DW |
408 | #ifdef CONFIG_SWAP |
409 | struct file; | |
410 | struct swap_info_struct; | |
411 | ||
412 | int iomap_swapfile_activate(struct swap_info_struct *sis, | |
413 | struct file *swap_file, sector_t *pagespan, | |
414 | const struct iomap_ops *ops); | |
415 | #else | |
416 | # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) | |
417 | #endif /* CONFIG_SWAP */ | |
418 | ||
199a31c6 | 419 | #endif /* LINUX_IOMAP_H */ |