netfs: Implement unbuffered/DIO write support
[linux-2.6-block.git] / include / linux / netfs.h
CommitLineData
b533a83f
DH
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/* Network filesystem support services.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * See:
8 *
9 * Documentation/filesystems/netfs_library.rst
10 *
11 * for a description of the network filesystem interface declared here.
12 */
13
14#ifndef _LINUX_NETFS_H
15#define _LINUX_NETFS_H
16
3d3c9504
DH
17#include <linux/workqueue.h>
18#include <linux/fs.h>
b533a83f 19#include <linux/pagemap.h>
85dd2c8f 20#include <linux/uio.h>
b533a83f 21
6cd3d6fd
DH
22enum netfs_sreq_ref_trace;
23
b533a83f
DH
24/*
25 * Overload PG_private_2 to give us PG_fscache - this is used to indicate that
26 * a page is currently backed by a local disk cache
27 */
6abbaa5b 28#define folio_test_fscache(folio) folio_test_private_2(folio)
b533a83f
DH
29#define PageFsCache(page) PagePrivate2((page))
30#define SetPageFsCache(page) SetPagePrivate2((page))
31#define ClearPageFsCache(page) ClearPagePrivate2((page))
32#define TestSetPageFsCache(page) TestSetPagePrivate2((page))
33#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
34
99bff93c 35/**
6abbaa5b
MWO
36 * folio_start_fscache - Start an fscache write on a folio.
37 * @folio: The folio.
99bff93c 38 *
6abbaa5b
MWO
39 * Call this function before writing a folio to a local cache. Starting a
40 * second write before the first one finishes is not allowed.
99bff93c 41 */
6abbaa5b 42static inline void folio_start_fscache(struct folio *folio)
99bff93c 43{
6abbaa5b
MWO
44 VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
45 folio_get(folio);
46 folio_set_private_2(folio);
99bff93c
DH
47}
48
49/**
6abbaa5b
MWO
50 * folio_end_fscache - End an fscache write on a folio.
51 * @folio: The folio.
99bff93c 52 *
6abbaa5b
MWO
53 * Call this function after the folio has been written to the local cache.
54 * This will wake any sleepers waiting on this folio.
99bff93c 55 */
6abbaa5b 56static inline void folio_end_fscache(struct folio *folio)
99bff93c 57{
6abbaa5b 58 folio_end_private_2(folio);
99bff93c
DH
59}
60
61/**
6abbaa5b
MWO
62 * folio_wait_fscache - Wait for an fscache write on this folio to end.
63 * @folio: The folio.
99bff93c 64 *
6abbaa5b
MWO
65 * If this folio is currently being written to a local cache, wait for
66 * the write to finish. Another write may start after this one finishes,
67 * unless the caller holds the folio lock.
99bff93c 68 */
6abbaa5b 69static inline void folio_wait_fscache(struct folio *folio)
99bff93c 70{
6abbaa5b 71 folio_wait_private_2(folio);
99bff93c
DH
72}
73
74/**
6abbaa5b
MWO
75 * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
76 * @folio: The folio.
99bff93c 77 *
6abbaa5b
MWO
78 * If this folio is currently being written to a local cache, wait
79 * for the write to finish or for a fatal signal to be received.
80 * Another write may start after this one finishes, unless the caller
81 * holds the folio lock.
99bff93c
DH
82 *
83 * Return:
84 * - 0 if successful.
85 * - -EINTR if a fatal signal was encountered.
86 */
6abbaa5b
MWO
87static inline int folio_wait_fscache_killable(struct folio *folio)
88{
89 return folio_wait_private_2_killable(folio);
90}
91
92static inline void set_page_fscache(struct page *page)
93{
94 folio_start_fscache(page_folio(page));
95}
96
97static inline void end_page_fscache(struct page *page)
98{
99 folio_end_private_2(page_folio(page));
100}
101
102static inline void wait_on_page_fscache(struct page *page)
103{
104 folio_wait_private_2(page_folio(page));
105}
106
99bff93c
DH
107static inline int wait_on_page_fscache_killable(struct page *page)
108{
b47393f8 109 return folio_wait_private_2_killable(page_folio(page));
99bff93c
DH
110}
111
7d828a06
DH
112/* Marks used on xarray-based buffers */
113#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
114#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
115
6a19114b 116enum netfs_io_source {
3d3c9504
DH
117 NETFS_FILL_WITH_ZEROES,
118 NETFS_DOWNLOAD_FROM_SERVER,
119 NETFS_READ_FROM_CACHE,
120 NETFS_INVALID_READ,
16af134c
DH
121 NETFS_UPLOAD_TO_SERVER,
122 NETFS_WRITE_TO_CACHE,
123 NETFS_INVALID_WRITE,
3d3c9504
DH
124} __mode(byte);
125
726218fd
DH
126typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
127 bool was_async);
128
bc899ee1 129/*
874c8ca1 130 * Per-inode context. This wraps the VFS inode.
bc899ee1 131 */
874c8ca1
DH
132struct netfs_inode {
133 struct inode inode; /* The VFS inode */
bc899ee1
DH
134 const struct netfs_request_ops *ops;
135#if IS_ENABLED(CONFIG_FSCACHE)
136 struct fscache_cookie *cache;
137#endif
4058f742 138 loff_t remote_i_size; /* Size of the remote file */
46ed60dc
DH
139 unsigned long flags;
140#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
153a9961 141#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
bc899ee1
DH
142};
143
9ebff83e
DH
144/*
145 * A netfs group - for instance a ceph snap. This is marked on dirty pages and
146 * pages marked with a group must be flushed before they can be written under
147 * the domain of another group.
148 */
149struct netfs_group {
150 refcount_t ref;
151 void (*free)(struct netfs_group *netfs_group);
152};
153
154/*
155 * Information about a dirty page (attached only if necessary).
156 * folio->private
157 */
158struct netfs_folio {
159 struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */
160 unsigned int dirty_offset; /* Write-streaming dirty data offset */
161 unsigned int dirty_len; /* Write-streaming dirty data length */
162};
163#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
164
165static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
166{
167 void *priv = folio_get_private(folio);
168
169 if ((unsigned long)priv & NETFS_FOLIO_INFO)
170 return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
171 return NULL;
172}
173
174static inline struct netfs_group *netfs_folio_group(struct folio *folio)
175{
176 struct netfs_folio *finfo;
177 void *priv = folio_get_private(folio);
178
179 finfo = netfs_folio_info(folio);
180 if (finfo)
181 return finfo->netfs_group;
182 return priv;
183}
184
726218fd
DH
185/*
186 * Resources required to do operations on a cache.
187 */
188struct netfs_cache_resources {
189 const struct netfs_cache_ops *ops;
190 void *cache_priv;
191 void *cache_priv2;
a7e20e31 192 unsigned int debug_id; /* Cookie debug ID */
d24af13e 193 unsigned int inval_counter; /* object->inval_counter at begin_op */
726218fd
DH
194};
195
3d3c9504 196/*
16af134c
DH
197 * Descriptor for a single component subrequest. Each operation represents an
198 * individual read/write from/to a server, a cache, a journal, etc..
199 *
200 * The buffer iterator is persistent for the life of the subrequest struct and
201 * the pages it points to can be relied on to exist for the duration.
3d3c9504 202 */
6a19114b 203struct netfs_io_subrequest {
16af134c 204 struct work_struct work;
f18a3785 205 struct netfs_io_request *rreq; /* Supervising I/O request */
3d3c9504 206 struct list_head rreq_link; /* Link in rreq->subrequests */
92b6cc5d 207 struct iov_iter io_iter; /* Iterator for this subrequest */
3d3c9504
DH
208 loff_t start; /* Where to start the I/O */
209 size_t len; /* Size of the I/O */
210 size_t transferred; /* Amount of data transferred */
6cd3d6fd 211 refcount_t ref;
3d3c9504
DH
212 short error; /* 0 or error that occurred */
213 unsigned short debug_index; /* Index in list (for debugging output) */
768ddb1e 214 unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
f18a3785 215 enum netfs_io_source source; /* Where to read from/write to */
3d3c9504 216 unsigned long flags;
f18a3785 217#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
3d3c9504 218#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
f18a3785 219#define NETFS_SREQ_SHORT_IO 2 /* Set if the I/O was short */
3d3c9504
DH
220#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
221#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
9032b6e8 222#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
3d3c9504
DH
223};
224
663dfb65
DH
225enum netfs_io_origin {
226 NETFS_READAHEAD, /* This read was triggered by readahead */
227 NETFS_READPAGE, /* This read is a synchronous read */
228 NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
16af134c 229 NETFS_WRITEBACK, /* This write was triggered by writepages */
153a9961 230 NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
016dc851 231 NETFS_DIO_READ, /* This is a direct I/O read */
153a9961 232 NETFS_DIO_WRITE, /* This is a direct I/O write */
16af134c 233 nr__netfs_io_origin
663dfb65
DH
234} __mode(byte);
235
3d3c9504 236/*
f18a3785
DH
237 * Descriptor for an I/O helper request. This is used to make multiple I/O
238 * operations to a variety of data stores and then stitch the result together.
3d3c9504 239 */
6a19114b 240struct netfs_io_request {
87b57a04
DH
241 union {
242 struct work_struct work;
243 struct rcu_head rcu;
244 };
3d3c9504
DH
245 struct inode *inode; /* The file being accessed */
246 struct address_space *mapping; /* The mapping being accessed */
016dc851 247 struct kiocb *iocb; /* AIO completion vector */
726218fd 248 struct netfs_cache_resources cache_resources;
87b57a04 249 struct list_head proc_link; /* Link in netfs_iorequests */
f18a3785 250 struct list_head subrequests; /* Contributory I/O operations */
92b6cc5d
DH
251 struct iov_iter iter; /* Unencrypted-side iterator */
252 struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
3d3c9504 253 void *netfs_priv; /* Private data for the netfs */
21d706d5
DH
254 struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
255 unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
3d3c9504 256 unsigned int debug_id;
016dc851 257 unsigned int rsize; /* Maximum read size (0 for none) */
0e0f2dfe 258 unsigned int wsize; /* Maximum write size (0 for none) */
16af134c 259 unsigned int subreq_counter; /* Next subreq->debug_index */
f18a3785
DH
260 atomic_t nr_outstanding; /* Number of ops in progress */
261 atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
3d3c9504
DH
262 size_t submitted; /* Amount submitted for I/O so far */
263 size_t len; /* Length of the request */
016dc851 264 size_t transferred; /* Amount to be indicated as transferred */
3d3c9504 265 short error; /* 0 or error that occurred */
663dfb65 266 enum netfs_io_origin origin; /* Origin of the request */
21d706d5 267 bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
3d3c9504
DH
268 loff_t i_size; /* Size of the file */
269 loff_t start; /* Start position */
78525c74 270 pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
de74023b 271 refcount_t ref;
3d3c9504
DH
272 unsigned long flags;
273#define NETFS_RREQ_INCOMPLETE_IO 0 /* Some ioreqs terminated short or with error */
f18a3785 274#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
78525c74
DH
275#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
276#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
3d3c9504
DH
277#define NETFS_RREQ_FAILED 4 /* The request failed */
278#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
16af134c
DH
279#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */
280#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
016dc851
DH
281#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
282#define NETFS_RREQ_BLOCKED 10 /* We blocked */
6a19114b 283 const struct netfs_request_ops *netfs_ops;
0e0f2dfe 284 void (*cleanup)(struct netfs_io_request *req);
3d3c9504
DH
285};
286
287/*
288 * Operations the network filesystem can/must provide to the helpers.
289 */
6a19114b 290struct netfs_request_ops {
cc3cb0a1
DH
291 unsigned int io_request_size; /* Alloc size for netfs_io_request struct */
292 unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */
2de16041 293 int (*init_request)(struct netfs_io_request *rreq, struct file *file);
40a81101 294 void (*free_request)(struct netfs_io_request *rreq);
5f5ce7ba 295 void (*free_subrequest)(struct netfs_io_subrequest *rreq);
40a81101 296
c6dc54dd 297 /* Read request handling */
6a19114b
DH
298 void (*expand_readahead)(struct netfs_io_request *rreq);
299 bool (*clamp_length)(struct netfs_io_subrequest *subreq);
f18a3785 300 void (*issue_read)(struct netfs_io_subrequest *subreq);
6a19114b 301 bool (*is_still_valid)(struct netfs_io_request *rreq);
e1b1240c 302 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
fac47b43 303 struct folio **foliop, void **_fsdata);
6a19114b 304 void (*done)(struct netfs_io_request *rreq);
c6dc54dd
DH
305
306 /* Modification handling */
307 void (*update_i_size)(struct inode *inode, loff_t i_size);
0e0f2dfe
DH
308
309 /* Write request handling */
310 void (*create_write_requests)(struct netfs_io_request *wreq,
311 loff_t start, size_t len);
312 void (*invalidate_cache)(struct netfs_io_request *wreq);
3d3c9504
DH
313};
314
3a11b3a8
DH
315/*
316 * How to handle reading from a hole.
317 */
318enum netfs_read_from_hole {
319 NETFS_READ_HOLE_IGNORE,
320 NETFS_READ_HOLE_CLEAR,
321 NETFS_READ_HOLE_FAIL,
322};
323
726218fd 324/*
4498a8ec 325 * Table of operations for access to a cache.
726218fd
DH
326 */
327struct netfs_cache_ops {
328 /* End an operation */
329 void (*end_operation)(struct netfs_cache_resources *cres);
330
331 /* Read data from the cache */
332 int (*read)(struct netfs_cache_resources *cres,
333 loff_t start_pos,
334 struct iov_iter *iter,
3a11b3a8 335 enum netfs_read_from_hole read_hole,
726218fd
DH
336 netfs_io_terminated_t term_func,
337 void *term_func_priv);
338
339 /* Write data to the cache */
340 int (*write)(struct netfs_cache_resources *cres,
341 loff_t start_pos,
342 struct iov_iter *iter,
343 netfs_io_terminated_t term_func,
344 void *term_func_priv);
345
346 /* Expand readahead request */
347 void (*expand_readahead)(struct netfs_cache_resources *cres,
348 loff_t *_start, size_t *_len, loff_t i_size);
349
350 /* Prepare a read operation, shortening it to a cached/uncached
351 * boundary as appropriate.
352 */
6a19114b 353 enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
874c8ca1 354 loff_t i_size);
726218fd
DH
355
356 /* Prepare a write operation, working out what part of the write we can
357 * actually do.
358 */
359 int (*prepare_write)(struct netfs_cache_resources *cres,
a39c41b8
DH
360 loff_t *_start, size_t *_len, loff_t i_size,
361 bool no_space_allocated_yet);
bee9f655 362
86692475
JX
363 /* Prepare an on-demand read operation, shortening it to a cached/uncached
364 * boundary as appropriate.
365 */
366 enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres,
367 loff_t start, size_t *_len,
368 loff_t i_size,
369 unsigned long *_flags, ino_t ino);
370
bee9f655
DH
371 /* Query the occupancy of the cache in a region, returning where the
372 * next chunk of data starts and how long it is.
373 */
374 int (*query_occupancy)(struct netfs_cache_resources *cres,
375 loff_t start, size_t len, size_t granularity,
376 loff_t *_data_start, size_t *_data_len);
726218fd
DH
377};
378
016dc851
DH
379/* High-level read API. */
380ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
381
c38f4e96
DH
382/* High-level write API */
383ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
384 struct netfs_group *netfs_group);
153a9961 385ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
c38f4e96
DH
386
387/* Address operations API */
3d3c9504 388struct readahead_control;
0e8e08cc 389void netfs_readahead(struct readahead_control *);
6c62371b 390int netfs_read_folio(struct file *, struct folio *);
0e8e08cc 391int netfs_write_begin(struct netfs_inode *, struct file *,
c1ec4d7c
DH
392 struct address_space *, loff_t pos, unsigned int len,
393 struct folio **, void **fsdata);
c9c4ff12
DH
394bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
395int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
396void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
c1ec4d7c
DH
397void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
398bool netfs_release_folio(struct folio *folio, gfp_t gfp);
0e8e08cc
MWO
399
400void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
401void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
402 enum netfs_sreq_ref_trace what);
403void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
404 bool was_async, enum netfs_sreq_ref_trace what);
85dd2c8f
DH
405ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
406 struct iov_iter *new,
407 iov_iter_extraction_t extraction_flags);
cae932d3
DH
408size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
409 size_t max_size, size_t max_segs);
0e0f2dfe
DH
410struct netfs_io_subrequest *netfs_create_write_request(
411 struct netfs_io_request *wreq, enum netfs_io_source dest,
412 loff_t start, size_t len, work_func_t worker);
413void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
414 bool was_async);
415void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
3d3c9504 416
46ed60dc
DH
417int netfs_start_io_read(struct inode *inode);
418void netfs_end_io_read(struct inode *inode);
419int netfs_start_io_write(struct inode *inode);
420void netfs_end_io_write(struct inode *inode);
421int netfs_start_io_direct(struct inode *inode);
422void netfs_end_io_direct(struct inode *inode);
423
bc899ee1 424/**
874c8ca1 425 * netfs_inode - Get the netfs inode context from the inode
bc899ee1
DH
426 * @inode: The inode to query
427 *
428 * Get the netfs lib inode context from the network filesystem's inode. The
429 * context struct is expected to directly follow on from the VFS inode struct.
430 */
874c8ca1 431static inline struct netfs_inode *netfs_inode(struct inode *inode)
bc899ee1 432{
874c8ca1 433 return container_of(inode, struct netfs_inode, inode);
bc899ee1
DH
434}
435
436/**
874c8ca1 437 * netfs_inode_init - Initialise a netfslib inode context
018ab4fa 438 * @ctx: The netfs inode to initialise
bc899ee1
DH
439 * @ops: The netfs's operations list
440 *
441 * Initialise the netfs library context struct. This is expected to follow on
442 * directly from the VFS inode struct.
443 */
e81fb419 444static inline void netfs_inode_init(struct netfs_inode *ctx,
874c8ca1 445 const struct netfs_request_ops *ops)
bc899ee1 446{
bc899ee1 447 ctx->ops = ops;
e81fb419 448 ctx->remote_i_size = i_size_read(&ctx->inode);
46ed60dc 449 ctx->flags = 0;
874c8ca1
DH
450#if IS_ENABLED(CONFIG_FSCACHE)
451 ctx->cache = NULL;
452#endif
4058f742
DH
453}
454
455/**
456 * netfs_resize_file - Note that a file got resized
e81fb419 457 * @ctx: The netfs inode being resized
4058f742
DH
458 * @new_i_size: The new file size
459 *
460 * Inform the netfs lib that a file got resized so that it can adjust its state.
461 */
e81fb419 462static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
4058f742 463{
4058f742 464 ctx->remote_i_size = new_i_size;
bc899ee1
DH
465}
466
467/**
468 * netfs_i_cookie - Get the cache cookie from the inode
e81fb419 469 * @ctx: The netfs inode to query
bc899ee1
DH
470 *
471 * Get the caching cookie (if enabled) from the network filesystem's inode.
472 */
e81fb419 473static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
bc899ee1
DH
474{
475#if IS_ENABLED(CONFIG_FSCACHE)
bc899ee1
DH
476 return ctx->cache;
477#else
478 return NULL;
479#endif
480}
481
b533a83f 482#endif /* _LINUX_NETFS_H */