Commit | Line | Data |
---|---|---|
16211268 DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level buffered read support. | |
3 | * | |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells (dhowells@redhat.com) | |
6 | */ | |
7 | ||
8 | #include <linux/export.h> | |
9 | #include <linux/task_io_accounting_ops.h> | |
10 | #include "internal.h" | |
11 | ||
12 | /* | |
13 | * Unlock the folios in a read operation. We need to set PG_fscache on any | |
14 | * folios we're going to write back before we unlock them. | |
15 | */ | |
16 | void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) | |
17 | { | |
18 | struct netfs_io_subrequest *subreq; | |
19 | struct folio *folio; | |
16211268 DH |
20 | pgoff_t start_page = rreq->start / PAGE_SIZE; |
21 | pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; | |
5e51c627 | 22 | size_t account = 0; |
16211268 DH |
23 | bool subreq_failed = false; |
24 | ||
25 | XA_STATE(xas, &rreq->mapping->i_pages, start_page); | |
26 | ||
27 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { | |
28 | __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); | |
29 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
30 | __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); | |
31 | } | |
32 | } | |
33 | ||
34 | /* Walk through the pagecache and the I/O request lists simultaneously. | |
35 | * We may have a mixture of cached and uncached sections and we only | |
36 | * really want to write out the uncached sections. This is slightly | |
37 | * complicated by the possibility that we might have huge pages with a | |
38 | * mixture inside. | |
39 | */ | |
40 | subreq = list_first_entry(&rreq->subrequests, | |
41 | struct netfs_io_subrequest, rreq_link); | |
16211268 DH |
42 | subreq_failed = (subreq->error < 0); |
43 | ||
44 | trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); | |
45 | ||
46 | rcu_read_lock(); | |
47 | xas_for_each(&xas, folio, last_page) { | |
5e51c627 | 48 | loff_t pg_end; |
16211268 | 49 | bool pg_failed = false; |
df1c357f | 50 | bool folio_started; |
16211268 | 51 | |
7e043a80 DH |
52 | if (xas_retry(&xas, folio)) |
53 | continue; | |
54 | ||
5e51c627 | 55 | pg_end = folio_pos(folio) + folio_size(folio) - 1; |
7e043a80 | 56 | |
df1c357f | 57 | folio_started = false; |
16211268 | 58 | for (;;) { |
5e51c627 DH |
59 | loff_t sreq_end; |
60 | ||
16211268 DH |
61 | if (!subreq) { |
62 | pg_failed = true; | |
63 | break; | |
64 | } | |
df1c357f | 65 | if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { |
16211268 | 66 | folio_start_fscache(folio); |
df1c357f DW |
67 | folio_started = true; |
68 | } | |
16211268 | 69 | pg_failed |= subreq_failed; |
5e51c627 DH |
70 | sreq_end = subreq->start + subreq->len - 1; |
71 | if (pg_end < sreq_end) | |
16211268 DH |
72 | break; |
73 | ||
74 | account += subreq->transferred; | |
16211268 DH |
75 | if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { |
76 | subreq = list_next_entry(subreq, rreq_link); | |
77 | subreq_failed = (subreq->error < 0); | |
78 | } else { | |
79 | subreq = NULL; | |
80 | subreq_failed = false; | |
81 | } | |
5e51c627 DH |
82 | |
83 | if (pg_end == sreq_end) | |
16211268 DH |
84 | break; |
85 | } | |
86 | ||
87 | if (!pg_failed) { | |
88 | flush_dcache_folio(folio); | |
89 | folio_mark_uptodate(folio); | |
90 | } | |
91 | ||
92 | if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { | |
93 | if (folio_index(folio) == rreq->no_unlock_folio && | |
94 | test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) | |
95 | _debug("no unlock"); | |
96 | else | |
97 | folio_unlock(folio); | |
98 | } | |
99 | } | |
100 | rcu_read_unlock(); | |
101 | ||
102 | task_io_account_read(account); | |
103 | if (rreq->netfs_ops->done) | |
104 | rreq->netfs_ops->done(rreq); | |
105 | } | |
106 | ||
107 | static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, | |
108 | loff_t *_start, size_t *_len, loff_t i_size) | |
109 | { | |
110 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
111 | ||
112 | if (cres->ops && cres->ops->expand_readahead) | |
113 | cres->ops->expand_readahead(cres, _start, _len, i_size); | |
114 | } | |
115 | ||
116 | static void netfs_rreq_expand(struct netfs_io_request *rreq, | |
117 | struct readahead_control *ractl) | |
118 | { | |
119 | /* Give the cache a chance to change the request parameters. The | |
120 | * resultant request must contain the original region. | |
121 | */ | |
122 | netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); | |
123 | ||
124 | /* Give the netfs a chance to change the request parameters. The | |
125 | * resultant request must contain the original region. | |
126 | */ | |
127 | if (rreq->netfs_ops->expand_readahead) | |
128 | rreq->netfs_ops->expand_readahead(rreq); | |
129 | ||
130 | /* Expand the request if the cache wants it to start earlier. Note | |
131 | * that the expansion may get further extended if the VM wishes to | |
132 | * insert THPs and the preferred start and/or end wind up in the middle | |
133 | * of THPs. | |
134 | * | |
135 | * If this is the case, however, the THP size should be an integer | |
136 | * multiple of the cache granule size, so we get a whole number of | |
137 | * granules to deal with. | |
138 | */ | |
139 | if (rreq->start != readahead_pos(ractl) || | |
140 | rreq->len != readahead_length(ractl)) { | |
141 | readahead_expand(ractl, rreq->start, rreq->len); | |
142 | rreq->start = readahead_pos(ractl); | |
143 | rreq->len = readahead_length(ractl); | |
144 | ||
145 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
146 | netfs_read_trace_expanded); | |
147 | } | |
148 | } | |
149 | ||
150 | /** | |
151 | * netfs_readahead - Helper to manage a read request | |
152 | * @ractl: The description of the readahead request | |
153 | * | |
154 | * Fulfil a readahead request by drawing data from the cache if possible, or | |
155 | * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O | |
156 | * requests from different sources will get munged together. If necessary, the | |
157 | * readahead window can be expanded in either direction to a more convenient | |
158 | * alighment for RPC efficiency or to make storage in the cache feasible. | |
159 | * | |
160 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
161 | * inode before calling this. | |
162 | * | |
163 | * This is usable whether or not caching is enabled. | |
164 | */ | |
165 | void netfs_readahead(struct readahead_control *ractl) | |
166 | { | |
167 | struct netfs_io_request *rreq; | |
874c8ca1 | 168 | struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); |
16211268 DH |
169 | int ret; |
170 | ||
171 | _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); | |
172 | ||
173 | if (readahead_count(ractl) == 0) | |
174 | return; | |
175 | ||
176 | rreq = netfs_alloc_request(ractl->mapping, ractl->file, | |
177 | readahead_pos(ractl), | |
178 | readahead_length(ractl), | |
179 | NETFS_READAHEAD); | |
180 | if (IS_ERR(rreq)) | |
181 | return; | |
182 | ||
183 | if (ctx->ops->begin_cache_operation) { | |
184 | ret = ctx->ops->begin_cache_operation(rreq); | |
185 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
186 | goto cleanup_free; | |
187 | } | |
188 | ||
189 | netfs_stat(&netfs_n_rh_readahead); | |
190 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
191 | netfs_read_trace_readahead); | |
192 | ||
193 | netfs_rreq_expand(rreq, ractl); | |
194 | ||
195 | /* Drop the refs on the folios here rather than in the cache or | |
196 | * filesystem. The locks will be dropped in netfs_rreq_unlock(). | |
197 | */ | |
198 | while (readahead_folio(ractl)) | |
199 | ; | |
200 | ||
201 | netfs_begin_read(rreq, false); | |
202 | return; | |
203 | ||
204 | cleanup_free: | |
205 | netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); | |
206 | return; | |
207 | } | |
208 | EXPORT_SYMBOL(netfs_readahead); | |
209 | ||
210 | /** | |
6c62371b | 211 | * netfs_read_folio - Helper to manage a read_folio request |
16211268 | 212 | * @file: The file to read from |
6c62371b | 213 | * @folio: The folio to read |
16211268 | 214 | * |
6c62371b MWO |
215 | * Fulfil a read_folio request by drawing data from the cache if |
216 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
217 | * Multiple I/O requests from different sources will get munged together. | |
16211268 DH |
218 | * |
219 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
220 | * inode before calling this. | |
221 | * | |
222 | * This is usable whether or not caching is enabled. | |
223 | */ | |
6c62371b | 224 | int netfs_read_folio(struct file *file, struct folio *folio) |
16211268 | 225 | { |
16211268 DH |
226 | struct address_space *mapping = folio_file_mapping(folio); |
227 | struct netfs_io_request *rreq; | |
874c8ca1 | 228 | struct netfs_inode *ctx = netfs_inode(mapping->host); |
16211268 DH |
229 | int ret; |
230 | ||
231 | _enter("%lx", folio_index(folio)); | |
232 | ||
233 | rreq = netfs_alloc_request(mapping, file, | |
234 | folio_file_pos(folio), folio_size(folio), | |
235 | NETFS_READPAGE); | |
236 | if (IS_ERR(rreq)) { | |
237 | ret = PTR_ERR(rreq); | |
238 | goto alloc_error; | |
239 | } | |
240 | ||
241 | if (ctx->ops->begin_cache_operation) { | |
242 | ret = ctx->ops->begin_cache_operation(rreq); | |
243 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
244 | goto discard; | |
245 | } | |
246 | ||
247 | netfs_stat(&netfs_n_rh_readpage); | |
248 | trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); | |
249 | return netfs_begin_read(rreq, true); | |
250 | ||
251 | discard: | |
252 | netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); | |
253 | alloc_error: | |
254 | folio_unlock(folio); | |
255 | return ret; | |
256 | } | |
6c62371b | 257 | EXPORT_SYMBOL(netfs_read_folio); |
16211268 DH |
258 | |
259 | /* | |
260 | * Prepare a folio for writing without reading first | |
261 | * @folio: The folio being prepared | |
262 | * @pos: starting position for the write | |
263 | * @len: length of write | |
264 | * @always_fill: T if the folio should always be completely filled/cleared | |
265 | * | |
266 | * In some cases, write_begin doesn't need to read at all: | |
267 | * - full folio write | |
268 | * - write that lies in a folio that is completely beyond EOF | |
269 | * - write that covers the folio from start to EOF or beyond it | |
270 | * | |
271 | * If any of these criteria are met, then zero out the unwritten parts | |
272 | * of the folio and return true. Otherwise, return false. | |
273 | */ | |
274 | static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, | |
275 | bool always_fill) | |
276 | { | |
277 | struct inode *inode = folio_inode(folio); | |
278 | loff_t i_size = i_size_read(inode); | |
279 | size_t offset = offset_in_folio(folio, pos); | |
280 | size_t plen = folio_size(folio); | |
281 | ||
282 | if (unlikely(always_fill)) { | |
283 | if (pos - offset + len <= i_size) | |
284 | return false; /* Page entirely before EOF */ | |
285 | zero_user_segment(&folio->page, 0, plen); | |
286 | folio_mark_uptodate(folio); | |
287 | return true; | |
288 | } | |
289 | ||
290 | /* Full folio write */ | |
291 | if (offset == 0 && len >= plen) | |
292 | return true; | |
293 | ||
294 | /* Page entirely beyond the end of the file */ | |
295 | if (pos - offset >= i_size) | |
296 | goto zero_out; | |
297 | ||
298 | /* Write that covers from the start of the folio to EOF or beyond */ | |
299 | if (offset == 0 && (pos + len) >= i_size) | |
300 | goto zero_out; | |
301 | ||
302 | return false; | |
303 | zero_out: | |
304 | zero_user_segments(&folio->page, 0, offset, offset + len, plen); | |
305 | return true; | |
306 | } | |
307 | ||
308 | /** | |
309 | * netfs_write_begin - Helper to prepare for writing | |
e81fb419 | 310 | * @ctx: The netfs context |
16211268 DH |
311 | * @file: The file to read from |
312 | * @mapping: The mapping to read from | |
313 | * @pos: File position at which the write will begin | |
314 | * @len: The length of the write (may extend beyond the end of the folio chosen) | |
16211268 DH |
315 | * @_folio: Where to put the resultant folio |
316 | * @_fsdata: Place for the netfs to store a cookie | |
317 | * | |
318 | * Pre-read data for a write-begin request by drawing data from the cache if | |
319 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
320 | * Multiple I/O requests from different sources will get munged together. If | |
321 | * necessary, the readahead window can be expanded in either direction to a | |
322 | * more convenient alighment for RPC efficiency or to make storage in the cache | |
323 | * feasible. | |
324 | * | |
325 | * The calling netfs must provide a table of operations, only one of which, | |
326 | * issue_op, is mandatory. | |
327 | * | |
328 | * The check_write_begin() operation can be provided to check for and flush | |
329 | * conflicting writes once the folio is grabbed and locked. It is passed a | |
330 | * pointer to the fsdata cookie that gets returned to the VM to be passed to | |
331 | * write_end. It is permitted to sleep. It should return 0 if the request | |
fac47b43 XL |
332 | * should go ahead or it may return an error. It may also unlock and put the |
333 | * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 | |
334 | * will cause the folio to be re-got and the process to be retried. | |
16211268 DH |
335 | * |
336 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
337 | * inode before calling this. | |
338 | * | |
339 | * This is usable whether or not caching is enabled. | |
340 | */ | |
e81fb419 LT |
341 | int netfs_write_begin(struct netfs_inode *ctx, |
342 | struct file *file, struct address_space *mapping, | |
de2a9311 MWO |
343 | loff_t pos, unsigned int len, struct folio **_folio, |
344 | void **_fsdata) | |
16211268 DH |
345 | { |
346 | struct netfs_io_request *rreq; | |
16211268 | 347 | struct folio *folio; |
16211268 DH |
348 | pgoff_t index = pos >> PAGE_SHIFT; |
349 | int ret; | |
350 | ||
351 | DEFINE_READAHEAD(ractl, file, NULL, mapping, index); | |
352 | ||
353 | retry: | |
e999a5c5 | 354 | folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, |
16211268 | 355 | mapping_gfp_mask(mapping)); |
66dabbb6 CH |
356 | if (IS_ERR(folio)) |
357 | return PTR_ERR(folio); | |
16211268 DH |
358 | |
359 | if (ctx->ops->check_write_begin) { | |
360 | /* Allow the netfs (eg. ceph) to flush conflicts. */ | |
fac47b43 | 361 | ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); |
16211268 DH |
362 | if (ret < 0) { |
363 | trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); | |
16211268 DH |
364 | goto error; |
365 | } | |
fac47b43 XL |
366 | if (!folio) |
367 | goto retry; | |
16211268 DH |
368 | } |
369 | ||
370 | if (folio_test_uptodate(folio)) | |
371 | goto have_folio; | |
372 | ||
373 | /* If the page is beyond the EOF, we want to clear it - unless it's | |
374 | * within the cache granule containing the EOF, in which case we need | |
375 | * to preload the granule. | |
376 | */ | |
377 | if (!netfs_is_cache_enabled(ctx) && | |
378 | netfs_skip_folio_read(folio, pos, len, false)) { | |
379 | netfs_stat(&netfs_n_rh_write_zskip); | |
380 | goto have_folio_no_wait; | |
381 | } | |
382 | ||
383 | rreq = netfs_alloc_request(mapping, file, | |
384 | folio_file_pos(folio), folio_size(folio), | |
385 | NETFS_READ_FOR_WRITE); | |
386 | if (IS_ERR(rreq)) { | |
387 | ret = PTR_ERR(rreq); | |
388 | goto error; | |
389 | } | |
390 | rreq->no_unlock_folio = folio_index(folio); | |
391 | __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); | |
392 | ||
393 | if (ctx->ops->begin_cache_operation) { | |
394 | ret = ctx->ops->begin_cache_operation(rreq); | |
395 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
396 | goto error_put; | |
397 | } | |
398 | ||
399 | netfs_stat(&netfs_n_rh_write_begin); | |
400 | trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); | |
401 | ||
402 | /* Expand the request to meet caching requirements and download | |
403 | * preferences. | |
404 | */ | |
405 | ractl._nr_pages = folio_nr_pages(folio); | |
406 | netfs_rreq_expand(rreq, &ractl); | |
407 | ||
408 | /* We hold the folio locks, so we can drop the references */ | |
409 | folio_get(folio); | |
410 | while (readahead_folio(&ractl)) | |
411 | ; | |
412 | ||
413 | ret = netfs_begin_read(rreq, true); | |
414 | if (ret < 0) | |
415 | goto error; | |
416 | ||
417 | have_folio: | |
418 | ret = folio_wait_fscache_killable(folio); | |
419 | if (ret < 0) | |
420 | goto error; | |
421 | have_folio_no_wait: | |
422 | *_folio = folio; | |
423 | _leave(" = 0"); | |
424 | return 0; | |
425 | ||
426 | error_put: | |
427 | netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); | |
428 | error: | |
fac47b43 XL |
429 | if (folio) { |
430 | folio_unlock(folio); | |
431 | folio_put(folio); | |
432 | } | |
16211268 DH |
433 | _leave(" = %d", ret); |
434 | return ret; | |
435 | } | |
436 | EXPORT_SYMBOL(netfs_write_begin); |