Commit | Line | Data |
---|---|---|
16211268 DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level buffered read support. | |
3 | * | |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells (dhowells@redhat.com) | |
6 | */ | |
7 | ||
8 | #include <linux/export.h> | |
9 | #include <linux/task_io_accounting_ops.h> | |
10 | #include "internal.h" | |
11 | ||
12 | /* | |
13 | * Unlock the folios in a read operation. We need to set PG_fscache on any | |
14 | * folios we're going to write back before we unlock them. | |
15 | */ | |
16 | void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) | |
17 | { | |
18 | struct netfs_io_subrequest *subreq; | |
19 | struct folio *folio; | |
16211268 DH |
20 | pgoff_t start_page = rreq->start / PAGE_SIZE; |
21 | pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; | |
5e51c627 | 22 | size_t account = 0; |
16211268 DH |
23 | bool subreq_failed = false; |
24 | ||
25 | XA_STATE(xas, &rreq->mapping->i_pages, start_page); | |
26 | ||
27 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { | |
28 | __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); | |
29 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
30 | __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); | |
31 | } | |
32 | } | |
33 | ||
34 | /* Walk through the pagecache and the I/O request lists simultaneously. | |
35 | * We may have a mixture of cached and uncached sections and we only | |
36 | * really want to write out the uncached sections. This is slightly | |
37 | * complicated by the possibility that we might have huge pages with a | |
38 | * mixture inside. | |
39 | */ | |
40 | subreq = list_first_entry(&rreq->subrequests, | |
41 | struct netfs_io_subrequest, rreq_link); | |
16211268 DH |
42 | subreq_failed = (subreq->error < 0); |
43 | ||
44 | trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); | |
45 | ||
46 | rcu_read_lock(); | |
47 | xas_for_each(&xas, folio, last_page) { | |
5e51c627 | 48 | loff_t pg_end; |
16211268 DH |
49 | bool pg_failed = false; |
50 | ||
7e043a80 DH |
51 | if (xas_retry(&xas, folio)) |
52 | continue; | |
53 | ||
5e51c627 | 54 | pg_end = folio_pos(folio) + folio_size(folio) - 1; |
7e043a80 | 55 | |
16211268 | 56 | for (;;) { |
5e51c627 DH |
57 | loff_t sreq_end; |
58 | ||
16211268 DH |
59 | if (!subreq) { |
60 | pg_failed = true; | |
61 | break; | |
62 | } | |
63 | if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) | |
64 | folio_start_fscache(folio); | |
65 | pg_failed |= subreq_failed; | |
5e51c627 DH |
66 | sreq_end = subreq->start + subreq->len - 1; |
67 | if (pg_end < sreq_end) | |
16211268 DH |
68 | break; |
69 | ||
70 | account += subreq->transferred; | |
16211268 DH |
71 | if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { |
72 | subreq = list_next_entry(subreq, rreq_link); | |
73 | subreq_failed = (subreq->error < 0); | |
74 | } else { | |
75 | subreq = NULL; | |
76 | subreq_failed = false; | |
77 | } | |
5e51c627 DH |
78 | |
79 | if (pg_end == sreq_end) | |
16211268 DH |
80 | break; |
81 | } | |
82 | ||
83 | if (!pg_failed) { | |
84 | flush_dcache_folio(folio); | |
85 | folio_mark_uptodate(folio); | |
86 | } | |
87 | ||
88 | if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { | |
89 | if (folio_index(folio) == rreq->no_unlock_folio && | |
90 | test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) | |
91 | _debug("no unlock"); | |
92 | else | |
93 | folio_unlock(folio); | |
94 | } | |
95 | } | |
96 | rcu_read_unlock(); | |
97 | ||
98 | task_io_account_read(account); | |
99 | if (rreq->netfs_ops->done) | |
100 | rreq->netfs_ops->done(rreq); | |
101 | } | |
102 | ||
103 | static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, | |
104 | loff_t *_start, size_t *_len, loff_t i_size) | |
105 | { | |
106 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
107 | ||
108 | if (cres->ops && cres->ops->expand_readahead) | |
109 | cres->ops->expand_readahead(cres, _start, _len, i_size); | |
110 | } | |
111 | ||
112 | static void netfs_rreq_expand(struct netfs_io_request *rreq, | |
113 | struct readahead_control *ractl) | |
114 | { | |
115 | /* Give the cache a chance to change the request parameters. The | |
116 | * resultant request must contain the original region. | |
117 | */ | |
118 | netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); | |
119 | ||
120 | /* Give the netfs a chance to change the request parameters. The | |
121 | * resultant request must contain the original region. | |
122 | */ | |
123 | if (rreq->netfs_ops->expand_readahead) | |
124 | rreq->netfs_ops->expand_readahead(rreq); | |
125 | ||
126 | /* Expand the request if the cache wants it to start earlier. Note | |
127 | * that the expansion may get further extended if the VM wishes to | |
128 | * insert THPs and the preferred start and/or end wind up in the middle | |
129 | * of THPs. | |
130 | * | |
131 | * If this is the case, however, the THP size should be an integer | |
132 | * multiple of the cache granule size, so we get a whole number of | |
133 | * granules to deal with. | |
134 | */ | |
135 | if (rreq->start != readahead_pos(ractl) || | |
136 | rreq->len != readahead_length(ractl)) { | |
137 | readahead_expand(ractl, rreq->start, rreq->len); | |
138 | rreq->start = readahead_pos(ractl); | |
139 | rreq->len = readahead_length(ractl); | |
140 | ||
141 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
142 | netfs_read_trace_expanded); | |
143 | } | |
144 | } | |
145 | ||
146 | /** | |
147 | * netfs_readahead - Helper to manage a read request | |
148 | * @ractl: The description of the readahead request | |
149 | * | |
150 | * Fulfil a readahead request by drawing data from the cache if possible, or | |
151 | * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O | |
152 | * requests from different sources will get munged together. If necessary, the | |
153 | * readahead window can be expanded in either direction to a more convenient | |
154 | * alighment for RPC efficiency or to make storage in the cache feasible. | |
155 | * | |
156 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
157 | * inode before calling this. | |
158 | * | |
159 | * This is usable whether or not caching is enabled. | |
160 | */ | |
161 | void netfs_readahead(struct readahead_control *ractl) | |
162 | { | |
163 | struct netfs_io_request *rreq; | |
874c8ca1 | 164 | struct netfs_inode *ctx = netfs_inode(ractl->mapping->host); |
16211268 DH |
165 | int ret; |
166 | ||
167 | _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); | |
168 | ||
169 | if (readahead_count(ractl) == 0) | |
170 | return; | |
171 | ||
172 | rreq = netfs_alloc_request(ractl->mapping, ractl->file, | |
173 | readahead_pos(ractl), | |
174 | readahead_length(ractl), | |
175 | NETFS_READAHEAD); | |
176 | if (IS_ERR(rreq)) | |
177 | return; | |
178 | ||
179 | if (ctx->ops->begin_cache_operation) { | |
180 | ret = ctx->ops->begin_cache_operation(rreq); | |
181 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
182 | goto cleanup_free; | |
183 | } | |
184 | ||
185 | netfs_stat(&netfs_n_rh_readahead); | |
186 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
187 | netfs_read_trace_readahead); | |
188 | ||
189 | netfs_rreq_expand(rreq, ractl); | |
190 | ||
191 | /* Drop the refs on the folios here rather than in the cache or | |
192 | * filesystem. The locks will be dropped in netfs_rreq_unlock(). | |
193 | */ | |
194 | while (readahead_folio(ractl)) | |
195 | ; | |
196 | ||
197 | netfs_begin_read(rreq, false); | |
198 | return; | |
199 | ||
200 | cleanup_free: | |
201 | netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); | |
202 | return; | |
203 | } | |
204 | EXPORT_SYMBOL(netfs_readahead); | |
205 | ||
206 | /** | |
6c62371b | 207 | * netfs_read_folio - Helper to manage a read_folio request |
16211268 | 208 | * @file: The file to read from |
6c62371b | 209 | * @folio: The folio to read |
16211268 | 210 | * |
6c62371b MWO |
211 | * Fulfil a read_folio request by drawing data from the cache if |
212 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
213 | * Multiple I/O requests from different sources will get munged together. | |
16211268 DH |
214 | * |
215 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
216 | * inode before calling this. | |
217 | * | |
218 | * This is usable whether or not caching is enabled. | |
219 | */ | |
6c62371b | 220 | int netfs_read_folio(struct file *file, struct folio *folio) |
16211268 | 221 | { |
16211268 DH |
222 | struct address_space *mapping = folio_file_mapping(folio); |
223 | struct netfs_io_request *rreq; | |
874c8ca1 | 224 | struct netfs_inode *ctx = netfs_inode(mapping->host); |
16211268 DH |
225 | int ret; |
226 | ||
227 | _enter("%lx", folio_index(folio)); | |
228 | ||
229 | rreq = netfs_alloc_request(mapping, file, | |
230 | folio_file_pos(folio), folio_size(folio), | |
231 | NETFS_READPAGE); | |
232 | if (IS_ERR(rreq)) { | |
233 | ret = PTR_ERR(rreq); | |
234 | goto alloc_error; | |
235 | } | |
236 | ||
237 | if (ctx->ops->begin_cache_operation) { | |
238 | ret = ctx->ops->begin_cache_operation(rreq); | |
239 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
240 | goto discard; | |
241 | } | |
242 | ||
243 | netfs_stat(&netfs_n_rh_readpage); | |
244 | trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); | |
245 | return netfs_begin_read(rreq, true); | |
246 | ||
247 | discard: | |
248 | netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); | |
249 | alloc_error: | |
250 | folio_unlock(folio); | |
251 | return ret; | |
252 | } | |
6c62371b | 253 | EXPORT_SYMBOL(netfs_read_folio); |
16211268 DH |
254 | |
255 | /* | |
256 | * Prepare a folio for writing without reading first | |
257 | * @folio: The folio being prepared | |
258 | * @pos: starting position for the write | |
259 | * @len: length of write | |
260 | * @always_fill: T if the folio should always be completely filled/cleared | |
261 | * | |
262 | * In some cases, write_begin doesn't need to read at all: | |
263 | * - full folio write | |
264 | * - write that lies in a folio that is completely beyond EOF | |
265 | * - write that covers the folio from start to EOF or beyond it | |
266 | * | |
267 | * If any of these criteria are met, then zero out the unwritten parts | |
268 | * of the folio and return true. Otherwise, return false. | |
269 | */ | |
270 | static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, | |
271 | bool always_fill) | |
272 | { | |
273 | struct inode *inode = folio_inode(folio); | |
274 | loff_t i_size = i_size_read(inode); | |
275 | size_t offset = offset_in_folio(folio, pos); | |
276 | size_t plen = folio_size(folio); | |
277 | ||
278 | if (unlikely(always_fill)) { | |
279 | if (pos - offset + len <= i_size) | |
280 | return false; /* Page entirely before EOF */ | |
281 | zero_user_segment(&folio->page, 0, plen); | |
282 | folio_mark_uptodate(folio); | |
283 | return true; | |
284 | } | |
285 | ||
286 | /* Full folio write */ | |
287 | if (offset == 0 && len >= plen) | |
288 | return true; | |
289 | ||
290 | /* Page entirely beyond the end of the file */ | |
291 | if (pos - offset >= i_size) | |
292 | goto zero_out; | |
293 | ||
294 | /* Write that covers from the start of the folio to EOF or beyond */ | |
295 | if (offset == 0 && (pos + len) >= i_size) | |
296 | goto zero_out; | |
297 | ||
298 | return false; | |
299 | zero_out: | |
300 | zero_user_segments(&folio->page, 0, offset, offset + len, plen); | |
301 | return true; | |
302 | } | |
303 | ||
304 | /** | |
305 | * netfs_write_begin - Helper to prepare for writing | |
e81fb419 | 306 | * @ctx: The netfs context |
16211268 DH |
307 | * @file: The file to read from |
308 | * @mapping: The mapping to read from | |
309 | * @pos: File position at which the write will begin | |
310 | * @len: The length of the write (may extend beyond the end of the folio chosen) | |
16211268 DH |
311 | * @_folio: Where to put the resultant folio |
312 | * @_fsdata: Place for the netfs to store a cookie | |
313 | * | |
314 | * Pre-read data for a write-begin request by drawing data from the cache if | |
315 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
316 | * Multiple I/O requests from different sources will get munged together. If | |
317 | * necessary, the readahead window can be expanded in either direction to a | |
318 | * more convenient alighment for RPC efficiency or to make storage in the cache | |
319 | * feasible. | |
320 | * | |
321 | * The calling netfs must provide a table of operations, only one of which, | |
322 | * issue_op, is mandatory. | |
323 | * | |
324 | * The check_write_begin() operation can be provided to check for and flush | |
325 | * conflicting writes once the folio is grabbed and locked. It is passed a | |
326 | * pointer to the fsdata cookie that gets returned to the VM to be passed to | |
327 | * write_end. It is permitted to sleep. It should return 0 if the request | |
fac47b43 XL |
328 | * should go ahead or it may return an error. It may also unlock and put the |
329 | * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0 | |
330 | * will cause the folio to be re-got and the process to be retried. | |
16211268 DH |
331 | * |
332 | * The calling netfs must initialise a netfs context contiguous to the vfs | |
333 | * inode before calling this. | |
334 | * | |
335 | * This is usable whether or not caching is enabled. | |
336 | */ | |
e81fb419 LT |
337 | int netfs_write_begin(struct netfs_inode *ctx, |
338 | struct file *file, struct address_space *mapping, | |
de2a9311 MWO |
339 | loff_t pos, unsigned int len, struct folio **_folio, |
340 | void **_fsdata) | |
16211268 DH |
341 | { |
342 | struct netfs_io_request *rreq; | |
16211268 | 343 | struct folio *folio; |
16211268 DH |
344 | pgoff_t index = pos >> PAGE_SHIFT; |
345 | int ret; | |
346 | ||
347 | DEFINE_READAHEAD(ractl, file, NULL, mapping, index); | |
348 | ||
349 | retry: | |
e999a5c5 | 350 | folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, |
16211268 | 351 | mapping_gfp_mask(mapping)); |
66dabbb6 CH |
352 | if (IS_ERR(folio)) |
353 | return PTR_ERR(folio); | |
16211268 DH |
354 | |
355 | if (ctx->ops->check_write_begin) { | |
356 | /* Allow the netfs (eg. ceph) to flush conflicts. */ | |
fac47b43 | 357 | ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata); |
16211268 DH |
358 | if (ret < 0) { |
359 | trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); | |
16211268 DH |
360 | goto error; |
361 | } | |
fac47b43 XL |
362 | if (!folio) |
363 | goto retry; | |
16211268 DH |
364 | } |
365 | ||
366 | if (folio_test_uptodate(folio)) | |
367 | goto have_folio; | |
368 | ||
369 | /* If the page is beyond the EOF, we want to clear it - unless it's | |
370 | * within the cache granule containing the EOF, in which case we need | |
371 | * to preload the granule. | |
372 | */ | |
373 | if (!netfs_is_cache_enabled(ctx) && | |
374 | netfs_skip_folio_read(folio, pos, len, false)) { | |
375 | netfs_stat(&netfs_n_rh_write_zskip); | |
376 | goto have_folio_no_wait; | |
377 | } | |
378 | ||
379 | rreq = netfs_alloc_request(mapping, file, | |
380 | folio_file_pos(folio), folio_size(folio), | |
381 | NETFS_READ_FOR_WRITE); | |
382 | if (IS_ERR(rreq)) { | |
383 | ret = PTR_ERR(rreq); | |
384 | goto error; | |
385 | } | |
386 | rreq->no_unlock_folio = folio_index(folio); | |
387 | __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); | |
388 | ||
389 | if (ctx->ops->begin_cache_operation) { | |
390 | ret = ctx->ops->begin_cache_operation(rreq); | |
391 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
392 | goto error_put; | |
393 | } | |
394 | ||
395 | netfs_stat(&netfs_n_rh_write_begin); | |
396 | trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); | |
397 | ||
398 | /* Expand the request to meet caching requirements and download | |
399 | * preferences. | |
400 | */ | |
401 | ractl._nr_pages = folio_nr_pages(folio); | |
402 | netfs_rreq_expand(rreq, &ractl); | |
403 | ||
404 | /* We hold the folio locks, so we can drop the references */ | |
405 | folio_get(folio); | |
406 | while (readahead_folio(&ractl)) | |
407 | ; | |
408 | ||
409 | ret = netfs_begin_read(rreq, true); | |
410 | if (ret < 0) | |
411 | goto error; | |
412 | ||
413 | have_folio: | |
414 | ret = folio_wait_fscache_killable(folio); | |
415 | if (ret < 0) | |
416 | goto error; | |
417 | have_folio_no_wait: | |
418 | *_folio = folio; | |
419 | _leave(" = 0"); | |
420 | return 0; | |
421 | ||
422 | error_put: | |
423 | netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); | |
424 | error: | |
fac47b43 XL |
425 | if (folio) { |
426 | folio_unlock(folio); | |
427 | folio_put(folio); | |
428 | } | |
16211268 DH |
429 | _leave(" = %d", ret); |
430 | return ret; | |
431 | } | |
432 | EXPORT_SYMBOL(netfs_write_begin); |