Commit | Line | Data |
---|---|---|
3d3c9504 DH |
1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* Network filesystem high-level read support. | |
3 | * | |
4 | * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. | |
5 | * Written by David Howells (dhowells@redhat.com) | |
6 | */ | |
7 | ||
8 | #include <linux/module.h> | |
9 | #include <linux/export.h> | |
10 | #include <linux/fs.h> | |
11 | #include <linux/mm.h> | |
12 | #include <linux/pagemap.h> | |
13 | #include <linux/slab.h> | |
14 | #include <linux/uio.h> | |
15 | #include <linux/sched/mm.h> | |
16 | #include <linux/task_io_accounting_ops.h> | |
17 | #include <linux/netfs.h> | |
18 | #include "internal.h" | |
77b4d2c6 DH |
19 | #define CREATE_TRACE_POINTS |
20 | #include <trace/events/netfs.h> | |
3d3c9504 DH |
21 | |
22 | MODULE_DESCRIPTION("Network fs support"); | |
23 | MODULE_AUTHOR("Red Hat, Inc."); | |
24 | MODULE_LICENSE("GPL"); | |
25 | ||
26 | unsigned netfs_debug; | |
27 | module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO); | |
28 | MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask"); | |
29 | ||
30 | static void netfs_rreq_work(struct work_struct *); | |
31 | static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool); | |
32 | ||
33 | static void netfs_put_subrequest(struct netfs_read_subrequest *subreq, | |
34 | bool was_async) | |
35 | { | |
36 | if (refcount_dec_and_test(&subreq->usage)) | |
37 | __netfs_put_subrequest(subreq, was_async); | |
38 | } | |
39 | ||
40 | static struct netfs_read_request *netfs_alloc_read_request( | |
41 | const struct netfs_read_request_ops *ops, void *netfs_priv, | |
42 | struct file *file) | |
43 | { | |
44 | static atomic_t debug_ids; | |
45 | struct netfs_read_request *rreq; | |
46 | ||
47 | rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL); | |
48 | if (rreq) { | |
49 | rreq->netfs_ops = ops; | |
50 | rreq->netfs_priv = netfs_priv; | |
51 | rreq->inode = file_inode(file); | |
52 | rreq->i_size = i_size_read(rreq->inode); | |
53 | rreq->debug_id = atomic_inc_return(&debug_ids); | |
54 | INIT_LIST_HEAD(&rreq->subrequests); | |
55 | INIT_WORK(&rreq->work, netfs_rreq_work); | |
56 | refcount_set(&rreq->usage, 1); | |
57 | __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); | |
58 | ops->init_rreq(rreq, file); | |
289af54c | 59 | netfs_stat(&netfs_n_rh_rreq); |
3d3c9504 DH |
60 | } |
61 | ||
62 | return rreq; | |
63 | } | |
64 | ||
65 | static void netfs_get_read_request(struct netfs_read_request *rreq) | |
66 | { | |
67 | refcount_inc(&rreq->usage); | |
68 | } | |
69 | ||
70 | static void netfs_rreq_clear_subreqs(struct netfs_read_request *rreq, | |
71 | bool was_async) | |
72 | { | |
73 | struct netfs_read_subrequest *subreq; | |
74 | ||
75 | while (!list_empty(&rreq->subrequests)) { | |
76 | subreq = list_first_entry(&rreq->subrequests, | |
77 | struct netfs_read_subrequest, rreq_link); | |
78 | list_del(&subreq->rreq_link); | |
79 | netfs_put_subrequest(subreq, was_async); | |
80 | } | |
81 | } | |
82 | ||
83 | static void netfs_free_read_request(struct work_struct *work) | |
84 | { | |
85 | struct netfs_read_request *rreq = | |
86 | container_of(work, struct netfs_read_request, work); | |
87 | netfs_rreq_clear_subreqs(rreq, false); | |
88 | if (rreq->netfs_priv) | |
89 | rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv); | |
77b4d2c6 | 90 | trace_netfs_rreq(rreq, netfs_rreq_trace_free); |
726218fd DH |
91 | if (rreq->cache_resources.ops) |
92 | rreq->cache_resources.ops->end_operation(&rreq->cache_resources); | |
3d3c9504 | 93 | kfree(rreq); |
289af54c | 94 | netfs_stat_d(&netfs_n_rh_rreq); |
3d3c9504 DH |
95 | } |
96 | ||
97 | static void netfs_put_read_request(struct netfs_read_request *rreq, bool was_async) | |
98 | { | |
99 | if (refcount_dec_and_test(&rreq->usage)) { | |
100 | if (was_async) { | |
101 | rreq->work.func = netfs_free_read_request; | |
102 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
103 | BUG(); | |
104 | } else { | |
105 | netfs_free_read_request(&rreq->work); | |
106 | } | |
107 | } | |
108 | } | |
109 | ||
110 | /* | |
111 | * Allocate and partially initialise an I/O request structure. | |
112 | */ | |
113 | static struct netfs_read_subrequest *netfs_alloc_subrequest( | |
114 | struct netfs_read_request *rreq) | |
115 | { | |
116 | struct netfs_read_subrequest *subreq; | |
117 | ||
118 | subreq = kzalloc(sizeof(struct netfs_read_subrequest), GFP_KERNEL); | |
119 | if (subreq) { | |
120 | INIT_LIST_HEAD(&subreq->rreq_link); | |
121 | refcount_set(&subreq->usage, 2); | |
122 | subreq->rreq = rreq; | |
123 | netfs_get_read_request(rreq); | |
289af54c | 124 | netfs_stat(&netfs_n_rh_sreq); |
3d3c9504 DH |
125 | } |
126 | ||
127 | return subreq; | |
128 | } | |
129 | ||
130 | static void netfs_get_read_subrequest(struct netfs_read_subrequest *subreq) | |
131 | { | |
132 | refcount_inc(&subreq->usage); | |
133 | } | |
134 | ||
135 | static void __netfs_put_subrequest(struct netfs_read_subrequest *subreq, | |
136 | bool was_async) | |
137 | { | |
138 | struct netfs_read_request *rreq = subreq->rreq; | |
139 | ||
77b4d2c6 | 140 | trace_netfs_sreq(subreq, netfs_sreq_trace_free); |
3d3c9504 | 141 | kfree(subreq); |
289af54c | 142 | netfs_stat_d(&netfs_n_rh_sreq); |
3d3c9504 DH |
143 | netfs_put_read_request(rreq, was_async); |
144 | } | |
145 | ||
146 | /* | |
147 | * Clear the unread part of an I/O request. | |
148 | */ | |
149 | static void netfs_clear_unread(struct netfs_read_subrequest *subreq) | |
150 | { | |
151 | struct iov_iter iter; | |
152 | ||
330de47d | 153 | iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages, |
3d3c9504 DH |
154 | subreq->start + subreq->transferred, |
155 | subreq->len - subreq->transferred); | |
156 | iov_iter_zero(iov_iter_count(&iter), &iter); | |
157 | } | |
158 | ||
726218fd DH |
159 | static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error, |
160 | bool was_async) | |
161 | { | |
162 | struct netfs_read_subrequest *subreq = priv; | |
163 | ||
164 | netfs_subreq_terminated(subreq, transferred_or_error, was_async); | |
165 | } | |
166 | ||
167 | /* | |
168 | * Issue a read against the cache. | |
169 | * - Eats the caller's ref on subreq. | |
170 | */ | |
171 | static void netfs_read_from_cache(struct netfs_read_request *rreq, | |
172 | struct netfs_read_subrequest *subreq, | |
173 | bool seek_data) | |
174 | { | |
175 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
176 | struct iov_iter iter; | |
177 | ||
178 | netfs_stat(&netfs_n_rh_read); | |
179 | iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, | |
180 | subreq->start + subreq->transferred, | |
181 | subreq->len - subreq->transferred); | |
182 | ||
183 | cres->ops->read(cres, subreq->start, &iter, seek_data, | |
184 | netfs_cache_read_terminated, subreq); | |
185 | } | |
186 | ||
3d3c9504 DH |
187 | /* |
188 | * Fill a subrequest region with zeroes. | |
189 | */ | |
190 | static void netfs_fill_with_zeroes(struct netfs_read_request *rreq, | |
191 | struct netfs_read_subrequest *subreq) | |
192 | { | |
289af54c | 193 | netfs_stat(&netfs_n_rh_zero); |
3d3c9504 DH |
194 | __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); |
195 | netfs_subreq_terminated(subreq, 0, false); | |
196 | } | |
197 | ||
198 | /* | |
199 | * Ask the netfs to issue a read request to the server for us. | |
200 | * | |
201 | * The netfs is expected to read from subreq->pos + subreq->transferred to | |
202 | * subreq->pos + subreq->len - 1. It may not backtrack and write data into the | |
203 | * buffer prior to the transferred point as it might clobber dirty data | |
204 | * obtained from the cache. | |
205 | * | |
206 | * Alternatively, the netfs is allowed to indicate one of two things: | |
207 | * | |
208 | * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and | |
209 | * make progress. | |
210 | * | |
211 | * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be | |
212 | * cleared. | |
213 | */ | |
214 | static void netfs_read_from_server(struct netfs_read_request *rreq, | |
215 | struct netfs_read_subrequest *subreq) | |
216 | { | |
289af54c | 217 | netfs_stat(&netfs_n_rh_download); |
3d3c9504 DH |
218 | rreq->netfs_ops->issue_op(subreq); |
219 | } | |
220 | ||
221 | /* | |
222 | * Release those waiting. | |
223 | */ | |
224 | static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async) | |
225 | { | |
77b4d2c6 | 226 | trace_netfs_rreq(rreq, netfs_rreq_trace_done); |
3d3c9504 DH |
227 | netfs_rreq_clear_subreqs(rreq, was_async); |
228 | netfs_put_read_request(rreq, was_async); | |
229 | } | |
230 | ||
726218fd DH |
231 | /* |
232 | * Deal with the completion of writing the data to the cache. We have to clear | |
78525c74 | 233 | * the PG_fscache bits on the folios involved and release the caller's ref. |
726218fd DH |
234 | * |
235 | * May be called in softirq mode and we inherit a ref from the caller. | |
236 | */ | |
237 | static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq, | |
238 | bool was_async) | |
239 | { | |
240 | struct netfs_read_subrequest *subreq; | |
78525c74 | 241 | struct folio *folio; |
726218fd DH |
242 | pgoff_t unlocked = 0; |
243 | bool have_unlocked = false; | |
244 | ||
245 | rcu_read_lock(); | |
246 | ||
247 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
248 | XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); | |
249 | ||
78525c74 | 250 | xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { |
726218fd | 251 | /* We might have multiple writes from the same huge |
78525c74 | 252 | * folio, but we mustn't unlock a folio more than once. |
726218fd | 253 | */ |
78525c74 | 254 | if (have_unlocked && folio_index(folio) <= unlocked) |
726218fd | 255 | continue; |
78525c74 DH |
256 | unlocked = folio_index(folio); |
257 | folio_end_fscache(folio); | |
726218fd DH |
258 | have_unlocked = true; |
259 | } | |
260 | } | |
261 | ||
262 | rcu_read_unlock(); | |
263 | netfs_rreq_completed(rreq, was_async); | |
264 | } | |
265 | ||
266 | static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, | |
267 | bool was_async) | |
268 | { | |
269 | struct netfs_read_subrequest *subreq = priv; | |
270 | struct netfs_read_request *rreq = subreq->rreq; | |
271 | ||
272 | if (IS_ERR_VALUE(transferred_or_error)) { | |
273 | netfs_stat(&netfs_n_rh_write_failed); | |
0246f3e5 DH |
274 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
275 | netfs_fail_copy_to_cache); | |
726218fd DH |
276 | } else { |
277 | netfs_stat(&netfs_n_rh_write_done); | |
278 | } | |
279 | ||
280 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); | |
281 | ||
282 | /* If we decrement nr_wr_ops to 0, the ref belongs to us. */ | |
283 | if (atomic_dec_and_test(&rreq->nr_wr_ops)) | |
284 | netfs_rreq_unmark_after_write(rreq, was_async); | |
285 | ||
286 | netfs_put_subrequest(subreq, was_async); | |
287 | } | |
288 | ||
289 | /* | |
290 | * Perform any outstanding writes to the cache. We inherit a ref from the | |
291 | * caller. | |
292 | */ | |
293 | static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq) | |
294 | { | |
295 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
296 | struct netfs_read_subrequest *subreq, *next, *p; | |
297 | struct iov_iter iter; | |
298 | int ret; | |
299 | ||
300 | trace_netfs_rreq(rreq, netfs_rreq_trace_write); | |
301 | ||
302 | /* We don't want terminating writes trying to wake us up whilst we're | |
303 | * still going through the list. | |
304 | */ | |
305 | atomic_inc(&rreq->nr_wr_ops); | |
306 | ||
307 | list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { | |
308 | if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) { | |
309 | list_del_init(&subreq->rreq_link); | |
310 | netfs_put_subrequest(subreq, false); | |
311 | } | |
312 | } | |
313 | ||
314 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
315 | /* Amalgamate adjacent writes */ | |
316 | while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
317 | next = list_next_entry(subreq, rreq_link); | |
318 | if (next->start != subreq->start + subreq->len) | |
319 | break; | |
320 | subreq->len += next->len; | |
321 | list_del_init(&next->rreq_link); | |
322 | netfs_put_subrequest(next, false); | |
323 | } | |
324 | ||
325 | ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, | |
326 | rreq->i_size); | |
327 | if (ret < 0) { | |
0246f3e5 | 328 | trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); |
726218fd DH |
329 | trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); |
330 | continue; | |
331 | } | |
332 | ||
333 | iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages, | |
334 | subreq->start, subreq->len); | |
335 | ||
336 | atomic_inc(&rreq->nr_wr_ops); | |
337 | netfs_stat(&netfs_n_rh_write); | |
338 | netfs_get_read_subrequest(subreq); | |
339 | trace_netfs_sreq(subreq, netfs_sreq_trace_write); | |
340 | cres->ops->write(cres, subreq->start, &iter, | |
341 | netfs_rreq_copy_terminated, subreq); | |
342 | } | |
343 | ||
344 | /* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */ | |
345 | if (atomic_dec_and_test(&rreq->nr_wr_ops)) | |
346 | netfs_rreq_unmark_after_write(rreq, false); | |
347 | } | |
348 | ||
349 | static void netfs_rreq_write_to_cache_work(struct work_struct *work) | |
350 | { | |
351 | struct netfs_read_request *rreq = | |
352 | container_of(work, struct netfs_read_request, work); | |
353 | ||
354 | netfs_rreq_do_write_to_cache(rreq); | |
355 | } | |
356 | ||
357 | static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq, | |
358 | bool was_async) | |
359 | { | |
360 | if (was_async) { | |
361 | rreq->work.func = netfs_rreq_write_to_cache_work; | |
362 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
363 | BUG(); | |
364 | } else { | |
365 | netfs_rreq_do_write_to_cache(rreq); | |
366 | } | |
367 | } | |
368 | ||
3d3c9504 | 369 | /* |
78525c74 DH |
370 | * Unlock the folios in a read operation. We need to set PG_fscache on any |
371 | * folios we're going to write back before we unlock them. | |
3d3c9504 DH |
372 | */ |
373 | static void netfs_rreq_unlock(struct netfs_read_request *rreq) | |
374 | { | |
375 | struct netfs_read_subrequest *subreq; | |
78525c74 | 376 | struct folio *folio; |
3d3c9504 DH |
377 | unsigned int iopos, account = 0; |
378 | pgoff_t start_page = rreq->start / PAGE_SIZE; | |
379 | pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; | |
380 | bool subreq_failed = false; | |
3d3c9504 DH |
381 | |
382 | XA_STATE(xas, &rreq->mapping->i_pages, start_page); | |
383 | ||
384 | if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { | |
385 | __clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); | |
386 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
387 | __clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); | |
388 | } | |
389 | } | |
390 | ||
391 | /* Walk through the pagecache and the I/O request lists simultaneously. | |
392 | * We may have a mixture of cached and uncached sections and we only | |
393 | * really want to write out the uncached sections. This is slightly | |
394 | * complicated by the possibility that we might have huge pages with a | |
395 | * mixture inside. | |
396 | */ | |
397 | subreq = list_first_entry(&rreq->subrequests, | |
398 | struct netfs_read_subrequest, rreq_link); | |
399 | iopos = 0; | |
400 | subreq_failed = (subreq->error < 0); | |
401 | ||
77b4d2c6 DH |
402 | trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); |
403 | ||
3d3c9504 | 404 | rcu_read_lock(); |
78525c74 DH |
405 | xas_for_each(&xas, folio, last_page) { |
406 | unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; | |
407 | unsigned int pgend = pgpos + folio_size(folio); | |
3d3c9504 DH |
408 | bool pg_failed = false; |
409 | ||
410 | for (;;) { | |
411 | if (!subreq) { | |
412 | pg_failed = true; | |
413 | break; | |
414 | } | |
415 | if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) | |
78525c74 | 416 | folio_start_fscache(folio); |
3d3c9504 DH |
417 | pg_failed |= subreq_failed; |
418 | if (pgend < iopos + subreq->len) | |
419 | break; | |
420 | ||
421 | account += subreq->transferred; | |
422 | iopos += subreq->len; | |
423 | if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { | |
424 | subreq = list_next_entry(subreq, rreq_link); | |
425 | subreq_failed = (subreq->error < 0); | |
426 | } else { | |
427 | subreq = NULL; | |
428 | subreq_failed = false; | |
429 | } | |
430 | if (pgend == iopos) | |
431 | break; | |
432 | } | |
433 | ||
434 | if (!pg_failed) { | |
78525c74 DH |
435 | flush_dcache_folio(folio); |
436 | folio_mark_uptodate(folio); | |
3d3c9504 DH |
437 | } |
438 | ||
78525c74 DH |
439 | if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { |
440 | if (folio_index(folio) == rreq->no_unlock_folio && | |
441 | test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) | |
3d3c9504 DH |
442 | _debug("no unlock"); |
443 | else | |
78525c74 | 444 | folio_unlock(folio); |
3d3c9504 DH |
445 | } |
446 | } | |
447 | rcu_read_unlock(); | |
448 | ||
449 | task_io_account_read(account); | |
450 | if (rreq->netfs_ops->done) | |
451 | rreq->netfs_ops->done(rreq); | |
452 | } | |
453 | ||
454 | /* | |
455 | * Handle a short read. | |
456 | */ | |
457 | static void netfs_rreq_short_read(struct netfs_read_request *rreq, | |
458 | struct netfs_read_subrequest *subreq) | |
459 | { | |
460 | __clear_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); | |
461 | __set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags); | |
462 | ||
289af54c | 463 | netfs_stat(&netfs_n_rh_short_read); |
77b4d2c6 DH |
464 | trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short); |
465 | ||
3d3c9504 DH |
466 | netfs_get_read_subrequest(subreq); |
467 | atomic_inc(&rreq->nr_rd_ops); | |
726218fd DH |
468 | if (subreq->source == NETFS_READ_FROM_CACHE) |
469 | netfs_read_from_cache(rreq, subreq, true); | |
470 | else | |
471 | netfs_read_from_server(rreq, subreq); | |
3d3c9504 DH |
472 | } |
473 | ||
474 | /* | |
475 | * Resubmit any short or failed operations. Returns true if we got the rreq | |
476 | * ref back. | |
477 | */ | |
478 | static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq) | |
479 | { | |
480 | struct netfs_read_subrequest *subreq; | |
481 | ||
482 | WARN_ON(in_interrupt()); | |
483 | ||
77b4d2c6 DH |
484 | trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); |
485 | ||
3d3c9504 DH |
486 | /* We don't want terminating submissions trying to wake us up whilst |
487 | * we're still going through the list. | |
488 | */ | |
489 | atomic_inc(&rreq->nr_rd_ops); | |
490 | ||
491 | __clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
492 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
493 | if (subreq->error) { | |
494 | if (subreq->source != NETFS_READ_FROM_CACHE) | |
495 | break; | |
496 | subreq->source = NETFS_DOWNLOAD_FROM_SERVER; | |
497 | subreq->error = 0; | |
289af54c | 498 | netfs_stat(&netfs_n_rh_download_instead); |
77b4d2c6 | 499 | trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead); |
3d3c9504 DH |
500 | netfs_get_read_subrequest(subreq); |
501 | atomic_inc(&rreq->nr_rd_ops); | |
502 | netfs_read_from_server(rreq, subreq); | |
503 | } else if (test_bit(NETFS_SREQ_SHORT_READ, &subreq->flags)) { | |
504 | netfs_rreq_short_read(rreq, subreq); | |
505 | } | |
506 | } | |
507 | ||
508 | /* If we decrement nr_rd_ops to 0, the usage ref belongs to us. */ | |
509 | if (atomic_dec_and_test(&rreq->nr_rd_ops)) | |
510 | return true; | |
511 | ||
512 | wake_up_var(&rreq->nr_rd_ops); | |
513 | return false; | |
514 | } | |
515 | ||
726218fd DH |
516 | /* |
517 | * Check to see if the data read is still valid. | |
518 | */ | |
519 | static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq) | |
520 | { | |
521 | struct netfs_read_subrequest *subreq; | |
522 | ||
523 | if (!rreq->netfs_ops->is_still_valid || | |
524 | rreq->netfs_ops->is_still_valid(rreq)) | |
525 | return; | |
526 | ||
527 | list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { | |
528 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
529 | subreq->error = -ESTALE; | |
530 | __set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
531 | } | |
532 | } | |
533 | } | |
534 | ||
3d3c9504 DH |
535 | /* |
536 | * Assess the state of a read request and decide what to do next. | |
537 | * | |
538 | * Note that we could be in an ordinary kernel thread, on a workqueue or in | |
539 | * softirq context at this point. We inherit a ref from the caller. | |
540 | */ | |
541 | static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async) | |
542 | { | |
77b4d2c6 DH |
543 | trace_netfs_rreq(rreq, netfs_rreq_trace_assess); |
544 | ||
3d3c9504 | 545 | again: |
726218fd DH |
546 | netfs_rreq_is_still_valid(rreq); |
547 | ||
3d3c9504 DH |
548 | if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) && |
549 | test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) { | |
550 | if (netfs_rreq_perform_resubmissions(rreq)) | |
551 | goto again; | |
552 | return; | |
553 | } | |
554 | ||
555 | netfs_rreq_unlock(rreq); | |
556 | ||
557 | clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); | |
558 | wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); | |
559 | ||
726218fd DH |
560 | if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags)) |
561 | return netfs_rreq_write_to_cache(rreq, was_async); | |
562 | ||
3d3c9504 DH |
563 | netfs_rreq_completed(rreq, was_async); |
564 | } | |
565 | ||
566 | static void netfs_rreq_work(struct work_struct *work) | |
567 | { | |
568 | struct netfs_read_request *rreq = | |
569 | container_of(work, struct netfs_read_request, work); | |
570 | netfs_rreq_assess(rreq, false); | |
571 | } | |
572 | ||
573 | /* | |
574 | * Handle the completion of all outstanding I/O operations on a read request. | |
575 | * We inherit a ref from the caller. | |
576 | */ | |
577 | static void netfs_rreq_terminated(struct netfs_read_request *rreq, | |
578 | bool was_async) | |
579 | { | |
580 | if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) && | |
581 | was_async) { | |
582 | if (!queue_work(system_unbound_wq, &rreq->work)) | |
583 | BUG(); | |
584 | } else { | |
585 | netfs_rreq_assess(rreq, was_async); | |
586 | } | |
587 | } | |
588 | ||
589 | /** | |
590 | * netfs_subreq_terminated - Note the termination of an I/O operation. | |
591 | * @subreq: The I/O request that has terminated. | |
592 | * @transferred_or_error: The amount of data transferred or an error code. | |
593 | * @was_async: The termination was asynchronous | |
594 | * | |
595 | * This tells the read helper that a contributory I/O operation has terminated, | |
596 | * one way or another, and that it should integrate the results. | |
597 | * | |
598 | * The caller indicates in @transferred_or_error the outcome of the operation, | |
599 | * supplying a positive value to indicate the number of bytes transferred, 0 to | |
600 | * indicate a failure to transfer anything that should be retried or a negative | |
601 | * error code. The helper will look after reissuing I/O operations as | |
602 | * appropriate and writing downloaded data to the cache. | |
603 | * | |
604 | * If @was_async is true, the caller might be running in softirq or interrupt | |
605 | * context and we can't sleep. | |
606 | */ | |
607 | void netfs_subreq_terminated(struct netfs_read_subrequest *subreq, | |
608 | ssize_t transferred_or_error, | |
609 | bool was_async) | |
610 | { | |
611 | struct netfs_read_request *rreq = subreq->rreq; | |
612 | int u; | |
613 | ||
614 | _enter("[%u]{%llx,%lx},%zd", | |
615 | subreq->debug_index, subreq->start, subreq->flags, | |
616 | transferred_or_error); | |
617 | ||
289af54c DH |
618 | switch (subreq->source) { |
619 | case NETFS_READ_FROM_CACHE: | |
620 | netfs_stat(&netfs_n_rh_read_done); | |
621 | break; | |
622 | case NETFS_DOWNLOAD_FROM_SERVER: | |
623 | netfs_stat(&netfs_n_rh_download_done); | |
624 | break; | |
625 | default: | |
626 | break; | |
627 | } | |
628 | ||
3d3c9504 DH |
629 | if (IS_ERR_VALUE(transferred_or_error)) { |
630 | subreq->error = transferred_or_error; | |
0246f3e5 DH |
631 | trace_netfs_failure(rreq, subreq, transferred_or_error, |
632 | netfs_fail_read); | |
3d3c9504 DH |
633 | goto failed; |
634 | } | |
635 | ||
636 | if (WARN(transferred_or_error > subreq->len - subreq->transferred, | |
637 | "Subreq overread: R%x[%x] %zd > %zu - %zu", | |
638 | rreq->debug_id, subreq->debug_index, | |
639 | transferred_or_error, subreq->len, subreq->transferred)) | |
640 | transferred_or_error = subreq->len - subreq->transferred; | |
641 | ||
642 | subreq->error = 0; | |
643 | subreq->transferred += transferred_or_error; | |
644 | if (subreq->transferred < subreq->len) | |
645 | goto incomplete; | |
646 | ||
647 | complete: | |
648 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
649 | if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) | |
650 | set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); | |
651 | ||
652 | out: | |
77b4d2c6 DH |
653 | trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); |
654 | ||
3d3c9504 DH |
655 | /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ |
656 | u = atomic_dec_return(&rreq->nr_rd_ops); | |
657 | if (u == 0) | |
658 | netfs_rreq_terminated(rreq, was_async); | |
659 | else if (u == 1) | |
660 | wake_up_var(&rreq->nr_rd_ops); | |
661 | ||
662 | netfs_put_subrequest(subreq, was_async); | |
663 | return; | |
664 | ||
665 | incomplete: | |
666 | if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) { | |
667 | netfs_clear_unread(subreq); | |
668 | subreq->transferred = subreq->len; | |
669 | goto complete; | |
670 | } | |
671 | ||
672 | if (transferred_or_error == 0) { | |
673 | if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { | |
674 | subreq->error = -ENODATA; | |
675 | goto failed; | |
676 | } | |
677 | } else { | |
678 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); | |
679 | } | |
680 | ||
681 | __set_bit(NETFS_SREQ_SHORT_READ, &subreq->flags); | |
682 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); | |
683 | goto out; | |
684 | ||
685 | failed: | |
686 | if (subreq->source == NETFS_READ_FROM_CACHE) { | |
289af54c | 687 | netfs_stat(&netfs_n_rh_read_failed); |
3d3c9504 DH |
688 | set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags); |
689 | } else { | |
289af54c | 690 | netfs_stat(&netfs_n_rh_download_failed); |
3d3c9504 DH |
691 | set_bit(NETFS_RREQ_FAILED, &rreq->flags); |
692 | rreq->error = subreq->error; | |
693 | } | |
694 | goto out; | |
695 | } | |
696 | EXPORT_SYMBOL(netfs_subreq_terminated); | |
697 | ||
698 | static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequest *subreq, | |
699 | loff_t i_size) | |
700 | { | |
701 | struct netfs_read_request *rreq = subreq->rreq; | |
726218fd | 702 | struct netfs_cache_resources *cres = &rreq->cache_resources; |
3d3c9504 | 703 | |
726218fd DH |
704 | if (cres->ops) |
705 | return cres->ops->prepare_read(subreq, i_size); | |
3d3c9504 DH |
706 | if (subreq->start >= rreq->i_size) |
707 | return NETFS_FILL_WITH_ZEROES; | |
708 | return NETFS_DOWNLOAD_FROM_SERVER; | |
709 | } | |
710 | ||
711 | /* | |
712 | * Work out what sort of subrequest the next one will be. | |
713 | */ | |
714 | static enum netfs_read_source | |
715 | netfs_rreq_prepare_read(struct netfs_read_request *rreq, | |
716 | struct netfs_read_subrequest *subreq) | |
717 | { | |
718 | enum netfs_read_source source; | |
719 | ||
720 | _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); | |
721 | ||
722 | source = netfs_cache_prepare_read(subreq, rreq->i_size); | |
723 | if (source == NETFS_INVALID_READ) | |
724 | goto out; | |
725 | ||
726 | if (source == NETFS_DOWNLOAD_FROM_SERVER) { | |
727 | /* Call out to the netfs to let it shrink the request to fit | |
728 | * its own I/O sizes and boundaries. If it shinks it here, it | |
729 | * will be called again to make simultaneous calls; if it wants | |
730 | * to make serial calls, it can indicate a short read and then | |
731 | * we will call it again. | |
732 | */ | |
733 | if (subreq->len > rreq->i_size - subreq->start) | |
734 | subreq->len = rreq->i_size - subreq->start; | |
735 | ||
736 | if (rreq->netfs_ops->clamp_length && | |
737 | !rreq->netfs_ops->clamp_length(subreq)) { | |
738 | source = NETFS_INVALID_READ; | |
739 | goto out; | |
740 | } | |
741 | } | |
742 | ||
743 | if (WARN_ON(subreq->len == 0)) | |
744 | source = NETFS_INVALID_READ; | |
745 | ||
746 | out: | |
747 | subreq->source = source; | |
77b4d2c6 | 748 | trace_netfs_sreq(subreq, netfs_sreq_trace_prepare); |
3d3c9504 DH |
749 | return source; |
750 | } | |
751 | ||
752 | /* | |
753 | * Slice off a piece of a read request and submit an I/O request for it. | |
754 | */ | |
755 | static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq, | |
756 | unsigned int *_debug_index) | |
757 | { | |
758 | struct netfs_read_subrequest *subreq; | |
759 | enum netfs_read_source source; | |
760 | ||
761 | subreq = netfs_alloc_subrequest(rreq); | |
762 | if (!subreq) | |
763 | return false; | |
764 | ||
765 | subreq->debug_index = (*_debug_index)++; | |
766 | subreq->start = rreq->start + rreq->submitted; | |
767 | subreq->len = rreq->len - rreq->submitted; | |
768 | ||
769 | _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted); | |
770 | list_add_tail(&subreq->rreq_link, &rreq->subrequests); | |
771 | ||
772 | /* Call out to the cache to find out what it can do with the remaining | |
773 | * subset. It tells us in subreq->flags what it decided should be done | |
774 | * and adjusts subreq->len down if the subset crosses a cache boundary. | |
775 | * | |
776 | * Then when we hand the subset, it can choose to take a subset of that | |
777 | * (the starts must coincide), in which case, we go around the loop | |
778 | * again and ask it to download the next piece. | |
779 | */ | |
780 | source = netfs_rreq_prepare_read(rreq, subreq); | |
781 | if (source == NETFS_INVALID_READ) | |
782 | goto subreq_failed; | |
783 | ||
784 | atomic_inc(&rreq->nr_rd_ops); | |
785 | ||
786 | rreq->submitted += subreq->len; | |
787 | ||
77b4d2c6 | 788 | trace_netfs_sreq(subreq, netfs_sreq_trace_submit); |
3d3c9504 DH |
789 | switch (source) { |
790 | case NETFS_FILL_WITH_ZEROES: | |
791 | netfs_fill_with_zeroes(rreq, subreq); | |
792 | break; | |
793 | case NETFS_DOWNLOAD_FROM_SERVER: | |
794 | netfs_read_from_server(rreq, subreq); | |
795 | break; | |
726218fd DH |
796 | case NETFS_READ_FROM_CACHE: |
797 | netfs_read_from_cache(rreq, subreq, false); | |
798 | break; | |
3d3c9504 DH |
799 | default: |
800 | BUG(); | |
801 | } | |
802 | ||
803 | return true; | |
804 | ||
805 | subreq_failed: | |
806 | rreq->error = subreq->error; | |
807 | netfs_put_subrequest(subreq, false); | |
808 | return false; | |
809 | } | |
810 | ||
726218fd DH |
811 | static void netfs_cache_expand_readahead(struct netfs_read_request *rreq, |
812 | loff_t *_start, size_t *_len, loff_t i_size) | |
813 | { | |
814 | struct netfs_cache_resources *cres = &rreq->cache_resources; | |
815 | ||
816 | if (cres->ops && cres->ops->expand_readahead) | |
817 | cres->ops->expand_readahead(cres, _start, _len, i_size); | |
818 | } | |
819 | ||
3d3c9504 DH |
820 | static void netfs_rreq_expand(struct netfs_read_request *rreq, |
821 | struct readahead_control *ractl) | |
822 | { | |
726218fd DH |
823 | /* Give the cache a chance to change the request parameters. The |
824 | * resultant request must contain the original region. | |
825 | */ | |
826 | netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); | |
827 | ||
3d3c9504 DH |
828 | /* Give the netfs a chance to change the request parameters. The |
829 | * resultant request must contain the original region. | |
830 | */ | |
831 | if (rreq->netfs_ops->expand_readahead) | |
832 | rreq->netfs_ops->expand_readahead(rreq); | |
833 | ||
834 | /* Expand the request if the cache wants it to start earlier. Note | |
835 | * that the expansion may get further extended if the VM wishes to | |
836 | * insert THPs and the preferred start and/or end wind up in the middle | |
837 | * of THPs. | |
838 | * | |
839 | * If this is the case, however, the THP size should be an integer | |
840 | * multiple of the cache granule size, so we get a whole number of | |
841 | * granules to deal with. | |
842 | */ | |
843 | if (rreq->start != readahead_pos(ractl) || | |
844 | rreq->len != readahead_length(ractl)) { | |
845 | readahead_expand(ractl, rreq->start, rreq->len); | |
846 | rreq->start = readahead_pos(ractl); | |
847 | rreq->len = readahead_length(ractl); | |
77b4d2c6 DH |
848 | |
849 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), | |
850 | netfs_read_trace_expanded); | |
3d3c9504 DH |
851 | } |
852 | } | |
853 | ||
854 | /** | |
855 | * netfs_readahead - Helper to manage a read request | |
856 | * @ractl: The description of the readahead request | |
857 | * @ops: The network filesystem's operations for the helper to use | |
858 | * @netfs_priv: Private netfs data to be retained in the request | |
859 | * | |
860 | * Fulfil a readahead request by drawing data from the cache if possible, or | |
861 | * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O | |
862 | * requests from different sources will get munged together. If necessary, the | |
863 | * readahead window can be expanded in either direction to a more convenient | |
864 | * alighment for RPC efficiency or to make storage in the cache feasible. | |
865 | * | |
866 | * The calling netfs must provide a table of operations, only one of which, | |
867 | * issue_op, is mandatory. It may also be passed a private token, which will | |
868 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
869 | * | |
870 | * This is usable whether or not caching is enabled. | |
871 | */ | |
872 | void netfs_readahead(struct readahead_control *ractl, | |
873 | const struct netfs_read_request_ops *ops, | |
874 | void *netfs_priv) | |
875 | { | |
876 | struct netfs_read_request *rreq; | |
3d3c9504 | 877 | unsigned int debug_index = 0; |
726218fd | 878 | int ret; |
3d3c9504 DH |
879 | |
880 | _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); | |
881 | ||
882 | if (readahead_count(ractl) == 0) | |
883 | goto cleanup; | |
884 | ||
885 | rreq = netfs_alloc_read_request(ops, netfs_priv, ractl->file); | |
886 | if (!rreq) | |
887 | goto cleanup; | |
888 | rreq->mapping = ractl->mapping; | |
889 | rreq->start = readahead_pos(ractl); | |
890 | rreq->len = readahead_length(ractl); | |
891 | ||
726218fd DH |
892 | if (ops->begin_cache_operation) { |
893 | ret = ops->begin_cache_operation(rreq); | |
894 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
895 | goto cleanup_free; | |
896 | } | |
897 | ||
289af54c | 898 | netfs_stat(&netfs_n_rh_readahead); |
77b4d2c6 DH |
899 | trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), |
900 | netfs_read_trace_readahead); | |
901 | ||
3d3c9504 DH |
902 | netfs_rreq_expand(rreq, ractl); |
903 | ||
904 | atomic_set(&rreq->nr_rd_ops, 1); | |
905 | do { | |
906 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
907 | break; | |
908 | ||
909 | } while (rreq->submitted < rreq->len); | |
910 | ||
78525c74 | 911 | /* Drop the refs on the folios here rather than in the cache or |
3d3c9504 DH |
912 | * filesystem. The locks will be dropped in netfs_rreq_unlock(). |
913 | */ | |
78525c74 DH |
914 | while (readahead_folio(ractl)) |
915 | ; | |
3d3c9504 DH |
916 | |
917 | /* If we decrement nr_rd_ops to 0, the ref belongs to us. */ | |
918 | if (atomic_dec_and_test(&rreq->nr_rd_ops)) | |
919 | netfs_rreq_assess(rreq, false); | |
920 | return; | |
921 | ||
726218fd DH |
922 | cleanup_free: |
923 | netfs_put_read_request(rreq, false); | |
924 | return; | |
3d3c9504 DH |
925 | cleanup: |
926 | if (netfs_priv) | |
927 | ops->cleanup(ractl->mapping, netfs_priv); | |
928 | return; | |
929 | } | |
930 | EXPORT_SYMBOL(netfs_readahead); | |
931 | ||
932 | /** | |
53b776c7 | 933 | * netfs_readpage - Helper to manage a readpage request |
3d3c9504 | 934 | * @file: The file to read from |
78525c74 | 935 | * @folio: The folio to read |
3d3c9504 DH |
936 | * @ops: The network filesystem's operations for the helper to use |
937 | * @netfs_priv: Private netfs data to be retained in the request | |
938 | * | |
939 | * Fulfil a readpage request by drawing data from the cache if possible, or the | |
940 | * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests | |
941 | * from different sources will get munged together. | |
942 | * | |
943 | * The calling netfs must provide a table of operations, only one of which, | |
944 | * issue_op, is mandatory. It may also be passed a private token, which will | |
945 | * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup(). | |
946 | * | |
947 | * This is usable whether or not caching is enabled. | |
948 | */ | |
949 | int netfs_readpage(struct file *file, | |
78525c74 | 950 | struct folio *folio, |
3d3c9504 DH |
951 | const struct netfs_read_request_ops *ops, |
952 | void *netfs_priv) | |
953 | { | |
954 | struct netfs_read_request *rreq; | |
955 | unsigned int debug_index = 0; | |
956 | int ret; | |
957 | ||
78525c74 | 958 | _enter("%lx", folio_index(folio)); |
3d3c9504 DH |
959 | |
960 | rreq = netfs_alloc_read_request(ops, netfs_priv, file); | |
961 | if (!rreq) { | |
962 | if (netfs_priv) | |
78525c74 DH |
963 | ops->cleanup(netfs_priv, folio_file_mapping(folio)); |
964 | folio_unlock(folio); | |
3d3c9504 DH |
965 | return -ENOMEM; |
966 | } | |
78525c74 DH |
967 | rreq->mapping = folio_file_mapping(folio); |
968 | rreq->start = folio_file_pos(folio); | |
969 | rreq->len = folio_size(folio); | |
3d3c9504 | 970 | |
726218fd DH |
971 | if (ops->begin_cache_operation) { |
972 | ret = ops->begin_cache_operation(rreq); | |
973 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) { | |
78525c74 | 974 | folio_unlock(folio); |
726218fd DH |
975 | goto out; |
976 | } | |
977 | } | |
978 | ||
289af54c | 979 | netfs_stat(&netfs_n_rh_readpage); |
77b4d2c6 DH |
980 | trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); |
981 | ||
3d3c9504 DH |
982 | netfs_get_read_request(rreq); |
983 | ||
984 | atomic_set(&rreq->nr_rd_ops, 1); | |
985 | do { | |
986 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
987 | break; | |
988 | ||
989 | } while (rreq->submitted < rreq->len); | |
990 | ||
991 | /* Keep nr_rd_ops incremented so that the ref always belongs to us, and | |
992 | * the service code isn't punted off to a random thread pool to | |
993 | * process. | |
994 | */ | |
995 | do { | |
996 | wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); | |
997 | netfs_rreq_assess(rreq, false); | |
998 | } while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)); | |
999 | ||
1000 | ret = rreq->error; | |
0246f3e5 DH |
1001 | if (ret == 0 && rreq->submitted < rreq->len) { |
1002 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage); | |
3d3c9504 | 1003 | ret = -EIO; |
0246f3e5 | 1004 | } |
726218fd | 1005 | out: |
3d3c9504 DH |
1006 | netfs_put_read_request(rreq, false); |
1007 | return ret; | |
1008 | } | |
1009 | EXPORT_SYMBOL(netfs_readpage); | |
e1b1240c | 1010 | |
827a746f | 1011 | /** |
78525c74 DH |
1012 | * netfs_skip_folio_read - prep a folio for writing without reading first |
1013 | * @folio: The folio being prepared | |
827a746f JL |
1014 | * @pos: starting position for the write |
1015 | * @len: length of write | |
1016 | * | |
1017 | * In some cases, write_begin doesn't need to read at all: | |
78525c74 DH |
1018 | * - full folio write |
1019 | * - write that lies in a folio that is completely beyond EOF | |
1020 | * - write that covers the folio from start to EOF or beyond it | |
827a746f JL |
1021 | * |
1022 | * If any of these criteria are met, then zero out the unwritten parts | |
78525c74 | 1023 | * of the folio and return true. Otherwise, return false. |
827a746f | 1024 | */ |
78525c74 | 1025 | static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len) |
e1b1240c | 1026 | { |
78525c74 | 1027 | struct inode *inode = folio_inode(folio); |
827a746f | 1028 | loff_t i_size = i_size_read(inode); |
78525c74 | 1029 | size_t offset = offset_in_folio(folio, pos); |
827a746f | 1030 | |
78525c74 DH |
1031 | /* Full folio write */ |
1032 | if (offset == 0 && len >= folio_size(folio)) | |
827a746f JL |
1033 | return true; |
1034 | ||
78525c74 | 1035 | /* pos beyond last folio in the file */ |
827a746f JL |
1036 | if (pos - offset >= i_size) |
1037 | goto zero_out; | |
1038 | ||
78525c74 | 1039 | /* Write that covers from the start of the folio to EOF or beyond */ |
827a746f JL |
1040 | if (offset == 0 && (pos + len) >= i_size) |
1041 | goto zero_out; | |
e1b1240c | 1042 | |
827a746f JL |
1043 | return false; |
1044 | zero_out: | |
78525c74 | 1045 | zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio)); |
827a746f | 1046 | return true; |
e1b1240c DH |
1047 | } |
1048 | ||
1049 | /** | |
1050 | * netfs_write_begin - Helper to prepare for writing | |
1051 | * @file: The file to read from | |
1052 | * @mapping: The mapping to read from | |
1053 | * @pos: File position at which the write will begin | |
78525c74 DH |
1054 | * @len: The length of the write (may extend beyond the end of the folio chosen) |
1055 | * @aop_flags: AOP_* flags | |
1056 | * @_folio: Where to put the resultant folio | |
e1b1240c DH |
1057 | * @_fsdata: Place for the netfs to store a cookie |
1058 | * @ops: The network filesystem's operations for the helper to use | |
1059 | * @netfs_priv: Private netfs data to be retained in the request | |
1060 | * | |
1061 | * Pre-read data for a write-begin request by drawing data from the cache if | |
1062 | * possible, or the netfs if not. Space beyond the EOF is zero-filled. | |
1063 | * Multiple I/O requests from different sources will get munged together. If | |
1064 | * necessary, the readahead window can be expanded in either direction to a | |
1065 | * more convenient alighment for RPC efficiency or to make storage in the cache | |
1066 | * feasible. | |
1067 | * | |
1068 | * The calling netfs must provide a table of operations, only one of which, | |
1069 | * issue_op, is mandatory. | |
1070 | * | |
1071 | * The check_write_begin() operation can be provided to check for and flush | |
78525c74 | 1072 | * conflicting writes once the folio is grabbed and locked. It is passed a |
e1b1240c DH |
1073 | * pointer to the fsdata cookie that gets returned to the VM to be passed to |
1074 | * write_end. It is permitted to sleep. It should return 0 if the request | |
78525c74 DH |
1075 | * should go ahead; unlock the folio and return -EAGAIN to cause the folio to |
1076 | * be regot; or return an error. | |
e1b1240c DH |
1077 | * |
1078 | * This is usable whether or not caching is enabled. | |
1079 | */ | |
1080 | int netfs_write_begin(struct file *file, struct address_space *mapping, | |
78525c74 DH |
1081 | loff_t pos, unsigned int len, unsigned int aop_flags, |
1082 | struct folio **_folio, void **_fsdata, | |
e1b1240c DH |
1083 | const struct netfs_read_request_ops *ops, |
1084 | void *netfs_priv) | |
1085 | { | |
1086 | struct netfs_read_request *rreq; | |
78525c74 | 1087 | struct folio *folio; |
e1b1240c | 1088 | struct inode *inode = file_inode(file); |
78525c74 | 1089 | unsigned int debug_index = 0, fgp_flags; |
e1b1240c | 1090 | pgoff_t index = pos >> PAGE_SHIFT; |
e1b1240c DH |
1091 | int ret; |
1092 | ||
1093 | DEFINE_READAHEAD(ractl, file, NULL, mapping, index); | |
1094 | ||
1095 | retry: | |
78525c74 DH |
1096 | fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; |
1097 | if (aop_flags & AOP_FLAG_NOFS) | |
1098 | fgp_flags |= FGP_NOFS; | |
1099 | folio = __filemap_get_folio(mapping, index, fgp_flags, | |
1100 | mapping_gfp_mask(mapping)); | |
1101 | if (!folio) | |
e1b1240c DH |
1102 | return -ENOMEM; |
1103 | ||
1104 | if (ops->check_write_begin) { | |
1105 | /* Allow the netfs (eg. ceph) to flush conflicts. */ | |
78525c74 | 1106 | ret = ops->check_write_begin(file, pos, len, folio, _fsdata); |
e1b1240c | 1107 | if (ret < 0) { |
0246f3e5 | 1108 | trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); |
e1b1240c DH |
1109 | if (ret == -EAGAIN) |
1110 | goto retry; | |
1111 | goto error; | |
1112 | } | |
1113 | } | |
1114 | ||
78525c74 DH |
1115 | if (folio_test_uptodate(folio)) |
1116 | goto have_folio; | |
e1b1240c DH |
1117 | |
1118 | /* If the page is beyond the EOF, we want to clear it - unless it's | |
1119 | * within the cache granule containing the EOF, in which case we need | |
1120 | * to preload the granule. | |
1121 | */ | |
e1b1240c | 1122 | if (!ops->is_cache_enabled(inode) && |
78525c74 | 1123 | netfs_skip_folio_read(folio, pos, len)) { |
e1b1240c | 1124 | netfs_stat(&netfs_n_rh_write_zskip); |
78525c74 | 1125 | goto have_folio_no_wait; |
e1b1240c DH |
1126 | } |
1127 | ||
1128 | ret = -ENOMEM; | |
1129 | rreq = netfs_alloc_read_request(ops, netfs_priv, file); | |
1130 | if (!rreq) | |
1131 | goto error; | |
78525c74 DH |
1132 | rreq->mapping = folio_file_mapping(folio); |
1133 | rreq->start = folio_file_pos(folio); | |
1134 | rreq->len = folio_size(folio); | |
1135 | rreq->no_unlock_folio = folio_index(folio); | |
1136 | __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); | |
e1b1240c DH |
1137 | netfs_priv = NULL; |
1138 | ||
726218fd DH |
1139 | if (ops->begin_cache_operation) { |
1140 | ret = ops->begin_cache_operation(rreq); | |
1141 | if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) | |
1142 | goto error_put; | |
1143 | } | |
1144 | ||
e1b1240c DH |
1145 | netfs_stat(&netfs_n_rh_write_begin); |
1146 | trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); | |
1147 | ||
1148 | /* Expand the request to meet caching requirements and download | |
1149 | * preferences. | |
1150 | */ | |
78525c74 | 1151 | ractl._nr_pages = folio_nr_pages(folio); |
e1b1240c DH |
1152 | netfs_rreq_expand(rreq, &ractl); |
1153 | netfs_get_read_request(rreq); | |
1154 | ||
78525c74 DH |
1155 | /* We hold the folio locks, so we can drop the references */ |
1156 | folio_get(folio); | |
1157 | while (readahead_folio(&ractl)) | |
1158 | ; | |
e1b1240c DH |
1159 | |
1160 | atomic_set(&rreq->nr_rd_ops, 1); | |
1161 | do { | |
1162 | if (!netfs_rreq_submit_slice(rreq, &debug_index)) | |
1163 | break; | |
1164 | ||
1165 | } while (rreq->submitted < rreq->len); | |
1166 | ||
1167 | /* Keep nr_rd_ops incremented so that the ref always belongs to us, and | |
1168 | * the service code isn't punted off to a random thread pool to | |
1169 | * process. | |
1170 | */ | |
1171 | for (;;) { | |
1172 | wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1); | |
1173 | netfs_rreq_assess(rreq, false); | |
1174 | if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) | |
1175 | break; | |
1176 | cond_resched(); | |
1177 | } | |
1178 | ||
1179 | ret = rreq->error; | |
0246f3e5 DH |
1180 | if (ret == 0 && rreq->submitted < rreq->len) { |
1181 | trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin); | |
e1b1240c | 1182 | ret = -EIO; |
0246f3e5 | 1183 | } |
e1b1240c DH |
1184 | netfs_put_read_request(rreq, false); |
1185 | if (ret < 0) | |
1186 | goto error; | |
1187 | ||
78525c74 DH |
1188 | have_folio: |
1189 | ret = folio_wait_fscache_killable(folio); | |
e1b1240c DH |
1190 | if (ret < 0) |
1191 | goto error; | |
78525c74 | 1192 | have_folio_no_wait: |
e1b1240c DH |
1193 | if (netfs_priv) |
1194 | ops->cleanup(netfs_priv, mapping); | |
78525c74 | 1195 | *_folio = folio; |
e1b1240c DH |
1196 | _leave(" = 0"); |
1197 | return 0; | |
1198 | ||
1199 | error_put: | |
1200 | netfs_put_read_request(rreq, false); | |
1201 | error: | |
78525c74 DH |
1202 | folio_unlock(folio); |
1203 | folio_put(folio); | |
e1b1240c DH |
1204 | if (netfs_priv) |
1205 | ops->cleanup(netfs_priv, mapping); | |
1206 | _leave(" = %d", ret); | |
1207 | return ret; | |
1208 | } | |
1209 | EXPORT_SYMBOL(netfs_write_begin); |