fscache, cachefiles: Add alternate API to use kiocb for read/write to cache
[linux-block.git] / lib / iov_iter.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
7999096f 2#include <crypto/hash.h>
4f18cd31 3#include <linux/export.h>
2f8b5444 4#include <linux/bvec.h>
4d0e9df5 5#include <linux/fault-inject-usercopy.h>
4f18cd31
AV
6#include <linux/uio.h>
7#include <linux/pagemap.h>
91f79c43
AV
8#include <linux/slab.h>
9#include <linux/vmalloc.h>
241699cd 10#include <linux/splice.h>
bfdc5970 11#include <linux/compat.h>
a604ec7e 12#include <net/checksum.h>
d05f4435 13#include <linux/scatterlist.h>
d0ef4c36 14#include <linux/instrumented.h>
4f18cd31 15
241699cd
AV
16#define PIPE_PARANOIA /* for now */
17
04a31165
AV
18#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
19 size_t left; \
20 size_t wanted = n; \
21 __p = i->iov; \
22 __v.iov_len = min(n, __p->iov_len - skip); \
23 if (likely(__v.iov_len)) { \
24 __v.iov_base = __p->iov_base + skip; \
25 left = (STEP); \
26 __v.iov_len -= left; \
27 skip += __v.iov_len; \
28 n -= __v.iov_len; \
29 } else { \
30 left = 0; \
31 } \
32 while (unlikely(!left && n)) { \
33 __p++; \
34 __v.iov_len = min(n, __p->iov_len); \
35 if (unlikely(!__v.iov_len)) \
36 continue; \
37 __v.iov_base = __p->iov_base; \
38 left = (STEP); \
39 __v.iov_len -= left; \
40 skip = __v.iov_len; \
41 n -= __v.iov_len; \
42 } \
43 n = wanted - n; \
44}
45
a280455f
AV
46#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
47 size_t wanted = n; \
48 __p = i->kvec; \
49 __v.iov_len = min(n, __p->iov_len - skip); \
50 if (likely(__v.iov_len)) { \
51 __v.iov_base = __p->iov_base + skip; \
52 (void)(STEP); \
53 skip += __v.iov_len; \
54 n -= __v.iov_len; \
55 } \
56 while (unlikely(n)) { \
57 __p++; \
58 __v.iov_len = min(n, __p->iov_len); \
59 if (unlikely(!__v.iov_len)) \
60 continue; \
61 __v.iov_base = __p->iov_base; \
62 (void)(STEP); \
63 skip = __v.iov_len; \
64 n -= __v.iov_len; \
65 } \
66 n = wanted; \
67}
68
1bdc76ae
ML
69#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
70 struct bvec_iter __start; \
71 __start.bi_size = n; \
72 __start.bi_bvec_done = skip; \
73 __start.bi_idx = 0; \
74 for_each_bvec(__v, i->bvec, __bi, __start) { \
04a31165 75 (void)(STEP); \
04a31165 76 } \
04a31165
AV
77}
78
7ff50620
DH
79#define iterate_xarray(i, n, __v, skip, STEP) { \
80 struct page *head = NULL; \
81 size_t wanted = n, seg, offset; \
82 loff_t start = i->xarray_start + skip; \
83 pgoff_t index = start >> PAGE_SHIFT; \
84 int j; \
85 \
86 XA_STATE(xas, i->xarray, index); \
87 \
88 rcu_read_lock(); \
89 xas_for_each(&xas, head, ULONG_MAX) { \
90 if (xas_retry(&xas, head)) \
91 continue; \
92 if (WARN_ON(xa_is_value(head))) \
93 break; \
94 if (WARN_ON(PageHuge(head))) \
95 break; \
96 for (j = (head->index < index) ? index - head->index : 0; \
97 j < thp_nr_pages(head); j++) { \
98 __v.bv_page = head + j; \
99 offset = (i->xarray_start + skip) & ~PAGE_MASK; \
100 seg = PAGE_SIZE - offset; \
101 __v.bv_offset = offset; \
102 __v.bv_len = min(n, seg); \
103 (void)(STEP); \
104 n -= __v.bv_len; \
105 skip += __v.bv_len; \
106 if (n == 0) \
107 break; \
108 } \
109 if (n == 0) \
110 break; \
111 } \
112 rcu_read_unlock(); \
113 n = wanted - n; \
114}
115
116#define iterate_all_kinds(i, n, v, I, B, K, X) { \
33844e66
AV
117 if (likely(n)) { \
118 size_t skip = i->iov_offset; \
119 if (unlikely(i->type & ITER_BVEC)) { \
120 struct bio_vec v; \
121 struct bvec_iter __bi; \
122 iterate_bvec(i, n, v, __bi, skip, (B)) \
123 } else if (unlikely(i->type & ITER_KVEC)) { \
124 const struct kvec *kvec; \
125 struct kvec v; \
126 iterate_kvec(i, n, v, kvec, skip, (K)) \
9ea9ce04 127 } else if (unlikely(i->type & ITER_DISCARD)) { \
7ff50620
DH
128 } else if (unlikely(i->type & ITER_XARRAY)) { \
129 struct bio_vec v; \
130 iterate_xarray(i, n, v, skip, (X)); \
33844e66
AV
131 } else { \
132 const struct iovec *iov; \
133 struct iovec v; \
134 iterate_iovec(i, n, v, iov, skip, (I)) \
135 } \
04a31165
AV
136 } \
137}
138
7ff50620 139#define iterate_and_advance(i, n, v, I, B, K, X) { \
dd254f5a
AV
140 if (unlikely(i->count < n)) \
141 n = i->count; \
19f18459 142 if (i->count) { \
dd254f5a
AV
143 size_t skip = i->iov_offset; \
144 if (unlikely(i->type & ITER_BVEC)) { \
1bdc76ae 145 const struct bio_vec *bvec = i->bvec; \
dd254f5a 146 struct bio_vec v; \
1bdc76ae
ML
147 struct bvec_iter __bi; \
148 iterate_bvec(i, n, v, __bi, skip, (B)) \
149 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
150 i->nr_segs -= i->bvec - bvec; \
151 skip = __bi.bi_bvec_done; \
dd254f5a
AV
152 } else if (unlikely(i->type & ITER_KVEC)) { \
153 const struct kvec *kvec; \
154 struct kvec v; \
155 iterate_kvec(i, n, v, kvec, skip, (K)) \
156 if (skip == kvec->iov_len) { \
157 kvec++; \
158 skip = 0; \
159 } \
160 i->nr_segs -= kvec - i->kvec; \
161 i->kvec = kvec; \
9ea9ce04
DH
162 } else if (unlikely(i->type & ITER_DISCARD)) { \
163 skip += n; \
7ff50620
DH
164 } else if (unlikely(i->type & ITER_XARRAY)) { \
165 struct bio_vec v; \
166 iterate_xarray(i, n, v, skip, (X)) \
dd254f5a
AV
167 } else { \
168 const struct iovec *iov; \
169 struct iovec v; \
170 iterate_iovec(i, n, v, iov, skip, (I)) \
171 if (skip == iov->iov_len) { \
172 iov++; \
173 skip = 0; \
174 } \
175 i->nr_segs -= iov - i->iov; \
176 i->iov = iov; \
7ce2a91e 177 } \
dd254f5a
AV
178 i->count -= n; \
179 i->iov_offset = skip; \
7ce2a91e 180 } \
7ce2a91e
AV
181}
182
09fc68dc
AV
183static int copyout(void __user *to, const void *from, size_t n)
184{
4d0e9df5
AL
185 if (should_fail_usercopy())
186 return n;
96d4f267 187 if (access_ok(to, n)) {
d0ef4c36 188 instrument_copy_to_user(to, from, n);
09fc68dc
AV
189 n = raw_copy_to_user(to, from, n);
190 }
191 return n;
192}
193
194static int copyin(void *to, const void __user *from, size_t n)
195{
4d0e9df5
AL
196 if (should_fail_usercopy())
197 return n;
96d4f267 198 if (access_ok(from, n)) {
d0ef4c36 199 instrument_copy_from_user(to, from, n);
09fc68dc
AV
200 n = raw_copy_from_user(to, from, n);
201 }
202 return n;
203}
204
62a8067a 205static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
4f18cd31
AV
206 struct iov_iter *i)
207{
208 size_t skip, copy, left, wanted;
209 const struct iovec *iov;
210 char __user *buf;
211 void *kaddr, *from;
212
213 if (unlikely(bytes > i->count))
214 bytes = i->count;
215
216 if (unlikely(!bytes))
217 return 0;
218
09fc68dc 219 might_fault();
4f18cd31
AV
220 wanted = bytes;
221 iov = i->iov;
222 skip = i->iov_offset;
223 buf = iov->iov_base + skip;
224 copy = min(bytes, iov->iov_len - skip);
225
3fa6c507 226 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
4f18cd31
AV
227 kaddr = kmap_atomic(page);
228 from = kaddr + offset;
229
230 /* first chunk, usually the only one */
09fc68dc 231 left = copyout(buf, from, copy);
4f18cd31
AV
232 copy -= left;
233 skip += copy;
234 from += copy;
235 bytes -= copy;
236
237 while (unlikely(!left && bytes)) {
238 iov++;
239 buf = iov->iov_base;
240 copy = min(bytes, iov->iov_len);
09fc68dc 241 left = copyout(buf, from, copy);
4f18cd31
AV
242 copy -= left;
243 skip = copy;
244 from += copy;
245 bytes -= copy;
246 }
247 if (likely(!bytes)) {
248 kunmap_atomic(kaddr);
249 goto done;
250 }
251 offset = from - kaddr;
252 buf += copy;
253 kunmap_atomic(kaddr);
254 copy = min(bytes, iov->iov_len - skip);
255 }
256 /* Too bad - revert to non-atomic kmap */
3fa6c507 257
4f18cd31
AV
258 kaddr = kmap(page);
259 from = kaddr + offset;
09fc68dc 260 left = copyout(buf, from, copy);
4f18cd31
AV
261 copy -= left;
262 skip += copy;
263 from += copy;
264 bytes -= copy;
265 while (unlikely(!left && bytes)) {
266 iov++;
267 buf = iov->iov_base;
268 copy = min(bytes, iov->iov_len);
09fc68dc 269 left = copyout(buf, from, copy);
4f18cd31
AV
270 copy -= left;
271 skip = copy;
272 from += copy;
273 bytes -= copy;
274 }
275 kunmap(page);
3fa6c507 276
4f18cd31 277done:
81055e58
AV
278 if (skip == iov->iov_len) {
279 iov++;
280 skip = 0;
281 }
4f18cd31
AV
282 i->count -= wanted - bytes;
283 i->nr_segs -= iov - i->iov;
284 i->iov = iov;
285 i->iov_offset = skip;
286 return wanted - bytes;
287}
4f18cd31 288
62a8067a 289static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
f0d1bec9
AV
290 struct iov_iter *i)
291{
292 size_t skip, copy, left, wanted;
293 const struct iovec *iov;
294 char __user *buf;
295 void *kaddr, *to;
296
297 if (unlikely(bytes > i->count))
298 bytes = i->count;
299
300 if (unlikely(!bytes))
301 return 0;
302
09fc68dc 303 might_fault();
f0d1bec9
AV
304 wanted = bytes;
305 iov = i->iov;
306 skip = i->iov_offset;
307 buf = iov->iov_base + skip;
308 copy = min(bytes, iov->iov_len - skip);
309
3fa6c507 310 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
f0d1bec9
AV
311 kaddr = kmap_atomic(page);
312 to = kaddr + offset;
313
314 /* first chunk, usually the only one */
09fc68dc 315 left = copyin(to, buf, copy);
f0d1bec9
AV
316 copy -= left;
317 skip += copy;
318 to += copy;
319 bytes -= copy;
320
321 while (unlikely(!left && bytes)) {
322 iov++;
323 buf = iov->iov_base;
324 copy = min(bytes, iov->iov_len);
09fc68dc 325 left = copyin(to, buf, copy);
f0d1bec9
AV
326 copy -= left;
327 skip = copy;
328 to += copy;
329 bytes -= copy;
330 }
331 if (likely(!bytes)) {
332 kunmap_atomic(kaddr);
333 goto done;
334 }
335 offset = to - kaddr;
336 buf += copy;
337 kunmap_atomic(kaddr);
338 copy = min(bytes, iov->iov_len - skip);
339 }
340 /* Too bad - revert to non-atomic kmap */
3fa6c507 341
f0d1bec9
AV
342 kaddr = kmap(page);
343 to = kaddr + offset;
09fc68dc 344 left = copyin(to, buf, copy);
f0d1bec9
AV
345 copy -= left;
346 skip += copy;
347 to += copy;
348 bytes -= copy;
349 while (unlikely(!left && bytes)) {
350 iov++;
351 buf = iov->iov_base;
352 copy = min(bytes, iov->iov_len);
09fc68dc 353 left = copyin(to, buf, copy);
f0d1bec9
AV
354 copy -= left;
355 skip = copy;
356 to += copy;
357 bytes -= copy;
358 }
359 kunmap(page);
3fa6c507 360
f0d1bec9 361done:
81055e58
AV
362 if (skip == iov->iov_len) {
363 iov++;
364 skip = 0;
365 }
f0d1bec9
AV
366 i->count -= wanted - bytes;
367 i->nr_segs -= iov - i->iov;
368 i->iov = iov;
369 i->iov_offset = skip;
370 return wanted - bytes;
371}
f0d1bec9 372
241699cd
AV
373#ifdef PIPE_PARANOIA
374static bool sanity(const struct iov_iter *i)
375{
376 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
377 unsigned int p_head = pipe->head;
378 unsigned int p_tail = pipe->tail;
379 unsigned int p_mask = pipe->ring_size - 1;
380 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
381 unsigned int i_head = i->head;
382 unsigned int idx;
383
241699cd
AV
384 if (i->iov_offset) {
385 struct pipe_buffer *p;
8cefc107 386 if (unlikely(p_occupancy == 0))
241699cd 387 goto Bad; // pipe must be non-empty
8cefc107 388 if (unlikely(i_head != p_head - 1))
241699cd
AV
389 goto Bad; // must be at the last buffer...
390
8cefc107 391 p = &pipe->bufs[i_head & p_mask];
241699cd
AV
392 if (unlikely(p->offset + p->len != i->iov_offset))
393 goto Bad; // ... at the end of segment
394 } else {
8cefc107 395 if (i_head != p_head)
241699cd
AV
396 goto Bad; // must be right after the last buffer
397 }
398 return true;
399Bad:
8cefc107
DH
400 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
401 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
402 p_head, p_tail, pipe->ring_size);
403 for (idx = 0; idx < pipe->ring_size; idx++)
241699cd
AV
404 printk(KERN_ERR "[%p %p %d %d]\n",
405 pipe->bufs[idx].ops,
406 pipe->bufs[idx].page,
407 pipe->bufs[idx].offset,
408 pipe->bufs[idx].len);
409 WARN_ON(1);
410 return false;
411}
412#else
413#define sanity(i) true
414#endif
415
241699cd
AV
416static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
417 struct iov_iter *i)
418{
419 struct pipe_inode_info *pipe = i->pipe;
420 struct pipe_buffer *buf;
8cefc107
DH
421 unsigned int p_tail = pipe->tail;
422 unsigned int p_mask = pipe->ring_size - 1;
423 unsigned int i_head = i->head;
241699cd 424 size_t off;
241699cd
AV
425
426 if (unlikely(bytes > i->count))
427 bytes = i->count;
428
429 if (unlikely(!bytes))
430 return 0;
431
432 if (!sanity(i))
433 return 0;
434
435 off = i->iov_offset;
8cefc107 436 buf = &pipe->bufs[i_head & p_mask];
241699cd
AV
437 if (off) {
438 if (offset == off && buf->page == page) {
439 /* merge with the last one */
440 buf->len += bytes;
441 i->iov_offset += bytes;
442 goto out;
443 }
8cefc107
DH
444 i_head++;
445 buf = &pipe->bufs[i_head & p_mask];
241699cd 446 }
6718b6f8 447 if (pipe_full(i_head, p_tail, pipe->max_usage))
241699cd 448 return 0;
8cefc107 449
241699cd 450 buf->ops = &page_cache_pipe_buf_ops;
8cefc107
DH
451 get_page(page);
452 buf->page = page;
241699cd
AV
453 buf->offset = offset;
454 buf->len = bytes;
8cefc107
DH
455
456 pipe->head = i_head + 1;
241699cd 457 i->iov_offset = offset + bytes;
8cefc107 458 i->head = i_head;
241699cd
AV
459out:
460 i->count -= bytes;
461 return bytes;
462}
463
171a0203
AA
464/*
465 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
466 * bytes. For each iovec, fault in each page that constitutes the iovec.
467 *
468 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
469 * because it is an invalid address).
470 */
d4690f1e 471int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
171a0203
AA
472{
473 size_t skip = i->iov_offset;
474 const struct iovec *iov;
475 int err;
476 struct iovec v;
477
478 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
479 iterate_iovec(i, bytes, v, iov, skip, ({
4bce9f6e 480 err = fault_in_pages_readable(v.iov_base, v.iov_len);
171a0203
AA
481 if (unlikely(err))
482 return err;
483 0;}))
484 }
485 return 0;
486}
d4690f1e 487EXPORT_SYMBOL(iov_iter_fault_in_readable);
171a0203 488
aa563d7b 489void iov_iter_init(struct iov_iter *i, unsigned int direction,
71d8e532
AV
490 const struct iovec *iov, unsigned long nr_segs,
491 size_t count)
492{
aa563d7b
DH
493 WARN_ON(direction & ~(READ | WRITE));
494 direction &= READ | WRITE;
495
71d8e532 496 /* It will get better. Eventually... */
db68ce10 497 if (uaccess_kernel()) {
aa563d7b 498 i->type = ITER_KVEC | direction;
a280455f
AV
499 i->kvec = (struct kvec *)iov;
500 } else {
aa563d7b 501 i->type = ITER_IOVEC | direction;
a280455f
AV
502 i->iov = iov;
503 }
71d8e532
AV
504 i->nr_segs = nr_segs;
505 i->iov_offset = 0;
506 i->count = count;
507}
508EXPORT_SYMBOL(iov_iter_init);
7b2c99d1 509
c35e0248
MW
510static void memzero_page(struct page *page, size_t offset, size_t len)
511{
512 char *addr = kmap_atomic(page);
513 memset(addr + offset, 0, len);
514 kunmap_atomic(addr);
515}
516
241699cd
AV
517static inline bool allocated(struct pipe_buffer *buf)
518{
519 return buf->ops == &default_pipe_buf_ops;
520}
521
8cefc107
DH
522static inline void data_start(const struct iov_iter *i,
523 unsigned int *iter_headp, size_t *offp)
241699cd 524{
8cefc107
DH
525 unsigned int p_mask = i->pipe->ring_size - 1;
526 unsigned int iter_head = i->head;
241699cd 527 size_t off = i->iov_offset;
8cefc107
DH
528
529 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
530 off == PAGE_SIZE)) {
531 iter_head++;
241699cd
AV
532 off = 0;
533 }
8cefc107 534 *iter_headp = iter_head;
241699cd
AV
535 *offp = off;
536}
537
538static size_t push_pipe(struct iov_iter *i, size_t size,
8cefc107 539 int *iter_headp, size_t *offp)
241699cd
AV
540{
541 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
542 unsigned int p_tail = pipe->tail;
543 unsigned int p_mask = pipe->ring_size - 1;
544 unsigned int iter_head;
241699cd 545 size_t off;
241699cd
AV
546 ssize_t left;
547
548 if (unlikely(size > i->count))
549 size = i->count;
550 if (unlikely(!size))
551 return 0;
552
553 left = size;
8cefc107
DH
554 data_start(i, &iter_head, &off);
555 *iter_headp = iter_head;
241699cd
AV
556 *offp = off;
557 if (off) {
558 left -= PAGE_SIZE - off;
559 if (left <= 0) {
8cefc107 560 pipe->bufs[iter_head & p_mask].len += size;
241699cd
AV
561 return size;
562 }
8cefc107
DH
563 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
564 iter_head++;
241699cd 565 }
6718b6f8 566 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
8cefc107 567 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
241699cd
AV
568 struct page *page = alloc_page(GFP_USER);
569 if (!page)
570 break;
8cefc107
DH
571
572 buf->ops = &default_pipe_buf_ops;
573 buf->page = page;
574 buf->offset = 0;
575 buf->len = min_t(ssize_t, left, PAGE_SIZE);
576 left -= buf->len;
577 iter_head++;
578 pipe->head = iter_head;
579
580 if (left == 0)
241699cd 581 return size;
241699cd
AV
582 }
583 return size - left;
584}
585
586static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
587 struct iov_iter *i)
588{
589 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
590 unsigned int p_mask = pipe->ring_size - 1;
591 unsigned int i_head;
241699cd 592 size_t n, off;
241699cd
AV
593
594 if (!sanity(i))
595 return 0;
596
8cefc107 597 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
598 if (unlikely(!n))
599 return 0;
8cefc107 600 do {
241699cd 601 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
602 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
603 i->head = i_head;
241699cd
AV
604 i->iov_offset = off + chunk;
605 n -= chunk;
606 addr += chunk;
8cefc107
DH
607 off = 0;
608 i_head++;
609 } while (n);
241699cd
AV
610 i->count -= bytes;
611 return bytes;
612}
613
f9152895
AV
614static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
615 __wsum sum, size_t off)
616{
cc44c17b 617 __wsum next = csum_partial_copy_nocheck(from, to, len);
f9152895
AV
618 return csum_block_add(sum, next, off);
619}
620
78e1f386 621static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
52cbd23a
WB
622 struct csum_state *csstate,
623 struct iov_iter *i)
78e1f386
AV
624{
625 struct pipe_inode_info *pipe = i->pipe;
8cefc107 626 unsigned int p_mask = pipe->ring_size - 1;
52cbd23a
WB
627 __wsum sum = csstate->csum;
628 size_t off = csstate->off;
8cefc107 629 unsigned int i_head;
78e1f386 630 size_t n, r;
78e1f386
AV
631
632 if (!sanity(i))
633 return 0;
634
8cefc107 635 bytes = n = push_pipe(i, bytes, &i_head, &r);
78e1f386
AV
636 if (unlikely(!n))
637 return 0;
8cefc107 638 do {
78e1f386 639 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
8cefc107 640 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
f9152895 641 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
78e1f386 642 kunmap_atomic(p);
8cefc107 643 i->head = i_head;
78e1f386
AV
644 i->iov_offset = r + chunk;
645 n -= chunk;
646 off += chunk;
647 addr += chunk;
8cefc107
DH
648 r = 0;
649 i_head++;
650 } while (n);
78e1f386 651 i->count -= bytes;
52cbd23a
WB
652 csstate->csum = sum;
653 csstate->off = off;
78e1f386
AV
654 return bytes;
655}
656
aa28de27 657size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
62a8067a 658{
36f7a8a4 659 const char *from = addr;
00e23707 660 if (unlikely(iov_iter_is_pipe(i)))
241699cd 661 return copy_pipe_to_iter(addr, bytes, i);
09fc68dc
AV
662 if (iter_is_iovec(i))
663 might_fault();
3d4d3e48 664 iterate_and_advance(i, bytes, v,
09fc68dc 665 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
3d4d3e48 666 memcpy_to_page(v.bv_page, v.bv_offset,
a280455f 667 (from += v.bv_len) - v.bv_len, v.bv_len),
7ff50620
DH
668 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
669 memcpy_to_page(v.bv_page, v.bv_offset,
670 (from += v.bv_len) - v.bv_len, v.bv_len)
3d4d3e48 671 )
62a8067a 672
3d4d3e48 673 return bytes;
c35e0248 674}
aa28de27 675EXPORT_SYMBOL(_copy_to_iter);
c35e0248 676
ec6347bb
DW
677#ifdef CONFIG_ARCH_HAS_COPY_MC
678static int copyout_mc(void __user *to, const void *from, size_t n)
8780356e 679{
96d4f267 680 if (access_ok(to, n)) {
d0ef4c36 681 instrument_copy_to_user(to, from, n);
ec6347bb 682 n = copy_mc_to_user((__force void *) to, from, n);
8780356e
DW
683 }
684 return n;
685}
686
ec6347bb 687static unsigned long copy_mc_to_page(struct page *page, size_t offset,
8780356e
DW
688 const char *from, size_t len)
689{
690 unsigned long ret;
691 char *to;
692
693 to = kmap_atomic(page);
ec6347bb 694 ret = copy_mc_to_kernel(to + offset, from, len);
8780356e
DW
695 kunmap_atomic(to);
696
697 return ret;
698}
699
ec6347bb 700static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
ca146f6f
DW
701 struct iov_iter *i)
702{
703 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
704 unsigned int p_mask = pipe->ring_size - 1;
705 unsigned int i_head;
ca146f6f 706 size_t n, off, xfer = 0;
ca146f6f
DW
707
708 if (!sanity(i))
709 return 0;
710
8cefc107 711 bytes = n = push_pipe(i, bytes, &i_head, &off);
ca146f6f
DW
712 if (unlikely(!n))
713 return 0;
8cefc107 714 do {
ca146f6f
DW
715 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
716 unsigned long rem;
717
ec6347bb 718 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
8cefc107
DH
719 off, addr, chunk);
720 i->head = i_head;
ca146f6f
DW
721 i->iov_offset = off + chunk - rem;
722 xfer += chunk - rem;
723 if (rem)
724 break;
725 n -= chunk;
726 addr += chunk;
8cefc107
DH
727 off = 0;
728 i_head++;
729 } while (n);
ca146f6f
DW
730 i->count -= xfer;
731 return xfer;
732}
733
bf3eeb9b 734/**
ec6347bb 735 * _copy_mc_to_iter - copy to iter with source memory error exception handling
bf3eeb9b
DW
736 * @addr: source kernel address
737 * @bytes: total transfer length
738 * @iter: destination iterator
739 *
ec6347bb
DW
740 * The pmem driver deploys this for the dax operation
741 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
742 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
743 * successfully copied.
bf3eeb9b 744 *
ec6347bb 745 * The main differences between this and typical _copy_to_iter().
bf3eeb9b
DW
746 *
747 * * Typical tail/residue handling after a fault retries the copy
748 * byte-by-byte until the fault happens again. Re-triggering machine
749 * checks is potentially fatal so the implementation uses source
750 * alignment and poison alignment assumptions to avoid re-triggering
751 * hardware exceptions.
752 *
753 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
754 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
755 * a short copy.
bf3eeb9b 756 */
ec6347bb 757size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
8780356e
DW
758{
759 const char *from = addr;
760 unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
761
00e23707 762 if (unlikely(iov_iter_is_pipe(i)))
ec6347bb 763 return copy_mc_pipe_to_iter(addr, bytes, i);
8780356e
DW
764 if (iter_is_iovec(i))
765 might_fault();
766 iterate_and_advance(i, bytes, v,
ec6347bb
DW
767 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
768 v.iov_len),
8780356e 769 ({
ec6347bb
DW
770 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
771 (from += v.bv_len) - v.bv_len, v.bv_len);
8780356e
DW
772 if (rem) {
773 curr_addr = (unsigned long) from;
774 bytes = curr_addr - s_addr - rem;
775 return bytes;
776 }
777 }),
778 ({
ec6347bb
DW
779 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
780 - v.iov_len, v.iov_len);
8780356e
DW
781 if (rem) {
782 curr_addr = (unsigned long) from;
783 bytes = curr_addr - s_addr - rem;
784 return bytes;
785 }
7ff50620
DH
786 }),
787 ({
788 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
789 (from += v.bv_len) - v.bv_len, v.bv_len);
790 if (rem) {
791 curr_addr = (unsigned long) from;
792 bytes = curr_addr - s_addr - rem;
793 rcu_read_unlock();
794 return bytes;
795 }
8780356e
DW
796 })
797 )
798
799 return bytes;
800}
ec6347bb
DW
801EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
802#endif /* CONFIG_ARCH_HAS_COPY_MC */
8780356e 803
aa28de27 804size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
c35e0248 805{
0dbca9a4 806 char *to = addr;
00e23707 807 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
808 WARN_ON(1);
809 return 0;
810 }
09fc68dc
AV
811 if (iter_is_iovec(i))
812 might_fault();
0dbca9a4 813 iterate_and_advance(i, bytes, v,
09fc68dc 814 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
0dbca9a4 815 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
a280455f 816 v.bv_offset, v.bv_len),
7ff50620
DH
817 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
818 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
819 v.bv_offset, v.bv_len)
0dbca9a4
AV
820 )
821
822 return bytes;
c35e0248 823}
aa28de27 824EXPORT_SYMBOL(_copy_from_iter);
c35e0248 825
aa28de27 826bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
cbbd26b8
AV
827{
828 char *to = addr;
00e23707 829 if (unlikely(iov_iter_is_pipe(i))) {
cbbd26b8
AV
830 WARN_ON(1);
831 return false;
832 }
33844e66 833 if (unlikely(i->count < bytes))
cbbd26b8
AV
834 return false;
835
09fc68dc
AV
836 if (iter_is_iovec(i))
837 might_fault();
cbbd26b8 838 iterate_all_kinds(i, bytes, v, ({
09fc68dc 839 if (copyin((to += v.iov_len) - v.iov_len,
cbbd26b8
AV
840 v.iov_base, v.iov_len))
841 return false;
842 0;}),
843 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
844 v.bv_offset, v.bv_len),
7ff50620
DH
845 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
846 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
847 v.bv_offset, v.bv_len)
cbbd26b8
AV
848 )
849
850 iov_iter_advance(i, bytes);
851 return true;
852}
aa28de27 853EXPORT_SYMBOL(_copy_from_iter_full);
cbbd26b8 854
aa28de27 855size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
aa583096
AV
856{
857 char *to = addr;
00e23707 858 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
859 WARN_ON(1);
860 return 0;
861 }
aa583096 862 iterate_and_advance(i, bytes, v,
3f763453 863 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
aa583096
AV
864 v.iov_base, v.iov_len),
865 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
866 v.bv_offset, v.bv_len),
7ff50620
DH
867 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
868 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
869 v.bv_offset, v.bv_len)
aa583096
AV
870 )
871
872 return bytes;
873}
aa28de27 874EXPORT_SYMBOL(_copy_from_iter_nocache);
aa583096 875
0aed55af 876#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
abd08d7d
DW
877/**
878 * _copy_from_iter_flushcache - write destination through cpu cache
879 * @addr: destination kernel address
880 * @bytes: total transfer length
881 * @iter: source iterator
882 *
883 * The pmem driver arranges for filesystem-dax to use this facility via
884 * dax_copy_from_iter() for ensuring that writes to persistent memory
885 * are flushed through the CPU cache. It is differentiated from
886 * _copy_from_iter_nocache() in that guarantees all data is flushed for
887 * all iterator types. The _copy_from_iter_nocache() only attempts to
888 * bypass the cache for the ITER_IOVEC case, and on some archs may use
889 * instructions that strand dirty-data in the cache.
890 */
6a37e940 891size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
0aed55af
DW
892{
893 char *to = addr;
00e23707 894 if (unlikely(iov_iter_is_pipe(i))) {
0aed55af
DW
895 WARN_ON(1);
896 return 0;
897 }
898 iterate_and_advance(i, bytes, v,
899 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
900 v.iov_base, v.iov_len),
901 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
902 v.bv_offset, v.bv_len),
903 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
7ff50620
DH
904 v.iov_len),
905 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
906 v.bv_offset, v.bv_len)
0aed55af
DW
907 )
908
909 return bytes;
910}
6a37e940 911EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
0aed55af
DW
912#endif
913
aa28de27 914bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
cbbd26b8
AV
915{
916 char *to = addr;
00e23707 917 if (unlikely(iov_iter_is_pipe(i))) {
cbbd26b8
AV
918 WARN_ON(1);
919 return false;
920 }
33844e66 921 if (unlikely(i->count < bytes))
cbbd26b8
AV
922 return false;
923 iterate_all_kinds(i, bytes, v, ({
3f763453 924 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
cbbd26b8
AV
925 v.iov_base, v.iov_len))
926 return false;
927 0;}),
928 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
929 v.bv_offset, v.bv_len),
7ff50620
DH
930 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
931 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
932 v.bv_offset, v.bv_len)
cbbd26b8
AV
933 )
934
935 iov_iter_advance(i, bytes);
936 return true;
937}
aa28de27 938EXPORT_SYMBOL(_copy_from_iter_full_nocache);
cbbd26b8 939
72e809ed
AV
940static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
941{
6daef95b
ED
942 struct page *head;
943 size_t v = n + offset;
944
945 /*
946 * The general case needs to access the page order in order
947 * to compute the page size.
948 * However, we mostly deal with order-0 pages and thus can
949 * avoid a possible cache line miss for requests that fit all
950 * page orders.
951 */
952 if (n <= v && v <= PAGE_SIZE)
953 return true;
954
955 head = compound_head(page);
956 v += (page - head) << PAGE_SHIFT;
a90bcb86 957
a50b854e 958 if (likely(n <= v && v <= (page_size(head))))
72e809ed
AV
959 return true;
960 WARN_ON(1);
961 return false;
962}
cbbd26b8 963
62a8067a
AV
964size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
965 struct iov_iter *i)
966{
72e809ed
AV
967 if (unlikely(!page_copy_sane(page, offset, bytes)))
968 return 0;
7ff50620 969 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
d271524a
AV
970 void *kaddr = kmap_atomic(page);
971 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
972 kunmap_atomic(kaddr);
973 return wanted;
9ea9ce04
DH
974 } else if (unlikely(iov_iter_is_discard(i)))
975 return bytes;
976 else if (likely(!iov_iter_is_pipe(i)))
62a8067a 977 return copy_page_to_iter_iovec(page, offset, bytes, i);
241699cd
AV
978 else
979 return copy_page_to_iter_pipe(page, offset, bytes, i);
62a8067a
AV
980}
981EXPORT_SYMBOL(copy_page_to_iter);
982
983size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
984 struct iov_iter *i)
985{
72e809ed
AV
986 if (unlikely(!page_copy_sane(page, offset, bytes)))
987 return 0;
9ea9ce04 988 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
989 WARN_ON(1);
990 return 0;
991 }
7ff50620 992 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
d271524a 993 void *kaddr = kmap_atomic(page);
aa28de27 994 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
d271524a
AV
995 kunmap_atomic(kaddr);
996 return wanted;
997 } else
62a8067a
AV
998 return copy_page_from_iter_iovec(page, offset, bytes, i);
999}
1000EXPORT_SYMBOL(copy_page_from_iter);
1001
241699cd
AV
1002static size_t pipe_zero(size_t bytes, struct iov_iter *i)
1003{
1004 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1005 unsigned int p_mask = pipe->ring_size - 1;
1006 unsigned int i_head;
241699cd 1007 size_t n, off;
241699cd
AV
1008
1009 if (!sanity(i))
1010 return 0;
1011
8cefc107 1012 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
1013 if (unlikely(!n))
1014 return 0;
1015
8cefc107 1016 do {
241699cd 1017 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
1018 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
1019 i->head = i_head;
241699cd
AV
1020 i->iov_offset = off + chunk;
1021 n -= chunk;
8cefc107
DH
1022 off = 0;
1023 i_head++;
1024 } while (n);
241699cd
AV
1025 i->count -= bytes;
1026 return bytes;
1027}
1028
c35e0248
MW
1029size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
1030{
00e23707 1031 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1032 return pipe_zero(bytes, i);
8442fa46 1033 iterate_and_advance(i, bytes, v,
09fc68dc 1034 clear_user(v.iov_base, v.iov_len),
a280455f 1035 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
7ff50620
DH
1036 memset(v.iov_base, 0, v.iov_len),
1037 memzero_page(v.bv_page, v.bv_offset, v.bv_len)
8442fa46
AV
1038 )
1039
1040 return bytes;
c35e0248
MW
1041}
1042EXPORT_SYMBOL(iov_iter_zero);
1043
62a8067a
AV
1044size_t iov_iter_copy_from_user_atomic(struct page *page,
1045 struct iov_iter *i, unsigned long offset, size_t bytes)
1046{
04a31165 1047 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
72e809ed
AV
1048 if (unlikely(!page_copy_sane(page, offset, bytes))) {
1049 kunmap_atomic(kaddr);
1050 return 0;
1051 }
9ea9ce04 1052 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1053 kunmap_atomic(kaddr);
1054 WARN_ON(1);
1055 return 0;
1056 }
04a31165 1057 iterate_all_kinds(i, bytes, v,
09fc68dc 1058 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
04a31165 1059 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
a280455f 1060 v.bv_offset, v.bv_len),
7ff50620
DH
1061 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1062 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1063 v.bv_offset, v.bv_len)
04a31165
AV
1064 )
1065 kunmap_atomic(kaddr);
1066 return bytes;
62a8067a
AV
1067}
1068EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1069
b9dc6f65
AV
1070static inline void pipe_truncate(struct iov_iter *i)
1071{
1072 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1073 unsigned int p_tail = pipe->tail;
1074 unsigned int p_head = pipe->head;
1075 unsigned int p_mask = pipe->ring_size - 1;
1076
1077 if (!pipe_empty(p_head, p_tail)) {
1078 struct pipe_buffer *buf;
1079 unsigned int i_head = i->head;
b9dc6f65 1080 size_t off = i->iov_offset;
8cefc107 1081
b9dc6f65 1082 if (off) {
8cefc107
DH
1083 buf = &pipe->bufs[i_head & p_mask];
1084 buf->len = off - buf->offset;
1085 i_head++;
b9dc6f65 1086 }
8cefc107
DH
1087 while (p_head != i_head) {
1088 p_head--;
1089 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
b9dc6f65 1090 }
8cefc107
DH
1091
1092 pipe->head = p_head;
b9dc6f65
AV
1093 }
1094}
1095
241699cd
AV
1096static void pipe_advance(struct iov_iter *i, size_t size)
1097{
1098 struct pipe_inode_info *pipe = i->pipe;
241699cd
AV
1099 if (unlikely(i->count < size))
1100 size = i->count;
241699cd 1101 if (size) {
b9dc6f65 1102 struct pipe_buffer *buf;
8cefc107
DH
1103 unsigned int p_mask = pipe->ring_size - 1;
1104 unsigned int i_head = i->head;
b9dc6f65 1105 size_t off = i->iov_offset, left = size;
8cefc107 1106
241699cd 1107 if (off) /* make it relative to the beginning of buffer */
8cefc107 1108 left += off - pipe->bufs[i_head & p_mask].offset;
241699cd 1109 while (1) {
8cefc107 1110 buf = &pipe->bufs[i_head & p_mask];
b9dc6f65 1111 if (left <= buf->len)
241699cd 1112 break;
b9dc6f65 1113 left -= buf->len;
8cefc107 1114 i_head++;
241699cd 1115 }
8cefc107 1116 i->head = i_head;
b9dc6f65 1117 i->iov_offset = buf->offset + left;
241699cd 1118 }
b9dc6f65
AV
1119 i->count -= size;
1120 /* ... and discard everything past that point */
1121 pipe_truncate(i);
241699cd
AV
1122}
1123
54c8195b
PB
1124static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1125{
1126 struct bvec_iter bi;
1127
1128 bi.bi_size = i->count;
1129 bi.bi_bvec_done = i->iov_offset;
1130 bi.bi_idx = 0;
1131 bvec_iter_advance(i->bvec, &bi, size);
1132
1133 i->bvec += bi.bi_idx;
1134 i->nr_segs -= bi.bi_idx;
1135 i->count = bi.bi_size;
1136 i->iov_offset = bi.bi_bvec_done;
1137}
1138
62a8067a
AV
1139void iov_iter_advance(struct iov_iter *i, size_t size)
1140{
00e23707 1141 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
1142 pipe_advance(i, size);
1143 return;
1144 }
9ea9ce04
DH
1145 if (unlikely(iov_iter_is_discard(i))) {
1146 i->count -= size;
1147 return;
1148 }
7ff50620
DH
1149 if (unlikely(iov_iter_is_xarray(i))) {
1150 i->iov_offset += size;
1151 i->count -= size;
1152 return;
1153 }
54c8195b
PB
1154 if (iov_iter_is_bvec(i)) {
1155 iov_iter_bvec_advance(i, size);
1156 return;
1157 }
7ff50620 1158 iterate_and_advance(i, size, v, 0, 0, 0, 0)
62a8067a
AV
1159}
1160EXPORT_SYMBOL(iov_iter_advance);
1161
27c0e374
AV
1162void iov_iter_revert(struct iov_iter *i, size_t unroll)
1163{
1164 if (!unroll)
1165 return;
5b47d59a
AV
1166 if (WARN_ON(unroll > MAX_RW_COUNT))
1167 return;
27c0e374 1168 i->count += unroll;
00e23707 1169 if (unlikely(iov_iter_is_pipe(i))) {
27c0e374 1170 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1171 unsigned int p_mask = pipe->ring_size - 1;
1172 unsigned int i_head = i->head;
27c0e374
AV
1173 size_t off = i->iov_offset;
1174 while (1) {
8cefc107
DH
1175 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1176 size_t n = off - b->offset;
27c0e374 1177 if (unroll < n) {
4fa55cef 1178 off -= unroll;
27c0e374
AV
1179 break;
1180 }
1181 unroll -= n;
8cefc107 1182 if (!unroll && i_head == i->start_head) {
27c0e374
AV
1183 off = 0;
1184 break;
1185 }
8cefc107
DH
1186 i_head--;
1187 b = &pipe->bufs[i_head & p_mask];
1188 off = b->offset + b->len;
27c0e374
AV
1189 }
1190 i->iov_offset = off;
8cefc107 1191 i->head = i_head;
27c0e374
AV
1192 pipe_truncate(i);
1193 return;
1194 }
9ea9ce04
DH
1195 if (unlikely(iov_iter_is_discard(i)))
1196 return;
27c0e374
AV
1197 if (unroll <= i->iov_offset) {
1198 i->iov_offset -= unroll;
1199 return;
1200 }
1201 unroll -= i->iov_offset;
7ff50620
DH
1202 if (iov_iter_is_xarray(i)) {
1203 BUG(); /* We should never go beyond the start of the specified
1204 * range since we might then be straying into pages that
1205 * aren't pinned.
1206 */
1207 } else if (iov_iter_is_bvec(i)) {
27c0e374
AV
1208 const struct bio_vec *bvec = i->bvec;
1209 while (1) {
1210 size_t n = (--bvec)->bv_len;
1211 i->nr_segs++;
1212 if (unroll <= n) {
1213 i->bvec = bvec;
1214 i->iov_offset = n - unroll;
1215 return;
1216 }
1217 unroll -= n;
1218 }
1219 } else { /* same logics for iovec and kvec */
1220 const struct iovec *iov = i->iov;
1221 while (1) {
1222 size_t n = (--iov)->iov_len;
1223 i->nr_segs++;
1224 if (unroll <= n) {
1225 i->iov = iov;
1226 i->iov_offset = n - unroll;
1227 return;
1228 }
1229 unroll -= n;
1230 }
1231 }
1232}
1233EXPORT_SYMBOL(iov_iter_revert);
1234
62a8067a
AV
1235/*
1236 * Return the count of just the current iov_iter segment.
1237 */
1238size_t iov_iter_single_seg_count(const struct iov_iter *i)
1239{
00e23707 1240 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1241 return i->count; // it is a silly place, anyway
62a8067a
AV
1242 if (i->nr_segs == 1)
1243 return i->count;
7ff50620 1244 if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
9ea9ce04 1245 return i->count;
7ff50620 1246 if (iov_iter_is_bvec(i))
62a8067a 1247 return min(i->count, i->bvec->bv_len - i->iov_offset);
ad0eab92
PM
1248 else
1249 return min(i->count, i->iov->iov_len - i->iov_offset);
62a8067a
AV
1250}
1251EXPORT_SYMBOL(iov_iter_single_seg_count);
1252
aa563d7b 1253void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
05afcb77 1254 const struct kvec *kvec, unsigned long nr_segs,
abb78f87
AV
1255 size_t count)
1256{
aa563d7b
DH
1257 WARN_ON(direction & ~(READ | WRITE));
1258 i->type = ITER_KVEC | (direction & (READ | WRITE));
05afcb77 1259 i->kvec = kvec;
abb78f87
AV
1260 i->nr_segs = nr_segs;
1261 i->iov_offset = 0;
1262 i->count = count;
1263}
1264EXPORT_SYMBOL(iov_iter_kvec);
1265
aa563d7b 1266void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
05afcb77
AV
1267 const struct bio_vec *bvec, unsigned long nr_segs,
1268 size_t count)
1269{
aa563d7b
DH
1270 WARN_ON(direction & ~(READ | WRITE));
1271 i->type = ITER_BVEC | (direction & (READ | WRITE));
05afcb77
AV
1272 i->bvec = bvec;
1273 i->nr_segs = nr_segs;
1274 i->iov_offset = 0;
1275 i->count = count;
1276}
1277EXPORT_SYMBOL(iov_iter_bvec);
1278
aa563d7b 1279void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
241699cd
AV
1280 struct pipe_inode_info *pipe,
1281 size_t count)
1282{
aa563d7b 1283 BUG_ON(direction != READ);
8cefc107 1284 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
aa563d7b 1285 i->type = ITER_PIPE | READ;
241699cd 1286 i->pipe = pipe;
8cefc107 1287 i->head = pipe->head;
241699cd
AV
1288 i->iov_offset = 0;
1289 i->count = count;
8cefc107 1290 i->start_head = i->head;
241699cd
AV
1291}
1292EXPORT_SYMBOL(iov_iter_pipe);
1293
7ff50620
DH
1294/**
1295 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1296 * @i: The iterator to initialise.
1297 * @direction: The direction of the transfer.
1298 * @xarray: The xarray to access.
1299 * @start: The start file position.
1300 * @count: The size of the I/O buffer in bytes.
1301 *
1302 * Set up an I/O iterator to either draw data out of the pages attached to an
1303 * inode or to inject data into those pages. The pages *must* be prevented
1304 * from evaporation, either by taking a ref on them or locking them by the
1305 * caller.
1306 */
1307void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1308 struct xarray *xarray, loff_t start, size_t count)
1309{
1310 BUG_ON(direction & ~1);
1311 i->type = ITER_XARRAY | (direction & (READ | WRITE));
1312 i->xarray = xarray;
1313 i->xarray_start = start;
1314 i->count = count;
1315 i->iov_offset = 0;
1316}
1317EXPORT_SYMBOL(iov_iter_xarray);
1318
9ea9ce04
DH
1319/**
1320 * iov_iter_discard - Initialise an I/O iterator that discards data
1321 * @i: The iterator to initialise.
1322 * @direction: The direction of the transfer.
1323 * @count: The size of the I/O buffer in bytes.
1324 *
1325 * Set up an I/O iterator that just discards everything that's written to it.
1326 * It's only available as a READ iterator.
1327 */
1328void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1329{
1330 BUG_ON(direction != READ);
1331 i->type = ITER_DISCARD | READ;
1332 i->count = count;
1333 i->iov_offset = 0;
1334}
1335EXPORT_SYMBOL(iov_iter_discard);
1336
62a8067a
AV
1337unsigned long iov_iter_alignment(const struct iov_iter *i)
1338{
04a31165
AV
1339 unsigned long res = 0;
1340 size_t size = i->count;
1341
00e23707 1342 if (unlikely(iov_iter_is_pipe(i))) {
e0ff126e
JK
1343 unsigned int p_mask = i->pipe->ring_size - 1;
1344
8cefc107 1345 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
241699cd
AV
1346 return size | i->iov_offset;
1347 return size;
1348 }
04a31165
AV
1349 iterate_all_kinds(i, size, v,
1350 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
a280455f 1351 res |= v.bv_offset | v.bv_len,
7ff50620
DH
1352 res |= (unsigned long)v.iov_base | v.iov_len,
1353 res |= v.bv_offset | v.bv_len
04a31165
AV
1354 )
1355 return res;
62a8067a
AV
1356}
1357EXPORT_SYMBOL(iov_iter_alignment);
1358
357f435d
AV
1359unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1360{
33844e66 1361 unsigned long res = 0;
357f435d 1362 size_t size = i->count;
357f435d 1363
9ea9ce04 1364 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1365 WARN_ON(1);
1366 return ~0U;
1367 }
1368
357f435d
AV
1369 iterate_all_kinds(i, size, v,
1370 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1371 (size != v.iov_len ? size : 0), 0),
1372 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1373 (size != v.bv_len ? size : 0)),
1374 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
7ff50620
DH
1375 (size != v.iov_len ? size : 0)),
1376 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1377 (size != v.bv_len ? size : 0))
357f435d 1378 );
33844e66 1379 return res;
357f435d
AV
1380}
1381EXPORT_SYMBOL(iov_iter_gap_alignment);
1382
e76b6312 1383static inline ssize_t __pipe_get_pages(struct iov_iter *i,
241699cd
AV
1384 size_t maxsize,
1385 struct page **pages,
8cefc107 1386 int iter_head,
241699cd
AV
1387 size_t *start)
1388{
1389 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1390 unsigned int p_mask = pipe->ring_size - 1;
1391 ssize_t n = push_pipe(i, maxsize, &iter_head, start);
241699cd
AV
1392 if (!n)
1393 return -EFAULT;
1394
1395 maxsize = n;
1396 n += *start;
1689c73a 1397 while (n > 0) {
8cefc107
DH
1398 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1399 iter_head++;
241699cd
AV
1400 n -= PAGE_SIZE;
1401 }
1402
1403 return maxsize;
1404}
1405
1406static ssize_t pipe_get_pages(struct iov_iter *i,
1407 struct page **pages, size_t maxsize, unsigned maxpages,
1408 size_t *start)
1409{
8cefc107 1410 unsigned int iter_head, npages;
241699cd 1411 size_t capacity;
241699cd 1412
33844e66
AV
1413 if (!maxsize)
1414 return 0;
1415
241699cd
AV
1416 if (!sanity(i))
1417 return -EFAULT;
1418
8cefc107
DH
1419 data_start(i, &iter_head, start);
1420 /* Amount of free space: some of this one + all after this one */
1421 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1422 capacity = min(npages, maxpages) * PAGE_SIZE - *start;
241699cd 1423
8cefc107 1424 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
241699cd
AV
1425}
1426
7ff50620
DH
1427static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1428 pgoff_t index, unsigned int nr_pages)
1429{
1430 XA_STATE(xas, xa, index);
1431 struct page *page;
1432 unsigned int ret = 0;
1433
1434 rcu_read_lock();
1435 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1436 if (xas_retry(&xas, page))
1437 continue;
1438
1439 /* Has the page moved or been split? */
1440 if (unlikely(page != xas_reload(&xas))) {
1441 xas_reset(&xas);
1442 continue;
1443 }
1444
1445 pages[ret] = find_subpage(page, xas.xa_index);
1446 get_page(pages[ret]);
1447 if (++ret == nr_pages)
1448 break;
1449 }
1450 rcu_read_unlock();
1451 return ret;
1452}
1453
1454static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1455 struct page **pages, size_t maxsize,
1456 unsigned maxpages, size_t *_start_offset)
1457{
1458 unsigned nr, offset;
1459 pgoff_t index, count;
1460 size_t size = maxsize, actual;
1461 loff_t pos;
1462
1463 if (!size || !maxpages)
1464 return 0;
1465
1466 pos = i->xarray_start + i->iov_offset;
1467 index = pos >> PAGE_SHIFT;
1468 offset = pos & ~PAGE_MASK;
1469 *_start_offset = offset;
1470
1471 count = 1;
1472 if (size > PAGE_SIZE - offset) {
1473 size -= PAGE_SIZE - offset;
1474 count += size >> PAGE_SHIFT;
1475 size &= ~PAGE_MASK;
1476 if (size)
1477 count++;
1478 }
1479
1480 if (count > maxpages)
1481 count = maxpages;
1482
1483 nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1484 if (nr == 0)
1485 return 0;
1486
1487 actual = PAGE_SIZE * nr;
1488 actual -= offset;
1489 if (nr == count && size > 0) {
1490 unsigned last_offset = (nr > 1) ? 0 : offset;
1491 actual -= PAGE_SIZE - (last_offset + size);
1492 }
1493 return actual;
1494}
1495
62a8067a 1496ssize_t iov_iter_get_pages(struct iov_iter *i,
2c80929c 1497 struct page **pages, size_t maxsize, unsigned maxpages,
62a8067a
AV
1498 size_t *start)
1499{
e5393fae
AV
1500 if (maxsize > i->count)
1501 maxsize = i->count;
1502
00e23707 1503 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1504 return pipe_get_pages(i, pages, maxsize, maxpages, start);
7ff50620
DH
1505 if (unlikely(iov_iter_is_xarray(i)))
1506 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
9ea9ce04
DH
1507 if (unlikely(iov_iter_is_discard(i)))
1508 return -EFAULT;
1509
e5393fae
AV
1510 iterate_all_kinds(i, maxsize, v, ({
1511 unsigned long addr = (unsigned long)v.iov_base;
1512 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1513 int n;
1514 int res;
1515
1516 if (len > maxpages * PAGE_SIZE)
1517 len = maxpages * PAGE_SIZE;
1518 addr &= ~(PAGE_SIZE - 1);
1519 n = DIV_ROUND_UP(len, PAGE_SIZE);
73b0140b
IW
1520 res = get_user_pages_fast(addr, n,
1521 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
1522 pages);
e5393fae
AV
1523 if (unlikely(res < 0))
1524 return res;
1525 return (res == n ? len : res * PAGE_SIZE) - *start;
1526 0;}),({
1527 /* can't be more than PAGE_SIZE */
1528 *start = v.bv_offset;
1529 get_page(*pages = v.bv_page);
1530 return v.bv_len;
a280455f
AV
1531 }),({
1532 return -EFAULT;
7ff50620
DH
1533 }),
1534 0
e5393fae
AV
1535 )
1536 return 0;
62a8067a
AV
1537}
1538EXPORT_SYMBOL(iov_iter_get_pages);
1539
1b17f1f2
AV
1540static struct page **get_pages_array(size_t n)
1541{
752ade68 1542 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1b17f1f2
AV
1543}
1544
241699cd
AV
1545static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1546 struct page ***pages, size_t maxsize,
1547 size_t *start)
1548{
1549 struct page **p;
8cefc107 1550 unsigned int iter_head, npages;
d7760d63 1551 ssize_t n;
241699cd 1552
33844e66
AV
1553 if (!maxsize)
1554 return 0;
1555
241699cd
AV
1556 if (!sanity(i))
1557 return -EFAULT;
1558
8cefc107
DH
1559 data_start(i, &iter_head, start);
1560 /* Amount of free space: some of this one + all after this one */
1561 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
241699cd
AV
1562 n = npages * PAGE_SIZE - *start;
1563 if (maxsize > n)
1564 maxsize = n;
1565 else
1566 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1567 p = get_pages_array(npages);
1568 if (!p)
1569 return -ENOMEM;
8cefc107 1570 n = __pipe_get_pages(i, maxsize, p, iter_head, start);
241699cd
AV
1571 if (n > 0)
1572 *pages = p;
1573 else
1574 kvfree(p);
1575 return n;
1576}
1577
7ff50620
DH
1578static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1579 struct page ***pages, size_t maxsize,
1580 size_t *_start_offset)
1581{
1582 struct page **p;
1583 unsigned nr, offset;
1584 pgoff_t index, count;
1585 size_t size = maxsize, actual;
1586 loff_t pos;
1587
1588 if (!size)
1589 return 0;
1590
1591 pos = i->xarray_start + i->iov_offset;
1592 index = pos >> PAGE_SHIFT;
1593 offset = pos & ~PAGE_MASK;
1594 *_start_offset = offset;
1595
1596 count = 1;
1597 if (size > PAGE_SIZE - offset) {
1598 size -= PAGE_SIZE - offset;
1599 count += size >> PAGE_SHIFT;
1600 size &= ~PAGE_MASK;
1601 if (size)
1602 count++;
1603 }
1604
1605 p = get_pages_array(count);
1606 if (!p)
1607 return -ENOMEM;
1608 *pages = p;
1609
1610 nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1611 if (nr == 0)
1612 return 0;
1613
1614 actual = PAGE_SIZE * nr;
1615 actual -= offset;
1616 if (nr == count && size > 0) {
1617 unsigned last_offset = (nr > 1) ? 0 : offset;
1618 actual -= PAGE_SIZE - (last_offset + size);
1619 }
1620 return actual;
1621}
1622
62a8067a
AV
1623ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1624 struct page ***pages, size_t maxsize,
1625 size_t *start)
1626{
1b17f1f2
AV
1627 struct page **p;
1628
1629 if (maxsize > i->count)
1630 maxsize = i->count;
1631
00e23707 1632 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1633 return pipe_get_pages_alloc(i, pages, maxsize, start);
7ff50620
DH
1634 if (unlikely(iov_iter_is_xarray(i)))
1635 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
9ea9ce04
DH
1636 if (unlikely(iov_iter_is_discard(i)))
1637 return -EFAULT;
1638
1b17f1f2
AV
1639 iterate_all_kinds(i, maxsize, v, ({
1640 unsigned long addr = (unsigned long)v.iov_base;
1641 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1642 int n;
1643 int res;
1644
1645 addr &= ~(PAGE_SIZE - 1);
1646 n = DIV_ROUND_UP(len, PAGE_SIZE);
1647 p = get_pages_array(n);
1648 if (!p)
1649 return -ENOMEM;
73b0140b
IW
1650 res = get_user_pages_fast(addr, n,
1651 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
1b17f1f2
AV
1652 if (unlikely(res < 0)) {
1653 kvfree(p);
1654 return res;
1655 }
1656 *pages = p;
1657 return (res == n ? len : res * PAGE_SIZE) - *start;
1658 0;}),({
1659 /* can't be more than PAGE_SIZE */
1660 *start = v.bv_offset;
1661 *pages = p = get_pages_array(1);
1662 if (!p)
1663 return -ENOMEM;
1664 get_page(*p = v.bv_page);
1665 return v.bv_len;
a280455f
AV
1666 }),({
1667 return -EFAULT;
7ff50620 1668 }), 0
1b17f1f2
AV
1669 )
1670 return 0;
62a8067a
AV
1671}
1672EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1673
a604ec7e
AV
1674size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1675 struct iov_iter *i)
1676{
1677 char *to = addr;
1678 __wsum sum, next;
1679 size_t off = 0;
a604ec7e 1680 sum = *csum;
9ea9ce04 1681 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1682 WARN_ON(1);
1683 return 0;
1684 }
a604ec7e 1685 iterate_and_advance(i, bytes, v, ({
cbbd26b8 1686 next = csum_and_copy_from_user(v.iov_base,
a604ec7e 1687 (to += v.iov_len) - v.iov_len,
c693cc46
AV
1688 v.iov_len);
1689 if (next) {
a604ec7e
AV
1690 sum = csum_block_add(sum, next, off);
1691 off += v.iov_len;
1692 }
c693cc46 1693 next ? 0 : v.iov_len;
a604ec7e
AV
1694 }), ({
1695 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1696 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1697 p + v.bv_offset, v.bv_len,
1698 sum, off);
a604ec7e 1699 kunmap_atomic(p);
a604ec7e
AV
1700 off += v.bv_len;
1701 }),({
f9152895
AV
1702 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1703 v.iov_base, v.iov_len,
1704 sum, off);
a604ec7e 1705 off += v.iov_len;
7ff50620
DH
1706 }), ({
1707 char *p = kmap_atomic(v.bv_page);
1708 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1709 p + v.bv_offset, v.bv_len,
1710 sum, off);
1711 kunmap_atomic(p);
1712 off += v.bv_len;
a604ec7e
AV
1713 })
1714 )
1715 *csum = sum;
1716 return bytes;
1717}
1718EXPORT_SYMBOL(csum_and_copy_from_iter);
1719
cbbd26b8
AV
1720bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1721 struct iov_iter *i)
1722{
1723 char *to = addr;
1724 __wsum sum, next;
1725 size_t off = 0;
1726 sum = *csum;
9ea9ce04 1727 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
cbbd26b8
AV
1728 WARN_ON(1);
1729 return false;
1730 }
1731 if (unlikely(i->count < bytes))
1732 return false;
1733 iterate_all_kinds(i, bytes, v, ({
cbbd26b8
AV
1734 next = csum_and_copy_from_user(v.iov_base,
1735 (to += v.iov_len) - v.iov_len,
c693cc46
AV
1736 v.iov_len);
1737 if (!next)
cbbd26b8
AV
1738 return false;
1739 sum = csum_block_add(sum, next, off);
1740 off += v.iov_len;
1741 0;
1742 }), ({
1743 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1744 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1745 p + v.bv_offset, v.bv_len,
1746 sum, off);
cbbd26b8 1747 kunmap_atomic(p);
cbbd26b8
AV
1748 off += v.bv_len;
1749 }),({
f9152895
AV
1750 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1751 v.iov_base, v.iov_len,
1752 sum, off);
cbbd26b8 1753 off += v.iov_len;
7ff50620
DH
1754 }), ({
1755 char *p = kmap_atomic(v.bv_page);
1756 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1757 p + v.bv_offset, v.bv_len,
1758 sum, off);
1759 kunmap_atomic(p);
1760 off += v.bv_len;
cbbd26b8
AV
1761 })
1762 )
1763 *csum = sum;
1764 iov_iter_advance(i, bytes);
1765 return true;
1766}
1767EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1768
52cbd23a 1769size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
a604ec7e
AV
1770 struct iov_iter *i)
1771{
52cbd23a 1772 struct csum_state *csstate = _csstate;
36f7a8a4 1773 const char *from = addr;
a604ec7e 1774 __wsum sum, next;
52cbd23a 1775 size_t off;
78e1f386
AV
1776
1777 if (unlikely(iov_iter_is_pipe(i)))
52cbd23a 1778 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
78e1f386 1779
52cbd23a
WB
1780 sum = csstate->csum;
1781 off = csstate->off;
78e1f386 1782 if (unlikely(iov_iter_is_discard(i))) {
241699cd
AV
1783 WARN_ON(1); /* for now */
1784 return 0;
1785 }
a604ec7e 1786 iterate_and_advance(i, bytes, v, ({
a604ec7e 1787 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
cbbd26b8 1788 v.iov_base,
c693cc46
AV
1789 v.iov_len);
1790 if (next) {
a604ec7e
AV
1791 sum = csum_block_add(sum, next, off);
1792 off += v.iov_len;
1793 }
c693cc46 1794 next ? 0 : v.iov_len;
a604ec7e
AV
1795 }), ({
1796 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1797 sum = csum_and_memcpy(p + v.bv_offset,
1798 (from += v.bv_len) - v.bv_len,
1799 v.bv_len, sum, off);
a604ec7e 1800 kunmap_atomic(p);
a604ec7e
AV
1801 off += v.bv_len;
1802 }),({
f9152895
AV
1803 sum = csum_and_memcpy(v.iov_base,
1804 (from += v.iov_len) - v.iov_len,
1805 v.iov_len, sum, off);
a604ec7e 1806 off += v.iov_len;
7ff50620
DH
1807 }), ({
1808 char *p = kmap_atomic(v.bv_page);
1809 sum = csum_and_memcpy(p + v.bv_offset,
1810 (from += v.bv_len) - v.bv_len,
1811 v.bv_len, sum, off);
1812 kunmap_atomic(p);
1813 off += v.bv_len;
a604ec7e
AV
1814 })
1815 )
52cbd23a
WB
1816 csstate->csum = sum;
1817 csstate->off = off;
a604ec7e
AV
1818 return bytes;
1819}
1820EXPORT_SYMBOL(csum_and_copy_to_iter);
1821
d05f4435
SG
1822size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1823 struct iov_iter *i)
1824{
7999096f 1825#ifdef CONFIG_CRYPTO_HASH
d05f4435
SG
1826 struct ahash_request *hash = hashp;
1827 struct scatterlist sg;
1828 size_t copied;
1829
1830 copied = copy_to_iter(addr, bytes, i);
1831 sg_init_one(&sg, addr, copied);
1832 ahash_request_set_crypt(hash, &sg, NULL, copied);
1833 crypto_ahash_update(hash);
1834 return copied;
27fad74a
Y
1835#else
1836 return 0;
1837#endif
d05f4435
SG
1838}
1839EXPORT_SYMBOL(hash_and_copy_to_iter);
1840
62a8067a
AV
1841int iov_iter_npages(const struct iov_iter *i, int maxpages)
1842{
e0f2dc40
AV
1843 size_t size = i->count;
1844 int npages = 0;
1845
1846 if (!size)
1847 return 0;
9ea9ce04
DH
1848 if (unlikely(iov_iter_is_discard(i)))
1849 return 0;
e0f2dc40 1850
00e23707 1851 if (unlikely(iov_iter_is_pipe(i))) {
241699cd 1852 struct pipe_inode_info *pipe = i->pipe;
8cefc107 1853 unsigned int iter_head;
241699cd 1854 size_t off;
241699cd
AV
1855
1856 if (!sanity(i))
1857 return 0;
1858
8cefc107 1859 data_start(i, &iter_head, &off);
241699cd 1860 /* some of this one + all after this one */
8cefc107 1861 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
241699cd
AV
1862 if (npages >= maxpages)
1863 return maxpages;
7ff50620
DH
1864 } else if (unlikely(iov_iter_is_xarray(i))) {
1865 unsigned offset;
1866
1867 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1868
1869 npages = 1;
1870 if (size > PAGE_SIZE - offset) {
1871 size -= PAGE_SIZE - offset;
1872 npages += size >> PAGE_SHIFT;
1873 size &= ~PAGE_MASK;
1874 if (size)
1875 npages++;
1876 }
1877 if (npages >= maxpages)
1878 return maxpages;
241699cd 1879 } else iterate_all_kinds(i, size, v, ({
e0f2dc40
AV
1880 unsigned long p = (unsigned long)v.iov_base;
1881 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1882 - p / PAGE_SIZE;
1883 if (npages >= maxpages)
1884 return maxpages;
1885 0;}),({
1886 npages++;
1887 if (npages >= maxpages)
1888 return maxpages;
a280455f
AV
1889 }),({
1890 unsigned long p = (unsigned long)v.iov_base;
1891 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1892 - p / PAGE_SIZE;
1893 if (npages >= maxpages)
1894 return maxpages;
7ff50620
DH
1895 }),
1896 0
e0f2dc40
AV
1897 )
1898 return npages;
62a8067a 1899}
f67da30c 1900EXPORT_SYMBOL(iov_iter_npages);
4b8164b9
AV
1901
1902const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1903{
1904 *new = *old;
00e23707 1905 if (unlikely(iov_iter_is_pipe(new))) {
241699cd
AV
1906 WARN_ON(1);
1907 return NULL;
1908 }
7ff50620 1909 if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
9ea9ce04 1910 return NULL;
00e23707 1911 if (iov_iter_is_bvec(new))
4b8164b9
AV
1912 return new->bvec = kmemdup(new->bvec,
1913 new->nr_segs * sizeof(struct bio_vec),
1914 flags);
1915 else
1916 /* iovec and kvec have identical layout */
1917 return new->iov = kmemdup(new->iov,
1918 new->nr_segs * sizeof(struct iovec),
1919 flags);
1920}
1921EXPORT_SYMBOL(dup_iter);
bc917be8 1922
bfdc5970
CH
1923static int copy_compat_iovec_from_user(struct iovec *iov,
1924 const struct iovec __user *uvec, unsigned long nr_segs)
1925{
1926 const struct compat_iovec __user *uiov =
1927 (const struct compat_iovec __user *)uvec;
1928 int ret = -EFAULT, i;
1929
a959a978 1930 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
bfdc5970
CH
1931 return -EFAULT;
1932
1933 for (i = 0; i < nr_segs; i++) {
1934 compat_uptr_t buf;
1935 compat_ssize_t len;
1936
1937 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1938 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1939
1940 /* check for compat_size_t not fitting in compat_ssize_t .. */
1941 if (len < 0) {
1942 ret = -EINVAL;
1943 goto uaccess_end;
1944 }
1945 iov[i].iov_base = compat_ptr(buf);
1946 iov[i].iov_len = len;
1947 }
1948
1949 ret = 0;
1950uaccess_end:
1951 user_access_end();
1952 return ret;
1953}
1954
1955static int copy_iovec_from_user(struct iovec *iov,
1956 const struct iovec __user *uvec, unsigned long nr_segs)
fb041b59
DL
1957{
1958 unsigned long seg;
fb041b59 1959
bfdc5970
CH
1960 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1961 return -EFAULT;
1962 for (seg = 0; seg < nr_segs; seg++) {
1963 if ((ssize_t)iov[seg].iov_len < 0)
1964 return -EINVAL;
fb041b59
DL
1965 }
1966
bfdc5970
CH
1967 return 0;
1968}
1969
1970struct iovec *iovec_from_user(const struct iovec __user *uvec,
1971 unsigned long nr_segs, unsigned long fast_segs,
1972 struct iovec *fast_iov, bool compat)
1973{
1974 struct iovec *iov = fast_iov;
1975 int ret;
1976
fb041b59 1977 /*
bfdc5970
CH
1978 * SuS says "The readv() function *may* fail if the iovcnt argument was
1979 * less than or equal to 0, or greater than {IOV_MAX}. Linux has
1980 * traditionally returned zero for zero segments, so...
fb041b59 1981 */
bfdc5970
CH
1982 if (nr_segs == 0)
1983 return iov;
1984 if (nr_segs > UIO_MAXIOV)
1985 return ERR_PTR(-EINVAL);
fb041b59
DL
1986 if (nr_segs > fast_segs) {
1987 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
bfdc5970
CH
1988 if (!iov)
1989 return ERR_PTR(-ENOMEM);
fb041b59 1990 }
bfdc5970
CH
1991
1992 if (compat)
1993 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1994 else
1995 ret = copy_iovec_from_user(iov, uvec, nr_segs);
1996 if (ret) {
1997 if (iov != fast_iov)
1998 kfree(iov);
1999 return ERR_PTR(ret);
2000 }
2001
2002 return iov;
2003}
2004
2005ssize_t __import_iovec(int type, const struct iovec __user *uvec,
2006 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
2007 struct iov_iter *i, bool compat)
2008{
2009 ssize_t total_len = 0;
2010 unsigned long seg;
2011 struct iovec *iov;
2012
2013 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
2014 if (IS_ERR(iov)) {
2015 *iovp = NULL;
2016 return PTR_ERR(iov);
fb041b59
DL
2017 }
2018
2019 /*
bfdc5970
CH
2020 * According to the Single Unix Specification we should return EINVAL if
2021 * an element length is < 0 when cast to ssize_t or if the total length
2022 * would overflow the ssize_t return value of the system call.
fb041b59
DL
2023 *
2024 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2025 * overflow case.
2026 */
fb041b59 2027 for (seg = 0; seg < nr_segs; seg++) {
fb041b59
DL
2028 ssize_t len = (ssize_t)iov[seg].iov_len;
2029
bfdc5970
CH
2030 if (!access_ok(iov[seg].iov_base, len)) {
2031 if (iov != *iovp)
2032 kfree(iov);
2033 *iovp = NULL;
2034 return -EFAULT;
fb041b59 2035 }
bfdc5970
CH
2036
2037 if (len > MAX_RW_COUNT - total_len) {
2038 len = MAX_RW_COUNT - total_len;
fb041b59
DL
2039 iov[seg].iov_len = len;
2040 }
bfdc5970 2041 total_len += len;
fb041b59 2042 }
bfdc5970
CH
2043
2044 iov_iter_init(i, type, iov, nr_segs, total_len);
2045 if (iov == *iovp)
2046 *iovp = NULL;
2047 else
2048 *iovp = iov;
2049 return total_len;
fb041b59
DL
2050}
2051
ffecee4f
VN
2052/**
2053 * import_iovec() - Copy an array of &struct iovec from userspace
2054 * into the kernel, check that it is valid, and initialize a new
2055 * &struct iov_iter iterator to access it.
2056 *
2057 * @type: One of %READ or %WRITE.
bfdc5970 2058 * @uvec: Pointer to the userspace array.
ffecee4f
VN
2059 * @nr_segs: Number of elements in userspace array.
2060 * @fast_segs: Number of elements in @iov.
bfdc5970 2061 * @iovp: (input and output parameter) Pointer to pointer to (usually small
ffecee4f
VN
2062 * on-stack) kernel array.
2063 * @i: Pointer to iterator that will be initialized on success.
2064 *
2065 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2066 * then this function places %NULL in *@iov on return. Otherwise, a new
2067 * array will be allocated and the result placed in *@iov. This means that
2068 * the caller may call kfree() on *@iov regardless of whether the small
2069 * on-stack array was used or not (and regardless of whether this function
2070 * returns an error or not).
2071 *
87e5e6da 2072 * Return: Negative error code on error, bytes imported on success
ffecee4f 2073 */
bfdc5970 2074ssize_t import_iovec(int type, const struct iovec __user *uvec,
bc917be8 2075 unsigned nr_segs, unsigned fast_segs,
bfdc5970 2076 struct iovec **iovp, struct iov_iter *i)
bc917be8 2077{
89cd35c5
CH
2078 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2079 in_compat_syscall());
bc917be8
AV
2080}
2081EXPORT_SYMBOL(import_iovec);
2082
bc917be8
AV
2083int import_single_range(int rw, void __user *buf, size_t len,
2084 struct iovec *iov, struct iov_iter *i)
2085{
2086 if (len > MAX_RW_COUNT)
2087 len = MAX_RW_COUNT;
96d4f267 2088 if (unlikely(!access_ok(buf, len)))
bc917be8
AV
2089 return -EFAULT;
2090
2091 iov->iov_base = buf;
2092 iov->iov_len = len;
2093 iov_iter_init(i, rw, iov, 1, len);
2094 return 0;
2095}
e1267585 2096EXPORT_SYMBOL(import_single_range);
09cf698a
AV
2097
2098int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
2099 int (*f)(struct kvec *vec, void *context),
2100 void *context)
2101{
2102 struct kvec w;
2103 int err = -EINVAL;
2104 if (!bytes)
2105 return 0;
2106
2107 iterate_all_kinds(i, bytes, v, -EINVAL, ({
2108 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2109 w.iov_len = v.bv_len;
2110 err = f(&w, context);
2111 kunmap(v.bv_page);
2112 err;}), ({
2113 w = v;
7ff50620
DH
2114 err = f(&w, context);}), ({
2115 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2116 w.iov_len = v.bv_len;
2117 err = f(&w, context);
2118 kunmap(v.bv_page);
2119 err;})
09cf698a
AV
2120 )
2121 return err;
2122}
2123EXPORT_SYMBOL(iov_iter_for_each_range);