mm/zsmalloc: use BUG_ON instead of if condition followed by BUG.
[linux-block.git] / lib / iov_iter.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
7999096f 2#include <crypto/hash.h>
4f18cd31 3#include <linux/export.h>
2f8b5444 4#include <linux/bvec.h>
4d0e9df5 5#include <linux/fault-inject-usercopy.h>
4f18cd31
AV
6#include <linux/uio.h>
7#include <linux/pagemap.h>
91f79c43
AV
8#include <linux/slab.h>
9#include <linux/vmalloc.h>
241699cd 10#include <linux/splice.h>
bfdc5970 11#include <linux/compat.h>
a604ec7e 12#include <net/checksum.h>
d05f4435 13#include <linux/scatterlist.h>
d0ef4c36 14#include <linux/instrumented.h>
4f18cd31 15
241699cd
AV
16#define PIPE_PARANOIA /* for now */
17
04a31165
AV
18#define iterate_iovec(i, n, __v, __p, skip, STEP) { \
19 size_t left; \
20 size_t wanted = n; \
21 __p = i->iov; \
22 __v.iov_len = min(n, __p->iov_len - skip); \
23 if (likely(__v.iov_len)) { \
24 __v.iov_base = __p->iov_base + skip; \
25 left = (STEP); \
26 __v.iov_len -= left; \
27 skip += __v.iov_len; \
28 n -= __v.iov_len; \
29 } else { \
30 left = 0; \
31 } \
32 while (unlikely(!left && n)) { \
33 __p++; \
34 __v.iov_len = min(n, __p->iov_len); \
35 if (unlikely(!__v.iov_len)) \
36 continue; \
37 __v.iov_base = __p->iov_base; \
38 left = (STEP); \
39 __v.iov_len -= left; \
40 skip = __v.iov_len; \
41 n -= __v.iov_len; \
42 } \
43 n = wanted - n; \
44}
45
a280455f
AV
46#define iterate_kvec(i, n, __v, __p, skip, STEP) { \
47 size_t wanted = n; \
48 __p = i->kvec; \
49 __v.iov_len = min(n, __p->iov_len - skip); \
50 if (likely(__v.iov_len)) { \
51 __v.iov_base = __p->iov_base + skip; \
52 (void)(STEP); \
53 skip += __v.iov_len; \
54 n -= __v.iov_len; \
55 } \
56 while (unlikely(n)) { \
57 __p++; \
58 __v.iov_len = min(n, __p->iov_len); \
59 if (unlikely(!__v.iov_len)) \
60 continue; \
61 __v.iov_base = __p->iov_base; \
62 (void)(STEP); \
63 skip = __v.iov_len; \
64 n -= __v.iov_len; \
65 } \
66 n = wanted; \
67}
68
1bdc76ae
ML
69#define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
70 struct bvec_iter __start; \
71 __start.bi_size = n; \
72 __start.bi_bvec_done = skip; \
73 __start.bi_idx = 0; \
74 for_each_bvec(__v, i->bvec, __bi, __start) { \
04a31165 75 (void)(STEP); \
04a31165 76 } \
04a31165
AV
77}
78
7ff50620
DH
79#define iterate_xarray(i, n, __v, skip, STEP) { \
80 struct page *head = NULL; \
81 size_t wanted = n, seg, offset; \
82 loff_t start = i->xarray_start + skip; \
83 pgoff_t index = start >> PAGE_SHIFT; \
84 int j; \
85 \
86 XA_STATE(xas, i->xarray, index); \
87 \
88 rcu_read_lock(); \
89 xas_for_each(&xas, head, ULONG_MAX) { \
90 if (xas_retry(&xas, head)) \
91 continue; \
92 if (WARN_ON(xa_is_value(head))) \
93 break; \
94 if (WARN_ON(PageHuge(head))) \
95 break; \
96 for (j = (head->index < index) ? index - head->index : 0; \
97 j < thp_nr_pages(head); j++) { \
98 __v.bv_page = head + j; \
99 offset = (i->xarray_start + skip) & ~PAGE_MASK; \
100 seg = PAGE_SIZE - offset; \
101 __v.bv_offset = offset; \
102 __v.bv_len = min(n, seg); \
103 (void)(STEP); \
104 n -= __v.bv_len; \
105 skip += __v.bv_len; \
106 if (n == 0) \
107 break; \
108 } \
109 if (n == 0) \
110 break; \
111 } \
112 rcu_read_unlock(); \
113 n = wanted - n; \
114}
115
116#define iterate_all_kinds(i, n, v, I, B, K, X) { \
33844e66
AV
117 if (likely(n)) { \
118 size_t skip = i->iov_offset; \
119 if (unlikely(i->type & ITER_BVEC)) { \
120 struct bio_vec v; \
121 struct bvec_iter __bi; \
122 iterate_bvec(i, n, v, __bi, skip, (B)) \
123 } else if (unlikely(i->type & ITER_KVEC)) { \
124 const struct kvec *kvec; \
125 struct kvec v; \
126 iterate_kvec(i, n, v, kvec, skip, (K)) \
9ea9ce04 127 } else if (unlikely(i->type & ITER_DISCARD)) { \
7ff50620
DH
128 } else if (unlikely(i->type & ITER_XARRAY)) { \
129 struct bio_vec v; \
130 iterate_xarray(i, n, v, skip, (X)); \
33844e66
AV
131 } else { \
132 const struct iovec *iov; \
133 struct iovec v; \
134 iterate_iovec(i, n, v, iov, skip, (I)) \
135 } \
04a31165
AV
136 } \
137}
138
7ff50620 139#define iterate_and_advance(i, n, v, I, B, K, X) { \
dd254f5a
AV
140 if (unlikely(i->count < n)) \
141 n = i->count; \
19f18459 142 if (i->count) { \
dd254f5a
AV
143 size_t skip = i->iov_offset; \
144 if (unlikely(i->type & ITER_BVEC)) { \
1bdc76ae 145 const struct bio_vec *bvec = i->bvec; \
dd254f5a 146 struct bio_vec v; \
1bdc76ae
ML
147 struct bvec_iter __bi; \
148 iterate_bvec(i, n, v, __bi, skip, (B)) \
149 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
150 i->nr_segs -= i->bvec - bvec; \
151 skip = __bi.bi_bvec_done; \
dd254f5a
AV
152 } else if (unlikely(i->type & ITER_KVEC)) { \
153 const struct kvec *kvec; \
154 struct kvec v; \
155 iterate_kvec(i, n, v, kvec, skip, (K)) \
156 if (skip == kvec->iov_len) { \
157 kvec++; \
158 skip = 0; \
159 } \
160 i->nr_segs -= kvec - i->kvec; \
161 i->kvec = kvec; \
9ea9ce04
DH
162 } else if (unlikely(i->type & ITER_DISCARD)) { \
163 skip += n; \
7ff50620
DH
164 } else if (unlikely(i->type & ITER_XARRAY)) { \
165 struct bio_vec v; \
166 iterate_xarray(i, n, v, skip, (X)) \
dd254f5a
AV
167 } else { \
168 const struct iovec *iov; \
169 struct iovec v; \
170 iterate_iovec(i, n, v, iov, skip, (I)) \
171 if (skip == iov->iov_len) { \
172 iov++; \
173 skip = 0; \
174 } \
175 i->nr_segs -= iov - i->iov; \
176 i->iov = iov; \
7ce2a91e 177 } \
dd254f5a
AV
178 i->count -= n; \
179 i->iov_offset = skip; \
7ce2a91e 180 } \
7ce2a91e
AV
181}
182
09fc68dc
AV
183static int copyout(void __user *to, const void *from, size_t n)
184{
4d0e9df5
AL
185 if (should_fail_usercopy())
186 return n;
96d4f267 187 if (access_ok(to, n)) {
d0ef4c36 188 instrument_copy_to_user(to, from, n);
09fc68dc
AV
189 n = raw_copy_to_user(to, from, n);
190 }
191 return n;
192}
193
194static int copyin(void *to, const void __user *from, size_t n)
195{
4d0e9df5
AL
196 if (should_fail_usercopy())
197 return n;
96d4f267 198 if (access_ok(from, n)) {
d0ef4c36 199 instrument_copy_from_user(to, from, n);
09fc68dc
AV
200 n = raw_copy_from_user(to, from, n);
201 }
202 return n;
203}
204
62a8067a 205static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
4f18cd31
AV
206 struct iov_iter *i)
207{
208 size_t skip, copy, left, wanted;
209 const struct iovec *iov;
210 char __user *buf;
211 void *kaddr, *from;
212
213 if (unlikely(bytes > i->count))
214 bytes = i->count;
215
216 if (unlikely(!bytes))
217 return 0;
218
09fc68dc 219 might_fault();
4f18cd31
AV
220 wanted = bytes;
221 iov = i->iov;
222 skip = i->iov_offset;
223 buf = iov->iov_base + skip;
224 copy = min(bytes, iov->iov_len - skip);
225
3fa6c507 226 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
4f18cd31
AV
227 kaddr = kmap_atomic(page);
228 from = kaddr + offset;
229
230 /* first chunk, usually the only one */
09fc68dc 231 left = copyout(buf, from, copy);
4f18cd31
AV
232 copy -= left;
233 skip += copy;
234 from += copy;
235 bytes -= copy;
236
237 while (unlikely(!left && bytes)) {
238 iov++;
239 buf = iov->iov_base;
240 copy = min(bytes, iov->iov_len);
09fc68dc 241 left = copyout(buf, from, copy);
4f18cd31
AV
242 copy -= left;
243 skip = copy;
244 from += copy;
245 bytes -= copy;
246 }
247 if (likely(!bytes)) {
248 kunmap_atomic(kaddr);
249 goto done;
250 }
251 offset = from - kaddr;
252 buf += copy;
253 kunmap_atomic(kaddr);
254 copy = min(bytes, iov->iov_len - skip);
255 }
256 /* Too bad - revert to non-atomic kmap */
3fa6c507 257
4f18cd31
AV
258 kaddr = kmap(page);
259 from = kaddr + offset;
09fc68dc 260 left = copyout(buf, from, copy);
4f18cd31
AV
261 copy -= left;
262 skip += copy;
263 from += copy;
264 bytes -= copy;
265 while (unlikely(!left && bytes)) {
266 iov++;
267 buf = iov->iov_base;
268 copy = min(bytes, iov->iov_len);
09fc68dc 269 left = copyout(buf, from, copy);
4f18cd31
AV
270 copy -= left;
271 skip = copy;
272 from += copy;
273 bytes -= copy;
274 }
275 kunmap(page);
3fa6c507 276
4f18cd31 277done:
81055e58
AV
278 if (skip == iov->iov_len) {
279 iov++;
280 skip = 0;
281 }
4f18cd31
AV
282 i->count -= wanted - bytes;
283 i->nr_segs -= iov - i->iov;
284 i->iov = iov;
285 i->iov_offset = skip;
286 return wanted - bytes;
287}
4f18cd31 288
62a8067a 289static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
f0d1bec9
AV
290 struct iov_iter *i)
291{
292 size_t skip, copy, left, wanted;
293 const struct iovec *iov;
294 char __user *buf;
295 void *kaddr, *to;
296
297 if (unlikely(bytes > i->count))
298 bytes = i->count;
299
300 if (unlikely(!bytes))
301 return 0;
302
09fc68dc 303 might_fault();
f0d1bec9
AV
304 wanted = bytes;
305 iov = i->iov;
306 skip = i->iov_offset;
307 buf = iov->iov_base + skip;
308 copy = min(bytes, iov->iov_len - skip);
309
3fa6c507 310 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
f0d1bec9
AV
311 kaddr = kmap_atomic(page);
312 to = kaddr + offset;
313
314 /* first chunk, usually the only one */
09fc68dc 315 left = copyin(to, buf, copy);
f0d1bec9
AV
316 copy -= left;
317 skip += copy;
318 to += copy;
319 bytes -= copy;
320
321 while (unlikely(!left && bytes)) {
322 iov++;
323 buf = iov->iov_base;
324 copy = min(bytes, iov->iov_len);
09fc68dc 325 left = copyin(to, buf, copy);
f0d1bec9
AV
326 copy -= left;
327 skip = copy;
328 to += copy;
329 bytes -= copy;
330 }
331 if (likely(!bytes)) {
332 kunmap_atomic(kaddr);
333 goto done;
334 }
335 offset = to - kaddr;
336 buf += copy;
337 kunmap_atomic(kaddr);
338 copy = min(bytes, iov->iov_len - skip);
339 }
340 /* Too bad - revert to non-atomic kmap */
3fa6c507 341
f0d1bec9
AV
342 kaddr = kmap(page);
343 to = kaddr + offset;
09fc68dc 344 left = copyin(to, buf, copy);
f0d1bec9
AV
345 copy -= left;
346 skip += copy;
347 to += copy;
348 bytes -= copy;
349 while (unlikely(!left && bytes)) {
350 iov++;
351 buf = iov->iov_base;
352 copy = min(bytes, iov->iov_len);
09fc68dc 353 left = copyin(to, buf, copy);
f0d1bec9
AV
354 copy -= left;
355 skip = copy;
356 to += copy;
357 bytes -= copy;
358 }
359 kunmap(page);
3fa6c507 360
f0d1bec9 361done:
81055e58
AV
362 if (skip == iov->iov_len) {
363 iov++;
364 skip = 0;
365 }
f0d1bec9
AV
366 i->count -= wanted - bytes;
367 i->nr_segs -= iov - i->iov;
368 i->iov = iov;
369 i->iov_offset = skip;
370 return wanted - bytes;
371}
f0d1bec9 372
241699cd
AV
373#ifdef PIPE_PARANOIA
374static bool sanity(const struct iov_iter *i)
375{
376 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
377 unsigned int p_head = pipe->head;
378 unsigned int p_tail = pipe->tail;
379 unsigned int p_mask = pipe->ring_size - 1;
380 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
381 unsigned int i_head = i->head;
382 unsigned int idx;
383
241699cd
AV
384 if (i->iov_offset) {
385 struct pipe_buffer *p;
8cefc107 386 if (unlikely(p_occupancy == 0))
241699cd 387 goto Bad; // pipe must be non-empty
8cefc107 388 if (unlikely(i_head != p_head - 1))
241699cd
AV
389 goto Bad; // must be at the last buffer...
390
8cefc107 391 p = &pipe->bufs[i_head & p_mask];
241699cd
AV
392 if (unlikely(p->offset + p->len != i->iov_offset))
393 goto Bad; // ... at the end of segment
394 } else {
8cefc107 395 if (i_head != p_head)
241699cd
AV
396 goto Bad; // must be right after the last buffer
397 }
398 return true;
399Bad:
8cefc107
DH
400 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
401 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
402 p_head, p_tail, pipe->ring_size);
403 for (idx = 0; idx < pipe->ring_size; idx++)
241699cd
AV
404 printk(KERN_ERR "[%p %p %d %d]\n",
405 pipe->bufs[idx].ops,
406 pipe->bufs[idx].page,
407 pipe->bufs[idx].offset,
408 pipe->bufs[idx].len);
409 WARN_ON(1);
410 return false;
411}
412#else
413#define sanity(i) true
414#endif
415
241699cd
AV
416static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
417 struct iov_iter *i)
418{
419 struct pipe_inode_info *pipe = i->pipe;
420 struct pipe_buffer *buf;
8cefc107
DH
421 unsigned int p_tail = pipe->tail;
422 unsigned int p_mask = pipe->ring_size - 1;
423 unsigned int i_head = i->head;
241699cd 424 size_t off;
241699cd
AV
425
426 if (unlikely(bytes > i->count))
427 bytes = i->count;
428
429 if (unlikely(!bytes))
430 return 0;
431
432 if (!sanity(i))
433 return 0;
434
435 off = i->iov_offset;
8cefc107 436 buf = &pipe->bufs[i_head & p_mask];
241699cd
AV
437 if (off) {
438 if (offset == off && buf->page == page) {
439 /* merge with the last one */
440 buf->len += bytes;
441 i->iov_offset += bytes;
442 goto out;
443 }
8cefc107
DH
444 i_head++;
445 buf = &pipe->bufs[i_head & p_mask];
241699cd 446 }
6718b6f8 447 if (pipe_full(i_head, p_tail, pipe->max_usage))
241699cd 448 return 0;
8cefc107 449
241699cd 450 buf->ops = &page_cache_pipe_buf_ops;
8cefc107
DH
451 get_page(page);
452 buf->page = page;
241699cd
AV
453 buf->offset = offset;
454 buf->len = bytes;
8cefc107
DH
455
456 pipe->head = i_head + 1;
241699cd 457 i->iov_offset = offset + bytes;
8cefc107 458 i->head = i_head;
241699cd
AV
459out:
460 i->count -= bytes;
461 return bytes;
462}
463
171a0203
AA
464/*
465 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
466 * bytes. For each iovec, fault in each page that constitutes the iovec.
467 *
468 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
469 * because it is an invalid address).
470 */
d4690f1e 471int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
171a0203
AA
472{
473 size_t skip = i->iov_offset;
474 const struct iovec *iov;
475 int err;
476 struct iovec v;
477
478 if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
479 iterate_iovec(i, bytes, v, iov, skip, ({
4bce9f6e 480 err = fault_in_pages_readable(v.iov_base, v.iov_len);
171a0203
AA
481 if (unlikely(err))
482 return err;
483 0;}))
484 }
485 return 0;
486}
d4690f1e 487EXPORT_SYMBOL(iov_iter_fault_in_readable);
171a0203 488
aa563d7b 489void iov_iter_init(struct iov_iter *i, unsigned int direction,
71d8e532
AV
490 const struct iovec *iov, unsigned long nr_segs,
491 size_t count)
492{
aa563d7b
DH
493 WARN_ON(direction & ~(READ | WRITE));
494 direction &= READ | WRITE;
495
71d8e532 496 /* It will get better. Eventually... */
db68ce10 497 if (uaccess_kernel()) {
aa563d7b 498 i->type = ITER_KVEC | direction;
a280455f
AV
499 i->kvec = (struct kvec *)iov;
500 } else {
aa563d7b 501 i->type = ITER_IOVEC | direction;
a280455f
AV
502 i->iov = iov;
503 }
71d8e532
AV
504 i->nr_segs = nr_segs;
505 i->iov_offset = 0;
506 i->count = count;
507}
508EXPORT_SYMBOL(iov_iter_init);
7b2c99d1 509
c35e0248
MW
510static void memzero_page(struct page *page, size_t offset, size_t len)
511{
512 char *addr = kmap_atomic(page);
513 memset(addr + offset, 0, len);
514 kunmap_atomic(addr);
515}
516
241699cd
AV
517static inline bool allocated(struct pipe_buffer *buf)
518{
519 return buf->ops == &default_pipe_buf_ops;
520}
521
8cefc107
DH
522static inline void data_start(const struct iov_iter *i,
523 unsigned int *iter_headp, size_t *offp)
241699cd 524{
8cefc107
DH
525 unsigned int p_mask = i->pipe->ring_size - 1;
526 unsigned int iter_head = i->head;
241699cd 527 size_t off = i->iov_offset;
8cefc107
DH
528
529 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
530 off == PAGE_SIZE)) {
531 iter_head++;
241699cd
AV
532 off = 0;
533 }
8cefc107 534 *iter_headp = iter_head;
241699cd
AV
535 *offp = off;
536}
537
538static size_t push_pipe(struct iov_iter *i, size_t size,
8cefc107 539 int *iter_headp, size_t *offp)
241699cd
AV
540{
541 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
542 unsigned int p_tail = pipe->tail;
543 unsigned int p_mask = pipe->ring_size - 1;
544 unsigned int iter_head;
241699cd 545 size_t off;
241699cd
AV
546 ssize_t left;
547
548 if (unlikely(size > i->count))
549 size = i->count;
550 if (unlikely(!size))
551 return 0;
552
553 left = size;
8cefc107
DH
554 data_start(i, &iter_head, &off);
555 *iter_headp = iter_head;
241699cd
AV
556 *offp = off;
557 if (off) {
558 left -= PAGE_SIZE - off;
559 if (left <= 0) {
8cefc107 560 pipe->bufs[iter_head & p_mask].len += size;
241699cd
AV
561 return size;
562 }
8cefc107
DH
563 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
564 iter_head++;
241699cd 565 }
6718b6f8 566 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
8cefc107 567 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
241699cd
AV
568 struct page *page = alloc_page(GFP_USER);
569 if (!page)
570 break;
8cefc107
DH
571
572 buf->ops = &default_pipe_buf_ops;
573 buf->page = page;
574 buf->offset = 0;
575 buf->len = min_t(ssize_t, left, PAGE_SIZE);
576 left -= buf->len;
577 iter_head++;
578 pipe->head = iter_head;
579
580 if (left == 0)
241699cd 581 return size;
241699cd
AV
582 }
583 return size - left;
584}
585
586static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
587 struct iov_iter *i)
588{
589 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
590 unsigned int p_mask = pipe->ring_size - 1;
591 unsigned int i_head;
241699cd 592 size_t n, off;
241699cd
AV
593
594 if (!sanity(i))
595 return 0;
596
8cefc107 597 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
598 if (unlikely(!n))
599 return 0;
8cefc107 600 do {
241699cd 601 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
602 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
603 i->head = i_head;
241699cd
AV
604 i->iov_offset = off + chunk;
605 n -= chunk;
606 addr += chunk;
8cefc107
DH
607 off = 0;
608 i_head++;
609 } while (n);
241699cd
AV
610 i->count -= bytes;
611 return bytes;
612}
613
f9152895
AV
614static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
615 __wsum sum, size_t off)
616{
cc44c17b 617 __wsum next = csum_partial_copy_nocheck(from, to, len);
f9152895
AV
618 return csum_block_add(sum, next, off);
619}
620
78e1f386 621static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
52cbd23a
WB
622 struct csum_state *csstate,
623 struct iov_iter *i)
78e1f386
AV
624{
625 struct pipe_inode_info *pipe = i->pipe;
8cefc107 626 unsigned int p_mask = pipe->ring_size - 1;
52cbd23a
WB
627 __wsum sum = csstate->csum;
628 size_t off = csstate->off;
8cefc107 629 unsigned int i_head;
78e1f386 630 size_t n, r;
78e1f386
AV
631
632 if (!sanity(i))
633 return 0;
634
8cefc107 635 bytes = n = push_pipe(i, bytes, &i_head, &r);
78e1f386
AV
636 if (unlikely(!n))
637 return 0;
8cefc107 638 do {
78e1f386 639 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
8cefc107 640 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
f9152895 641 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
78e1f386 642 kunmap_atomic(p);
8cefc107 643 i->head = i_head;
78e1f386
AV
644 i->iov_offset = r + chunk;
645 n -= chunk;
646 off += chunk;
647 addr += chunk;
8cefc107
DH
648 r = 0;
649 i_head++;
650 } while (n);
78e1f386 651 i->count -= bytes;
52cbd23a
WB
652 csstate->csum = sum;
653 csstate->off = off;
78e1f386
AV
654 return bytes;
655}
656
aa28de27 657size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
62a8067a 658{
36f7a8a4 659 const char *from = addr;
00e23707 660 if (unlikely(iov_iter_is_pipe(i)))
241699cd 661 return copy_pipe_to_iter(addr, bytes, i);
09fc68dc
AV
662 if (iter_is_iovec(i))
663 might_fault();
3d4d3e48 664 iterate_and_advance(i, bytes, v,
09fc68dc 665 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
3d4d3e48 666 memcpy_to_page(v.bv_page, v.bv_offset,
a280455f 667 (from += v.bv_len) - v.bv_len, v.bv_len),
7ff50620
DH
668 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
669 memcpy_to_page(v.bv_page, v.bv_offset,
670 (from += v.bv_len) - v.bv_len, v.bv_len)
3d4d3e48 671 )
62a8067a 672
3d4d3e48 673 return bytes;
c35e0248 674}
aa28de27 675EXPORT_SYMBOL(_copy_to_iter);
c35e0248 676
ec6347bb
DW
677#ifdef CONFIG_ARCH_HAS_COPY_MC
678static int copyout_mc(void __user *to, const void *from, size_t n)
8780356e 679{
96d4f267 680 if (access_ok(to, n)) {
d0ef4c36 681 instrument_copy_to_user(to, from, n);
ec6347bb 682 n = copy_mc_to_user((__force void *) to, from, n);
8780356e
DW
683 }
684 return n;
685}
686
ec6347bb 687static unsigned long copy_mc_to_page(struct page *page, size_t offset,
8780356e
DW
688 const char *from, size_t len)
689{
690 unsigned long ret;
691 char *to;
692
693 to = kmap_atomic(page);
ec6347bb 694 ret = copy_mc_to_kernel(to + offset, from, len);
8780356e
DW
695 kunmap_atomic(to);
696
697 return ret;
698}
699
ec6347bb 700static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
ca146f6f
DW
701 struct iov_iter *i)
702{
703 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
704 unsigned int p_mask = pipe->ring_size - 1;
705 unsigned int i_head;
ca146f6f 706 size_t n, off, xfer = 0;
ca146f6f
DW
707
708 if (!sanity(i))
709 return 0;
710
8cefc107 711 bytes = n = push_pipe(i, bytes, &i_head, &off);
ca146f6f
DW
712 if (unlikely(!n))
713 return 0;
8cefc107 714 do {
ca146f6f
DW
715 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
716 unsigned long rem;
717
ec6347bb 718 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
8cefc107
DH
719 off, addr, chunk);
720 i->head = i_head;
ca146f6f
DW
721 i->iov_offset = off + chunk - rem;
722 xfer += chunk - rem;
723 if (rem)
724 break;
725 n -= chunk;
726 addr += chunk;
8cefc107
DH
727 off = 0;
728 i_head++;
729 } while (n);
ca146f6f
DW
730 i->count -= xfer;
731 return xfer;
732}
733
bf3eeb9b 734/**
ec6347bb 735 * _copy_mc_to_iter - copy to iter with source memory error exception handling
bf3eeb9b
DW
736 * @addr: source kernel address
737 * @bytes: total transfer length
738 * @iter: destination iterator
739 *
ec6347bb
DW
740 * The pmem driver deploys this for the dax operation
741 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
742 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
743 * successfully copied.
bf3eeb9b 744 *
ec6347bb 745 * The main differences between this and typical _copy_to_iter().
bf3eeb9b
DW
746 *
747 * * Typical tail/residue handling after a fault retries the copy
748 * byte-by-byte until the fault happens again. Re-triggering machine
749 * checks is potentially fatal so the implementation uses source
750 * alignment and poison alignment assumptions to avoid re-triggering
751 * hardware exceptions.
752 *
753 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
754 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
755 * a short copy.
bf3eeb9b 756 */
ec6347bb 757size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
8780356e
DW
758{
759 const char *from = addr;
760 unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
761
00e23707 762 if (unlikely(iov_iter_is_pipe(i)))
ec6347bb 763 return copy_mc_pipe_to_iter(addr, bytes, i);
8780356e
DW
764 if (iter_is_iovec(i))
765 might_fault();
766 iterate_and_advance(i, bytes, v,
ec6347bb
DW
767 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
768 v.iov_len),
8780356e 769 ({
ec6347bb
DW
770 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
771 (from += v.bv_len) - v.bv_len, v.bv_len);
8780356e
DW
772 if (rem) {
773 curr_addr = (unsigned long) from;
774 bytes = curr_addr - s_addr - rem;
775 return bytes;
776 }
777 }),
778 ({
ec6347bb
DW
779 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
780 - v.iov_len, v.iov_len);
8780356e
DW
781 if (rem) {
782 curr_addr = (unsigned long) from;
783 bytes = curr_addr - s_addr - rem;
784 return bytes;
785 }
7ff50620
DH
786 }),
787 ({
788 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
789 (from += v.bv_len) - v.bv_len, v.bv_len);
790 if (rem) {
791 curr_addr = (unsigned long) from;
792 bytes = curr_addr - s_addr - rem;
793 rcu_read_unlock();
3d14ec1f
DH
794 i->iov_offset += bytes;
795 i->count -= bytes;
7ff50620
DH
796 return bytes;
797 }
8780356e
DW
798 })
799 )
800
801 return bytes;
802}
ec6347bb
DW
803EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
804#endif /* CONFIG_ARCH_HAS_COPY_MC */
8780356e 805
aa28de27 806size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
c35e0248 807{
0dbca9a4 808 char *to = addr;
00e23707 809 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
810 WARN_ON(1);
811 return 0;
812 }
09fc68dc
AV
813 if (iter_is_iovec(i))
814 might_fault();
0dbca9a4 815 iterate_and_advance(i, bytes, v,
09fc68dc 816 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
0dbca9a4 817 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
a280455f 818 v.bv_offset, v.bv_len),
7ff50620
DH
819 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
820 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
821 v.bv_offset, v.bv_len)
0dbca9a4
AV
822 )
823
824 return bytes;
c35e0248 825}
aa28de27 826EXPORT_SYMBOL(_copy_from_iter);
c35e0248 827
aa28de27 828bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
cbbd26b8
AV
829{
830 char *to = addr;
00e23707 831 if (unlikely(iov_iter_is_pipe(i))) {
cbbd26b8
AV
832 WARN_ON(1);
833 return false;
834 }
33844e66 835 if (unlikely(i->count < bytes))
cbbd26b8
AV
836 return false;
837
09fc68dc
AV
838 if (iter_is_iovec(i))
839 might_fault();
cbbd26b8 840 iterate_all_kinds(i, bytes, v, ({
09fc68dc 841 if (copyin((to += v.iov_len) - v.iov_len,
cbbd26b8
AV
842 v.iov_base, v.iov_len))
843 return false;
844 0;}),
845 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
846 v.bv_offset, v.bv_len),
7ff50620
DH
847 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
848 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
849 v.bv_offset, v.bv_len)
cbbd26b8
AV
850 )
851
852 iov_iter_advance(i, bytes);
853 return true;
854}
aa28de27 855EXPORT_SYMBOL(_copy_from_iter_full);
cbbd26b8 856
aa28de27 857size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
aa583096
AV
858{
859 char *to = addr;
00e23707 860 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
861 WARN_ON(1);
862 return 0;
863 }
aa583096 864 iterate_and_advance(i, bytes, v,
3f763453 865 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
aa583096
AV
866 v.iov_base, v.iov_len),
867 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
868 v.bv_offset, v.bv_len),
7ff50620
DH
869 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
870 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
871 v.bv_offset, v.bv_len)
aa583096
AV
872 )
873
874 return bytes;
875}
aa28de27 876EXPORT_SYMBOL(_copy_from_iter_nocache);
aa583096 877
0aed55af 878#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
abd08d7d
DW
879/**
880 * _copy_from_iter_flushcache - write destination through cpu cache
881 * @addr: destination kernel address
882 * @bytes: total transfer length
883 * @iter: source iterator
884 *
885 * The pmem driver arranges for filesystem-dax to use this facility via
886 * dax_copy_from_iter() for ensuring that writes to persistent memory
887 * are flushed through the CPU cache. It is differentiated from
888 * _copy_from_iter_nocache() in that guarantees all data is flushed for
889 * all iterator types. The _copy_from_iter_nocache() only attempts to
890 * bypass the cache for the ITER_IOVEC case, and on some archs may use
891 * instructions that strand dirty-data in the cache.
892 */
6a37e940 893size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
0aed55af
DW
894{
895 char *to = addr;
00e23707 896 if (unlikely(iov_iter_is_pipe(i))) {
0aed55af
DW
897 WARN_ON(1);
898 return 0;
899 }
900 iterate_and_advance(i, bytes, v,
901 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
902 v.iov_base, v.iov_len),
903 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
904 v.bv_offset, v.bv_len),
905 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
7ff50620
DH
906 v.iov_len),
907 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
908 v.bv_offset, v.bv_len)
0aed55af
DW
909 )
910
911 return bytes;
912}
6a37e940 913EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
0aed55af
DW
914#endif
915
aa28de27 916bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
cbbd26b8
AV
917{
918 char *to = addr;
00e23707 919 if (unlikely(iov_iter_is_pipe(i))) {
cbbd26b8
AV
920 WARN_ON(1);
921 return false;
922 }
33844e66 923 if (unlikely(i->count < bytes))
cbbd26b8
AV
924 return false;
925 iterate_all_kinds(i, bytes, v, ({
3f763453 926 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
cbbd26b8
AV
927 v.iov_base, v.iov_len))
928 return false;
929 0;}),
930 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
931 v.bv_offset, v.bv_len),
7ff50620
DH
932 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
933 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
934 v.bv_offset, v.bv_len)
cbbd26b8
AV
935 )
936
937 iov_iter_advance(i, bytes);
938 return true;
939}
aa28de27 940EXPORT_SYMBOL(_copy_from_iter_full_nocache);
cbbd26b8 941
72e809ed
AV
942static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
943{
6daef95b
ED
944 struct page *head;
945 size_t v = n + offset;
946
947 /*
948 * The general case needs to access the page order in order
949 * to compute the page size.
950 * However, we mostly deal with order-0 pages and thus can
951 * avoid a possible cache line miss for requests that fit all
952 * page orders.
953 */
954 if (n <= v && v <= PAGE_SIZE)
955 return true;
956
957 head = compound_head(page);
958 v += (page - head) << PAGE_SHIFT;
a90bcb86 959
a50b854e 960 if (likely(n <= v && v <= (page_size(head))))
72e809ed
AV
961 return true;
962 WARN_ON(1);
963 return false;
964}
cbbd26b8 965
62a8067a
AV
966size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
967 struct iov_iter *i)
968{
72e809ed
AV
969 if (unlikely(!page_copy_sane(page, offset, bytes)))
970 return 0;
7ff50620 971 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
d271524a
AV
972 void *kaddr = kmap_atomic(page);
973 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
974 kunmap_atomic(kaddr);
975 return wanted;
9ea9ce04
DH
976 } else if (unlikely(iov_iter_is_discard(i)))
977 return bytes;
978 else if (likely(!iov_iter_is_pipe(i)))
62a8067a 979 return copy_page_to_iter_iovec(page, offset, bytes, i);
241699cd
AV
980 else
981 return copy_page_to_iter_pipe(page, offset, bytes, i);
62a8067a
AV
982}
983EXPORT_SYMBOL(copy_page_to_iter);
984
985size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
986 struct iov_iter *i)
987{
72e809ed
AV
988 if (unlikely(!page_copy_sane(page, offset, bytes)))
989 return 0;
9ea9ce04 990 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
991 WARN_ON(1);
992 return 0;
993 }
7ff50620 994 if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
d271524a 995 void *kaddr = kmap_atomic(page);
aa28de27 996 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
d271524a
AV
997 kunmap_atomic(kaddr);
998 return wanted;
999 } else
62a8067a
AV
1000 return copy_page_from_iter_iovec(page, offset, bytes, i);
1001}
1002EXPORT_SYMBOL(copy_page_from_iter);
1003
241699cd
AV
1004static size_t pipe_zero(size_t bytes, struct iov_iter *i)
1005{
1006 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1007 unsigned int p_mask = pipe->ring_size - 1;
1008 unsigned int i_head;
241699cd 1009 size_t n, off;
241699cd
AV
1010
1011 if (!sanity(i))
1012 return 0;
1013
8cefc107 1014 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
1015 if (unlikely(!n))
1016 return 0;
1017
8cefc107 1018 do {
241699cd 1019 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
1020 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
1021 i->head = i_head;
241699cd
AV
1022 i->iov_offset = off + chunk;
1023 n -= chunk;
8cefc107
DH
1024 off = 0;
1025 i_head++;
1026 } while (n);
241699cd
AV
1027 i->count -= bytes;
1028 return bytes;
1029}
1030
c35e0248
MW
1031size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
1032{
00e23707 1033 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1034 return pipe_zero(bytes, i);
8442fa46 1035 iterate_and_advance(i, bytes, v,
09fc68dc 1036 clear_user(v.iov_base, v.iov_len),
a280455f 1037 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
7ff50620
DH
1038 memset(v.iov_base, 0, v.iov_len),
1039 memzero_page(v.bv_page, v.bv_offset, v.bv_len)
8442fa46
AV
1040 )
1041
1042 return bytes;
c35e0248
MW
1043}
1044EXPORT_SYMBOL(iov_iter_zero);
1045
62a8067a
AV
1046size_t iov_iter_copy_from_user_atomic(struct page *page,
1047 struct iov_iter *i, unsigned long offset, size_t bytes)
1048{
04a31165 1049 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
72e809ed
AV
1050 if (unlikely(!page_copy_sane(page, offset, bytes))) {
1051 kunmap_atomic(kaddr);
1052 return 0;
1053 }
9ea9ce04 1054 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1055 kunmap_atomic(kaddr);
1056 WARN_ON(1);
1057 return 0;
1058 }
04a31165 1059 iterate_all_kinds(i, bytes, v,
09fc68dc 1060 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
04a31165 1061 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
a280455f 1062 v.bv_offset, v.bv_len),
7ff50620
DH
1063 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1064 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1065 v.bv_offset, v.bv_len)
04a31165
AV
1066 )
1067 kunmap_atomic(kaddr);
1068 return bytes;
62a8067a
AV
1069}
1070EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1071
b9dc6f65
AV
1072static inline void pipe_truncate(struct iov_iter *i)
1073{
1074 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1075 unsigned int p_tail = pipe->tail;
1076 unsigned int p_head = pipe->head;
1077 unsigned int p_mask = pipe->ring_size - 1;
1078
1079 if (!pipe_empty(p_head, p_tail)) {
1080 struct pipe_buffer *buf;
1081 unsigned int i_head = i->head;
b9dc6f65 1082 size_t off = i->iov_offset;
8cefc107 1083
b9dc6f65 1084 if (off) {
8cefc107
DH
1085 buf = &pipe->bufs[i_head & p_mask];
1086 buf->len = off - buf->offset;
1087 i_head++;
b9dc6f65 1088 }
8cefc107
DH
1089 while (p_head != i_head) {
1090 p_head--;
1091 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
b9dc6f65 1092 }
8cefc107
DH
1093
1094 pipe->head = p_head;
b9dc6f65
AV
1095 }
1096}
1097
241699cd
AV
1098static void pipe_advance(struct iov_iter *i, size_t size)
1099{
1100 struct pipe_inode_info *pipe = i->pipe;
241699cd
AV
1101 if (unlikely(i->count < size))
1102 size = i->count;
241699cd 1103 if (size) {
b9dc6f65 1104 struct pipe_buffer *buf;
8cefc107
DH
1105 unsigned int p_mask = pipe->ring_size - 1;
1106 unsigned int i_head = i->head;
b9dc6f65 1107 size_t off = i->iov_offset, left = size;
8cefc107 1108
241699cd 1109 if (off) /* make it relative to the beginning of buffer */
8cefc107 1110 left += off - pipe->bufs[i_head & p_mask].offset;
241699cd 1111 while (1) {
8cefc107 1112 buf = &pipe->bufs[i_head & p_mask];
b9dc6f65 1113 if (left <= buf->len)
241699cd 1114 break;
b9dc6f65 1115 left -= buf->len;
8cefc107 1116 i_head++;
241699cd 1117 }
8cefc107 1118 i->head = i_head;
b9dc6f65 1119 i->iov_offset = buf->offset + left;
241699cd 1120 }
b9dc6f65
AV
1121 i->count -= size;
1122 /* ... and discard everything past that point */
1123 pipe_truncate(i);
241699cd
AV
1124}
1125
54c8195b
PB
1126static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1127{
1128 struct bvec_iter bi;
1129
1130 bi.bi_size = i->count;
1131 bi.bi_bvec_done = i->iov_offset;
1132 bi.bi_idx = 0;
1133 bvec_iter_advance(i->bvec, &bi, size);
1134
1135 i->bvec += bi.bi_idx;
1136 i->nr_segs -= bi.bi_idx;
1137 i->count = bi.bi_size;
1138 i->iov_offset = bi.bi_bvec_done;
1139}
1140
62a8067a
AV
1141void iov_iter_advance(struct iov_iter *i, size_t size)
1142{
00e23707 1143 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
1144 pipe_advance(i, size);
1145 return;
1146 }
9ea9ce04
DH
1147 if (unlikely(iov_iter_is_discard(i))) {
1148 i->count -= size;
1149 return;
1150 }
7ff50620 1151 if (unlikely(iov_iter_is_xarray(i))) {
3d14ec1f 1152 size = min(size, i->count);
7ff50620
DH
1153 i->iov_offset += size;
1154 i->count -= size;
1155 return;
1156 }
54c8195b
PB
1157 if (iov_iter_is_bvec(i)) {
1158 iov_iter_bvec_advance(i, size);
1159 return;
1160 }
7ff50620 1161 iterate_and_advance(i, size, v, 0, 0, 0, 0)
62a8067a
AV
1162}
1163EXPORT_SYMBOL(iov_iter_advance);
1164
27c0e374
AV
1165void iov_iter_revert(struct iov_iter *i, size_t unroll)
1166{
1167 if (!unroll)
1168 return;
5b47d59a
AV
1169 if (WARN_ON(unroll > MAX_RW_COUNT))
1170 return;
27c0e374 1171 i->count += unroll;
00e23707 1172 if (unlikely(iov_iter_is_pipe(i))) {
27c0e374 1173 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1174 unsigned int p_mask = pipe->ring_size - 1;
1175 unsigned int i_head = i->head;
27c0e374
AV
1176 size_t off = i->iov_offset;
1177 while (1) {
8cefc107
DH
1178 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1179 size_t n = off - b->offset;
27c0e374 1180 if (unroll < n) {
4fa55cef 1181 off -= unroll;
27c0e374
AV
1182 break;
1183 }
1184 unroll -= n;
8cefc107 1185 if (!unroll && i_head == i->start_head) {
27c0e374
AV
1186 off = 0;
1187 break;
1188 }
8cefc107
DH
1189 i_head--;
1190 b = &pipe->bufs[i_head & p_mask];
1191 off = b->offset + b->len;
27c0e374
AV
1192 }
1193 i->iov_offset = off;
8cefc107 1194 i->head = i_head;
27c0e374
AV
1195 pipe_truncate(i);
1196 return;
1197 }
9ea9ce04
DH
1198 if (unlikely(iov_iter_is_discard(i)))
1199 return;
27c0e374
AV
1200 if (unroll <= i->iov_offset) {
1201 i->iov_offset -= unroll;
1202 return;
1203 }
1204 unroll -= i->iov_offset;
7ff50620
DH
1205 if (iov_iter_is_xarray(i)) {
1206 BUG(); /* We should never go beyond the start of the specified
1207 * range since we might then be straying into pages that
1208 * aren't pinned.
1209 */
1210 } else if (iov_iter_is_bvec(i)) {
27c0e374
AV
1211 const struct bio_vec *bvec = i->bvec;
1212 while (1) {
1213 size_t n = (--bvec)->bv_len;
1214 i->nr_segs++;
1215 if (unroll <= n) {
1216 i->bvec = bvec;
1217 i->iov_offset = n - unroll;
1218 return;
1219 }
1220 unroll -= n;
1221 }
1222 } else { /* same logics for iovec and kvec */
1223 const struct iovec *iov = i->iov;
1224 while (1) {
1225 size_t n = (--iov)->iov_len;
1226 i->nr_segs++;
1227 if (unroll <= n) {
1228 i->iov = iov;
1229 i->iov_offset = n - unroll;
1230 return;
1231 }
1232 unroll -= n;
1233 }
1234 }
1235}
1236EXPORT_SYMBOL(iov_iter_revert);
1237
62a8067a
AV
1238/*
1239 * Return the count of just the current iov_iter segment.
1240 */
1241size_t iov_iter_single_seg_count(const struct iov_iter *i)
1242{
00e23707 1243 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1244 return i->count; // it is a silly place, anyway
62a8067a
AV
1245 if (i->nr_segs == 1)
1246 return i->count;
7ff50620 1247 if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
9ea9ce04 1248 return i->count;
7ff50620 1249 if (iov_iter_is_bvec(i))
62a8067a 1250 return min(i->count, i->bvec->bv_len - i->iov_offset);
ad0eab92
PM
1251 else
1252 return min(i->count, i->iov->iov_len - i->iov_offset);
62a8067a
AV
1253}
1254EXPORT_SYMBOL(iov_iter_single_seg_count);
1255
aa563d7b 1256void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
05afcb77 1257 const struct kvec *kvec, unsigned long nr_segs,
abb78f87
AV
1258 size_t count)
1259{
aa563d7b
DH
1260 WARN_ON(direction & ~(READ | WRITE));
1261 i->type = ITER_KVEC | (direction & (READ | WRITE));
05afcb77 1262 i->kvec = kvec;
abb78f87
AV
1263 i->nr_segs = nr_segs;
1264 i->iov_offset = 0;
1265 i->count = count;
1266}
1267EXPORT_SYMBOL(iov_iter_kvec);
1268
aa563d7b 1269void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
05afcb77
AV
1270 const struct bio_vec *bvec, unsigned long nr_segs,
1271 size_t count)
1272{
aa563d7b
DH
1273 WARN_ON(direction & ~(READ | WRITE));
1274 i->type = ITER_BVEC | (direction & (READ | WRITE));
05afcb77
AV
1275 i->bvec = bvec;
1276 i->nr_segs = nr_segs;
1277 i->iov_offset = 0;
1278 i->count = count;
1279}
1280EXPORT_SYMBOL(iov_iter_bvec);
1281
aa563d7b 1282void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
241699cd
AV
1283 struct pipe_inode_info *pipe,
1284 size_t count)
1285{
aa563d7b 1286 BUG_ON(direction != READ);
8cefc107 1287 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
aa563d7b 1288 i->type = ITER_PIPE | READ;
241699cd 1289 i->pipe = pipe;
8cefc107 1290 i->head = pipe->head;
241699cd
AV
1291 i->iov_offset = 0;
1292 i->count = count;
8cefc107 1293 i->start_head = i->head;
241699cd
AV
1294}
1295EXPORT_SYMBOL(iov_iter_pipe);
1296
7ff50620
DH
1297/**
1298 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1299 * @i: The iterator to initialise.
1300 * @direction: The direction of the transfer.
1301 * @xarray: The xarray to access.
1302 * @start: The start file position.
1303 * @count: The size of the I/O buffer in bytes.
1304 *
1305 * Set up an I/O iterator to either draw data out of the pages attached to an
1306 * inode or to inject data into those pages. The pages *must* be prevented
1307 * from evaporation, either by taking a ref on them or locking them by the
1308 * caller.
1309 */
1310void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1311 struct xarray *xarray, loff_t start, size_t count)
1312{
1313 BUG_ON(direction & ~1);
1314 i->type = ITER_XARRAY | (direction & (READ | WRITE));
1315 i->xarray = xarray;
1316 i->xarray_start = start;
1317 i->count = count;
1318 i->iov_offset = 0;
1319}
1320EXPORT_SYMBOL(iov_iter_xarray);
1321
9ea9ce04
DH
1322/**
1323 * iov_iter_discard - Initialise an I/O iterator that discards data
1324 * @i: The iterator to initialise.
1325 * @direction: The direction of the transfer.
1326 * @count: The size of the I/O buffer in bytes.
1327 *
1328 * Set up an I/O iterator that just discards everything that's written to it.
1329 * It's only available as a READ iterator.
1330 */
1331void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1332{
1333 BUG_ON(direction != READ);
1334 i->type = ITER_DISCARD | READ;
1335 i->count = count;
1336 i->iov_offset = 0;
1337}
1338EXPORT_SYMBOL(iov_iter_discard);
1339
62a8067a
AV
1340unsigned long iov_iter_alignment(const struct iov_iter *i)
1341{
04a31165
AV
1342 unsigned long res = 0;
1343 size_t size = i->count;
1344
00e23707 1345 if (unlikely(iov_iter_is_pipe(i))) {
e0ff126e
JK
1346 unsigned int p_mask = i->pipe->ring_size - 1;
1347
8cefc107 1348 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
241699cd
AV
1349 return size | i->iov_offset;
1350 return size;
1351 }
3d14ec1f
DH
1352 if (unlikely(iov_iter_is_xarray(i)))
1353 return (i->xarray_start + i->iov_offset) | i->count;
04a31165
AV
1354 iterate_all_kinds(i, size, v,
1355 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
a280455f 1356 res |= v.bv_offset | v.bv_len,
7ff50620
DH
1357 res |= (unsigned long)v.iov_base | v.iov_len,
1358 res |= v.bv_offset | v.bv_len
04a31165
AV
1359 )
1360 return res;
62a8067a
AV
1361}
1362EXPORT_SYMBOL(iov_iter_alignment);
1363
357f435d
AV
1364unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1365{
33844e66 1366 unsigned long res = 0;
357f435d 1367 size_t size = i->count;
357f435d 1368
9ea9ce04 1369 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1370 WARN_ON(1);
1371 return ~0U;
1372 }
1373
357f435d
AV
1374 iterate_all_kinds(i, size, v,
1375 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1376 (size != v.iov_len ? size : 0), 0),
1377 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1378 (size != v.bv_len ? size : 0)),
1379 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
7ff50620
DH
1380 (size != v.iov_len ? size : 0)),
1381 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1382 (size != v.bv_len ? size : 0))
357f435d 1383 );
33844e66 1384 return res;
357f435d
AV
1385}
1386EXPORT_SYMBOL(iov_iter_gap_alignment);
1387
e76b6312 1388static inline ssize_t __pipe_get_pages(struct iov_iter *i,
241699cd
AV
1389 size_t maxsize,
1390 struct page **pages,
8cefc107 1391 int iter_head,
241699cd
AV
1392 size_t *start)
1393{
1394 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1395 unsigned int p_mask = pipe->ring_size - 1;
1396 ssize_t n = push_pipe(i, maxsize, &iter_head, start);
241699cd
AV
1397 if (!n)
1398 return -EFAULT;
1399
1400 maxsize = n;
1401 n += *start;
1689c73a 1402 while (n > 0) {
8cefc107
DH
1403 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1404 iter_head++;
241699cd
AV
1405 n -= PAGE_SIZE;
1406 }
1407
1408 return maxsize;
1409}
1410
1411static ssize_t pipe_get_pages(struct iov_iter *i,
1412 struct page **pages, size_t maxsize, unsigned maxpages,
1413 size_t *start)
1414{
8cefc107 1415 unsigned int iter_head, npages;
241699cd 1416 size_t capacity;
241699cd 1417
33844e66
AV
1418 if (!maxsize)
1419 return 0;
1420
241699cd
AV
1421 if (!sanity(i))
1422 return -EFAULT;
1423
8cefc107
DH
1424 data_start(i, &iter_head, start);
1425 /* Amount of free space: some of this one + all after this one */
1426 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1427 capacity = min(npages, maxpages) * PAGE_SIZE - *start;
241699cd 1428
8cefc107 1429 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
241699cd
AV
1430}
1431
7ff50620
DH
1432static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1433 pgoff_t index, unsigned int nr_pages)
1434{
1435 XA_STATE(xas, xa, index);
1436 struct page *page;
1437 unsigned int ret = 0;
1438
1439 rcu_read_lock();
1440 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1441 if (xas_retry(&xas, page))
1442 continue;
1443
1444 /* Has the page moved or been split? */
1445 if (unlikely(page != xas_reload(&xas))) {
1446 xas_reset(&xas);
1447 continue;
1448 }
1449
1450 pages[ret] = find_subpage(page, xas.xa_index);
1451 get_page(pages[ret]);
1452 if (++ret == nr_pages)
1453 break;
1454 }
1455 rcu_read_unlock();
1456 return ret;
1457}
1458
1459static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1460 struct page **pages, size_t maxsize,
1461 unsigned maxpages, size_t *_start_offset)
1462{
1463 unsigned nr, offset;
1464 pgoff_t index, count;
1465 size_t size = maxsize, actual;
1466 loff_t pos;
1467
1468 if (!size || !maxpages)
1469 return 0;
1470
1471 pos = i->xarray_start + i->iov_offset;
1472 index = pos >> PAGE_SHIFT;
1473 offset = pos & ~PAGE_MASK;
1474 *_start_offset = offset;
1475
1476 count = 1;
1477 if (size > PAGE_SIZE - offset) {
1478 size -= PAGE_SIZE - offset;
1479 count += size >> PAGE_SHIFT;
1480 size &= ~PAGE_MASK;
1481 if (size)
1482 count++;
1483 }
1484
1485 if (count > maxpages)
1486 count = maxpages;
1487
1488 nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1489 if (nr == 0)
1490 return 0;
1491
1492 actual = PAGE_SIZE * nr;
1493 actual -= offset;
1494 if (nr == count && size > 0) {
1495 unsigned last_offset = (nr > 1) ? 0 : offset;
1496 actual -= PAGE_SIZE - (last_offset + size);
1497 }
1498 return actual;
1499}
1500
62a8067a 1501ssize_t iov_iter_get_pages(struct iov_iter *i,
2c80929c 1502 struct page **pages, size_t maxsize, unsigned maxpages,
62a8067a
AV
1503 size_t *start)
1504{
e5393fae
AV
1505 if (maxsize > i->count)
1506 maxsize = i->count;
1507
00e23707 1508 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1509 return pipe_get_pages(i, pages, maxsize, maxpages, start);
7ff50620
DH
1510 if (unlikely(iov_iter_is_xarray(i)))
1511 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
9ea9ce04
DH
1512 if (unlikely(iov_iter_is_discard(i)))
1513 return -EFAULT;
1514
e5393fae
AV
1515 iterate_all_kinds(i, maxsize, v, ({
1516 unsigned long addr = (unsigned long)v.iov_base;
1517 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1518 int n;
1519 int res;
1520
1521 if (len > maxpages * PAGE_SIZE)
1522 len = maxpages * PAGE_SIZE;
1523 addr &= ~(PAGE_SIZE - 1);
1524 n = DIV_ROUND_UP(len, PAGE_SIZE);
73b0140b
IW
1525 res = get_user_pages_fast(addr, n,
1526 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
1527 pages);
e5393fae
AV
1528 if (unlikely(res < 0))
1529 return res;
1530 return (res == n ? len : res * PAGE_SIZE) - *start;
1531 0;}),({
1532 /* can't be more than PAGE_SIZE */
1533 *start = v.bv_offset;
1534 get_page(*pages = v.bv_page);
1535 return v.bv_len;
a280455f
AV
1536 }),({
1537 return -EFAULT;
7ff50620
DH
1538 }),
1539 0
e5393fae
AV
1540 )
1541 return 0;
62a8067a
AV
1542}
1543EXPORT_SYMBOL(iov_iter_get_pages);
1544
1b17f1f2
AV
1545static struct page **get_pages_array(size_t n)
1546{
752ade68 1547 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1b17f1f2
AV
1548}
1549
241699cd
AV
1550static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1551 struct page ***pages, size_t maxsize,
1552 size_t *start)
1553{
1554 struct page **p;
8cefc107 1555 unsigned int iter_head, npages;
d7760d63 1556 ssize_t n;
241699cd 1557
33844e66
AV
1558 if (!maxsize)
1559 return 0;
1560
241699cd
AV
1561 if (!sanity(i))
1562 return -EFAULT;
1563
8cefc107
DH
1564 data_start(i, &iter_head, start);
1565 /* Amount of free space: some of this one + all after this one */
1566 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
241699cd
AV
1567 n = npages * PAGE_SIZE - *start;
1568 if (maxsize > n)
1569 maxsize = n;
1570 else
1571 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1572 p = get_pages_array(npages);
1573 if (!p)
1574 return -ENOMEM;
8cefc107 1575 n = __pipe_get_pages(i, maxsize, p, iter_head, start);
241699cd
AV
1576 if (n > 0)
1577 *pages = p;
1578 else
1579 kvfree(p);
1580 return n;
1581}
1582
7ff50620
DH
1583static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1584 struct page ***pages, size_t maxsize,
1585 size_t *_start_offset)
1586{
1587 struct page **p;
1588 unsigned nr, offset;
1589 pgoff_t index, count;
1590 size_t size = maxsize, actual;
1591 loff_t pos;
1592
1593 if (!size)
1594 return 0;
1595
1596 pos = i->xarray_start + i->iov_offset;
1597 index = pos >> PAGE_SHIFT;
1598 offset = pos & ~PAGE_MASK;
1599 *_start_offset = offset;
1600
1601 count = 1;
1602 if (size > PAGE_SIZE - offset) {
1603 size -= PAGE_SIZE - offset;
1604 count += size >> PAGE_SHIFT;
1605 size &= ~PAGE_MASK;
1606 if (size)
1607 count++;
1608 }
1609
1610 p = get_pages_array(count);
1611 if (!p)
1612 return -ENOMEM;
1613 *pages = p;
1614
1615 nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1616 if (nr == 0)
1617 return 0;
1618
1619 actual = PAGE_SIZE * nr;
1620 actual -= offset;
1621 if (nr == count && size > 0) {
1622 unsigned last_offset = (nr > 1) ? 0 : offset;
1623 actual -= PAGE_SIZE - (last_offset + size);
1624 }
1625 return actual;
1626}
1627
62a8067a
AV
1628ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1629 struct page ***pages, size_t maxsize,
1630 size_t *start)
1631{
1b17f1f2
AV
1632 struct page **p;
1633
1634 if (maxsize > i->count)
1635 maxsize = i->count;
1636
00e23707 1637 if (unlikely(iov_iter_is_pipe(i)))
241699cd 1638 return pipe_get_pages_alloc(i, pages, maxsize, start);
7ff50620
DH
1639 if (unlikely(iov_iter_is_xarray(i)))
1640 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
9ea9ce04
DH
1641 if (unlikely(iov_iter_is_discard(i)))
1642 return -EFAULT;
1643
1b17f1f2
AV
1644 iterate_all_kinds(i, maxsize, v, ({
1645 unsigned long addr = (unsigned long)v.iov_base;
1646 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1647 int n;
1648 int res;
1649
1650 addr &= ~(PAGE_SIZE - 1);
1651 n = DIV_ROUND_UP(len, PAGE_SIZE);
1652 p = get_pages_array(n);
1653 if (!p)
1654 return -ENOMEM;
73b0140b
IW
1655 res = get_user_pages_fast(addr, n,
1656 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
1b17f1f2
AV
1657 if (unlikely(res < 0)) {
1658 kvfree(p);
1659 return res;
1660 }
1661 *pages = p;
1662 return (res == n ? len : res * PAGE_SIZE) - *start;
1663 0;}),({
1664 /* can't be more than PAGE_SIZE */
1665 *start = v.bv_offset;
1666 *pages = p = get_pages_array(1);
1667 if (!p)
1668 return -ENOMEM;
1669 get_page(*p = v.bv_page);
1670 return v.bv_len;
a280455f
AV
1671 }),({
1672 return -EFAULT;
7ff50620 1673 }), 0
1b17f1f2
AV
1674 )
1675 return 0;
62a8067a
AV
1676}
1677EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1678
a604ec7e
AV
1679size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1680 struct iov_iter *i)
1681{
1682 char *to = addr;
1683 __wsum sum, next;
1684 size_t off = 0;
a604ec7e 1685 sum = *csum;
9ea9ce04 1686 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1687 WARN_ON(1);
1688 return 0;
1689 }
a604ec7e 1690 iterate_and_advance(i, bytes, v, ({
cbbd26b8 1691 next = csum_and_copy_from_user(v.iov_base,
a604ec7e 1692 (to += v.iov_len) - v.iov_len,
c693cc46
AV
1693 v.iov_len);
1694 if (next) {
a604ec7e
AV
1695 sum = csum_block_add(sum, next, off);
1696 off += v.iov_len;
1697 }
c693cc46 1698 next ? 0 : v.iov_len;
a604ec7e
AV
1699 }), ({
1700 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1701 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1702 p + v.bv_offset, v.bv_len,
1703 sum, off);
a604ec7e 1704 kunmap_atomic(p);
a604ec7e
AV
1705 off += v.bv_len;
1706 }),({
f9152895
AV
1707 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1708 v.iov_base, v.iov_len,
1709 sum, off);
a604ec7e 1710 off += v.iov_len;
7ff50620
DH
1711 }), ({
1712 char *p = kmap_atomic(v.bv_page);
1713 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1714 p + v.bv_offset, v.bv_len,
1715 sum, off);
1716 kunmap_atomic(p);
1717 off += v.bv_len;
a604ec7e
AV
1718 })
1719 )
1720 *csum = sum;
1721 return bytes;
1722}
1723EXPORT_SYMBOL(csum_and_copy_from_iter);
1724
cbbd26b8
AV
1725bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1726 struct iov_iter *i)
1727{
1728 char *to = addr;
1729 __wsum sum, next;
1730 size_t off = 0;
1731 sum = *csum;
9ea9ce04 1732 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
cbbd26b8
AV
1733 WARN_ON(1);
1734 return false;
1735 }
1736 if (unlikely(i->count < bytes))
1737 return false;
1738 iterate_all_kinds(i, bytes, v, ({
cbbd26b8
AV
1739 next = csum_and_copy_from_user(v.iov_base,
1740 (to += v.iov_len) - v.iov_len,
c693cc46
AV
1741 v.iov_len);
1742 if (!next)
cbbd26b8
AV
1743 return false;
1744 sum = csum_block_add(sum, next, off);
1745 off += v.iov_len;
1746 0;
1747 }), ({
1748 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1749 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1750 p + v.bv_offset, v.bv_len,
1751 sum, off);
cbbd26b8 1752 kunmap_atomic(p);
cbbd26b8
AV
1753 off += v.bv_len;
1754 }),({
f9152895
AV
1755 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1756 v.iov_base, v.iov_len,
1757 sum, off);
cbbd26b8 1758 off += v.iov_len;
7ff50620
DH
1759 }), ({
1760 char *p = kmap_atomic(v.bv_page);
1761 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1762 p + v.bv_offset, v.bv_len,
1763 sum, off);
1764 kunmap_atomic(p);
1765 off += v.bv_len;
cbbd26b8
AV
1766 })
1767 )
1768 *csum = sum;
1769 iov_iter_advance(i, bytes);
1770 return true;
1771}
1772EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1773
52cbd23a 1774size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
a604ec7e
AV
1775 struct iov_iter *i)
1776{
52cbd23a 1777 struct csum_state *csstate = _csstate;
36f7a8a4 1778 const char *from = addr;
a604ec7e 1779 __wsum sum, next;
52cbd23a 1780 size_t off;
78e1f386
AV
1781
1782 if (unlikely(iov_iter_is_pipe(i)))
52cbd23a 1783 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
78e1f386 1784
52cbd23a
WB
1785 sum = csstate->csum;
1786 off = csstate->off;
78e1f386 1787 if (unlikely(iov_iter_is_discard(i))) {
241699cd
AV
1788 WARN_ON(1); /* for now */
1789 return 0;
1790 }
a604ec7e 1791 iterate_and_advance(i, bytes, v, ({
a604ec7e 1792 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
cbbd26b8 1793 v.iov_base,
c693cc46
AV
1794 v.iov_len);
1795 if (next) {
a604ec7e
AV
1796 sum = csum_block_add(sum, next, off);
1797 off += v.iov_len;
1798 }
c693cc46 1799 next ? 0 : v.iov_len;
a604ec7e
AV
1800 }), ({
1801 char *p = kmap_atomic(v.bv_page);
f9152895
AV
1802 sum = csum_and_memcpy(p + v.bv_offset,
1803 (from += v.bv_len) - v.bv_len,
1804 v.bv_len, sum, off);
a604ec7e 1805 kunmap_atomic(p);
a604ec7e
AV
1806 off += v.bv_len;
1807 }),({
f9152895
AV
1808 sum = csum_and_memcpy(v.iov_base,
1809 (from += v.iov_len) - v.iov_len,
1810 v.iov_len, sum, off);
a604ec7e 1811 off += v.iov_len;
7ff50620
DH
1812 }), ({
1813 char *p = kmap_atomic(v.bv_page);
1814 sum = csum_and_memcpy(p + v.bv_offset,
1815 (from += v.bv_len) - v.bv_len,
1816 v.bv_len, sum, off);
1817 kunmap_atomic(p);
1818 off += v.bv_len;
a604ec7e
AV
1819 })
1820 )
52cbd23a
WB
1821 csstate->csum = sum;
1822 csstate->off = off;
a604ec7e
AV
1823 return bytes;
1824}
1825EXPORT_SYMBOL(csum_and_copy_to_iter);
1826
d05f4435
SG
1827size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1828 struct iov_iter *i)
1829{
7999096f 1830#ifdef CONFIG_CRYPTO_HASH
d05f4435
SG
1831 struct ahash_request *hash = hashp;
1832 struct scatterlist sg;
1833 size_t copied;
1834
1835 copied = copy_to_iter(addr, bytes, i);
1836 sg_init_one(&sg, addr, copied);
1837 ahash_request_set_crypt(hash, &sg, NULL, copied);
1838 crypto_ahash_update(hash);
1839 return copied;
27fad74a
Y
1840#else
1841 return 0;
1842#endif
d05f4435
SG
1843}
1844EXPORT_SYMBOL(hash_and_copy_to_iter);
1845
62a8067a
AV
1846int iov_iter_npages(const struct iov_iter *i, int maxpages)
1847{
e0f2dc40
AV
1848 size_t size = i->count;
1849 int npages = 0;
1850
1851 if (!size)
1852 return 0;
9ea9ce04
DH
1853 if (unlikely(iov_iter_is_discard(i)))
1854 return 0;
e0f2dc40 1855
00e23707 1856 if (unlikely(iov_iter_is_pipe(i))) {
241699cd 1857 struct pipe_inode_info *pipe = i->pipe;
8cefc107 1858 unsigned int iter_head;
241699cd 1859 size_t off;
241699cd
AV
1860
1861 if (!sanity(i))
1862 return 0;
1863
8cefc107 1864 data_start(i, &iter_head, &off);
241699cd 1865 /* some of this one + all after this one */
8cefc107 1866 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
241699cd
AV
1867 if (npages >= maxpages)
1868 return maxpages;
7ff50620
DH
1869 } else if (unlikely(iov_iter_is_xarray(i))) {
1870 unsigned offset;
1871
1872 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1873
1874 npages = 1;
1875 if (size > PAGE_SIZE - offset) {
1876 size -= PAGE_SIZE - offset;
1877 npages += size >> PAGE_SHIFT;
1878 size &= ~PAGE_MASK;
1879 if (size)
1880 npages++;
1881 }
1882 if (npages >= maxpages)
1883 return maxpages;
241699cd 1884 } else iterate_all_kinds(i, size, v, ({
e0f2dc40
AV
1885 unsigned long p = (unsigned long)v.iov_base;
1886 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1887 - p / PAGE_SIZE;
1888 if (npages >= maxpages)
1889 return maxpages;
1890 0;}),({
1891 npages++;
1892 if (npages >= maxpages)
1893 return maxpages;
a280455f
AV
1894 }),({
1895 unsigned long p = (unsigned long)v.iov_base;
1896 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1897 - p / PAGE_SIZE;
1898 if (npages >= maxpages)
1899 return maxpages;
7ff50620
DH
1900 }),
1901 0
e0f2dc40
AV
1902 )
1903 return npages;
62a8067a 1904}
f67da30c 1905EXPORT_SYMBOL(iov_iter_npages);
4b8164b9
AV
1906
1907const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1908{
1909 *new = *old;
00e23707 1910 if (unlikely(iov_iter_is_pipe(new))) {
241699cd
AV
1911 WARN_ON(1);
1912 return NULL;
1913 }
7ff50620 1914 if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
9ea9ce04 1915 return NULL;
00e23707 1916 if (iov_iter_is_bvec(new))
4b8164b9
AV
1917 return new->bvec = kmemdup(new->bvec,
1918 new->nr_segs * sizeof(struct bio_vec),
1919 flags);
1920 else
1921 /* iovec and kvec have identical layout */
1922 return new->iov = kmemdup(new->iov,
1923 new->nr_segs * sizeof(struct iovec),
1924 flags);
1925}
1926EXPORT_SYMBOL(dup_iter);
bc917be8 1927
bfdc5970
CH
1928static int copy_compat_iovec_from_user(struct iovec *iov,
1929 const struct iovec __user *uvec, unsigned long nr_segs)
1930{
1931 const struct compat_iovec __user *uiov =
1932 (const struct compat_iovec __user *)uvec;
1933 int ret = -EFAULT, i;
1934
a959a978 1935 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
bfdc5970
CH
1936 return -EFAULT;
1937
1938 for (i = 0; i < nr_segs; i++) {
1939 compat_uptr_t buf;
1940 compat_ssize_t len;
1941
1942 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1943 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1944
1945 /* check for compat_size_t not fitting in compat_ssize_t .. */
1946 if (len < 0) {
1947 ret = -EINVAL;
1948 goto uaccess_end;
1949 }
1950 iov[i].iov_base = compat_ptr(buf);
1951 iov[i].iov_len = len;
1952 }
1953
1954 ret = 0;
1955uaccess_end:
1956 user_access_end();
1957 return ret;
1958}
1959
1960static int copy_iovec_from_user(struct iovec *iov,
1961 const struct iovec __user *uvec, unsigned long nr_segs)
fb041b59
DL
1962{
1963 unsigned long seg;
fb041b59 1964
bfdc5970
CH
1965 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1966 return -EFAULT;
1967 for (seg = 0; seg < nr_segs; seg++) {
1968 if ((ssize_t)iov[seg].iov_len < 0)
1969 return -EINVAL;
fb041b59
DL
1970 }
1971
bfdc5970
CH
1972 return 0;
1973}
1974
1975struct iovec *iovec_from_user(const struct iovec __user *uvec,
1976 unsigned long nr_segs, unsigned long fast_segs,
1977 struct iovec *fast_iov, bool compat)
1978{
1979 struct iovec *iov = fast_iov;
1980 int ret;
1981
fb041b59 1982 /*
bfdc5970
CH
1983 * SuS says "The readv() function *may* fail if the iovcnt argument was
1984 * less than or equal to 0, or greater than {IOV_MAX}. Linux has
1985 * traditionally returned zero for zero segments, so...
fb041b59 1986 */
bfdc5970
CH
1987 if (nr_segs == 0)
1988 return iov;
1989 if (nr_segs > UIO_MAXIOV)
1990 return ERR_PTR(-EINVAL);
fb041b59
DL
1991 if (nr_segs > fast_segs) {
1992 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
bfdc5970
CH
1993 if (!iov)
1994 return ERR_PTR(-ENOMEM);
fb041b59 1995 }
bfdc5970
CH
1996
1997 if (compat)
1998 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1999 else
2000 ret = copy_iovec_from_user(iov, uvec, nr_segs);
2001 if (ret) {
2002 if (iov != fast_iov)
2003 kfree(iov);
2004 return ERR_PTR(ret);
2005 }
2006
2007 return iov;
2008}
2009
2010ssize_t __import_iovec(int type, const struct iovec __user *uvec,
2011 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
2012 struct iov_iter *i, bool compat)
2013{
2014 ssize_t total_len = 0;
2015 unsigned long seg;
2016 struct iovec *iov;
2017
2018 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
2019 if (IS_ERR(iov)) {
2020 *iovp = NULL;
2021 return PTR_ERR(iov);
fb041b59
DL
2022 }
2023
2024 /*
bfdc5970
CH
2025 * According to the Single Unix Specification we should return EINVAL if
2026 * an element length is < 0 when cast to ssize_t or if the total length
2027 * would overflow the ssize_t return value of the system call.
fb041b59
DL
2028 *
2029 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2030 * overflow case.
2031 */
fb041b59 2032 for (seg = 0; seg < nr_segs; seg++) {
fb041b59
DL
2033 ssize_t len = (ssize_t)iov[seg].iov_len;
2034
bfdc5970
CH
2035 if (!access_ok(iov[seg].iov_base, len)) {
2036 if (iov != *iovp)
2037 kfree(iov);
2038 *iovp = NULL;
2039 return -EFAULT;
fb041b59 2040 }
bfdc5970
CH
2041
2042 if (len > MAX_RW_COUNT - total_len) {
2043 len = MAX_RW_COUNT - total_len;
fb041b59
DL
2044 iov[seg].iov_len = len;
2045 }
bfdc5970 2046 total_len += len;
fb041b59 2047 }
bfdc5970
CH
2048
2049 iov_iter_init(i, type, iov, nr_segs, total_len);
2050 if (iov == *iovp)
2051 *iovp = NULL;
2052 else
2053 *iovp = iov;
2054 return total_len;
fb041b59
DL
2055}
2056
ffecee4f
VN
2057/**
2058 * import_iovec() - Copy an array of &struct iovec from userspace
2059 * into the kernel, check that it is valid, and initialize a new
2060 * &struct iov_iter iterator to access it.
2061 *
2062 * @type: One of %READ or %WRITE.
bfdc5970 2063 * @uvec: Pointer to the userspace array.
ffecee4f
VN
2064 * @nr_segs: Number of elements in userspace array.
2065 * @fast_segs: Number of elements in @iov.
bfdc5970 2066 * @iovp: (input and output parameter) Pointer to pointer to (usually small
ffecee4f
VN
2067 * on-stack) kernel array.
2068 * @i: Pointer to iterator that will be initialized on success.
2069 *
2070 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2071 * then this function places %NULL in *@iov on return. Otherwise, a new
2072 * array will be allocated and the result placed in *@iov. This means that
2073 * the caller may call kfree() on *@iov regardless of whether the small
2074 * on-stack array was used or not (and regardless of whether this function
2075 * returns an error or not).
2076 *
87e5e6da 2077 * Return: Negative error code on error, bytes imported on success
ffecee4f 2078 */
bfdc5970 2079ssize_t import_iovec(int type, const struct iovec __user *uvec,
bc917be8 2080 unsigned nr_segs, unsigned fast_segs,
bfdc5970 2081 struct iovec **iovp, struct iov_iter *i)
bc917be8 2082{
89cd35c5
CH
2083 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2084 in_compat_syscall());
bc917be8
AV
2085}
2086EXPORT_SYMBOL(import_iovec);
2087
bc917be8
AV
2088int import_single_range(int rw, void __user *buf, size_t len,
2089 struct iovec *iov, struct iov_iter *i)
2090{
2091 if (len > MAX_RW_COUNT)
2092 len = MAX_RW_COUNT;
96d4f267 2093 if (unlikely(!access_ok(buf, len)))
bc917be8
AV
2094 return -EFAULT;
2095
2096 iov->iov_base = buf;
2097 iov->iov_len = len;
2098 iov_iter_init(i, rw, iov, 1, len);
2099 return 0;
2100}
e1267585 2101EXPORT_SYMBOL(import_single_range);
09cf698a
AV
2102
2103int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
2104 int (*f)(struct kvec *vec, void *context),
2105 void *context)
2106{
2107 struct kvec w;
2108 int err = -EINVAL;
2109 if (!bytes)
2110 return 0;
2111
2112 iterate_all_kinds(i, bytes, v, -EINVAL, ({
2113 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2114 w.iov_len = v.bv_len;
2115 err = f(&w, context);
2116 kunmap(v.bv_page);
2117 err;}), ({
2118 w = v;
7ff50620
DH
2119 err = f(&w, context);}), ({
2120 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2121 w.iov_len = v.bv_len;
2122 err = f(&w, context);
2123 kunmap(v.bv_page);
2124 err;})
09cf698a
AV
2125 )
2126 return err;
2127}
2128EXPORT_SYMBOL(iov_iter_for_each_range);