iov_iter: make iterator callbacks use base and len instead of iovec
[linux-block.git] / lib / iov_iter.c
CommitLineData
457c8996 1// SPDX-License-Identifier: GPL-2.0-only
7999096f 2#include <crypto/hash.h>
4f18cd31 3#include <linux/export.h>
2f8b5444 4#include <linux/bvec.h>
4d0e9df5 5#include <linux/fault-inject-usercopy.h>
4f18cd31
AV
6#include <linux/uio.h>
7#include <linux/pagemap.h>
28961998 8#include <linux/highmem.h>
91f79c43
AV
9#include <linux/slab.h>
10#include <linux/vmalloc.h>
241699cd 11#include <linux/splice.h>
bfdc5970 12#include <linux/compat.h>
a604ec7e 13#include <net/checksum.h>
d05f4435 14#include <linux/scatterlist.h>
d0ef4c36 15#include <linux/instrumented.h>
4f18cd31 16
241699cd
AV
17#define PIPE_PARANOIA /* for now */
18
5c67aa90 19/* covers iovec and kvec alike */
7baa5099
AV
20#define iterate_iovec(i, n, base, len, off, __p, skip, STEP) { \
21 size_t off = 0; \
7a1bcb5d 22 do { \
7baa5099
AV
23 len = min(n, __p->iov_len - skip); \
24 if (likely(len)) { \
25 base = __p->iov_base + skip; \
26 len -= (STEP); \
27 off += len; \
28 skip += len; \
29 n -= len; \
7a1bcb5d
AV
30 if (skip < __p->iov_len) \
31 break; \
32 } \
33 __p++; \
34 skip = 0; \
35 } while (n); \
7baa5099 36 n = off; \
04a31165
AV
37}
38
7baa5099
AV
39#define iterate_bvec(i, n, base, len, off, p, skip, STEP) { \
40 size_t off = 0; \
7491a2bf
AV
41 while (n) { \
42 unsigned offset = p->bv_offset + skip; \
1b4fb5ff 43 unsigned left; \
21b56c84
AV
44 void *kaddr = kmap_local_page(p->bv_page + \
45 offset / PAGE_SIZE); \
7baa5099
AV
46 base = kaddr + offset % PAGE_SIZE; \
47 len = min(min(n, p->bv_len - skip), \
7491a2bf 48 (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
1b4fb5ff 49 left = (STEP); \
21b56c84 50 kunmap_local(kaddr); \
7baa5099
AV
51 len -= left; \
52 off += len; \
53 skip += len; \
7491a2bf
AV
54 if (skip == p->bv_len) { \
55 skip = 0; \
56 p++; \
57 } \
7baa5099 58 n -= len; \
1b4fb5ff
AV
59 if (left) \
60 break; \
7491a2bf 61 } \
7baa5099 62 n = off; \
04a31165
AV
63}
64
7baa5099 65#define iterate_xarray(i, n, base, len, __off, skip, STEP) { \
1b4fb5ff 66 __label__ __out; \
622838f3 67 size_t __off = 0; \
7ff50620 68 struct page *head = NULL; \
7baa5099 69 size_t offset; \
7ff50620
DH
70 loff_t start = i->xarray_start + skip; \
71 pgoff_t index = start >> PAGE_SHIFT; \
72 int j; \
73 \
74 XA_STATE(xas, i->xarray, index); \
75 \
7baa5099
AV
76 rcu_read_lock(); \
77 xas_for_each(&xas, head, ULONG_MAX) { \
78 unsigned left; \
79 if (xas_retry(&xas, head)) \
80 continue; \
81 if (WARN_ON(xa_is_value(head))) \
82 break; \
83 if (WARN_ON(PageHuge(head))) \
84 break; \
7ff50620 85 for (j = (head->index < index) ? index - head->index : 0; \
7baa5099 86 j < thp_nr_pages(head); j++) { \
21b56c84 87 void *kaddr = kmap_local_page(head + j); \
7baa5099
AV
88 offset = (start + __off) % PAGE_SIZE; \
89 base = kaddr + offset; \
90 len = PAGE_SIZE - offset; \
91 len = min(n, len); \
92 left = (STEP); \
93 kunmap_local(kaddr); \
94 len -= left; \
95 __off += len; \
96 n -= len; \
97 if (left || n == 0) \
98 goto __out; \
99 } \
7ff50620 100 } \
1b4fb5ff 101__out: \
7ff50620 102 rcu_read_unlock(); \
622838f3
AV
103 skip += __off; \
104 n = __off; \
7ff50620
DH
105}
106
7baa5099 107#define __iterate_and_advance(i, n, base, len, off, I, K) { \
dd254f5a
AV
108 if (unlikely(i->count < n)) \
109 n = i->count; \
f5da8354 110 if (likely(n)) { \
dd254f5a 111 size_t skip = i->iov_offset; \
28f38db7 112 if (likely(iter_is_iovec(i))) { \
5c67aa90 113 const struct iovec *iov = i->iov; \
7baa5099
AV
114 void __user *base; \
115 size_t len; \
116 iterate_iovec(i, n, base, len, off, \
117 iov, skip, (I)) \
28f38db7
AV
118 i->nr_segs -= iov - i->iov; \
119 i->iov = iov; \
120 } else if (iov_iter_is_bvec(i)) { \
1bdc76ae 121 const struct bio_vec *bvec = i->bvec; \
7baa5099
AV
122 void *base; \
123 size_t len; \
124 iterate_bvec(i, n, base, len, off, \
125 bvec, skip, (K)) \
7491a2bf
AV
126 i->nr_segs -= bvec - i->bvec; \
127 i->bvec = bvec; \
28f38db7 128 } else if (iov_iter_is_kvec(i)) { \
5c67aa90 129 const struct kvec *kvec = i->kvec; \
7baa5099
AV
130 void *base; \
131 size_t len; \
132 iterate_iovec(i, n, base, len, off, \
133 kvec, skip, (K)) \
dd254f5a
AV
134 i->nr_segs -= kvec - i->kvec; \
135 i->kvec = kvec; \
28f38db7 136 } else if (iov_iter_is_xarray(i)) { \
7baa5099
AV
137 void *base; \
138 size_t len; \
139 iterate_xarray(i, n, base, len, off, \
140 skip, (K)) \
7ce2a91e 141 } \
dd254f5a
AV
142 i->count -= n; \
143 i->iov_offset = skip; \
7ce2a91e 144 } \
7ce2a91e 145}
7baa5099
AV
146#define iterate_and_advance(i, n, base, len, off, I, K) \
147 __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
7ce2a91e 148
09fc68dc
AV
149static int copyout(void __user *to, const void *from, size_t n)
150{
4d0e9df5
AL
151 if (should_fail_usercopy())
152 return n;
96d4f267 153 if (access_ok(to, n)) {
d0ef4c36 154 instrument_copy_to_user(to, from, n);
09fc68dc
AV
155 n = raw_copy_to_user(to, from, n);
156 }
157 return n;
158}
159
160static int copyin(void *to, const void __user *from, size_t n)
161{
4d0e9df5
AL
162 if (should_fail_usercopy())
163 return n;
96d4f267 164 if (access_ok(from, n)) {
d0ef4c36 165 instrument_copy_from_user(to, from, n);
09fc68dc
AV
166 n = raw_copy_from_user(to, from, n);
167 }
168 return n;
169}
170
62a8067a 171static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
4f18cd31
AV
172 struct iov_iter *i)
173{
174 size_t skip, copy, left, wanted;
175 const struct iovec *iov;
176 char __user *buf;
177 void *kaddr, *from;
178
179 if (unlikely(bytes > i->count))
180 bytes = i->count;
181
182 if (unlikely(!bytes))
183 return 0;
184
09fc68dc 185 might_fault();
4f18cd31
AV
186 wanted = bytes;
187 iov = i->iov;
188 skip = i->iov_offset;
189 buf = iov->iov_base + skip;
190 copy = min(bytes, iov->iov_len - skip);
191
3fa6c507 192 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
4f18cd31
AV
193 kaddr = kmap_atomic(page);
194 from = kaddr + offset;
195
196 /* first chunk, usually the only one */
09fc68dc 197 left = copyout(buf, from, copy);
4f18cd31
AV
198 copy -= left;
199 skip += copy;
200 from += copy;
201 bytes -= copy;
202
203 while (unlikely(!left && bytes)) {
204 iov++;
205 buf = iov->iov_base;
206 copy = min(bytes, iov->iov_len);
09fc68dc 207 left = copyout(buf, from, copy);
4f18cd31
AV
208 copy -= left;
209 skip = copy;
210 from += copy;
211 bytes -= copy;
212 }
213 if (likely(!bytes)) {
214 kunmap_atomic(kaddr);
215 goto done;
216 }
217 offset = from - kaddr;
218 buf += copy;
219 kunmap_atomic(kaddr);
220 copy = min(bytes, iov->iov_len - skip);
221 }
222 /* Too bad - revert to non-atomic kmap */
3fa6c507 223
4f18cd31
AV
224 kaddr = kmap(page);
225 from = kaddr + offset;
09fc68dc 226 left = copyout(buf, from, copy);
4f18cd31
AV
227 copy -= left;
228 skip += copy;
229 from += copy;
230 bytes -= copy;
231 while (unlikely(!left && bytes)) {
232 iov++;
233 buf = iov->iov_base;
234 copy = min(bytes, iov->iov_len);
09fc68dc 235 left = copyout(buf, from, copy);
4f18cd31
AV
236 copy -= left;
237 skip = copy;
238 from += copy;
239 bytes -= copy;
240 }
241 kunmap(page);
3fa6c507 242
4f18cd31 243done:
81055e58
AV
244 if (skip == iov->iov_len) {
245 iov++;
246 skip = 0;
247 }
4f18cd31
AV
248 i->count -= wanted - bytes;
249 i->nr_segs -= iov - i->iov;
250 i->iov = iov;
251 i->iov_offset = skip;
252 return wanted - bytes;
253}
4f18cd31 254
62a8067a 255static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
f0d1bec9
AV
256 struct iov_iter *i)
257{
258 size_t skip, copy, left, wanted;
259 const struct iovec *iov;
260 char __user *buf;
261 void *kaddr, *to;
262
263 if (unlikely(bytes > i->count))
264 bytes = i->count;
265
266 if (unlikely(!bytes))
267 return 0;
268
09fc68dc 269 might_fault();
f0d1bec9
AV
270 wanted = bytes;
271 iov = i->iov;
272 skip = i->iov_offset;
273 buf = iov->iov_base + skip;
274 copy = min(bytes, iov->iov_len - skip);
275
3fa6c507 276 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
f0d1bec9
AV
277 kaddr = kmap_atomic(page);
278 to = kaddr + offset;
279
280 /* first chunk, usually the only one */
09fc68dc 281 left = copyin(to, buf, copy);
f0d1bec9
AV
282 copy -= left;
283 skip += copy;
284 to += copy;
285 bytes -= copy;
286
287 while (unlikely(!left && bytes)) {
288 iov++;
289 buf = iov->iov_base;
290 copy = min(bytes, iov->iov_len);
09fc68dc 291 left = copyin(to, buf, copy);
f0d1bec9
AV
292 copy -= left;
293 skip = copy;
294 to += copy;
295 bytes -= copy;
296 }
297 if (likely(!bytes)) {
298 kunmap_atomic(kaddr);
299 goto done;
300 }
301 offset = to - kaddr;
302 buf += copy;
303 kunmap_atomic(kaddr);
304 copy = min(bytes, iov->iov_len - skip);
305 }
306 /* Too bad - revert to non-atomic kmap */
3fa6c507 307
f0d1bec9
AV
308 kaddr = kmap(page);
309 to = kaddr + offset;
09fc68dc 310 left = copyin(to, buf, copy);
f0d1bec9
AV
311 copy -= left;
312 skip += copy;
313 to += copy;
314 bytes -= copy;
315 while (unlikely(!left && bytes)) {
316 iov++;
317 buf = iov->iov_base;
318 copy = min(bytes, iov->iov_len);
09fc68dc 319 left = copyin(to, buf, copy);
f0d1bec9
AV
320 copy -= left;
321 skip = copy;
322 to += copy;
323 bytes -= copy;
324 }
325 kunmap(page);
3fa6c507 326
f0d1bec9 327done:
81055e58
AV
328 if (skip == iov->iov_len) {
329 iov++;
330 skip = 0;
331 }
f0d1bec9
AV
332 i->count -= wanted - bytes;
333 i->nr_segs -= iov - i->iov;
334 i->iov = iov;
335 i->iov_offset = skip;
336 return wanted - bytes;
337}
f0d1bec9 338
241699cd
AV
339#ifdef PIPE_PARANOIA
340static bool sanity(const struct iov_iter *i)
341{
342 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
343 unsigned int p_head = pipe->head;
344 unsigned int p_tail = pipe->tail;
345 unsigned int p_mask = pipe->ring_size - 1;
346 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
347 unsigned int i_head = i->head;
348 unsigned int idx;
349
241699cd
AV
350 if (i->iov_offset) {
351 struct pipe_buffer *p;
8cefc107 352 if (unlikely(p_occupancy == 0))
241699cd 353 goto Bad; // pipe must be non-empty
8cefc107 354 if (unlikely(i_head != p_head - 1))
241699cd
AV
355 goto Bad; // must be at the last buffer...
356
8cefc107 357 p = &pipe->bufs[i_head & p_mask];
241699cd
AV
358 if (unlikely(p->offset + p->len != i->iov_offset))
359 goto Bad; // ... at the end of segment
360 } else {
8cefc107 361 if (i_head != p_head)
241699cd
AV
362 goto Bad; // must be right after the last buffer
363 }
364 return true;
365Bad:
8cefc107
DH
366 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
367 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
368 p_head, p_tail, pipe->ring_size);
369 for (idx = 0; idx < pipe->ring_size; idx++)
241699cd
AV
370 printk(KERN_ERR "[%p %p %d %d]\n",
371 pipe->bufs[idx].ops,
372 pipe->bufs[idx].page,
373 pipe->bufs[idx].offset,
374 pipe->bufs[idx].len);
375 WARN_ON(1);
376 return false;
377}
378#else
379#define sanity(i) true
380#endif
381
241699cd
AV
382static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
383 struct iov_iter *i)
384{
385 struct pipe_inode_info *pipe = i->pipe;
386 struct pipe_buffer *buf;
8cefc107
DH
387 unsigned int p_tail = pipe->tail;
388 unsigned int p_mask = pipe->ring_size - 1;
389 unsigned int i_head = i->head;
241699cd 390 size_t off;
241699cd
AV
391
392 if (unlikely(bytes > i->count))
393 bytes = i->count;
394
395 if (unlikely(!bytes))
396 return 0;
397
398 if (!sanity(i))
399 return 0;
400
401 off = i->iov_offset;
8cefc107 402 buf = &pipe->bufs[i_head & p_mask];
241699cd
AV
403 if (off) {
404 if (offset == off && buf->page == page) {
405 /* merge with the last one */
406 buf->len += bytes;
407 i->iov_offset += bytes;
408 goto out;
409 }
8cefc107
DH
410 i_head++;
411 buf = &pipe->bufs[i_head & p_mask];
241699cd 412 }
6718b6f8 413 if (pipe_full(i_head, p_tail, pipe->max_usage))
241699cd 414 return 0;
8cefc107 415
241699cd 416 buf->ops = &page_cache_pipe_buf_ops;
8cefc107
DH
417 get_page(page);
418 buf->page = page;
241699cd
AV
419 buf->offset = offset;
420 buf->len = bytes;
8cefc107
DH
421
422 pipe->head = i_head + 1;
241699cd 423 i->iov_offset = offset + bytes;
8cefc107 424 i->head = i_head;
241699cd
AV
425out:
426 i->count -= bytes;
427 return bytes;
428}
429
171a0203
AA
430/*
431 * Fault in one or more iovecs of the given iov_iter, to a maximum length of
432 * bytes. For each iovec, fault in each page that constitutes the iovec.
433 *
434 * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
435 * because it is an invalid address).
436 */
8409a0d2 437int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
171a0203 438{
0e8f0d67 439 if (iter_is_iovec(i)) {
8409a0d2
AV
440 const struct iovec *p;
441 size_t skip;
442
443 if (bytes > i->count)
444 bytes = i->count;
445 for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
446 size_t len = min(bytes, p->iov_len - skip);
447 int err;
448
449 if (unlikely(!len))
450 continue;
451 err = fault_in_pages_readable(p->iov_base + skip, len);
171a0203 452 if (unlikely(err))
8409a0d2
AV
453 return err;
454 bytes -= len;
455 }
171a0203
AA
456 }
457 return 0;
458}
d4690f1e 459EXPORT_SYMBOL(iov_iter_fault_in_readable);
171a0203 460
aa563d7b 461void iov_iter_init(struct iov_iter *i, unsigned int direction,
71d8e532
AV
462 const struct iovec *iov, unsigned long nr_segs,
463 size_t count)
464{
aa563d7b 465 WARN_ON(direction & ~(READ | WRITE));
8cd54c1c
AV
466 WARN_ON_ONCE(uaccess_kernel());
467 *i = (struct iov_iter) {
468 .iter_type = ITER_IOVEC,
469 .data_source = direction,
470 .iov = iov,
471 .nr_segs = nr_segs,
472 .iov_offset = 0,
473 .count = count
474 };
71d8e532
AV
475}
476EXPORT_SYMBOL(iov_iter_init);
7b2c99d1 477
241699cd
AV
478static inline bool allocated(struct pipe_buffer *buf)
479{
480 return buf->ops == &default_pipe_buf_ops;
481}
482
8cefc107
DH
483static inline void data_start(const struct iov_iter *i,
484 unsigned int *iter_headp, size_t *offp)
241699cd 485{
8cefc107
DH
486 unsigned int p_mask = i->pipe->ring_size - 1;
487 unsigned int iter_head = i->head;
241699cd 488 size_t off = i->iov_offset;
8cefc107
DH
489
490 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
491 off == PAGE_SIZE)) {
492 iter_head++;
241699cd
AV
493 off = 0;
494 }
8cefc107 495 *iter_headp = iter_head;
241699cd
AV
496 *offp = off;
497}
498
499static size_t push_pipe(struct iov_iter *i, size_t size,
8cefc107 500 int *iter_headp, size_t *offp)
241699cd
AV
501{
502 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
503 unsigned int p_tail = pipe->tail;
504 unsigned int p_mask = pipe->ring_size - 1;
505 unsigned int iter_head;
241699cd 506 size_t off;
241699cd
AV
507 ssize_t left;
508
509 if (unlikely(size > i->count))
510 size = i->count;
511 if (unlikely(!size))
512 return 0;
513
514 left = size;
8cefc107
DH
515 data_start(i, &iter_head, &off);
516 *iter_headp = iter_head;
241699cd
AV
517 *offp = off;
518 if (off) {
519 left -= PAGE_SIZE - off;
520 if (left <= 0) {
8cefc107 521 pipe->bufs[iter_head & p_mask].len += size;
241699cd
AV
522 return size;
523 }
8cefc107
DH
524 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
525 iter_head++;
241699cd 526 }
6718b6f8 527 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
8cefc107 528 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
241699cd
AV
529 struct page *page = alloc_page(GFP_USER);
530 if (!page)
531 break;
8cefc107
DH
532
533 buf->ops = &default_pipe_buf_ops;
534 buf->page = page;
535 buf->offset = 0;
536 buf->len = min_t(ssize_t, left, PAGE_SIZE);
537 left -= buf->len;
538 iter_head++;
539 pipe->head = iter_head;
540
541 if (left == 0)
241699cd 542 return size;
241699cd
AV
543 }
544 return size - left;
545}
546
547static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
548 struct iov_iter *i)
549{
550 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
551 unsigned int p_mask = pipe->ring_size - 1;
552 unsigned int i_head;
241699cd 553 size_t n, off;
241699cd
AV
554
555 if (!sanity(i))
556 return 0;
557
8cefc107 558 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
559 if (unlikely(!n))
560 return 0;
8cefc107 561 do {
241699cd 562 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
563 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
564 i->head = i_head;
241699cd
AV
565 i->iov_offset = off + chunk;
566 n -= chunk;
567 addr += chunk;
8cefc107
DH
568 off = 0;
569 i_head++;
570 } while (n);
241699cd
AV
571 i->count -= bytes;
572 return bytes;
573}
574
f9152895
AV
575static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
576 __wsum sum, size_t off)
577{
cc44c17b 578 __wsum next = csum_partial_copy_nocheck(from, to, len);
f9152895
AV
579 return csum_block_add(sum, next, off);
580}
581
78e1f386 582static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
52cbd23a
WB
583 struct csum_state *csstate,
584 struct iov_iter *i)
78e1f386
AV
585{
586 struct pipe_inode_info *pipe = i->pipe;
8cefc107 587 unsigned int p_mask = pipe->ring_size - 1;
52cbd23a
WB
588 __wsum sum = csstate->csum;
589 size_t off = csstate->off;
8cefc107 590 unsigned int i_head;
78e1f386 591 size_t n, r;
78e1f386
AV
592
593 if (!sanity(i))
594 return 0;
595
8cefc107 596 bytes = n = push_pipe(i, bytes, &i_head, &r);
78e1f386
AV
597 if (unlikely(!n))
598 return 0;
8cefc107 599 do {
78e1f386 600 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
8cefc107 601 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
f9152895 602 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
78e1f386 603 kunmap_atomic(p);
8cefc107 604 i->head = i_head;
78e1f386
AV
605 i->iov_offset = r + chunk;
606 n -= chunk;
607 off += chunk;
608 addr += chunk;
8cefc107
DH
609 r = 0;
610 i_head++;
611 } while (n);
78e1f386 612 i->count -= bytes;
52cbd23a
WB
613 csstate->csum = sum;
614 csstate->off = off;
78e1f386
AV
615 return bytes;
616}
617
aa28de27 618size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
62a8067a 619{
00e23707 620 if (unlikely(iov_iter_is_pipe(i)))
241699cd 621 return copy_pipe_to_iter(addr, bytes, i);
09fc68dc
AV
622 if (iter_is_iovec(i))
623 might_fault();
7baa5099
AV
624 iterate_and_advance(i, bytes, base, len, off,
625 copyout(base, addr + off, len),
626 memcpy(base, addr + off, len)
3d4d3e48 627 )
62a8067a 628
3d4d3e48 629 return bytes;
c35e0248 630}
aa28de27 631EXPORT_SYMBOL(_copy_to_iter);
c35e0248 632
ec6347bb
DW
633#ifdef CONFIG_ARCH_HAS_COPY_MC
634static int copyout_mc(void __user *to, const void *from, size_t n)
8780356e 635{
96d4f267 636 if (access_ok(to, n)) {
d0ef4c36 637 instrument_copy_to_user(to, from, n);
ec6347bb 638 n = copy_mc_to_user((__force void *) to, from, n);
8780356e
DW
639 }
640 return n;
641}
642
ec6347bb 643static unsigned long copy_mc_to_page(struct page *page, size_t offset,
8780356e
DW
644 const char *from, size_t len)
645{
646 unsigned long ret;
647 char *to;
648
649 to = kmap_atomic(page);
ec6347bb 650 ret = copy_mc_to_kernel(to + offset, from, len);
8780356e
DW
651 kunmap_atomic(to);
652
653 return ret;
654}
655
ec6347bb 656static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
ca146f6f
DW
657 struct iov_iter *i)
658{
659 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
660 unsigned int p_mask = pipe->ring_size - 1;
661 unsigned int i_head;
ca146f6f 662 size_t n, off, xfer = 0;
ca146f6f
DW
663
664 if (!sanity(i))
665 return 0;
666
8cefc107 667 bytes = n = push_pipe(i, bytes, &i_head, &off);
ca146f6f
DW
668 if (unlikely(!n))
669 return 0;
8cefc107 670 do {
ca146f6f
DW
671 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
672 unsigned long rem;
673
ec6347bb 674 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
8cefc107
DH
675 off, addr, chunk);
676 i->head = i_head;
ca146f6f
DW
677 i->iov_offset = off + chunk - rem;
678 xfer += chunk - rem;
679 if (rem)
680 break;
681 n -= chunk;
682 addr += chunk;
8cefc107
DH
683 off = 0;
684 i_head++;
685 } while (n);
ca146f6f
DW
686 i->count -= xfer;
687 return xfer;
688}
689
bf3eeb9b 690/**
ec6347bb 691 * _copy_mc_to_iter - copy to iter with source memory error exception handling
bf3eeb9b
DW
692 * @addr: source kernel address
693 * @bytes: total transfer length
694 * @iter: destination iterator
695 *
ec6347bb
DW
696 * The pmem driver deploys this for the dax operation
697 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
698 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
699 * successfully copied.
bf3eeb9b 700 *
ec6347bb 701 * The main differences between this and typical _copy_to_iter().
bf3eeb9b
DW
702 *
703 * * Typical tail/residue handling after a fault retries the copy
704 * byte-by-byte until the fault happens again. Re-triggering machine
705 * checks is potentially fatal so the implementation uses source
706 * alignment and poison alignment assumptions to avoid re-triggering
707 * hardware exceptions.
708 *
709 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
710 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
711 * a short copy.
bf3eeb9b 712 */
ec6347bb 713size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
8780356e 714{
00e23707 715 if (unlikely(iov_iter_is_pipe(i)))
ec6347bb 716 return copy_mc_pipe_to_iter(addr, bytes, i);
8780356e
DW
717 if (iter_is_iovec(i))
718 might_fault();
7baa5099
AV
719 __iterate_and_advance(i, bytes, base, len, off,
720 copyout_mc(base, addr + off, len),
721 copy_mc_to_kernel(base, addr + off, len)
8780356e
DW
722 )
723
724 return bytes;
725}
ec6347bb
DW
726EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
727#endif /* CONFIG_ARCH_HAS_COPY_MC */
8780356e 728
aa28de27 729size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
c35e0248 730{
00e23707 731 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
732 WARN_ON(1);
733 return 0;
734 }
09fc68dc
AV
735 if (iter_is_iovec(i))
736 might_fault();
7baa5099
AV
737 iterate_and_advance(i, bytes, base, len, off,
738 copyin(addr + off, base, len),
739 memcpy(addr + off, base, len)
0dbca9a4
AV
740 )
741
742 return bytes;
c35e0248 743}
aa28de27 744EXPORT_SYMBOL(_copy_from_iter);
c35e0248 745
aa28de27 746size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
aa583096 747{
00e23707 748 if (unlikely(iov_iter_is_pipe(i))) {
241699cd
AV
749 WARN_ON(1);
750 return 0;
751 }
7baa5099
AV
752 iterate_and_advance(i, bytes, base, len, off,
753 __copy_from_user_inatomic_nocache(addr + off, base, len),
754 memcpy(addr + off, base, len)
aa583096
AV
755 )
756
757 return bytes;
758}
aa28de27 759EXPORT_SYMBOL(_copy_from_iter_nocache);
aa583096 760
0aed55af 761#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
abd08d7d
DW
762/**
763 * _copy_from_iter_flushcache - write destination through cpu cache
764 * @addr: destination kernel address
765 * @bytes: total transfer length
766 * @iter: source iterator
767 *
768 * The pmem driver arranges for filesystem-dax to use this facility via
769 * dax_copy_from_iter() for ensuring that writes to persistent memory
770 * are flushed through the CPU cache. It is differentiated from
771 * _copy_from_iter_nocache() in that guarantees all data is flushed for
772 * all iterator types. The _copy_from_iter_nocache() only attempts to
773 * bypass the cache for the ITER_IOVEC case, and on some archs may use
774 * instructions that strand dirty-data in the cache.
775 */
6a37e940 776size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
0aed55af 777{
00e23707 778 if (unlikely(iov_iter_is_pipe(i))) {
0aed55af
DW
779 WARN_ON(1);
780 return 0;
781 }
7baa5099
AV
782 iterate_and_advance(i, bytes, base, len, off,
783 __copy_from_user_flushcache(addr + off, base, len),
784 memcpy_flushcache(addr + off, base, len)
0aed55af
DW
785 )
786
787 return bytes;
788}
6a37e940 789EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
0aed55af
DW
790#endif
791
72e809ed
AV
792static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
793{
6daef95b
ED
794 struct page *head;
795 size_t v = n + offset;
796
797 /*
798 * The general case needs to access the page order in order
799 * to compute the page size.
800 * However, we mostly deal with order-0 pages and thus can
801 * avoid a possible cache line miss for requests that fit all
802 * page orders.
803 */
804 if (n <= v && v <= PAGE_SIZE)
805 return true;
806
807 head = compound_head(page);
808 v += (page - head) << PAGE_SHIFT;
a90bcb86 809
a50b854e 810 if (likely(n <= v && v <= (page_size(head))))
72e809ed
AV
811 return true;
812 WARN_ON(1);
813 return false;
814}
cbbd26b8 815
08aa6479 816static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
62a8067a
AV
817 struct iov_iter *i)
818{
28f38db7
AV
819 if (likely(iter_is_iovec(i)))
820 return copy_page_to_iter_iovec(page, offset, bytes, i);
821 if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
d271524a
AV
822 void *kaddr = kmap_atomic(page);
823 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
824 kunmap_atomic(kaddr);
825 return wanted;
28f38db7
AV
826 }
827 if (iov_iter_is_pipe(i))
828 return copy_page_to_iter_pipe(page, offset, bytes, i);
829 if (unlikely(iov_iter_is_discard(i))) {
a506abc7
AV
830 if (unlikely(i->count < bytes))
831 bytes = i->count;
832 i->count -= bytes;
9ea9ce04 833 return bytes;
28f38db7
AV
834 }
835 WARN_ON(1);
836 return 0;
62a8067a 837}
08aa6479
AV
838
839size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
840 struct iov_iter *i)
841{
842 size_t res = 0;
843 if (unlikely(!page_copy_sane(page, offset, bytes)))
844 return 0;
845 page += offset / PAGE_SIZE; // first subpage
846 offset %= PAGE_SIZE;
847 while (1) {
848 size_t n = __copy_page_to_iter(page, offset,
849 min(bytes, (size_t)PAGE_SIZE - offset), i);
850 res += n;
851 bytes -= n;
852 if (!bytes || !n)
853 break;
854 offset += n;
855 if (offset == PAGE_SIZE) {
856 page++;
857 offset = 0;
858 }
859 }
860 return res;
861}
62a8067a
AV
862EXPORT_SYMBOL(copy_page_to_iter);
863
864size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
865 struct iov_iter *i)
866{
72e809ed
AV
867 if (unlikely(!page_copy_sane(page, offset, bytes)))
868 return 0;
28f38db7
AV
869 if (likely(iter_is_iovec(i)))
870 return copy_page_from_iter_iovec(page, offset, bytes, i);
871 if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
d271524a 872 void *kaddr = kmap_atomic(page);
aa28de27 873 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
d271524a
AV
874 kunmap_atomic(kaddr);
875 return wanted;
28f38db7
AV
876 }
877 WARN_ON(1);
878 return 0;
62a8067a
AV
879}
880EXPORT_SYMBOL(copy_page_from_iter);
881
241699cd
AV
882static size_t pipe_zero(size_t bytes, struct iov_iter *i)
883{
884 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
885 unsigned int p_mask = pipe->ring_size - 1;
886 unsigned int i_head;
241699cd 887 size_t n, off;
241699cd
AV
888
889 if (!sanity(i))
890 return 0;
891
8cefc107 892 bytes = n = push_pipe(i, bytes, &i_head, &off);
241699cd
AV
893 if (unlikely(!n))
894 return 0;
895
8cefc107 896 do {
241699cd 897 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
8cefc107
DH
898 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
899 i->head = i_head;
241699cd
AV
900 i->iov_offset = off + chunk;
901 n -= chunk;
8cefc107
DH
902 off = 0;
903 i_head++;
904 } while (n);
241699cd
AV
905 i->count -= bytes;
906 return bytes;
907}
908
c35e0248
MW
909size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
910{
00e23707 911 if (unlikely(iov_iter_is_pipe(i)))
241699cd 912 return pipe_zero(bytes, i);
7baa5099
AV
913 iterate_and_advance(i, bytes, base, len, count,
914 clear_user(base, len),
915 memset(base, 0, len)
8442fa46
AV
916 )
917
918 return bytes;
c35e0248
MW
919}
920EXPORT_SYMBOL(iov_iter_zero);
921
f0b65f39
AV
922size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
923 struct iov_iter *i)
62a8067a 924{
04a31165 925 char *kaddr = kmap_atomic(page), *p = kaddr + offset;
72e809ed
AV
926 if (unlikely(!page_copy_sane(page, offset, bytes))) {
927 kunmap_atomic(kaddr);
928 return 0;
929 }
9ea9ce04 930 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
931 kunmap_atomic(kaddr);
932 WARN_ON(1);
933 return 0;
934 }
7baa5099
AV
935 iterate_and_advance(i, bytes, base, len, off,
936 copyin(p + off, base, len),
937 memcpy(p + off, base, len)
04a31165
AV
938 )
939 kunmap_atomic(kaddr);
940 return bytes;
62a8067a 941}
f0b65f39 942EXPORT_SYMBOL(copy_page_from_iter_atomic);
62a8067a 943
b9dc6f65
AV
944static inline void pipe_truncate(struct iov_iter *i)
945{
946 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
947 unsigned int p_tail = pipe->tail;
948 unsigned int p_head = pipe->head;
949 unsigned int p_mask = pipe->ring_size - 1;
950
951 if (!pipe_empty(p_head, p_tail)) {
952 struct pipe_buffer *buf;
953 unsigned int i_head = i->head;
b9dc6f65 954 size_t off = i->iov_offset;
8cefc107 955
b9dc6f65 956 if (off) {
8cefc107
DH
957 buf = &pipe->bufs[i_head & p_mask];
958 buf->len = off - buf->offset;
959 i_head++;
b9dc6f65 960 }
8cefc107
DH
961 while (p_head != i_head) {
962 p_head--;
963 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
b9dc6f65 964 }
8cefc107
DH
965
966 pipe->head = p_head;
b9dc6f65
AV
967 }
968}
969
241699cd
AV
970static void pipe_advance(struct iov_iter *i, size_t size)
971{
972 struct pipe_inode_info *pipe = i->pipe;
241699cd 973 if (size) {
b9dc6f65 974 struct pipe_buffer *buf;
8cefc107
DH
975 unsigned int p_mask = pipe->ring_size - 1;
976 unsigned int i_head = i->head;
b9dc6f65 977 size_t off = i->iov_offset, left = size;
8cefc107 978
241699cd 979 if (off) /* make it relative to the beginning of buffer */
8cefc107 980 left += off - pipe->bufs[i_head & p_mask].offset;
241699cd 981 while (1) {
8cefc107 982 buf = &pipe->bufs[i_head & p_mask];
b9dc6f65 983 if (left <= buf->len)
241699cd 984 break;
b9dc6f65 985 left -= buf->len;
8cefc107 986 i_head++;
241699cd 987 }
8cefc107 988 i->head = i_head;
b9dc6f65 989 i->iov_offset = buf->offset + left;
241699cd 990 }
b9dc6f65
AV
991 i->count -= size;
992 /* ... and discard everything past that point */
993 pipe_truncate(i);
241699cd
AV
994}
995
54c8195b
PB
996static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
997{
998 struct bvec_iter bi;
999
1000 bi.bi_size = i->count;
1001 bi.bi_bvec_done = i->iov_offset;
1002 bi.bi_idx = 0;
1003 bvec_iter_advance(i->bvec, &bi, size);
1004
1005 i->bvec += bi.bi_idx;
1006 i->nr_segs -= bi.bi_idx;
1007 i->count = bi.bi_size;
1008 i->iov_offset = bi.bi_bvec_done;
1009}
1010
185ac4d4
AV
1011static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1012{
1013 const struct iovec *iov, *end;
1014
1015 if (!i->count)
1016 return;
1017 i->count -= size;
1018
1019 size += i->iov_offset; // from beginning of current segment
1020 for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1021 if (likely(size < iov->iov_len))
1022 break;
1023 size -= iov->iov_len;
1024 }
1025 i->iov_offset = size;
1026 i->nr_segs -= iov - i->iov;
1027 i->iov = iov;
1028}
1029
62a8067a
AV
1030void iov_iter_advance(struct iov_iter *i, size_t size)
1031{
3b3fc051
AV
1032 if (unlikely(i->count < size))
1033 size = i->count;
185ac4d4
AV
1034 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1035 /* iovec and kvec have identical layouts */
1036 iov_iter_iovec_advance(i, size);
1037 } else if (iov_iter_is_bvec(i)) {
1038 iov_iter_bvec_advance(i, size);
1039 } else if (iov_iter_is_pipe(i)) {
241699cd 1040 pipe_advance(i, size);
185ac4d4 1041 } else if (unlikely(iov_iter_is_xarray(i))) {
7ff50620
DH
1042 i->iov_offset += size;
1043 i->count -= size;
185ac4d4
AV
1044 } else if (iov_iter_is_discard(i)) {
1045 i->count -= size;
54c8195b 1046 }
62a8067a
AV
1047}
1048EXPORT_SYMBOL(iov_iter_advance);
1049
27c0e374
AV
1050void iov_iter_revert(struct iov_iter *i, size_t unroll)
1051{
1052 if (!unroll)
1053 return;
5b47d59a
AV
1054 if (WARN_ON(unroll > MAX_RW_COUNT))
1055 return;
27c0e374 1056 i->count += unroll;
00e23707 1057 if (unlikely(iov_iter_is_pipe(i))) {
27c0e374 1058 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1059 unsigned int p_mask = pipe->ring_size - 1;
1060 unsigned int i_head = i->head;
27c0e374
AV
1061 size_t off = i->iov_offset;
1062 while (1) {
8cefc107
DH
1063 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1064 size_t n = off - b->offset;
27c0e374 1065 if (unroll < n) {
4fa55cef 1066 off -= unroll;
27c0e374
AV
1067 break;
1068 }
1069 unroll -= n;
8cefc107 1070 if (!unroll && i_head == i->start_head) {
27c0e374
AV
1071 off = 0;
1072 break;
1073 }
8cefc107
DH
1074 i_head--;
1075 b = &pipe->bufs[i_head & p_mask];
1076 off = b->offset + b->len;
27c0e374
AV
1077 }
1078 i->iov_offset = off;
8cefc107 1079 i->head = i_head;
27c0e374
AV
1080 pipe_truncate(i);
1081 return;
1082 }
9ea9ce04
DH
1083 if (unlikely(iov_iter_is_discard(i)))
1084 return;
27c0e374
AV
1085 if (unroll <= i->iov_offset) {
1086 i->iov_offset -= unroll;
1087 return;
1088 }
1089 unroll -= i->iov_offset;
7ff50620
DH
1090 if (iov_iter_is_xarray(i)) {
1091 BUG(); /* We should never go beyond the start of the specified
1092 * range since we might then be straying into pages that
1093 * aren't pinned.
1094 */
1095 } else if (iov_iter_is_bvec(i)) {
27c0e374
AV
1096 const struct bio_vec *bvec = i->bvec;
1097 while (1) {
1098 size_t n = (--bvec)->bv_len;
1099 i->nr_segs++;
1100 if (unroll <= n) {
1101 i->bvec = bvec;
1102 i->iov_offset = n - unroll;
1103 return;
1104 }
1105 unroll -= n;
1106 }
1107 } else { /* same logics for iovec and kvec */
1108 const struct iovec *iov = i->iov;
1109 while (1) {
1110 size_t n = (--iov)->iov_len;
1111 i->nr_segs++;
1112 if (unroll <= n) {
1113 i->iov = iov;
1114 i->iov_offset = n - unroll;
1115 return;
1116 }
1117 unroll -= n;
1118 }
1119 }
1120}
1121EXPORT_SYMBOL(iov_iter_revert);
1122
62a8067a
AV
1123/*
1124 * Return the count of just the current iov_iter segment.
1125 */
1126size_t iov_iter_single_seg_count(const struct iov_iter *i)
1127{
28f38db7
AV
1128 if (i->nr_segs > 1) {
1129 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1130 return min(i->count, i->iov->iov_len - i->iov_offset);
1131 if (iov_iter_is_bvec(i))
1132 return min(i->count, i->bvec->bv_len - i->iov_offset);
1133 }
1134 return i->count;
62a8067a
AV
1135}
1136EXPORT_SYMBOL(iov_iter_single_seg_count);
1137
aa563d7b 1138void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
05afcb77 1139 const struct kvec *kvec, unsigned long nr_segs,
abb78f87
AV
1140 size_t count)
1141{
aa563d7b 1142 WARN_ON(direction & ~(READ | WRITE));
8cd54c1c
AV
1143 *i = (struct iov_iter){
1144 .iter_type = ITER_KVEC,
1145 .data_source = direction,
1146 .kvec = kvec,
1147 .nr_segs = nr_segs,
1148 .iov_offset = 0,
1149 .count = count
1150 };
abb78f87
AV
1151}
1152EXPORT_SYMBOL(iov_iter_kvec);
1153
aa563d7b 1154void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
05afcb77
AV
1155 const struct bio_vec *bvec, unsigned long nr_segs,
1156 size_t count)
1157{
aa563d7b 1158 WARN_ON(direction & ~(READ | WRITE));
8cd54c1c
AV
1159 *i = (struct iov_iter){
1160 .iter_type = ITER_BVEC,
1161 .data_source = direction,
1162 .bvec = bvec,
1163 .nr_segs = nr_segs,
1164 .iov_offset = 0,
1165 .count = count
1166 };
05afcb77
AV
1167}
1168EXPORT_SYMBOL(iov_iter_bvec);
1169
aa563d7b 1170void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
241699cd
AV
1171 struct pipe_inode_info *pipe,
1172 size_t count)
1173{
aa563d7b 1174 BUG_ON(direction != READ);
8cefc107 1175 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
8cd54c1c
AV
1176 *i = (struct iov_iter){
1177 .iter_type = ITER_PIPE,
1178 .data_source = false,
1179 .pipe = pipe,
1180 .head = pipe->head,
1181 .start_head = pipe->head,
1182 .iov_offset = 0,
1183 .count = count
1184 };
241699cd
AV
1185}
1186EXPORT_SYMBOL(iov_iter_pipe);
1187
7ff50620
DH
1188/**
1189 * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1190 * @i: The iterator to initialise.
1191 * @direction: The direction of the transfer.
1192 * @xarray: The xarray to access.
1193 * @start: The start file position.
1194 * @count: The size of the I/O buffer in bytes.
1195 *
1196 * Set up an I/O iterator to either draw data out of the pages attached to an
1197 * inode or to inject data into those pages. The pages *must* be prevented
1198 * from evaporation, either by taking a ref on them or locking them by the
1199 * caller.
1200 */
1201void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1202 struct xarray *xarray, loff_t start, size_t count)
1203{
1204 BUG_ON(direction & ~1);
8cd54c1c
AV
1205 *i = (struct iov_iter) {
1206 .iter_type = ITER_XARRAY,
1207 .data_source = direction,
1208 .xarray = xarray,
1209 .xarray_start = start,
1210 .count = count,
1211 .iov_offset = 0
1212 };
7ff50620
DH
1213}
1214EXPORT_SYMBOL(iov_iter_xarray);
1215
9ea9ce04
DH
1216/**
1217 * iov_iter_discard - Initialise an I/O iterator that discards data
1218 * @i: The iterator to initialise.
1219 * @direction: The direction of the transfer.
1220 * @count: The size of the I/O buffer in bytes.
1221 *
1222 * Set up an I/O iterator that just discards everything that's written to it.
1223 * It's only available as a READ iterator.
1224 */
1225void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1226{
1227 BUG_ON(direction != READ);
8cd54c1c
AV
1228 *i = (struct iov_iter){
1229 .iter_type = ITER_DISCARD,
1230 .data_source = false,
1231 .count = count,
1232 .iov_offset = 0
1233 };
9ea9ce04
DH
1234}
1235EXPORT_SYMBOL(iov_iter_discard);
1236
9221d2e3 1237static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
62a8067a 1238{
04a31165
AV
1239 unsigned long res = 0;
1240 size_t size = i->count;
9221d2e3
AV
1241 size_t skip = i->iov_offset;
1242 unsigned k;
1243
1244 for (k = 0; k < i->nr_segs; k++, skip = 0) {
1245 size_t len = i->iov[k].iov_len - skip;
1246 if (len) {
1247 res |= (unsigned long)i->iov[k].iov_base + skip;
1248 if (len > size)
1249 len = size;
1250 res |= len;
1251 size -= len;
1252 if (!size)
1253 break;
1254 }
1255 }
1256 return res;
1257}
04a31165 1258
9221d2e3
AV
1259static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
1260{
1261 unsigned res = 0;
1262 size_t size = i->count;
1263 unsigned skip = i->iov_offset;
1264 unsigned k;
1265
1266 for (k = 0; k < i->nr_segs; k++, skip = 0) {
1267 size_t len = i->bvec[k].bv_len - skip;
1268 res |= (unsigned long)i->bvec[k].bv_offset + skip;
1269 if (len > size)
1270 len = size;
1271 res |= len;
1272 size -= len;
1273 if (!size)
1274 break;
1275 }
1276 return res;
1277}
1278
1279unsigned long iov_iter_alignment(const struct iov_iter *i)
1280{
1281 /* iovec and kvec have identical layouts */
1282 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1283 return iov_iter_alignment_iovec(i);
1284
1285 if (iov_iter_is_bvec(i))
1286 return iov_iter_alignment_bvec(i);
1287
1288 if (iov_iter_is_pipe(i)) {
e0ff126e 1289 unsigned int p_mask = i->pipe->ring_size - 1;
9221d2e3 1290 size_t size = i->count;
e0ff126e 1291
8cefc107 1292 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
241699cd
AV
1293 return size | i->iov_offset;
1294 return size;
1295 }
9221d2e3
AV
1296
1297 if (iov_iter_is_xarray(i))
3d14ec1f 1298 return (i->xarray_start + i->iov_offset) | i->count;
9221d2e3
AV
1299
1300 return 0;
62a8067a
AV
1301}
1302EXPORT_SYMBOL(iov_iter_alignment);
1303
357f435d
AV
1304unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1305{
33844e66 1306 unsigned long res = 0;
610c7a71 1307 unsigned long v = 0;
357f435d 1308 size_t size = i->count;
610c7a71 1309 unsigned k;
357f435d 1310
610c7a71 1311 if (WARN_ON(!iter_is_iovec(i)))
241699cd 1312 return ~0U;
241699cd 1313
610c7a71
AV
1314 for (k = 0; k < i->nr_segs; k++) {
1315 if (i->iov[k].iov_len) {
1316 unsigned long base = (unsigned long)i->iov[k].iov_base;
1317 if (v) // if not the first one
1318 res |= base | v; // this start | previous end
1319 v = base + i->iov[k].iov_len;
1320 if (size <= i->iov[k].iov_len)
1321 break;
1322 size -= i->iov[k].iov_len;
1323 }
1324 }
33844e66 1325 return res;
357f435d
AV
1326}
1327EXPORT_SYMBOL(iov_iter_gap_alignment);
1328
e76b6312 1329static inline ssize_t __pipe_get_pages(struct iov_iter *i,
241699cd
AV
1330 size_t maxsize,
1331 struct page **pages,
8cefc107 1332 int iter_head,
241699cd
AV
1333 size_t *start)
1334{
1335 struct pipe_inode_info *pipe = i->pipe;
8cefc107
DH
1336 unsigned int p_mask = pipe->ring_size - 1;
1337 ssize_t n = push_pipe(i, maxsize, &iter_head, start);
241699cd
AV
1338 if (!n)
1339 return -EFAULT;
1340
1341 maxsize = n;
1342 n += *start;
1689c73a 1343 while (n > 0) {
8cefc107
DH
1344 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1345 iter_head++;
241699cd
AV
1346 n -= PAGE_SIZE;
1347 }
1348
1349 return maxsize;
1350}
1351
1352static ssize_t pipe_get_pages(struct iov_iter *i,
1353 struct page **pages, size_t maxsize, unsigned maxpages,
1354 size_t *start)
1355{
8cefc107 1356 unsigned int iter_head, npages;
241699cd 1357 size_t capacity;
241699cd
AV
1358
1359 if (!sanity(i))
1360 return -EFAULT;
1361
8cefc107
DH
1362 data_start(i, &iter_head, start);
1363 /* Amount of free space: some of this one + all after this one */
1364 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1365 capacity = min(npages, maxpages) * PAGE_SIZE - *start;
241699cd 1366
8cefc107 1367 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
241699cd
AV
1368}
1369
7ff50620
DH
1370static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1371 pgoff_t index, unsigned int nr_pages)
1372{
1373 XA_STATE(xas, xa, index);
1374 struct page *page;
1375 unsigned int ret = 0;
1376
1377 rcu_read_lock();
1378 for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1379 if (xas_retry(&xas, page))
1380 continue;
1381
1382 /* Has the page moved or been split? */
1383 if (unlikely(page != xas_reload(&xas))) {
1384 xas_reset(&xas);
1385 continue;
1386 }
1387
1388 pages[ret] = find_subpage(page, xas.xa_index);
1389 get_page(pages[ret]);
1390 if (++ret == nr_pages)
1391 break;
1392 }
1393 rcu_read_unlock();
1394 return ret;
1395}
1396
1397static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1398 struct page **pages, size_t maxsize,
1399 unsigned maxpages, size_t *_start_offset)
1400{
1401 unsigned nr, offset;
1402 pgoff_t index, count;
1403 size_t size = maxsize, actual;
1404 loff_t pos;
1405
1406 if (!size || !maxpages)
1407 return 0;
1408
1409 pos = i->xarray_start + i->iov_offset;
1410 index = pos >> PAGE_SHIFT;
1411 offset = pos & ~PAGE_MASK;
1412 *_start_offset = offset;
1413
1414 count = 1;
1415 if (size > PAGE_SIZE - offset) {
1416 size -= PAGE_SIZE - offset;
1417 count += size >> PAGE_SHIFT;
1418 size &= ~PAGE_MASK;
1419 if (size)
1420 count++;
1421 }
1422
1423 if (count > maxpages)
1424 count = maxpages;
1425
1426 nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1427 if (nr == 0)
1428 return 0;
1429
1430 actual = PAGE_SIZE * nr;
1431 actual -= offset;
1432 if (nr == count && size > 0) {
1433 unsigned last_offset = (nr > 1) ? 0 : offset;
1434 actual -= PAGE_SIZE - (last_offset + size);
1435 }
1436 return actual;
1437}
1438
3d671ca6
AV
1439/* must be done on non-empty ITER_IOVEC one */
1440static unsigned long first_iovec_segment(const struct iov_iter *i,
1441 size_t *size, size_t *start,
1442 size_t maxsize, unsigned maxpages)
1443{
1444 size_t skip;
1445 long k;
1446
1447 for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1448 unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
1449 size_t len = i->iov[k].iov_len - skip;
1450
1451 if (unlikely(!len))
1452 continue;
1453 if (len > maxsize)
1454 len = maxsize;
1455 len += (*start = addr % PAGE_SIZE);
1456 if (len > maxpages * PAGE_SIZE)
1457 len = maxpages * PAGE_SIZE;
1458 *size = len;
1459 return addr & PAGE_MASK;
1460 }
1461 BUG(); // if it had been empty, we wouldn't get called
1462}
1463
1464/* must be done on non-empty ITER_BVEC one */
1465static struct page *first_bvec_segment(const struct iov_iter *i,
1466 size_t *size, size_t *start,
1467 size_t maxsize, unsigned maxpages)
1468{
1469 struct page *page;
1470 size_t skip = i->iov_offset, len;
1471
1472 len = i->bvec->bv_len - skip;
1473 if (len > maxsize)
1474 len = maxsize;
1475 skip += i->bvec->bv_offset;
1476 page = i->bvec->bv_page + skip / PAGE_SIZE;
1477 len += (*start = skip % PAGE_SIZE);
1478 if (len > maxpages * PAGE_SIZE)
1479 len = maxpages * PAGE_SIZE;
1480 *size = len;
1481 return page;
1482}
1483
62a8067a 1484ssize_t iov_iter_get_pages(struct iov_iter *i,
2c80929c 1485 struct page **pages, size_t maxsize, unsigned maxpages,
62a8067a
AV
1486 size_t *start)
1487{
3d671ca6
AV
1488 size_t len;
1489 int n, res;
1490
e5393fae
AV
1491 if (maxsize > i->count)
1492 maxsize = i->count;
3d671ca6
AV
1493 if (!maxsize)
1494 return 0;
e5393fae 1495
3d671ca6
AV
1496 if (likely(iter_is_iovec(i))) {
1497 unsigned long addr;
e5393fae 1498
3d671ca6 1499 addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
e5393fae 1500 n = DIV_ROUND_UP(len, PAGE_SIZE);
73b0140b
IW
1501 res = get_user_pages_fast(addr, n,
1502 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0,
1503 pages);
e5393fae
AV
1504 if (unlikely(res < 0))
1505 return res;
1506 return (res == n ? len : res * PAGE_SIZE) - *start;
3d671ca6
AV
1507 }
1508 if (iov_iter_is_bvec(i)) {
1509 struct page *page;
1510
1511 page = first_bvec_segment(i, &len, start, maxsize, maxpages);
1512 n = DIV_ROUND_UP(len, PAGE_SIZE);
1513 while (n--)
1514 get_page(*pages++ = page++);
1515 return len - *start;
1516 }
1517 if (iov_iter_is_pipe(i))
1518 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1519 if (iov_iter_is_xarray(i))
1520 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1521 return -EFAULT;
62a8067a
AV
1522}
1523EXPORT_SYMBOL(iov_iter_get_pages);
1524
1b17f1f2
AV
1525static struct page **get_pages_array(size_t n)
1526{
752ade68 1527 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1b17f1f2
AV
1528}
1529
241699cd
AV
1530static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1531 struct page ***pages, size_t maxsize,
1532 size_t *start)
1533{
1534 struct page **p;
8cefc107 1535 unsigned int iter_head, npages;
d7760d63 1536 ssize_t n;
241699cd
AV
1537
1538 if (!sanity(i))
1539 return -EFAULT;
1540
8cefc107
DH
1541 data_start(i, &iter_head, start);
1542 /* Amount of free space: some of this one + all after this one */
1543 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
241699cd
AV
1544 n = npages * PAGE_SIZE - *start;
1545 if (maxsize > n)
1546 maxsize = n;
1547 else
1548 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1549 p = get_pages_array(npages);
1550 if (!p)
1551 return -ENOMEM;
8cefc107 1552 n = __pipe_get_pages(i, maxsize, p, iter_head, start);
241699cd
AV
1553 if (n > 0)
1554 *pages = p;
1555 else
1556 kvfree(p);
1557 return n;
1558}
1559
7ff50620
DH
1560static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1561 struct page ***pages, size_t maxsize,
1562 size_t *_start_offset)
1563{
1564 struct page **p;
1565 unsigned nr, offset;
1566 pgoff_t index, count;
1567 size_t size = maxsize, actual;
1568 loff_t pos;
1569
1570 if (!size)
1571 return 0;
1572
1573 pos = i->xarray_start + i->iov_offset;
1574 index = pos >> PAGE_SHIFT;
1575 offset = pos & ~PAGE_MASK;
1576 *_start_offset = offset;
1577
1578 count = 1;
1579 if (size > PAGE_SIZE - offset) {
1580 size -= PAGE_SIZE - offset;
1581 count += size >> PAGE_SHIFT;
1582 size &= ~PAGE_MASK;
1583 if (size)
1584 count++;
1585 }
1586
1587 p = get_pages_array(count);
1588 if (!p)
1589 return -ENOMEM;
1590 *pages = p;
1591
1592 nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1593 if (nr == 0)
1594 return 0;
1595
1596 actual = PAGE_SIZE * nr;
1597 actual -= offset;
1598 if (nr == count && size > 0) {
1599 unsigned last_offset = (nr > 1) ? 0 : offset;
1600 actual -= PAGE_SIZE - (last_offset + size);
1601 }
1602 return actual;
1603}
1604
62a8067a
AV
1605ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1606 struct page ***pages, size_t maxsize,
1607 size_t *start)
1608{
1b17f1f2 1609 struct page **p;
3d671ca6
AV
1610 size_t len;
1611 int n, res;
1b17f1f2
AV
1612
1613 if (maxsize > i->count)
1614 maxsize = i->count;
3d671ca6
AV
1615 if (!maxsize)
1616 return 0;
1b17f1f2 1617
3d671ca6
AV
1618 if (likely(iter_is_iovec(i))) {
1619 unsigned long addr;
1b17f1f2 1620
3d671ca6 1621 addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
1b17f1f2
AV
1622 n = DIV_ROUND_UP(len, PAGE_SIZE);
1623 p = get_pages_array(n);
1624 if (!p)
1625 return -ENOMEM;
73b0140b
IW
1626 res = get_user_pages_fast(addr, n,
1627 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p);
1b17f1f2
AV
1628 if (unlikely(res < 0)) {
1629 kvfree(p);
1630 return res;
1631 }
1632 *pages = p;
1633 return (res == n ? len : res * PAGE_SIZE) - *start;
3d671ca6
AV
1634 }
1635 if (iov_iter_is_bvec(i)) {
1636 struct page *page;
1637
1638 page = first_bvec_segment(i, &len, start, maxsize, ~0U);
1639 n = DIV_ROUND_UP(len, PAGE_SIZE);
1640 *pages = p = get_pages_array(n);
1b17f1f2
AV
1641 if (!p)
1642 return -ENOMEM;
3d671ca6
AV
1643 while (n--)
1644 get_page(*p++ = page++);
1645 return len - *start;
1646 }
1647 if (iov_iter_is_pipe(i))
1648 return pipe_get_pages_alloc(i, pages, maxsize, start);
1649 if (iov_iter_is_xarray(i))
1650 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1651 return -EFAULT;
62a8067a
AV
1652}
1653EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1654
a604ec7e
AV
1655size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1656 struct iov_iter *i)
1657{
a604ec7e 1658 __wsum sum, next;
a604ec7e 1659 sum = *csum;
9ea9ce04 1660 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
241699cd
AV
1661 WARN_ON(1);
1662 return 0;
1663 }
7baa5099
AV
1664 iterate_and_advance(i, bytes, base, len, off, ({
1665 next = csum_and_copy_from_user(base, addr + off, len);
622838f3 1666 if (next)
a604ec7e 1667 sum = csum_block_add(sum, next, off);
7baa5099 1668 next ? 0 : len;
a604ec7e 1669 }), ({
7baa5099 1670 sum = csum_and_memcpy(addr + off, base, len, sum, off);
a604ec7e
AV
1671 })
1672 )
1673 *csum = sum;
1674 return bytes;
1675}
1676EXPORT_SYMBOL(csum_and_copy_from_iter);
1677
52cbd23a 1678size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
a604ec7e
AV
1679 struct iov_iter *i)
1680{
52cbd23a 1681 struct csum_state *csstate = _csstate;
a604ec7e 1682 __wsum sum, next;
78e1f386
AV
1683
1684 if (unlikely(iov_iter_is_pipe(i)))
52cbd23a 1685 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
78e1f386 1686
594e450b 1687 sum = csum_shift(csstate->csum, csstate->off);
78e1f386 1688 if (unlikely(iov_iter_is_discard(i))) {
241699cd
AV
1689 WARN_ON(1); /* for now */
1690 return 0;
1691 }
7baa5099
AV
1692 iterate_and_advance(i, bytes, base, len, off, ({
1693 next = csum_and_copy_to_user(addr + off, base, len);
622838f3 1694 if (next)
a604ec7e 1695 sum = csum_block_add(sum, next, off);
7baa5099 1696 next ? 0 : len;
a604ec7e 1697 }), ({
7baa5099 1698 sum = csum_and_memcpy(base, addr + off, len, sum, off);
a604ec7e
AV
1699 })
1700 )
594e450b
AV
1701 csstate->csum = csum_shift(sum, csstate->off);
1702 csstate->off += bytes;
a604ec7e
AV
1703 return bytes;
1704}
1705EXPORT_SYMBOL(csum_and_copy_to_iter);
1706
d05f4435
SG
1707size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1708 struct iov_iter *i)
1709{
7999096f 1710#ifdef CONFIG_CRYPTO_HASH
d05f4435
SG
1711 struct ahash_request *hash = hashp;
1712 struct scatterlist sg;
1713 size_t copied;
1714
1715 copied = copy_to_iter(addr, bytes, i);
1716 sg_init_one(&sg, addr, copied);
1717 ahash_request_set_crypt(hash, &sg, NULL, copied);
1718 crypto_ahash_update(hash);
1719 return copied;
27fad74a
Y
1720#else
1721 return 0;
1722#endif
d05f4435
SG
1723}
1724EXPORT_SYMBOL(hash_and_copy_to_iter);
1725
66531c65 1726static int iov_npages(const struct iov_iter *i, int maxpages)
62a8067a 1727{
66531c65
AV
1728 size_t skip = i->iov_offset, size = i->count;
1729 const struct iovec *p;
e0f2dc40
AV
1730 int npages = 0;
1731
66531c65
AV
1732 for (p = i->iov; size; skip = 0, p++) {
1733 unsigned offs = offset_in_page(p->iov_base + skip);
1734 size_t len = min(p->iov_len - skip, size);
e0f2dc40 1735
66531c65
AV
1736 if (len) {
1737 size -= len;
1738 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1739 if (unlikely(npages > maxpages))
1740 return maxpages;
1741 }
1742 }
1743 return npages;
1744}
1745
1746static int bvec_npages(const struct iov_iter *i, int maxpages)
1747{
1748 size_t skip = i->iov_offset, size = i->count;
1749 const struct bio_vec *p;
1750 int npages = 0;
1751
1752 for (p = i->bvec; size; skip = 0, p++) {
1753 unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
1754 size_t len = min(p->bv_len - skip, size);
1755
1756 size -= len;
1757 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1758 if (unlikely(npages > maxpages))
1759 return maxpages;
1760 }
1761 return npages;
1762}
1763
1764int iov_iter_npages(const struct iov_iter *i, int maxpages)
1765{
1766 if (unlikely(!i->count))
1767 return 0;
1768 /* iovec and kvec have identical layouts */
1769 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1770 return iov_npages(i, maxpages);
1771 if (iov_iter_is_bvec(i))
1772 return bvec_npages(i, maxpages);
1773 if (iov_iter_is_pipe(i)) {
8cefc107 1774 unsigned int iter_head;
66531c65 1775 int npages;
241699cd 1776 size_t off;
241699cd
AV
1777
1778 if (!sanity(i))
1779 return 0;
1780
8cefc107 1781 data_start(i, &iter_head, &off);
241699cd 1782 /* some of this one + all after this one */
66531c65
AV
1783 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1784 return min(npages, maxpages);
1785 }
1786 if (iov_iter_is_xarray(i)) {
e4f8df86
AV
1787 unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1788 int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
66531c65
AV
1789 return min(npages, maxpages);
1790 }
1791 return 0;
62a8067a 1792}
f67da30c 1793EXPORT_SYMBOL(iov_iter_npages);
4b8164b9
AV
1794
1795const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1796{
1797 *new = *old;
00e23707 1798 if (unlikely(iov_iter_is_pipe(new))) {
241699cd
AV
1799 WARN_ON(1);
1800 return NULL;
1801 }
7ff50620 1802 if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
9ea9ce04 1803 return NULL;
00e23707 1804 if (iov_iter_is_bvec(new))
4b8164b9
AV
1805 return new->bvec = kmemdup(new->bvec,
1806 new->nr_segs * sizeof(struct bio_vec),
1807 flags);
1808 else
1809 /* iovec and kvec have identical layout */
1810 return new->iov = kmemdup(new->iov,
1811 new->nr_segs * sizeof(struct iovec),
1812 flags);
1813}
1814EXPORT_SYMBOL(dup_iter);
bc917be8 1815
bfdc5970
CH
1816static int copy_compat_iovec_from_user(struct iovec *iov,
1817 const struct iovec __user *uvec, unsigned long nr_segs)
1818{
1819 const struct compat_iovec __user *uiov =
1820 (const struct compat_iovec __user *)uvec;
1821 int ret = -EFAULT, i;
1822
a959a978 1823 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
bfdc5970
CH
1824 return -EFAULT;
1825
1826 for (i = 0; i < nr_segs; i++) {
1827 compat_uptr_t buf;
1828 compat_ssize_t len;
1829
1830 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1831 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1832
1833 /* check for compat_size_t not fitting in compat_ssize_t .. */
1834 if (len < 0) {
1835 ret = -EINVAL;
1836 goto uaccess_end;
1837 }
1838 iov[i].iov_base = compat_ptr(buf);
1839 iov[i].iov_len = len;
1840 }
1841
1842 ret = 0;
1843uaccess_end:
1844 user_access_end();
1845 return ret;
1846}
1847
1848static int copy_iovec_from_user(struct iovec *iov,
1849 const struct iovec __user *uvec, unsigned long nr_segs)
fb041b59
DL
1850{
1851 unsigned long seg;
fb041b59 1852
bfdc5970
CH
1853 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1854 return -EFAULT;
1855 for (seg = 0; seg < nr_segs; seg++) {
1856 if ((ssize_t)iov[seg].iov_len < 0)
1857 return -EINVAL;
fb041b59
DL
1858 }
1859
bfdc5970
CH
1860 return 0;
1861}
1862
1863struct iovec *iovec_from_user(const struct iovec __user *uvec,
1864 unsigned long nr_segs, unsigned long fast_segs,
1865 struct iovec *fast_iov, bool compat)
1866{
1867 struct iovec *iov = fast_iov;
1868 int ret;
1869
fb041b59 1870 /*
bfdc5970
CH
1871 * SuS says "The readv() function *may* fail if the iovcnt argument was
1872 * less than or equal to 0, or greater than {IOV_MAX}. Linux has
1873 * traditionally returned zero for zero segments, so...
fb041b59 1874 */
bfdc5970
CH
1875 if (nr_segs == 0)
1876 return iov;
1877 if (nr_segs > UIO_MAXIOV)
1878 return ERR_PTR(-EINVAL);
fb041b59
DL
1879 if (nr_segs > fast_segs) {
1880 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
bfdc5970
CH
1881 if (!iov)
1882 return ERR_PTR(-ENOMEM);
fb041b59 1883 }
bfdc5970
CH
1884
1885 if (compat)
1886 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1887 else
1888 ret = copy_iovec_from_user(iov, uvec, nr_segs);
1889 if (ret) {
1890 if (iov != fast_iov)
1891 kfree(iov);
1892 return ERR_PTR(ret);
1893 }
1894
1895 return iov;
1896}
1897
1898ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1899 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1900 struct iov_iter *i, bool compat)
1901{
1902 ssize_t total_len = 0;
1903 unsigned long seg;
1904 struct iovec *iov;
1905
1906 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1907 if (IS_ERR(iov)) {
1908 *iovp = NULL;
1909 return PTR_ERR(iov);
fb041b59
DL
1910 }
1911
1912 /*
bfdc5970
CH
1913 * According to the Single Unix Specification we should return EINVAL if
1914 * an element length is < 0 when cast to ssize_t or if the total length
1915 * would overflow the ssize_t return value of the system call.
fb041b59
DL
1916 *
1917 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1918 * overflow case.
1919 */
fb041b59 1920 for (seg = 0; seg < nr_segs; seg++) {
fb041b59
DL
1921 ssize_t len = (ssize_t)iov[seg].iov_len;
1922
bfdc5970
CH
1923 if (!access_ok(iov[seg].iov_base, len)) {
1924 if (iov != *iovp)
1925 kfree(iov);
1926 *iovp = NULL;
1927 return -EFAULT;
fb041b59 1928 }
bfdc5970
CH
1929
1930 if (len > MAX_RW_COUNT - total_len) {
1931 len = MAX_RW_COUNT - total_len;
fb041b59
DL
1932 iov[seg].iov_len = len;
1933 }
bfdc5970 1934 total_len += len;
fb041b59 1935 }
bfdc5970
CH
1936
1937 iov_iter_init(i, type, iov, nr_segs, total_len);
1938 if (iov == *iovp)
1939 *iovp = NULL;
1940 else
1941 *iovp = iov;
1942 return total_len;
fb041b59
DL
1943}
1944
ffecee4f
VN
1945/**
1946 * import_iovec() - Copy an array of &struct iovec from userspace
1947 * into the kernel, check that it is valid, and initialize a new
1948 * &struct iov_iter iterator to access it.
1949 *
1950 * @type: One of %READ or %WRITE.
bfdc5970 1951 * @uvec: Pointer to the userspace array.
ffecee4f
VN
1952 * @nr_segs: Number of elements in userspace array.
1953 * @fast_segs: Number of elements in @iov.
bfdc5970 1954 * @iovp: (input and output parameter) Pointer to pointer to (usually small
ffecee4f
VN
1955 * on-stack) kernel array.
1956 * @i: Pointer to iterator that will be initialized on success.
1957 *
1958 * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1959 * then this function places %NULL in *@iov on return. Otherwise, a new
1960 * array will be allocated and the result placed in *@iov. This means that
1961 * the caller may call kfree() on *@iov regardless of whether the small
1962 * on-stack array was used or not (and regardless of whether this function
1963 * returns an error or not).
1964 *
87e5e6da 1965 * Return: Negative error code on error, bytes imported on success
ffecee4f 1966 */
bfdc5970 1967ssize_t import_iovec(int type, const struct iovec __user *uvec,
bc917be8 1968 unsigned nr_segs, unsigned fast_segs,
bfdc5970 1969 struct iovec **iovp, struct iov_iter *i)
bc917be8 1970{
89cd35c5
CH
1971 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
1972 in_compat_syscall());
bc917be8
AV
1973}
1974EXPORT_SYMBOL(import_iovec);
1975
bc917be8
AV
1976int import_single_range(int rw, void __user *buf, size_t len,
1977 struct iovec *iov, struct iov_iter *i)
1978{
1979 if (len > MAX_RW_COUNT)
1980 len = MAX_RW_COUNT;
96d4f267 1981 if (unlikely(!access_ok(buf, len)))
bc917be8
AV
1982 return -EFAULT;
1983
1984 iov->iov_base = buf;
1985 iov->iov_len = len;
1986 iov_iter_init(i, rw, iov, 1, len);
1987 return 0;
1988}
e1267585 1989EXPORT_SYMBOL(import_single_range);