Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64...
[linux-2.6-block.git] / drivers / misc / mic / scif / scif_dma.c
1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2015 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include "scif_main.h"
19 #include "scif_map.h"
20
21 /*
22  * struct scif_dma_comp_cb - SCIF DMA completion callback
23  *
24  * @dma_completion_func: DMA completion callback
25  * @cb_cookie: DMA completion callback cookie
26  * @temp_buf: Temporary buffer
27  * @temp_buf_to_free: Temporary buffer to be freed
28  * @is_cache: Is a kmem_cache allocated buffer
29  * @dst_offset: Destination registration offset
30  * @dst_window: Destination registration window
31  * @len: Length of the temp buffer
32  * @temp_phys: DMA address of the temp buffer
33  * @sdev: The SCIF device
34  * @header_padding: padding for cache line alignment
35  */
36 struct scif_dma_comp_cb {
37         void (*dma_completion_func)(void *cookie);
38         void *cb_cookie;
39         u8 *temp_buf;
40         u8 *temp_buf_to_free;
41         bool is_cache;
42         s64 dst_offset;
43         struct scif_window *dst_window;
44         size_t len;
45         dma_addr_t temp_phys;
46         struct scif_dev *sdev;
47         int header_padding;
48 };
49
50 /**
51  * struct scif_copy_work - Work for DMA copy
52  *
53  * @src_offset: Starting source offset
54  * @dst_offset: Starting destination offset
55  * @src_window: Starting src registered window
56  * @dst_window: Starting dst registered window
57  * @loopback: true if this is a loopback DMA transfer
58  * @len: Length of the transfer
59  * @comp_cb: DMA copy completion callback
60  * @remote_dev: The remote SCIF peer device
61  * @fence_type: polling or interrupt based
62  * @ordered: is this a tail byte ordered DMA transfer
63  */
64 struct scif_copy_work {
65         s64 src_offset;
66         s64 dst_offset;
67         struct scif_window *src_window;
68         struct scif_window *dst_window;
69         int loopback;
70         size_t len;
71         struct scif_dma_comp_cb   *comp_cb;
72         struct scif_dev *remote_dev;
73         int fence_type;
74         bool ordered;
75 };
76
77 /**
78  * scif_reserve_dma_chan:
79  * @ep: Endpoint Descriptor.
80  *
81  * This routine reserves a DMA channel for a particular
82  * endpoint. All DMA transfers for an endpoint are always
83  * programmed on the same DMA channel.
84  */
85 int scif_reserve_dma_chan(struct scif_endpt *ep)
86 {
87         int err = 0;
88         struct scif_dev *scifdev;
89         struct scif_hw_dev *sdev;
90         struct dma_chan *chan;
91
92         /* Loopback DMAs are not supported on the management node */
93         if (!scif_info.nodeid && scifdev_self(ep->remote_dev))
94                 return 0;
95         if (scif_info.nodeid)
96                 scifdev = &scif_dev[0];
97         else
98                 scifdev = ep->remote_dev;
99         sdev = scifdev->sdev;
100         if (!sdev->num_dma_ch)
101                 return -ENODEV;
102         chan = sdev->dma_ch[scifdev->dma_ch_idx];
103         scifdev->dma_ch_idx = (scifdev->dma_ch_idx + 1) % sdev->num_dma_ch;
104         mutex_lock(&ep->rma_info.rma_lock);
105         ep->rma_info.dma_chan = chan;
106         mutex_unlock(&ep->rma_info.rma_lock);
107         return err;
108 }
109
110 #ifdef CONFIG_MMU_NOTIFIER
111 /**
112  * scif_rma_destroy_tcw:
113  *
114  * This routine destroys temporary cached windows
115  */
116 static
117 void __scif_rma_destroy_tcw(struct scif_mmu_notif *mmn,
118                             struct scif_endpt *ep,
119                             u64 start, u64 len)
120 {
121         struct list_head *item, *tmp;
122         struct scif_window *window;
123         u64 start_va, end_va;
124         u64 end = start + len;
125
126         if (end <= start)
127                 return;
128
129         list_for_each_safe(item, tmp, &mmn->tc_reg_list) {
130                 window = list_entry(item, struct scif_window, list);
131                 ep = (struct scif_endpt *)window->ep;
132                 if (!len)
133                         break;
134                 start_va = window->va_for_temp;
135                 end_va = start_va + (window->nr_pages << PAGE_SHIFT);
136                 if (start < start_va && end <= start_va)
137                         break;
138                 if (start >= end_va)
139                         continue;
140                 __scif_rma_destroy_tcw_helper(window);
141         }
142 }
143
144 static void scif_rma_destroy_tcw(struct scif_mmu_notif *mmn, u64 start, u64 len)
145 {
146         struct scif_endpt *ep = mmn->ep;
147
148         spin_lock(&ep->rma_info.tc_lock);
149         __scif_rma_destroy_tcw(mmn, ep, start, len);
150         spin_unlock(&ep->rma_info.tc_lock);
151 }
152
153 static void scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
154 {
155         struct list_head *item, *tmp;
156         struct scif_mmu_notif *mmn;
157
158         list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
159                 mmn = list_entry(item, struct scif_mmu_notif, list);
160                 scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
161         }
162 }
163
164 static void __scif_rma_destroy_tcw_ep(struct scif_endpt *ep)
165 {
166         struct list_head *item, *tmp;
167         struct scif_mmu_notif *mmn;
168
169         spin_lock(&ep->rma_info.tc_lock);
170         list_for_each_safe(item, tmp, &ep->rma_info.mmn_list) {
171                 mmn = list_entry(item, struct scif_mmu_notif, list);
172                 __scif_rma_destroy_tcw(mmn, ep, 0, ULONG_MAX);
173         }
174         spin_unlock(&ep->rma_info.tc_lock);
175 }
176
177 static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
178 {
179         if ((cur_bytes >> PAGE_SHIFT) > scif_info.rma_tc_limit)
180                 return false;
181         if ((atomic_read(&ep->rma_info.tcw_total_pages)
182                         + (cur_bytes >> PAGE_SHIFT)) >
183                         scif_info.rma_tc_limit) {
184                 dev_info(scif_info.mdev.this_device,
185                          "%s %d total=%d, current=%zu reached max\n",
186                          __func__, __LINE__,
187                          atomic_read(&ep->rma_info.tcw_total_pages),
188                          (1 + (cur_bytes >> PAGE_SHIFT)));
189                 scif_rma_destroy_tcw_invalid();
190                 __scif_rma_destroy_tcw_ep(ep);
191         }
192         return true;
193 }
194
195 static void scif_mmu_notifier_release(struct mmu_notifier *mn,
196                                       struct mm_struct *mm)
197 {
198         struct scif_mmu_notif   *mmn;
199
200         mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
201         scif_rma_destroy_tcw(mmn, 0, ULONG_MAX);
202         schedule_work(&scif_info.misc_work);
203 }
204
205 static void scif_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
206                                               struct mm_struct *mm,
207                                               unsigned long address)
208 {
209         struct scif_mmu_notif   *mmn;
210
211         mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
212         scif_rma_destroy_tcw(mmn, address, PAGE_SIZE);
213 }
214
215 static void scif_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
216                                                      struct mm_struct *mm,
217                                                      unsigned long start,
218                                                      unsigned long end)
219 {
220         struct scif_mmu_notif   *mmn;
221
222         mmn = container_of(mn, struct scif_mmu_notif, ep_mmu_notifier);
223         scif_rma_destroy_tcw(mmn, start, end - start);
224 }
225
226 static void scif_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
227                                                    struct mm_struct *mm,
228                                                    unsigned long start,
229                                                    unsigned long end)
230 {
231         /*
232          * Nothing to do here, everything needed was done in
233          * invalidate_range_start.
234          */
235 }
236
237 static const struct mmu_notifier_ops scif_mmu_notifier_ops = {
238         .release = scif_mmu_notifier_release,
239         .clear_flush_young = NULL,
240         .invalidate_page = scif_mmu_notifier_invalidate_page,
241         .invalidate_range_start = scif_mmu_notifier_invalidate_range_start,
242         .invalidate_range_end = scif_mmu_notifier_invalidate_range_end};
243
244 static void scif_ep_unregister_mmu_notifier(struct scif_endpt *ep)
245 {
246         struct scif_endpt_rma_info *rma = &ep->rma_info;
247         struct scif_mmu_notif *mmn = NULL;
248         struct list_head *item, *tmp;
249
250         mutex_lock(&ep->rma_info.mmn_lock);
251         list_for_each_safe(item, tmp, &rma->mmn_list) {
252                 mmn = list_entry(item, struct scif_mmu_notif, list);
253                 mmu_notifier_unregister(&mmn->ep_mmu_notifier, mmn->mm);
254                 list_del(item);
255                 kfree(mmn);
256         }
257         mutex_unlock(&ep->rma_info.mmn_lock);
258 }
259
260 static void scif_init_mmu_notifier(struct scif_mmu_notif *mmn,
261                                    struct mm_struct *mm, struct scif_endpt *ep)
262 {
263         mmn->ep = ep;
264         mmn->mm = mm;
265         mmn->ep_mmu_notifier.ops = &scif_mmu_notifier_ops;
266         INIT_LIST_HEAD(&mmn->list);
267         INIT_LIST_HEAD(&mmn->tc_reg_list);
268 }
269
270 static struct scif_mmu_notif *
271 scif_find_mmu_notifier(struct mm_struct *mm, struct scif_endpt_rma_info *rma)
272 {
273         struct scif_mmu_notif *mmn;
274
275         list_for_each_entry(mmn, &rma->mmn_list, list)
276                 if (mmn->mm == mm)
277                         return mmn;
278         return NULL;
279 }
280
281 static struct scif_mmu_notif *
282 scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
283 {
284         struct scif_mmu_notif *mmn
285                  = kzalloc(sizeof(*mmn), GFP_KERNEL);
286
287         if (!mmn)
288                 return ERR_PTR(-ENOMEM);
289
290         scif_init_mmu_notifier(mmn, current->mm, ep);
291         if (mmu_notifier_register(&mmn->ep_mmu_notifier, current->mm)) {
292                 kfree(mmn);
293                 return ERR_PTR(-EBUSY);
294         }
295         list_add(&mmn->list, &ep->rma_info.mmn_list);
296         return mmn;
297 }
298
299 /*
300  * Called from the misc thread to destroy temporary cached windows and
301  * unregister the MMU notifier for the SCIF endpoint.
302  */
303 void scif_mmu_notif_handler(struct work_struct *work)
304 {
305         struct list_head *pos, *tmpq;
306         struct scif_endpt *ep;
307 restart:
308         scif_rma_destroy_tcw_invalid();
309         spin_lock(&scif_info.rmalock);
310         list_for_each_safe(pos, tmpq, &scif_info.mmu_notif_cleanup) {
311                 ep = list_entry(pos, struct scif_endpt, mmu_list);
312                 list_del(&ep->mmu_list);
313                 spin_unlock(&scif_info.rmalock);
314                 scif_rma_destroy_tcw_ep(ep);
315                 scif_ep_unregister_mmu_notifier(ep);
316                 goto restart;
317         }
318         spin_unlock(&scif_info.rmalock);
319 }
320
321 static bool scif_is_set_reg_cache(int flags)
322 {
323         return !!(flags & SCIF_RMA_USECACHE);
324 }
325 #else
326 static struct scif_mmu_notif *
327 scif_find_mmu_notifier(struct mm_struct *mm,
328                        struct scif_endpt_rma_info *rma)
329 {
330         return NULL;
331 }
332
333 static struct scif_mmu_notif *
334 scif_add_mmu_notifier(struct mm_struct *mm, struct scif_endpt *ep)
335 {
336         return NULL;
337 }
338
339 void scif_mmu_notif_handler(struct work_struct *work)
340 {
341 }
342
343 static bool scif_is_set_reg_cache(int flags)
344 {
345         return false;
346 }
347
348 static bool scif_rma_tc_can_cache(struct scif_endpt *ep, size_t cur_bytes)
349 {
350         return false;
351 }
352 #endif
353
354 /**
355  * scif_register_temp:
356  * @epd: End Point Descriptor.
357  * @addr: virtual address to/from which to copy
358  * @len: length of range to copy
359  * @out_offset: computed offset returned by reference.
360  * @out_window: allocated registered window returned by reference.
361  *
362  * Create a temporary registered window. The peer will not know about this
363  * window. This API is used for scif_vreadfrom()/scif_vwriteto() API's.
364  */
365 static int
366 scif_register_temp(scif_epd_t epd, unsigned long addr, size_t len, int prot,
367                    off_t *out_offset, struct scif_window **out_window)
368 {
369         struct scif_endpt *ep = (struct scif_endpt *)epd;
370         int err;
371         scif_pinned_pages_t pinned_pages;
372         size_t aligned_len;
373
374         aligned_len = ALIGN(len, PAGE_SIZE);
375
376         err = __scif_pin_pages((void *)(addr & PAGE_MASK),
377                                aligned_len, &prot, 0, &pinned_pages);
378         if (err)
379                 return err;
380
381         pinned_pages->prot = prot;
382
383         /* Compute the offset for this registration */
384         err = scif_get_window_offset(ep, 0, 0,
385                                      aligned_len >> PAGE_SHIFT,
386                                      (s64 *)out_offset);
387         if (err)
388                 goto error_unpin;
389
390         /* Allocate and prepare self registration window */
391         *out_window = scif_create_window(ep, aligned_len >> PAGE_SHIFT,
392                                         *out_offset, true);
393         if (!*out_window) {
394                 scif_free_window_offset(ep, NULL, *out_offset);
395                 err = -ENOMEM;
396                 goto error_unpin;
397         }
398
399         (*out_window)->pinned_pages = pinned_pages;
400         (*out_window)->nr_pages = pinned_pages->nr_pages;
401         (*out_window)->prot = pinned_pages->prot;
402
403         (*out_window)->va_for_temp = addr & PAGE_MASK;
404         err = scif_map_window(ep->remote_dev, *out_window);
405         if (err) {
406                 /* Something went wrong! Rollback */
407                 scif_destroy_window(ep, *out_window);
408                 *out_window = NULL;
409         } else {
410                 *out_offset |= (addr - (*out_window)->va_for_temp);
411         }
412         return err;
413 error_unpin:
414         if (err)
415                 dev_err(&ep->remote_dev->sdev->dev,
416                         "%s %d err %d\n", __func__, __LINE__, err);
417         scif_unpin_pages(pinned_pages);
418         return err;
419 }
420
421 #define SCIF_DMA_TO (3 * HZ)
422
423 /*
424  * scif_sync_dma - Program a DMA without an interrupt descriptor
425  *
426  * @dev - The address of the pointer to the device instance used
427  * for DMA registration.
428  * @chan - DMA channel to be used.
429  * @sync_wait: Wait for DMA to complete?
430  *
431  * Return 0 on success and -errno on error.
432  */
433 static int scif_sync_dma(struct scif_hw_dev *sdev, struct dma_chan *chan,
434                          bool sync_wait)
435 {
436         int err = 0;
437         struct dma_async_tx_descriptor *tx = NULL;
438         enum dma_ctrl_flags flags = DMA_PREP_FENCE;
439         dma_cookie_t cookie;
440         struct dma_device *ddev;
441
442         if (!chan) {
443                 err = -EIO;
444                 dev_err(&sdev->dev, "%s %d err %d\n",
445                         __func__, __LINE__, err);
446                 return err;
447         }
448         ddev = chan->device;
449
450         tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
451         if (!tx) {
452                 err = -ENOMEM;
453                 dev_err(&sdev->dev, "%s %d err %d\n",
454                         __func__, __LINE__, err);
455                 goto release;
456         }
457         cookie = tx->tx_submit(tx);
458
459         if (dma_submit_error(cookie)) {
460                 err = -ENOMEM;
461                 dev_err(&sdev->dev, "%s %d err %d\n",
462                         __func__, __LINE__, err);
463                 goto release;
464         }
465         if (!sync_wait) {
466                 dma_async_issue_pending(chan);
467         } else {
468                 if (dma_sync_wait(chan, cookie) == DMA_COMPLETE) {
469                         err = 0;
470                 } else {
471                         err = -EIO;
472                         dev_err(&sdev->dev, "%s %d err %d\n",
473                                 __func__, __LINE__, err);
474                 }
475         }
476 release:
477         return err;
478 }
479
480 static void scif_dma_callback(void *arg)
481 {
482         struct completion *done = (struct completion *)arg;
483
484         complete(done);
485 }
486
487 #define SCIF_DMA_SYNC_WAIT true
488 #define SCIF_DMA_POLL BIT(0)
489 #define SCIF_DMA_INTR BIT(1)
490
491 /*
492  * scif_async_dma - Program a DMA with an interrupt descriptor
493  *
494  * @dev - The address of the pointer to the device instance used
495  * for DMA registration.
496  * @chan - DMA channel to be used.
497  * Return 0 on success and -errno on error.
498  */
499 static int scif_async_dma(struct scif_hw_dev *sdev, struct dma_chan *chan)
500 {
501         int err = 0;
502         struct dma_device *ddev;
503         struct dma_async_tx_descriptor *tx = NULL;
504         enum dma_ctrl_flags flags = DMA_PREP_INTERRUPT | DMA_PREP_FENCE;
505         DECLARE_COMPLETION_ONSTACK(done_wait);
506         dma_cookie_t cookie;
507         enum dma_status status;
508
509         if (!chan) {
510                 err = -EIO;
511                 dev_err(&sdev->dev, "%s %d err %d\n",
512                         __func__, __LINE__, err);
513                 return err;
514         }
515         ddev = chan->device;
516
517         tx = ddev->device_prep_dma_memcpy(chan, 0, 0, 0, flags);
518         if (!tx) {
519                 err = -ENOMEM;
520                 dev_err(&sdev->dev, "%s %d err %d\n",
521                         __func__, __LINE__, err);
522                 goto release;
523         }
524         reinit_completion(&done_wait);
525         tx->callback = scif_dma_callback;
526         tx->callback_param = &done_wait;
527         cookie = tx->tx_submit(tx);
528
529         if (dma_submit_error(cookie)) {
530                 err = -ENOMEM;
531                 dev_err(&sdev->dev, "%s %d err %d\n",
532                         __func__, __LINE__, err);
533                 goto release;
534         }
535         dma_async_issue_pending(chan);
536
537         err = wait_for_completion_timeout(&done_wait, SCIF_DMA_TO);
538         if (!err) {
539                 err = -EIO;
540                 dev_err(&sdev->dev, "%s %d err %d\n",
541                         __func__, __LINE__, err);
542                 goto release;
543         }
544         err = 0;
545         status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
546         if (status != DMA_COMPLETE) {
547                 err = -EIO;
548                 dev_err(&sdev->dev, "%s %d err %d\n",
549                         __func__, __LINE__, err);
550                 goto release;
551         }
552 release:
553         return err;
554 }
555
556 /*
557  * scif_drain_dma_poll - Drain all outstanding DMA operations for a particular
558  * DMA channel via polling.
559  *
560  * @sdev - The SCIF device
561  * @chan - DMA channel
562  * Return 0 on success and -errno on error.
563  */
564 static int scif_drain_dma_poll(struct scif_hw_dev *sdev, struct dma_chan *chan)
565 {
566         if (!chan)
567                 return -EINVAL;
568         return scif_sync_dma(sdev, chan, SCIF_DMA_SYNC_WAIT);
569 }
570
571 /*
572  * scif_drain_dma_intr - Drain all outstanding DMA operations for a particular
573  * DMA channel via interrupt based blocking wait.
574  *
575  * @sdev - The SCIF device
576  * @chan - DMA channel
577  * Return 0 on success and -errno on error.
578  */
579 int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan)
580 {
581         if (!chan)
582                 return -EINVAL;
583         return scif_async_dma(sdev, chan);
584 }
585
586 /**
587  * scif_rma_destroy_windows:
588  *
589  * This routine destroys all windows queued for cleanup
590  */
591 void scif_rma_destroy_windows(void)
592 {
593         struct list_head *item, *tmp;
594         struct scif_window *window;
595         struct scif_endpt *ep;
596         struct dma_chan *chan;
597
598         might_sleep();
599 restart:
600         spin_lock(&scif_info.rmalock);
601         list_for_each_safe(item, tmp, &scif_info.rma) {
602                 window = list_entry(item, struct scif_window,
603                                     list);
604                 ep = (struct scif_endpt *)window->ep;
605                 chan = ep->rma_info.dma_chan;
606
607                 list_del_init(&window->list);
608                 spin_unlock(&scif_info.rmalock);
609                 if (!chan || !scifdev_alive(ep) ||
610                     !scif_drain_dma_intr(ep->remote_dev->sdev,
611                                          ep->rma_info.dma_chan))
612                         /* Remove window from global list */
613                         window->unreg_state = OP_COMPLETED;
614                 else
615                         dev_warn(&ep->remote_dev->sdev->dev,
616                                  "DMA engine hung?\n");
617                 if (window->unreg_state == OP_COMPLETED) {
618                         if (window->type == SCIF_WINDOW_SELF)
619                                 scif_destroy_window(ep, window);
620                         else
621                                 scif_destroy_remote_window(window);
622                         atomic_dec(&ep->rma_info.tw_refcount);
623                 }
624                 goto restart;
625         }
626         spin_unlock(&scif_info.rmalock);
627 }
628
629 /**
630  * scif_rma_destroy_tcw:
631  *
632  * This routine destroys temporary cached registered windows
633  * which have been queued for cleanup.
634  */
635 void scif_rma_destroy_tcw_invalid(void)
636 {
637         struct list_head *item, *tmp;
638         struct scif_window *window;
639         struct scif_endpt *ep;
640         struct dma_chan *chan;
641
642         might_sleep();
643 restart:
644         spin_lock(&scif_info.rmalock);
645         list_for_each_safe(item, tmp, &scif_info.rma_tc) {
646                 window = list_entry(item, struct scif_window, list);
647                 ep = (struct scif_endpt *)window->ep;
648                 chan = ep->rma_info.dma_chan;
649                 list_del_init(&window->list);
650                 spin_unlock(&scif_info.rmalock);
651                 mutex_lock(&ep->rma_info.rma_lock);
652                 if (!chan || !scifdev_alive(ep) ||
653                     !scif_drain_dma_intr(ep->remote_dev->sdev,
654                                          ep->rma_info.dma_chan)) {
655                         atomic_sub(window->nr_pages,
656                                    &ep->rma_info.tcw_total_pages);
657                         scif_destroy_window(ep, window);
658                         atomic_dec(&ep->rma_info.tcw_refcount);
659                 } else {
660                         dev_warn(&ep->remote_dev->sdev->dev,
661                                  "DMA engine hung?\n");
662                 }
663                 mutex_unlock(&ep->rma_info.rma_lock);
664                 goto restart;
665         }
666         spin_unlock(&scif_info.rmalock);
667 }
668
669 static inline
670 void *_get_local_va(off_t off, struct scif_window *window, size_t len)
671 {
672         int page_nr = (off - window->offset) >> PAGE_SHIFT;
673         off_t page_off = off & ~PAGE_MASK;
674         void *va = NULL;
675
676         if (window->type == SCIF_WINDOW_SELF) {
677                 struct page **pages = window->pinned_pages->pages;
678
679                 va = page_address(pages[page_nr]) + page_off;
680         }
681         return va;
682 }
683
684 static inline
685 void *ioremap_remote(off_t off, struct scif_window *window,
686                      size_t len, struct scif_dev *dev,
687                      struct scif_window_iter *iter)
688 {
689         dma_addr_t phys = scif_off_to_dma_addr(window, off, NULL, iter);
690
691         /*
692          * If the DMA address is not card relative then we need the DMA
693          * addresses to be an offset into the bar. The aperture base was already
694          * added so subtract it here since scif_ioremap is going to add it again
695          */
696         if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
697             dev->sdev->aper && !dev->sdev->card_rel_da)
698                 phys = phys - dev->sdev->aper->pa;
699         return scif_ioremap(phys, len, dev);
700 }
701
702 static inline void
703 iounmap_remote(void *virt, size_t size, struct scif_copy_work *work)
704 {
705         scif_iounmap(virt, size, work->remote_dev);
706 }
707
708 /*
709  * Takes care of ordering issue caused by
710  * 1. Hardware:  Only in the case of cpu copy from mgmt node to card
711  * because of WC memory.
712  * 2. Software: If memcpy reorders copy instructions for optimization.
713  * This could happen at both mgmt node and card.
714  */
715 static inline void
716 scif_ordered_memcpy_toio(char *dst, const char *src, size_t count)
717 {
718         if (!count)
719                 return;
720
721         memcpy_toio((void __iomem __force *)dst, src, --count);
722         /* Order the last byte with the previous stores */
723         wmb();
724         *(dst + count) = *(src + count);
725 }
726
727 static inline void scif_unaligned_cpy_toio(char *dst, const char *src,
728                                            size_t count, bool ordered)
729 {
730         if (ordered)
731                 scif_ordered_memcpy_toio(dst, src, count);
732         else
733                 memcpy_toio((void __iomem __force *)dst, src, count);
734 }
735
736 static inline
737 void scif_ordered_memcpy_fromio(char *dst, const char *src, size_t count)
738 {
739         if (!count)
740                 return;
741
742         memcpy_fromio(dst, (void __iomem __force *)src, --count);
743         /* Order the last byte with the previous loads */
744         rmb();
745         *(dst + count) = *(src + count);
746 }
747
748 static inline void scif_unaligned_cpy_fromio(char *dst, const char *src,
749                                              size_t count, bool ordered)
750 {
751         if (ordered)
752                 scif_ordered_memcpy_fromio(dst, src, count);
753         else
754                 memcpy_fromio(dst, (void __iomem __force *)src, count);
755 }
756
757 #define SCIF_RMA_ERROR_CODE (~(dma_addr_t)0x0)
758
759 /*
760  * scif_off_to_dma_addr:
761  * Obtain the dma_addr given the window and the offset.
762  * @window: Registered window.
763  * @off: Window offset.
764  * @nr_bytes: Return the number of contiguous bytes till next DMA addr index.
765  * @index: Return the index of the dma_addr array found.
766  * @start_off: start offset of index of the dma addr array found.
767  * The nr_bytes provides the callee an estimate of the maximum possible
768  * DMA xfer possible while the index/start_off provide faster lookups
769  * for the next iteration.
770  */
771 dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off,
772                                 size_t *nr_bytes, struct scif_window_iter *iter)
773 {
774         int i, page_nr;
775         s64 start, end;
776         off_t page_off;
777
778         if (window->nr_pages == window->nr_contig_chunks) {
779                 page_nr = (off - window->offset) >> PAGE_SHIFT;
780                 page_off = off & ~PAGE_MASK;
781
782                 if (nr_bytes)
783                         *nr_bytes = PAGE_SIZE - page_off;
784                 return window->dma_addr[page_nr] | page_off;
785         }
786         if (iter) {
787                 i = iter->index;
788                 start = iter->offset;
789         } else {
790                 i =  0;
791                 start =  window->offset;
792         }
793         for (; i < window->nr_contig_chunks; i++) {
794                 end = start + (window->num_pages[i] << PAGE_SHIFT);
795                 if (off >= start && off < end) {
796                         if (iter) {
797                                 iter->index = i;
798                                 iter->offset = start;
799                         }
800                         if (nr_bytes)
801                                 *nr_bytes = end - off;
802                         return (window->dma_addr[i] + (off - start));
803                 }
804                 start += (window->num_pages[i] << PAGE_SHIFT);
805         }
806         dev_err(scif_info.mdev.this_device,
807                 "%s %d BUG. Addr not found? window %p off 0x%llx\n",
808                 __func__, __LINE__, window, off);
809         return SCIF_RMA_ERROR_CODE;
810 }
811
812 /*
813  * Copy between rma window and temporary buffer
814  */
815 static void scif_rma_local_cpu_copy(s64 offset, struct scif_window *window,
816                                     u8 *temp, size_t rem_len, bool to_temp)
817 {
818         void *window_virt;
819         size_t loop_len;
820         int offset_in_page;
821         s64 end_offset;
822
823         offset_in_page = offset & ~PAGE_MASK;
824         loop_len = PAGE_SIZE - offset_in_page;
825
826         if (rem_len < loop_len)
827                 loop_len = rem_len;
828
829         window_virt = _get_local_va(offset, window, loop_len);
830         if (!window_virt)
831                 return;
832         if (to_temp)
833                 memcpy(temp, window_virt, loop_len);
834         else
835                 memcpy(window_virt, temp, loop_len);
836
837         offset += loop_len;
838         temp += loop_len;
839         rem_len -= loop_len;
840
841         end_offset = window->offset +
842                 (window->nr_pages << PAGE_SHIFT);
843         while (rem_len) {
844                 if (offset == end_offset) {
845                         window = list_next_entry(window, list);
846                         end_offset = window->offset +
847                                 (window->nr_pages << PAGE_SHIFT);
848                 }
849                 loop_len = min(PAGE_SIZE, rem_len);
850                 window_virt = _get_local_va(offset, window, loop_len);
851                 if (!window_virt)
852                         return;
853                 if (to_temp)
854                         memcpy(temp, window_virt, loop_len);
855                 else
856                         memcpy(window_virt, temp, loop_len);
857                 offset  += loop_len;
858                 temp    += loop_len;
859                 rem_len -= loop_len;
860         }
861 }
862
863 /**
864  * scif_rma_completion_cb:
865  * @data: RMA cookie
866  *
867  * RMA interrupt completion callback.
868  */
869 static void scif_rma_completion_cb(void *data)
870 {
871         struct scif_dma_comp_cb *comp_cb = data;
872
873         /* Free DMA Completion CB. */
874         if (comp_cb->dst_window)
875                 scif_rma_local_cpu_copy(comp_cb->dst_offset,
876                                         comp_cb->dst_window,
877                                         comp_cb->temp_buf +
878                                         comp_cb->header_padding,
879                                         comp_cb->len, false);
880         scif_unmap_single(comp_cb->temp_phys, comp_cb->sdev,
881                           SCIF_KMEM_UNALIGNED_BUF_SIZE);
882         if (comp_cb->is_cache)
883                 kmem_cache_free(unaligned_cache,
884                                 comp_cb->temp_buf_to_free);
885         else
886                 kfree(comp_cb->temp_buf_to_free);
887 }
888
889 /* Copies between temporary buffer and offsets provided in work */
890 static int
891 scif_rma_list_dma_copy_unaligned(struct scif_copy_work *work,
892                                  u8 *temp, struct dma_chan *chan,
893                                  bool src_local)
894 {
895         struct scif_dma_comp_cb *comp_cb = work->comp_cb;
896         dma_addr_t window_dma_addr, temp_dma_addr;
897         dma_addr_t temp_phys = comp_cb->temp_phys;
898         size_t loop_len, nr_contig_bytes = 0, remaining_len = work->len;
899         int offset_in_ca, ret = 0;
900         s64 end_offset, offset;
901         struct scif_window *window;
902         void *window_virt_addr;
903         size_t tail_len;
904         struct dma_async_tx_descriptor *tx;
905         struct dma_device *dev = chan->device;
906         dma_cookie_t cookie;
907
908         if (src_local) {
909                 offset = work->dst_offset;
910                 window = work->dst_window;
911         } else {
912                 offset = work->src_offset;
913                 window = work->src_window;
914         }
915
916         offset_in_ca = offset & (L1_CACHE_BYTES - 1);
917         if (offset_in_ca) {
918                 loop_len = L1_CACHE_BYTES - offset_in_ca;
919                 loop_len = min(loop_len, remaining_len);
920                 window_virt_addr = ioremap_remote(offset, window,
921                                                   loop_len,
922                                                   work->remote_dev,
923                                                   NULL);
924                 if (!window_virt_addr)
925                         return -ENOMEM;
926                 if (src_local)
927                         scif_unaligned_cpy_toio(window_virt_addr, temp,
928                                                 loop_len,
929                                                 work->ordered &&
930                                                 !(remaining_len - loop_len));
931                 else
932                         scif_unaligned_cpy_fromio(temp, window_virt_addr,
933                                                   loop_len, work->ordered &&
934                                                   !(remaining_len - loop_len));
935                 iounmap_remote(window_virt_addr, loop_len, work);
936
937                 offset += loop_len;
938                 temp += loop_len;
939                 temp_phys += loop_len;
940                 remaining_len -= loop_len;
941         }
942
943         offset_in_ca = offset & ~PAGE_MASK;
944         end_offset = window->offset +
945                 (window->nr_pages << PAGE_SHIFT);
946
947         tail_len = remaining_len & (L1_CACHE_BYTES - 1);
948         remaining_len -= tail_len;
949         while (remaining_len) {
950                 if (offset == end_offset) {
951                         window = list_next_entry(window, list);
952                         end_offset = window->offset +
953                                 (window->nr_pages << PAGE_SHIFT);
954                 }
955                 if (scif_is_mgmt_node())
956                         temp_dma_addr = temp_phys;
957                 else
958                         /* Fix if we ever enable IOMMU on the card */
959                         temp_dma_addr = (dma_addr_t)virt_to_phys(temp);
960                 window_dma_addr = scif_off_to_dma_addr(window, offset,
961                                                        &nr_contig_bytes,
962                                                        NULL);
963                 loop_len = min(nr_contig_bytes, remaining_len);
964                 if (src_local) {
965                         if (work->ordered && !tail_len &&
966                             !(remaining_len - loop_len) &&
967                             loop_len != L1_CACHE_BYTES) {
968                                 /*
969                                  * Break up the last chunk of the transfer into
970                                  * two steps. if there is no tail to guarantee
971                                  * DMA ordering. SCIF_DMA_POLLING inserts
972                                  * a status update descriptor in step 1 which
973                                  * acts as a double sided synchronization fence
974                                  * for the DMA engine to ensure that the last
975                                  * cache line in step 2 is updated last.
976                                  */
977                                 /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
978                                 tx =
979                                 dev->device_prep_dma_memcpy(chan,
980                                                             window_dma_addr,
981                                                             temp_dma_addr,
982                                                             loop_len -
983                                                             L1_CACHE_BYTES,
984                                                             DMA_PREP_FENCE);
985                                 if (!tx) {
986                                         ret = -ENOMEM;
987                                         goto err;
988                                 }
989                                 cookie = tx->tx_submit(tx);
990                                 if (dma_submit_error(cookie)) {
991                                         ret = -ENOMEM;
992                                         goto err;
993                                 }
994                                 dma_async_issue_pending(chan);
995                                 offset += (loop_len - L1_CACHE_BYTES);
996                                 temp_dma_addr += (loop_len - L1_CACHE_BYTES);
997                                 window_dma_addr += (loop_len - L1_CACHE_BYTES);
998                                 remaining_len -= (loop_len - L1_CACHE_BYTES);
999                                 loop_len = remaining_len;
1000
1001                                 /* Step 2) DMA: L1_CACHE_BYTES */
1002                                 tx =
1003                                 dev->device_prep_dma_memcpy(chan,
1004                                                             window_dma_addr,
1005                                                             temp_dma_addr,
1006                                                             loop_len, 0);
1007                                 if (!tx) {
1008                                         ret = -ENOMEM;
1009                                         goto err;
1010                                 }
1011                                 cookie = tx->tx_submit(tx);
1012                                 if (dma_submit_error(cookie)) {
1013                                         ret = -ENOMEM;
1014                                         goto err;
1015                                 }
1016                                 dma_async_issue_pending(chan);
1017                         } else {
1018                                 tx =
1019                                 dev->device_prep_dma_memcpy(chan,
1020                                                             window_dma_addr,
1021                                                             temp_dma_addr,
1022                                                             loop_len, 0);
1023                                 if (!tx) {
1024                                         ret = -ENOMEM;
1025                                         goto err;
1026                                 }
1027                                 cookie = tx->tx_submit(tx);
1028                                 if (dma_submit_error(cookie)) {
1029                                         ret = -ENOMEM;
1030                                         goto err;
1031                                 }
1032                                 dma_async_issue_pending(chan);
1033                         }
1034                 } else {
1035                         tx = dev->device_prep_dma_memcpy(chan, temp_dma_addr,
1036                                         window_dma_addr, loop_len, 0);
1037                         if (!tx) {
1038                                 ret = -ENOMEM;
1039                                 goto err;
1040                         }
1041                         cookie = tx->tx_submit(tx);
1042                         if (dma_submit_error(cookie)) {
1043                                 ret = -ENOMEM;
1044                                 goto err;
1045                         }
1046                         dma_async_issue_pending(chan);
1047                 }
1048                 if (ret < 0)
1049                         goto err;
1050                 offset += loop_len;
1051                 temp += loop_len;
1052                 temp_phys += loop_len;
1053                 remaining_len -= loop_len;
1054                 offset_in_ca = 0;
1055         }
1056         if (tail_len) {
1057                 if (offset == end_offset) {
1058                         window = list_next_entry(window, list);
1059                         end_offset = window->offset +
1060                                 (window->nr_pages << PAGE_SHIFT);
1061                 }
1062                 window_virt_addr = ioremap_remote(offset, window, tail_len,
1063                                                   work->remote_dev,
1064                                                   NULL);
1065                 if (!window_virt_addr)
1066                         return -ENOMEM;
1067                 /*
1068                  * The CPU copy for the tail bytes must be initiated only once
1069                  * previous DMA transfers for this endpoint have completed
1070                  * to guarantee ordering.
1071                  */
1072                 if (work->ordered) {
1073                         struct scif_dev *rdev = work->remote_dev;
1074
1075                         ret = scif_drain_dma_intr(rdev->sdev, chan);
1076                         if (ret)
1077                                 return ret;
1078                 }
1079                 if (src_local)
1080                         scif_unaligned_cpy_toio(window_virt_addr, temp,
1081                                                 tail_len, work->ordered);
1082                 else
1083                         scif_unaligned_cpy_fromio(temp, window_virt_addr,
1084                                                   tail_len, work->ordered);
1085                 iounmap_remote(window_virt_addr, tail_len, work);
1086         }
1087         tx = dev->device_prep_dma_memcpy(chan, 0, 0, 0, DMA_PREP_INTERRUPT);
1088         if (!tx) {
1089                 ret = -ENOMEM;
1090                 return ret;
1091         }
1092         tx->callback = &scif_rma_completion_cb;
1093         tx->callback_param = comp_cb;
1094         cookie = tx->tx_submit(tx);
1095
1096         if (dma_submit_error(cookie)) {
1097                 ret = -ENOMEM;
1098                 return ret;
1099         }
1100         dma_async_issue_pending(chan);
1101         return 0;
1102 err:
1103         dev_err(scif_info.mdev.this_device,
1104                 "%s %d Desc Prog Failed ret %d\n",
1105                 __func__, __LINE__, ret);
1106         return ret;
1107 }
1108
1109 /*
1110  * _scif_rma_list_dma_copy_aligned:
1111  *
1112  * Traverse all the windows and perform DMA copy.
1113  */
1114 static int _scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
1115                                            struct dma_chan *chan)
1116 {
1117         dma_addr_t src_dma_addr, dst_dma_addr;
1118         size_t loop_len, remaining_len, src_contig_bytes = 0;
1119         size_t dst_contig_bytes = 0;
1120         struct scif_window_iter src_win_iter;
1121         struct scif_window_iter dst_win_iter;
1122         s64 end_src_offset, end_dst_offset;
1123         struct scif_window *src_window = work->src_window;
1124         struct scif_window *dst_window = work->dst_window;
1125         s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1126         int ret = 0;
1127         struct dma_async_tx_descriptor *tx;
1128         struct dma_device *dev = chan->device;
1129         dma_cookie_t cookie;
1130
1131         remaining_len = work->len;
1132
1133         scif_init_window_iter(src_window, &src_win_iter);
1134         scif_init_window_iter(dst_window, &dst_win_iter);
1135         end_src_offset = src_window->offset +
1136                 (src_window->nr_pages << PAGE_SHIFT);
1137         end_dst_offset = dst_window->offset +
1138                 (dst_window->nr_pages << PAGE_SHIFT);
1139         while (remaining_len) {
1140                 if (src_offset == end_src_offset) {
1141                         src_window = list_next_entry(src_window, list);
1142                         end_src_offset = src_window->offset +
1143                                 (src_window->nr_pages << PAGE_SHIFT);
1144                         scif_init_window_iter(src_window, &src_win_iter);
1145                 }
1146                 if (dst_offset == end_dst_offset) {
1147                         dst_window = list_next_entry(dst_window, list);
1148                         end_dst_offset = dst_window->offset +
1149                                 (dst_window->nr_pages << PAGE_SHIFT);
1150                         scif_init_window_iter(dst_window, &dst_win_iter);
1151                 }
1152
1153                 /* compute dma addresses for transfer */
1154                 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
1155                                                     &src_contig_bytes,
1156                                                     &src_win_iter);
1157                 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
1158                                                     &dst_contig_bytes,
1159                                                     &dst_win_iter);
1160                 loop_len = min(src_contig_bytes, dst_contig_bytes);
1161                 loop_len = min(loop_len, remaining_len);
1162                 if (work->ordered && !(remaining_len - loop_len)) {
1163                         /*
1164                          * Break up the last chunk of the transfer into two
1165                          * steps to ensure that the last byte in step 2 is
1166                          * updated last.
1167                          */
1168                         /* Step 1) DMA: Body Length - 1 */
1169                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1170                                                          src_dma_addr,
1171                                                          loop_len - 1,
1172                                                          DMA_PREP_FENCE);
1173                         if (!tx) {
1174                                 ret = -ENOMEM;
1175                                 goto err;
1176                         }
1177                         cookie = tx->tx_submit(tx);
1178                         if (dma_submit_error(cookie)) {
1179                                 ret = -ENOMEM;
1180                                 goto err;
1181                         }
1182                         src_offset += (loop_len - 1);
1183                         dst_offset += (loop_len - 1);
1184                         src_dma_addr += (loop_len - 1);
1185                         dst_dma_addr += (loop_len - 1);
1186                         remaining_len -= (loop_len - 1);
1187                         loop_len = remaining_len;
1188
1189                         /* Step 2) DMA: 1 BYTES */
1190                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1191                                         src_dma_addr, loop_len, 0);
1192                         if (!tx) {
1193                                 ret = -ENOMEM;
1194                                 goto err;
1195                         }
1196                         cookie = tx->tx_submit(tx);
1197                         if (dma_submit_error(cookie)) {
1198                                 ret = -ENOMEM;
1199                                 goto err;
1200                         }
1201                         dma_async_issue_pending(chan);
1202                 } else {
1203                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1204                                         src_dma_addr, loop_len, 0);
1205                         if (!tx) {
1206                                 ret = -ENOMEM;
1207                                 goto err;
1208                         }
1209                         cookie = tx->tx_submit(tx);
1210                         if (dma_submit_error(cookie)) {
1211                                 ret = -ENOMEM;
1212                                 goto err;
1213                         }
1214                 }
1215                 src_offset += loop_len;
1216                 dst_offset += loop_len;
1217                 remaining_len -= loop_len;
1218         }
1219         return ret;
1220 err:
1221         dev_err(scif_info.mdev.this_device,
1222                 "%s %d Desc Prog Failed ret %d\n",
1223                 __func__, __LINE__, ret);
1224         return ret;
1225 }
1226
1227 /*
1228  * scif_rma_list_dma_copy_aligned:
1229  *
1230  * Traverse all the windows and perform DMA copy.
1231  */
1232 static int scif_rma_list_dma_copy_aligned(struct scif_copy_work *work,
1233                                           struct dma_chan *chan)
1234 {
1235         dma_addr_t src_dma_addr, dst_dma_addr;
1236         size_t loop_len, remaining_len, tail_len, src_contig_bytes = 0;
1237         size_t dst_contig_bytes = 0;
1238         int src_cache_off;
1239         s64 end_src_offset, end_dst_offset;
1240         struct scif_window_iter src_win_iter;
1241         struct scif_window_iter dst_win_iter;
1242         void *src_virt, *dst_virt;
1243         struct scif_window *src_window = work->src_window;
1244         struct scif_window *dst_window = work->dst_window;
1245         s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1246         int ret = 0;
1247         struct dma_async_tx_descriptor *tx;
1248         struct dma_device *dev = chan->device;
1249         dma_cookie_t cookie;
1250
1251         remaining_len = work->len;
1252         scif_init_window_iter(src_window, &src_win_iter);
1253         scif_init_window_iter(dst_window, &dst_win_iter);
1254
1255         src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
1256         if (src_cache_off != 0) {
1257                 /* Head */
1258                 loop_len = L1_CACHE_BYTES - src_cache_off;
1259                 loop_len = min(loop_len, remaining_len);
1260                 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
1261                 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
1262                 if (src_window->type == SCIF_WINDOW_SELF)
1263                         src_virt = _get_local_va(src_offset, src_window,
1264                                                  loop_len);
1265                 else
1266                         src_virt = ioremap_remote(src_offset, src_window,
1267                                                   loop_len,
1268                                                   work->remote_dev, NULL);
1269                 if (!src_virt)
1270                         return -ENOMEM;
1271                 if (dst_window->type == SCIF_WINDOW_SELF)
1272                         dst_virt = _get_local_va(dst_offset, dst_window,
1273                                                  loop_len);
1274                 else
1275                         dst_virt = ioremap_remote(dst_offset, dst_window,
1276                                                   loop_len,
1277                                                   work->remote_dev, NULL);
1278                 if (!dst_virt) {
1279                         if (src_window->type != SCIF_WINDOW_SELF)
1280                                 iounmap_remote(src_virt, loop_len, work);
1281                         return -ENOMEM;
1282                 }
1283                 if (src_window->type == SCIF_WINDOW_SELF)
1284                         scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
1285                                                 remaining_len == loop_len ?
1286                                                 work->ordered : false);
1287                 else
1288                         scif_unaligned_cpy_fromio(dst_virt, src_virt, loop_len,
1289                                                   remaining_len == loop_len ?
1290                                                   work->ordered : false);
1291                 if (src_window->type != SCIF_WINDOW_SELF)
1292                         iounmap_remote(src_virt, loop_len, work);
1293                 if (dst_window->type != SCIF_WINDOW_SELF)
1294                         iounmap_remote(dst_virt, loop_len, work);
1295                 src_offset += loop_len;
1296                 dst_offset += loop_len;
1297                 remaining_len -= loop_len;
1298         }
1299
1300         end_src_offset = src_window->offset +
1301                 (src_window->nr_pages << PAGE_SHIFT);
1302         end_dst_offset = dst_window->offset +
1303                 (dst_window->nr_pages << PAGE_SHIFT);
1304         tail_len = remaining_len & (L1_CACHE_BYTES - 1);
1305         remaining_len -= tail_len;
1306         while (remaining_len) {
1307                 if (src_offset == end_src_offset) {
1308                         src_window = list_next_entry(src_window, list);
1309                         end_src_offset = src_window->offset +
1310                                 (src_window->nr_pages << PAGE_SHIFT);
1311                         scif_init_window_iter(src_window, &src_win_iter);
1312                 }
1313                 if (dst_offset == end_dst_offset) {
1314                         dst_window = list_next_entry(dst_window, list);
1315                         end_dst_offset = dst_window->offset +
1316                                 (dst_window->nr_pages << PAGE_SHIFT);
1317                         scif_init_window_iter(dst_window, &dst_win_iter);
1318                 }
1319
1320                 /* compute dma addresses for transfer */
1321                 src_dma_addr = scif_off_to_dma_addr(src_window, src_offset,
1322                                                     &src_contig_bytes,
1323                                                     &src_win_iter);
1324                 dst_dma_addr = scif_off_to_dma_addr(dst_window, dst_offset,
1325                                                     &dst_contig_bytes,
1326                                                     &dst_win_iter);
1327                 loop_len = min(src_contig_bytes, dst_contig_bytes);
1328                 loop_len = min(loop_len, remaining_len);
1329                 if (work->ordered && !tail_len &&
1330                     !(remaining_len - loop_len)) {
1331                         /*
1332                          * Break up the last chunk of the transfer into two
1333                          * steps. if there is no tail to gurantee DMA ordering.
1334                          * Passing SCIF_DMA_POLLING inserts a status update
1335                          * descriptor in step 1 which acts as a double sided
1336                          * synchronization fence for the DMA engine to ensure
1337                          * that the last cache line in step 2 is updated last.
1338                          */
1339                         /* Step 1) DMA: Body Length - L1_CACHE_BYTES. */
1340                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1341                                                          src_dma_addr,
1342                                                          loop_len -
1343                                                          L1_CACHE_BYTES,
1344                                                          DMA_PREP_FENCE);
1345                         if (!tx) {
1346                                 ret = -ENOMEM;
1347                                 goto err;
1348                         }
1349                         cookie = tx->tx_submit(tx);
1350                         if (dma_submit_error(cookie)) {
1351                                 ret = -ENOMEM;
1352                                 goto err;
1353                         }
1354                         dma_async_issue_pending(chan);
1355                         src_offset += (loop_len - L1_CACHE_BYTES);
1356                         dst_offset += (loop_len - L1_CACHE_BYTES);
1357                         src_dma_addr += (loop_len - L1_CACHE_BYTES);
1358                         dst_dma_addr += (loop_len - L1_CACHE_BYTES);
1359                         remaining_len -= (loop_len - L1_CACHE_BYTES);
1360                         loop_len = remaining_len;
1361
1362                         /* Step 2) DMA: L1_CACHE_BYTES */
1363                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1364                                                          src_dma_addr,
1365                                                          loop_len, 0);
1366                         if (!tx) {
1367                                 ret = -ENOMEM;
1368                                 goto err;
1369                         }
1370                         cookie = tx->tx_submit(tx);
1371                         if (dma_submit_error(cookie)) {
1372                                 ret = -ENOMEM;
1373                                 goto err;
1374                         }
1375                         dma_async_issue_pending(chan);
1376                 } else {
1377                         tx = dev->device_prep_dma_memcpy(chan, dst_dma_addr,
1378                                                          src_dma_addr,
1379                                                          loop_len, 0);
1380                         if (!tx) {
1381                                 ret = -ENOMEM;
1382                                 goto err;
1383                         }
1384                         cookie = tx->tx_submit(tx);
1385                         if (dma_submit_error(cookie)) {
1386                                 ret = -ENOMEM;
1387                                 goto err;
1388                         }
1389                         dma_async_issue_pending(chan);
1390                 }
1391                 src_offset += loop_len;
1392                 dst_offset += loop_len;
1393                 remaining_len -= loop_len;
1394         }
1395         remaining_len = tail_len;
1396         if (remaining_len) {
1397                 loop_len = remaining_len;
1398                 if (src_offset == end_src_offset)
1399                         src_window = list_next_entry(src_window, list);
1400                 if (dst_offset == end_dst_offset)
1401                         dst_window = list_next_entry(dst_window, list);
1402
1403                 src_dma_addr = __scif_off_to_dma_addr(src_window, src_offset);
1404                 dst_dma_addr = __scif_off_to_dma_addr(dst_window, dst_offset);
1405                 /*
1406                  * The CPU copy for the tail bytes must be initiated only once
1407                  * previous DMA transfers for this endpoint have completed to
1408                  * guarantee ordering.
1409                  */
1410                 if (work->ordered) {
1411                         struct scif_dev *rdev = work->remote_dev;
1412
1413                         ret = scif_drain_dma_poll(rdev->sdev, chan);
1414                         if (ret)
1415                                 return ret;
1416                 }
1417                 if (src_window->type == SCIF_WINDOW_SELF)
1418                         src_virt = _get_local_va(src_offset, src_window,
1419                                                  loop_len);
1420                 else
1421                         src_virt = ioremap_remote(src_offset, src_window,
1422                                                   loop_len,
1423                                                   work->remote_dev, NULL);
1424                 if (!src_virt)
1425                         return -ENOMEM;
1426
1427                 if (dst_window->type == SCIF_WINDOW_SELF)
1428                         dst_virt = _get_local_va(dst_offset, dst_window,
1429                                                  loop_len);
1430                 else
1431                         dst_virt = ioremap_remote(dst_offset, dst_window,
1432                                                   loop_len,
1433                                                   work->remote_dev, NULL);
1434                 if (!dst_virt) {
1435                         if (src_window->type != SCIF_WINDOW_SELF)
1436                                 iounmap_remote(src_virt, loop_len, work);
1437                         return -ENOMEM;
1438                 }
1439
1440                 if (src_window->type == SCIF_WINDOW_SELF)
1441                         scif_unaligned_cpy_toio(dst_virt, src_virt, loop_len,
1442                                                 work->ordered);
1443                 else
1444                         scif_unaligned_cpy_fromio(dst_virt, src_virt,
1445                                                   loop_len, work->ordered);
1446                 if (src_window->type != SCIF_WINDOW_SELF)
1447                         iounmap_remote(src_virt, loop_len, work);
1448
1449                 if (dst_window->type != SCIF_WINDOW_SELF)
1450                         iounmap_remote(dst_virt, loop_len, work);
1451                 remaining_len -= loop_len;
1452         }
1453         return ret;
1454 err:
1455         dev_err(scif_info.mdev.this_device,
1456                 "%s %d Desc Prog Failed ret %d\n",
1457                 __func__, __LINE__, ret);
1458         return ret;
1459 }
1460
1461 /*
1462  * scif_rma_list_cpu_copy:
1463  *
1464  * Traverse all the windows and perform CPU copy.
1465  */
1466 static int scif_rma_list_cpu_copy(struct scif_copy_work *work)
1467 {
1468         void *src_virt, *dst_virt;
1469         size_t loop_len, remaining_len;
1470         int src_page_off, dst_page_off;
1471         s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1472         struct scif_window *src_window = work->src_window;
1473         struct scif_window *dst_window = work->dst_window;
1474         s64 end_src_offset, end_dst_offset;
1475         int ret = 0;
1476         struct scif_window_iter src_win_iter;
1477         struct scif_window_iter dst_win_iter;
1478
1479         remaining_len = work->len;
1480
1481         scif_init_window_iter(src_window, &src_win_iter);
1482         scif_init_window_iter(dst_window, &dst_win_iter);
1483         while (remaining_len) {
1484                 src_page_off = src_offset & ~PAGE_MASK;
1485                 dst_page_off = dst_offset & ~PAGE_MASK;
1486                 loop_len = min(PAGE_SIZE -
1487                                max(src_page_off, dst_page_off),
1488                                remaining_len);
1489
1490                 if (src_window->type == SCIF_WINDOW_SELF)
1491                         src_virt = _get_local_va(src_offset, src_window,
1492                                                  loop_len);
1493                 else
1494                         src_virt = ioremap_remote(src_offset, src_window,
1495                                                   loop_len,
1496                                                   work->remote_dev,
1497                                                   &src_win_iter);
1498                 if (!src_virt) {
1499                         ret = -ENOMEM;
1500                         goto error;
1501                 }
1502
1503                 if (dst_window->type == SCIF_WINDOW_SELF)
1504                         dst_virt = _get_local_va(dst_offset, dst_window,
1505                                                  loop_len);
1506                 else
1507                         dst_virt = ioremap_remote(dst_offset, dst_window,
1508                                                   loop_len,
1509                                                   work->remote_dev,
1510                                                   &dst_win_iter);
1511                 if (!dst_virt) {
1512                         if (src_window->type == SCIF_WINDOW_PEER)
1513                                 iounmap_remote(src_virt, loop_len, work);
1514                         ret = -ENOMEM;
1515                         goto error;
1516                 }
1517
1518                 if (work->loopback) {
1519                         memcpy(dst_virt, src_virt, loop_len);
1520                 } else {
1521                         if (src_window->type == SCIF_WINDOW_SELF)
1522                                 memcpy_toio((void __iomem __force *)dst_virt,
1523                                             src_virt, loop_len);
1524                         else
1525                                 memcpy_fromio(dst_virt,
1526                                               (void __iomem __force *)src_virt,
1527                                               loop_len);
1528                 }
1529                 if (src_window->type == SCIF_WINDOW_PEER)
1530                         iounmap_remote(src_virt, loop_len, work);
1531
1532                 if (dst_window->type == SCIF_WINDOW_PEER)
1533                         iounmap_remote(dst_virt, loop_len, work);
1534
1535                 src_offset += loop_len;
1536                 dst_offset += loop_len;
1537                 remaining_len -= loop_len;
1538                 if (remaining_len) {
1539                         end_src_offset = src_window->offset +
1540                                 (src_window->nr_pages << PAGE_SHIFT);
1541                         end_dst_offset = dst_window->offset +
1542                                 (dst_window->nr_pages << PAGE_SHIFT);
1543                         if (src_offset == end_src_offset) {
1544                                 src_window = list_next_entry(src_window, list);
1545                                 scif_init_window_iter(src_window,
1546                                                       &src_win_iter);
1547                         }
1548                         if (dst_offset == end_dst_offset) {
1549                                 dst_window = list_next_entry(dst_window, list);
1550                                 scif_init_window_iter(dst_window,
1551                                                       &dst_win_iter);
1552                         }
1553                 }
1554         }
1555 error:
1556         return ret;
1557 }
1558
1559 static int scif_rma_list_dma_copy_wrapper(struct scif_endpt *epd,
1560                                           struct scif_copy_work *work,
1561                                           struct dma_chan *chan, off_t loffset)
1562 {
1563         int src_cache_off, dst_cache_off;
1564         s64 src_offset = work->src_offset, dst_offset = work->dst_offset;
1565         u8 *temp = NULL;
1566         bool src_local = true, dst_local = false;
1567         struct scif_dma_comp_cb *comp_cb;
1568         dma_addr_t src_dma_addr, dst_dma_addr;
1569         int err;
1570
1571         if (is_dma_copy_aligned(chan->device, 1, 1, 1))
1572                 return _scif_rma_list_dma_copy_aligned(work, chan);
1573
1574         src_cache_off = src_offset & (L1_CACHE_BYTES - 1);
1575         dst_cache_off = dst_offset & (L1_CACHE_BYTES - 1);
1576
1577         if (dst_cache_off == src_cache_off)
1578                 return scif_rma_list_dma_copy_aligned(work, chan);
1579
1580         if (work->loopback)
1581                 return scif_rma_list_cpu_copy(work);
1582         src_dma_addr = __scif_off_to_dma_addr(work->src_window, src_offset);
1583         dst_dma_addr = __scif_off_to_dma_addr(work->dst_window, dst_offset);
1584         src_local = work->src_window->type == SCIF_WINDOW_SELF;
1585         dst_local = work->dst_window->type == SCIF_WINDOW_SELF;
1586
1587         dst_local = dst_local;
1588         /* Allocate dma_completion cb */
1589         comp_cb = kzalloc(sizeof(*comp_cb), GFP_KERNEL);
1590         if (!comp_cb)
1591                 goto error;
1592
1593         work->comp_cb = comp_cb;
1594         comp_cb->cb_cookie = comp_cb;
1595         comp_cb->dma_completion_func = &scif_rma_completion_cb;
1596
1597         if (work->len + (L1_CACHE_BYTES << 1) < SCIF_KMEM_UNALIGNED_BUF_SIZE) {
1598                 comp_cb->is_cache = false;
1599                 /* Allocate padding bytes to align to a cache line */
1600                 temp = kmalloc(work->len + (L1_CACHE_BYTES << 1),
1601                                GFP_KERNEL);
1602                 if (!temp)
1603                         goto free_comp_cb;
1604                 comp_cb->temp_buf_to_free = temp;
1605                 /* kmalloc(..) does not guarantee cache line alignment */
1606                 if (!IS_ALIGNED((u64)temp, L1_CACHE_BYTES))
1607                         temp = PTR_ALIGN(temp, L1_CACHE_BYTES);
1608         } else {
1609                 comp_cb->is_cache = true;
1610                 temp = kmem_cache_alloc(unaligned_cache, GFP_KERNEL);
1611                 if (!temp)
1612                         goto free_comp_cb;
1613                 comp_cb->temp_buf_to_free = temp;
1614         }
1615
1616         if (src_local) {
1617                 temp += dst_cache_off;
1618                 scif_rma_local_cpu_copy(work->src_offset, work->src_window,
1619                                         temp, work->len, true);
1620         } else {
1621                 comp_cb->dst_window = work->dst_window;
1622                 comp_cb->dst_offset = work->dst_offset;
1623                 work->src_offset = work->src_offset - src_cache_off;
1624                 comp_cb->len = work->len;
1625                 work->len = ALIGN(work->len + src_cache_off, L1_CACHE_BYTES);
1626                 comp_cb->header_padding = src_cache_off;
1627         }
1628         comp_cb->temp_buf = temp;
1629
1630         err = scif_map_single(&comp_cb->temp_phys, temp,
1631                               work->remote_dev, SCIF_KMEM_UNALIGNED_BUF_SIZE);
1632         if (err)
1633                 goto free_temp_buf;
1634         comp_cb->sdev = work->remote_dev;
1635         if (scif_rma_list_dma_copy_unaligned(work, temp, chan, src_local) < 0)
1636                 goto free_temp_buf;
1637         if (!src_local)
1638                 work->fence_type = SCIF_DMA_INTR;
1639         return 0;
1640 free_temp_buf:
1641         if (comp_cb->is_cache)
1642                 kmem_cache_free(unaligned_cache, comp_cb->temp_buf_to_free);
1643         else
1644                 kfree(comp_cb->temp_buf_to_free);
1645 free_comp_cb:
1646         kfree(comp_cb);
1647 error:
1648         return -ENOMEM;
1649 }
1650
1651 /**
1652  * scif_rma_copy:
1653  * @epd: end point descriptor.
1654  * @loffset: offset in local registered address space to/from which to copy
1655  * @addr: user virtual address to/from which to copy
1656  * @len: length of range to copy
1657  * @roffset: offset in remote registered address space to/from which to copy
1658  * @flags: flags
1659  * @dir: LOCAL->REMOTE or vice versa.
1660  * @last_chunk: true if this is the last chunk of a larger transfer
1661  *
1662  * Validate parameters, check if src/dst registered ranges requested for copy
1663  * are valid and initiate either CPU or DMA copy.
1664  */
1665 static int scif_rma_copy(scif_epd_t epd, off_t loffset, unsigned long addr,
1666                          size_t len, off_t roffset, int flags,
1667                          enum scif_rma_dir dir, bool last_chunk)
1668 {
1669         struct scif_endpt *ep = (struct scif_endpt *)epd;
1670         struct scif_rma_req remote_req;
1671         struct scif_rma_req req;
1672         struct scif_window *local_window = NULL;
1673         struct scif_window *remote_window = NULL;
1674         struct scif_copy_work copy_work;
1675         bool loopback;
1676         int err = 0;
1677         struct dma_chan *chan;
1678         struct scif_mmu_notif *mmn = NULL;
1679         bool cache = false;
1680         struct device *spdev;
1681
1682         err = scif_verify_epd(ep);
1683         if (err)
1684                 return err;
1685
1686         if (flags && !(flags & (SCIF_RMA_USECPU | SCIF_RMA_USECACHE |
1687                                 SCIF_RMA_SYNC | SCIF_RMA_ORDERED)))
1688                 return -EINVAL;
1689
1690         loopback = scifdev_self(ep->remote_dev) ? true : false;
1691         copy_work.fence_type = ((flags & SCIF_RMA_SYNC) && last_chunk) ?
1692                                 SCIF_DMA_POLL : 0;
1693         copy_work.ordered = !!((flags & SCIF_RMA_ORDERED) && last_chunk);
1694
1695         /* Use CPU for Mgmt node <-> Mgmt node copies */
1696         if (loopback && scif_is_mgmt_node()) {
1697                 flags |= SCIF_RMA_USECPU;
1698                 copy_work.fence_type = 0x0;
1699         }
1700
1701         cache = scif_is_set_reg_cache(flags);
1702
1703         remote_req.out_window = &remote_window;
1704         remote_req.offset = roffset;
1705         remote_req.nr_bytes = len;
1706         /*
1707          * If transfer is from local to remote then the remote window
1708          * must be writeable and vice versa.
1709          */
1710         remote_req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_WRITE : VM_READ;
1711         remote_req.type = SCIF_WINDOW_PARTIAL;
1712         remote_req.head = &ep->rma_info.remote_reg_list;
1713
1714         spdev = scif_get_peer_dev(ep->remote_dev);
1715         if (IS_ERR(spdev)) {
1716                 err = PTR_ERR(spdev);
1717                 return err;
1718         }
1719
1720         if (addr && cache) {
1721                 mutex_lock(&ep->rma_info.mmn_lock);
1722                 mmn = scif_find_mmu_notifier(current->mm, &ep->rma_info);
1723                 if (!mmn)
1724                         mmn = scif_add_mmu_notifier(current->mm, ep);
1725                 mutex_unlock(&ep->rma_info.mmn_lock);
1726                 if (IS_ERR(mmn)) {
1727                         scif_put_peer_dev(spdev);
1728                         return PTR_ERR(mmn);
1729                 }
1730                 cache = cache && !scif_rma_tc_can_cache(ep, len);
1731         }
1732         mutex_lock(&ep->rma_info.rma_lock);
1733         if (addr) {
1734                 req.out_window = &local_window;
1735                 req.nr_bytes = ALIGN(len + (addr & ~PAGE_MASK),
1736                                      PAGE_SIZE);
1737                 req.va_for_temp = addr & PAGE_MASK;
1738                 req.prot = (dir == SCIF_LOCAL_TO_REMOTE ?
1739                             VM_READ : VM_WRITE | VM_READ);
1740                 /* Does a valid local window exist? */
1741                 if (mmn) {
1742                         spin_lock(&ep->rma_info.tc_lock);
1743                         req.head = &mmn->tc_reg_list;
1744                         err = scif_query_tcw(ep, &req);
1745                         spin_unlock(&ep->rma_info.tc_lock);
1746                 }
1747                 if (!mmn || err) {
1748                         err = scif_register_temp(epd, req.va_for_temp,
1749                                                  req.nr_bytes, req.prot,
1750                                                  &loffset, &local_window);
1751                         if (err) {
1752                                 mutex_unlock(&ep->rma_info.rma_lock);
1753                                 goto error;
1754                         }
1755                         if (!cache)
1756                                 goto skip_cache;
1757                         atomic_inc(&ep->rma_info.tcw_refcount);
1758                         atomic_add_return(local_window->nr_pages,
1759                                           &ep->rma_info.tcw_total_pages);
1760                         if (mmn) {
1761                                 spin_lock(&ep->rma_info.tc_lock);
1762                                 scif_insert_tcw(local_window,
1763                                                 &mmn->tc_reg_list);
1764                                 spin_unlock(&ep->rma_info.tc_lock);
1765                         }
1766                 }
1767 skip_cache:
1768                 loffset = local_window->offset +
1769                                 (addr - local_window->va_for_temp);
1770         } else {
1771                 req.out_window = &local_window;
1772                 req.offset = loffset;
1773                 /*
1774                  * If transfer is from local to remote then the self window
1775                  * must be readable and vice versa.
1776                  */
1777                 req.prot = dir == SCIF_LOCAL_TO_REMOTE ? VM_READ : VM_WRITE;
1778                 req.nr_bytes = len;
1779                 req.type = SCIF_WINDOW_PARTIAL;
1780                 req.head = &ep->rma_info.reg_list;
1781                 /* Does a valid local window exist? */
1782                 err = scif_query_window(&req);
1783                 if (err) {
1784                         mutex_unlock(&ep->rma_info.rma_lock);
1785                         goto error;
1786                 }
1787         }
1788
1789         /* Does a valid remote window exist? */
1790         err = scif_query_window(&remote_req);
1791         if (err) {
1792                 mutex_unlock(&ep->rma_info.rma_lock);
1793                 goto error;
1794         }
1795
1796         /*
1797          * Prepare copy_work for submitting work to the DMA kernel thread
1798          * or CPU copy routine.
1799          */
1800         copy_work.len = len;
1801         copy_work.loopback = loopback;
1802         copy_work.remote_dev = ep->remote_dev;
1803         if (dir == SCIF_LOCAL_TO_REMOTE) {
1804                 copy_work.src_offset = loffset;
1805                 copy_work.src_window = local_window;
1806                 copy_work.dst_offset = roffset;
1807                 copy_work.dst_window = remote_window;
1808         } else {
1809                 copy_work.src_offset = roffset;
1810                 copy_work.src_window = remote_window;
1811                 copy_work.dst_offset = loffset;
1812                 copy_work.dst_window = local_window;
1813         }
1814
1815         if (flags & SCIF_RMA_USECPU) {
1816                 scif_rma_list_cpu_copy(&copy_work);
1817         } else {
1818                 chan = ep->rma_info.dma_chan;
1819                 err = scif_rma_list_dma_copy_wrapper(epd, &copy_work,
1820                                                      chan, loffset);
1821         }
1822         if (addr && !cache)
1823                 atomic_inc(&ep->rma_info.tw_refcount);
1824
1825         mutex_unlock(&ep->rma_info.rma_lock);
1826
1827         if (last_chunk) {
1828                 struct scif_dev *rdev = ep->remote_dev;
1829
1830                 if (copy_work.fence_type == SCIF_DMA_POLL)
1831                         err = scif_drain_dma_poll(rdev->sdev,
1832                                                   ep->rma_info.dma_chan);
1833                 else if (copy_work.fence_type == SCIF_DMA_INTR)
1834                         err = scif_drain_dma_intr(rdev->sdev,
1835                                                   ep->rma_info.dma_chan);
1836         }
1837
1838         if (addr && !cache)
1839                 scif_queue_for_cleanup(local_window, &scif_info.rma);
1840         scif_put_peer_dev(spdev);
1841         return err;
1842 error:
1843         if (err) {
1844                 if (addr && local_window && !cache)
1845                         scif_destroy_window(ep, local_window);
1846                 dev_err(scif_info.mdev.this_device,
1847                         "%s %d err %d len 0x%lx\n",
1848                         __func__, __LINE__, err, len);
1849         }
1850         scif_put_peer_dev(spdev);
1851         return err;
1852 }
1853
1854 int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len,
1855                   off_t roffset, int flags)
1856 {
1857         int err;
1858
1859         dev_dbg(scif_info.mdev.this_device,
1860                 "SCIFAPI readfrom: ep %p loffset 0x%lx len 0x%lx offset 0x%lx flags 0x%x\n",
1861                 epd, loffset, len, roffset, flags);
1862         if (scif_unaligned(loffset, roffset)) {
1863                 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1864                         err = scif_rma_copy(epd, loffset, 0x0,
1865                                             SCIF_MAX_UNALIGNED_BUF_SIZE,
1866                                             roffset, flags,
1867                                             SCIF_REMOTE_TO_LOCAL, false);
1868                         if (err)
1869                                 goto readfrom_err;
1870                         loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1871                         roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1872                         len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1873                 }
1874         }
1875         err = scif_rma_copy(epd, loffset, 0x0, len,
1876                             roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
1877 readfrom_err:
1878         return err;
1879 }
1880 EXPORT_SYMBOL_GPL(scif_readfrom);
1881
1882 int scif_writeto(scif_epd_t epd, off_t loffset, size_t len,
1883                  off_t roffset, int flags)
1884 {
1885         int err;
1886
1887         dev_dbg(scif_info.mdev.this_device,
1888                 "SCIFAPI writeto: ep %p loffset 0x%lx len 0x%lx roffset 0x%lx flags 0x%x\n",
1889                 epd, loffset, len, roffset, flags);
1890         if (scif_unaligned(loffset, roffset)) {
1891                 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1892                         err = scif_rma_copy(epd, loffset, 0x0,
1893                                             SCIF_MAX_UNALIGNED_BUF_SIZE,
1894                                             roffset, flags,
1895                                             SCIF_LOCAL_TO_REMOTE, false);
1896                         if (err)
1897                                 goto writeto_err;
1898                         loffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1899                         roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1900                         len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1901                 }
1902         }
1903         err = scif_rma_copy(epd, loffset, 0x0, len,
1904                             roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
1905 writeto_err:
1906         return err;
1907 }
1908 EXPORT_SYMBOL_GPL(scif_writeto);
1909
1910 int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len,
1911                    off_t roffset, int flags)
1912 {
1913         int err;
1914
1915         dev_dbg(scif_info.mdev.this_device,
1916                 "SCIFAPI vreadfrom: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
1917                 epd, addr, len, roffset, flags);
1918         if (scif_unaligned((off_t __force)addr, roffset)) {
1919                 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
1920                         flags &= ~SCIF_RMA_USECACHE;
1921
1922                 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1923                         err = scif_rma_copy(epd, 0, (u64)addr,
1924                                             SCIF_MAX_UNALIGNED_BUF_SIZE,
1925                                             roffset, flags,
1926                                             SCIF_REMOTE_TO_LOCAL, false);
1927                         if (err)
1928                                 goto vreadfrom_err;
1929                         addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
1930                         roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1931                         len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1932                 }
1933         }
1934         err = scif_rma_copy(epd, 0, (u64)addr, len,
1935                             roffset, flags, SCIF_REMOTE_TO_LOCAL, true);
1936 vreadfrom_err:
1937         return err;
1938 }
1939 EXPORT_SYMBOL_GPL(scif_vreadfrom);
1940
1941 int scif_vwriteto(scif_epd_t epd, void *addr, size_t len,
1942                   off_t roffset, int flags)
1943 {
1944         int err;
1945
1946         dev_dbg(scif_info.mdev.this_device,
1947                 "SCIFAPI vwriteto: ep %p addr %p len 0x%lx roffset 0x%lx flags 0x%x\n",
1948                 epd, addr, len, roffset, flags);
1949         if (scif_unaligned((off_t __force)addr, roffset)) {
1950                 if (len > SCIF_MAX_UNALIGNED_BUF_SIZE)
1951                         flags &= ~SCIF_RMA_USECACHE;
1952
1953                 while (len > SCIF_MAX_UNALIGNED_BUF_SIZE) {
1954                         err = scif_rma_copy(epd, 0, (u64)addr,
1955                                             SCIF_MAX_UNALIGNED_BUF_SIZE,
1956                                             roffset, flags,
1957                                             SCIF_LOCAL_TO_REMOTE, false);
1958                         if (err)
1959                                 goto vwriteto_err;
1960                         addr += SCIF_MAX_UNALIGNED_BUF_SIZE;
1961                         roffset += SCIF_MAX_UNALIGNED_BUF_SIZE;
1962                         len -= SCIF_MAX_UNALIGNED_BUF_SIZE;
1963                 }
1964         }
1965         err = scif_rma_copy(epd, 0, (u64)addr, len,
1966                             roffset, flags, SCIF_LOCAL_TO_REMOTE, true);
1967 vwriteto_err:
1968         return err;
1969 }
1970 EXPORT_SYMBOL_GPL(scif_vwriteto);