Commit | Line | Data |
---|---|---|
c3ae6175 SD |
1 | /* |
2 | * Intel MIC Platform Software Stack (MPSS) | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2015 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * BSD LICENSE | |
21 | * | |
22 | * Copyright(c) 2015 Intel Corporation. | |
23 | * | |
24 | * Redistribution and use in source and binary forms, with or without | |
25 | * modification, are permitted provided that the following conditions | |
26 | * are met: | |
27 | * | |
28 | * * Redistributions of source code must retain the above copyright | |
29 | * notice, this list of conditions and the following disclaimer. | |
30 | * * Redistributions in binary form must reproduce the above copyright | |
31 | * notice, this list of conditions and the following disclaimer in | |
32 | * the documentation and/or other materials provided with the | |
33 | * distribution. | |
34 | * * Neither the name of Intel Corporation nor the names of its | |
35 | * contributors may be used to endorse or promote products derived | |
36 | * from this software without specific prior written permission. | |
37 | * | |
38 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
39 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
40 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
41 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
42 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
43 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
44 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
45 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
46 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
47 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
48 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
49 | * | |
50 | * Intel SCIF driver. | |
51 | * | |
52 | */ | |
53 | #ifndef SCIF_RMA_H | |
54 | #define SCIF_RMA_H | |
55 | ||
daedaa33 | 56 | #include <linux/intel-iommu.h> |
c3ae6175 SD |
57 | #include <linux/mmu_notifier.h> |
58 | ||
59 | #include "../bus/scif_bus.h" | |
60 | ||
61 | /* If this bit is set then the mark is a remote fence mark */ | |
62 | #define SCIF_REMOTE_FENCE_BIT 31 | |
63 | /* Magic value used to indicate a remote fence request */ | |
64 | #define SCIF_REMOTE_FENCE BIT_ULL(SCIF_REMOTE_FENCE_BIT) | |
65 | ||
66 | #define SCIF_MAX_UNALIGNED_BUF_SIZE (1024 * 1024ULL) | |
67 | #define SCIF_KMEM_UNALIGNED_BUF_SIZE (SCIF_MAX_UNALIGNED_BUF_SIZE + \ | |
68 | (L1_CACHE_BYTES << 1)) | |
69 | ||
70 | #define SCIF_IOVA_START_PFN (1) | |
71 | #define SCIF_IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) | |
72 | #define SCIF_DMA_64BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(64)) | |
73 | #define SCIF_DMA_63BIT_PFN SCIF_IOVA_PFN(DMA_BIT_MASK(63)) | |
74 | ||
75 | /* | |
76 | * struct scif_endpt_rma_info - Per Endpoint Remote Memory Access Information | |
77 | * | |
78 | * @reg_list: List of registration windows for self | |
79 | * @remote_reg_list: List of registration windows for peer | |
80 | * @iovad: Offset generator | |
81 | * @rma_lock: Synchronizes access to self/remote list and also protects the | |
82 | * window from being destroyed while RMAs are in progress. | |
83 | * @tc_lock: Synchronizes access to temporary cached windows list | |
84 | * for SCIF Registration Caching. | |
85 | * @mmn_lock: Synchronizes access to the list of MMU notifiers registered | |
86 | * @tw_refcount: Keeps track of number of outstanding temporary registered | |
87 | * windows created by scif_vreadfrom/scif_vwriteto which have | |
88 | * not been destroyed. | |
89 | * @tcw_refcount: Same as tw_refcount but for temporary cached windows | |
90 | * @tcw_total_pages: Same as tcw_refcount but in terms of pages pinned | |
91 | * @mmn_list: MMU notifier so that we can destroy the windows when required | |
92 | * @fence_refcount: Keeps track of number of outstanding remote fence | |
93 | * requests which have been received by the peer. | |
94 | * @dma_chan: DMA channel used for all DMA transfers for this endpoint. | |
95 | * @async_list_del: Detect asynchronous list entry deletion | |
96 | * @vma_list: List of vmas with remote memory mappings | |
97 | * @markwq: Wait queue used for scif_fence_mark/scif_fence_wait | |
98 | */ | |
99 | struct scif_endpt_rma_info { | |
100 | struct list_head reg_list; | |
101 | struct list_head remote_reg_list; | |
102 | struct iova_domain iovad; | |
103 | struct mutex rma_lock; | |
104 | spinlock_t tc_lock; | |
105 | struct mutex mmn_lock; | |
106 | atomic_t tw_refcount; | |
107 | atomic_t tcw_refcount; | |
108 | atomic_t tcw_total_pages; | |
109 | struct list_head mmn_list; | |
110 | atomic_t fence_refcount; | |
111 | struct dma_chan *dma_chan; | |
112 | int async_list_del; | |
113 | struct list_head vma_list; | |
114 | wait_queue_head_t markwq; | |
115 | }; | |
116 | ||
117 | /* | |
118 | * struct scif_fence_info - used for tracking fence requests | |
119 | * | |
120 | * @state: State of this transfer | |
121 | * @wq: Fences wait on this queue | |
122 | * @dma_mark: Used for storing the DMA mark | |
123 | */ | |
124 | struct scif_fence_info { | |
125 | enum scif_msg_state state; | |
126 | struct completion comp; | |
127 | int dma_mark; | |
128 | }; | |
129 | ||
130 | /* | |
131 | * struct scif_remote_fence_info - used for tracking remote fence requests | |
132 | * | |
133 | * @msg: List of SCIF node QP fence messages | |
134 | * @list: Link to list of remote fence requests | |
135 | */ | |
136 | struct scif_remote_fence_info { | |
137 | struct scifmsg msg; | |
138 | struct list_head list; | |
139 | }; | |
140 | ||
141 | /* | |
142 | * Specifies whether an RMA operation can span across partial windows, a single | |
143 | * window or multiple contiguous windows. Mmaps can span across partial windows. | |
144 | * Unregistration can span across complete windows. scif_get_pages() can span a | |
145 | * single window. A window can also be of type self or peer. | |
146 | */ | |
147 | enum scif_window_type { | |
148 | SCIF_WINDOW_PARTIAL, | |
149 | SCIF_WINDOW_SINGLE, | |
150 | SCIF_WINDOW_FULL, | |
151 | SCIF_WINDOW_SELF, | |
152 | SCIF_WINDOW_PEER | |
153 | }; | |
154 | ||
155 | /* The number of physical addresses that can be stored in a PAGE. */ | |
156 | #define SCIF_NR_ADDR_IN_PAGE (0x1000 >> 3) | |
157 | ||
158 | /* | |
159 | * struct scif_rma_lookup - RMA lookup data structure for page list transfers | |
160 | * | |
161 | * Store an array of lookup offsets. Each offset in this array maps | |
162 | * one 4K page containing 512 physical addresses i.e. 2MB. 512 such | |
163 | * offsets in a 4K page will correspond to 1GB of registered address space. | |
164 | ||
165 | * @lookup: Array of offsets | |
166 | * @offset: DMA offset of lookup array | |
167 | */ | |
168 | struct scif_rma_lookup { | |
169 | dma_addr_t *lookup; | |
170 | dma_addr_t offset; | |
171 | }; | |
172 | ||
173 | /* | |
174 | * struct scif_pinned_pages - A set of pinned pages obtained with | |
175 | * scif_pin_pages() which could be part of multiple registered | |
176 | * windows across different end points. | |
177 | * | |
178 | * @nr_pages: Number of pages which is defined as a s64 instead of an int | |
179 | * to avoid sign extension with buffers >= 2GB | |
180 | * @prot: read/write protections | |
181 | * @map_flags: Flags specified during the pin operation | |
182 | * @ref_count: Reference count bumped in terms of number of pages | |
183 | * @magic: A magic value | |
184 | * @pages: Array of pointers to struct pages populated with get_user_pages(..) | |
185 | */ | |
186 | struct scif_pinned_pages { | |
187 | s64 nr_pages; | |
188 | int prot; | |
189 | int map_flags; | |
190 | atomic_t ref_count; | |
191 | u64 magic; | |
192 | struct page **pages; | |
193 | }; | |
194 | ||
195 | /* | |
196 | * struct scif_status - Stores DMA status update information | |
197 | * | |
198 | * @src_dma_addr: Source buffer DMA address | |
199 | * @val: src location for value to be written to the destination | |
200 | * @ep: SCIF endpoint | |
201 | */ | |
202 | struct scif_status { | |
203 | dma_addr_t src_dma_addr; | |
204 | u64 val; | |
205 | struct scif_endpt *ep; | |
206 | }; | |
207 | ||
208 | /* | |
209 | * struct scif_window - Registration Window for Self and Remote | |
210 | * | |
211 | * @nr_pages: Number of pages which is defined as a s64 instead of an int | |
212 | * to avoid sign extension with buffers >= 2GB | |
213 | * @nr_contig_chunks: Number of contiguous physical chunks | |
214 | * @prot: read/write protections | |
215 | * @ref_count: reference count in terms of number of pages | |
216 | * @magic: Cookie to detect corruption | |
217 | * @offset: registered offset | |
218 | * @va_for_temp: va address that this window represents | |
219 | * @dma_mark: Used to determine if all DMAs against the window are done | |
220 | * @ep: Pointer to EP. Useful for passing EP around with messages to | |
221 | avoid expensive list traversals. | |
222 | * @list: link to list of windows for the endpoint | |
223 | * @type: self or peer window | |
224 | * @peer_window: Pointer to peer window. Useful for sending messages to peer | |
225 | * without requiring an extra list traversal | |
226 | * @unreg_state: unregistration state | |
227 | * @offset_freed: True if the offset has been freed | |
228 | * @temp: True for temporary windows created via scif_vreadfrom/scif_vwriteto | |
229 | * @mm: memory descriptor for the task_struct which initiated the RMA | |
230 | * @st: scatter gather table for DMA mappings with IOMMU enabled | |
231 | * @pinned_pages: The set of pinned_pages backing this window | |
232 | * @alloc_handle: Handle for sending ALLOC_REQ | |
233 | * @regwq: Wait Queue for an registration (N)ACK | |
234 | * @reg_state: Registration state | |
235 | * @unregwq: Wait Queue for an unregistration (N)ACK | |
236 | * @dma_addr_lookup: Lookup for physical addresses used for DMA | |
237 | * @nr_lookup: Number of entries in lookup | |
238 | * @mapped_offset: Offset used to map the window by the peer | |
239 | * @dma_addr: Array of physical addresses used for Mgmt node & MIC initiated DMA | |
240 | * @num_pages: Array specifying number of pages for each physical address | |
241 | */ | |
242 | struct scif_window { | |
243 | s64 nr_pages; | |
244 | int nr_contig_chunks; | |
245 | int prot; | |
246 | int ref_count; | |
247 | u64 magic; | |
248 | s64 offset; | |
249 | unsigned long va_for_temp; | |
250 | int dma_mark; | |
251 | u64 ep; | |
252 | struct list_head list; | |
253 | enum scif_window_type type; | |
254 | u64 peer_window; | |
255 | enum scif_msg_state unreg_state; | |
256 | bool offset_freed; | |
257 | bool temp; | |
258 | struct mm_struct *mm; | |
259 | struct sg_table *st; | |
260 | union { | |
261 | struct { | |
262 | struct scif_pinned_pages *pinned_pages; | |
263 | struct scif_allocmsg alloc_handle; | |
264 | wait_queue_head_t regwq; | |
265 | enum scif_msg_state reg_state; | |
266 | wait_queue_head_t unregwq; | |
267 | }; | |
268 | struct { | |
269 | struct scif_rma_lookup dma_addr_lookup; | |
270 | struct scif_rma_lookup num_pages_lookup; | |
271 | int nr_lookup; | |
272 | dma_addr_t mapped_offset; | |
273 | }; | |
274 | }; | |
275 | dma_addr_t *dma_addr; | |
276 | u64 *num_pages; | |
277 | } __packed; | |
278 | ||
279 | /* | |
280 | * scif_mmu_notif - SCIF mmu notifier information | |
281 | * | |
282 | * @mmu_notifier ep_mmu_notifier: MMU notifier operations | |
283 | * @tc_reg_list: List of temp registration windows for self | |
284 | * @mm: memory descriptor for the task_struct which initiated the RMA | |
285 | * @ep: SCIF endpoint | |
286 | * @list: link to list of MMU notifier information | |
287 | */ | |
288 | struct scif_mmu_notif { | |
289 | #ifdef CONFIG_MMU_NOTIFIER | |
290 | struct mmu_notifier ep_mmu_notifier; | |
291 | #endif | |
292 | struct list_head tc_reg_list; | |
293 | struct mm_struct *mm; | |
294 | struct scif_endpt *ep; | |
295 | struct list_head list; | |
296 | }; | |
297 | ||
298 | enum scif_rma_dir { | |
299 | SCIF_LOCAL_TO_REMOTE, | |
300 | SCIF_REMOTE_TO_LOCAL | |
301 | }; | |
302 | ||
303 | extern struct kmem_cache *unaligned_cache; | |
304 | /* Initialize RMA for this EP */ | |
305 | void scif_rma_ep_init(struct scif_endpt *ep); | |
306 | /* Check if epd can be uninitialized */ | |
307 | int scif_rma_ep_can_uninit(struct scif_endpt *ep); | |
308 | /* Obtain a new offset. Callee must grab RMA lock */ | |
309 | int scif_get_window_offset(struct scif_endpt *ep, int flags, | |
310 | s64 offset, int nr_pages, s64 *out_offset); | |
311 | /* Free offset. Callee must grab RMA lock */ | |
312 | void scif_free_window_offset(struct scif_endpt *ep, | |
313 | struct scif_window *window, s64 offset); | |
314 | /* Create self registration window */ | |
315 | struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages, | |
316 | s64 offset, bool temp); | |
317 | /* Destroy self registration window.*/ | |
318 | int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window); | |
319 | void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window); | |
320 | /* Map pages of self window to Aperture/PCI */ | |
321 | int scif_map_window(struct scif_dev *remote_dev, | |
322 | struct scif_window *window); | |
323 | /* Unregister a self window */ | |
324 | int scif_unregister_window(struct scif_window *window); | |
325 | /* Destroy remote registration window */ | |
326 | void | |
327 | scif_destroy_remote_window(struct scif_window *window); | |
328 | /* remove valid remote memory mappings from process address space */ | |
329 | void scif_zap_mmaps(int node); | |
330 | /* Query if any applications have remote memory mappings */ | |
331 | bool scif_rma_do_apps_have_mmaps(int node); | |
332 | /* Cleanup remote registration lists for zombie endpoints */ | |
333 | void scif_cleanup_rma_for_zombies(int node); | |
334 | /* Reserve a DMA channel for a particular endpoint */ | |
335 | int scif_reserve_dma_chan(struct scif_endpt *ep); | |
336 | /* Setup a DMA mark for an endpoint */ | |
337 | int _scif_fence_mark(scif_epd_t epd, int *mark); | |
338 | int scif_prog_signal(scif_epd_t epd, off_t offset, u64 val, | |
339 | enum scif_window_type type); | |
340 | void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg); | |
341 | void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg); | |
342 | void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg); | |
343 | void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg); | |
344 | void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg); | |
345 | void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg); | |
346 | void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg); | |
347 | void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg); | |
348 | void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg); | |
349 | void scif_recv_munmap(struct scif_dev *scifdev, struct scifmsg *msg); | |
350 | void scif_recv_mark(struct scif_dev *scifdev, struct scifmsg *msg); | |
351 | void scif_recv_mark_resp(struct scif_dev *scifdev, struct scifmsg *msg); | |
352 | void scif_recv_wait(struct scif_dev *scifdev, struct scifmsg *msg); | |
353 | void scif_recv_wait_resp(struct scif_dev *scifdev, struct scifmsg *msg); | |
354 | void scif_recv_sig_local(struct scif_dev *scifdev, struct scifmsg *msg); | |
355 | void scif_recv_sig_remote(struct scif_dev *scifdev, struct scifmsg *msg); | |
356 | void scif_recv_sig_resp(struct scif_dev *scifdev, struct scifmsg *msg); | |
357 | void scif_mmu_notif_handler(struct work_struct *work); | |
358 | void scif_rma_handle_remote_fences(void); | |
359 | void scif_rma_destroy_windows(void); | |
360 | void scif_rma_destroy_tcw_invalid(void); | |
361 | int scif_drain_dma_intr(struct scif_hw_dev *sdev, struct dma_chan *chan); | |
362 | ||
363 | struct scif_window_iter { | |
364 | s64 offset; | |
365 | int index; | |
366 | }; | |
367 | ||
368 | static inline void | |
369 | scif_init_window_iter(struct scif_window *window, struct scif_window_iter *iter) | |
370 | { | |
371 | iter->offset = window->offset; | |
372 | iter->index = 0; | |
373 | } | |
374 | ||
375 | dma_addr_t scif_off_to_dma_addr(struct scif_window *window, s64 off, | |
376 | size_t *nr_bytes, | |
377 | struct scif_window_iter *iter); | |
378 | static inline | |
379 | dma_addr_t __scif_off_to_dma_addr(struct scif_window *window, s64 off) | |
380 | { | |
381 | return scif_off_to_dma_addr(window, off, NULL, NULL); | |
382 | } | |
383 | ||
384 | static inline bool scif_unaligned(off_t src_offset, off_t dst_offset) | |
385 | { | |
386 | src_offset = src_offset & (L1_CACHE_BYTES - 1); | |
387 | dst_offset = dst_offset & (L1_CACHE_BYTES - 1); | |
388 | return !(src_offset == dst_offset); | |
389 | } | |
390 | ||
391 | /* | |
392 | * scif_zalloc: | |
393 | * @size: Size of the allocation request. | |
394 | * | |
395 | * Helper API which attempts to allocate zeroed pages via | |
396 | * __get_free_pages(..) first and then falls back on | |
397 | * vzalloc(..) if that fails. | |
398 | */ | |
399 | static inline void *scif_zalloc(size_t size) | |
400 | { | |
401 | void *ret = NULL; | |
402 | size_t align = ALIGN(size, PAGE_SIZE); | |
403 | ||
404 | if (align && get_order(align) < MAX_ORDER) | |
405 | ret = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | |
406 | get_order(align)); | |
407 | return ret ? ret : vzalloc(align); | |
408 | } | |
409 | ||
410 | /* | |
411 | * scif_free: | |
412 | * @addr: Address to be freed. | |
413 | * @size: Size of the allocation. | |
414 | * Helper API which frees memory allocated via scif_zalloc(). | |
415 | */ | |
416 | static inline void scif_free(void *addr, size_t size) | |
417 | { | |
418 | size_t align = ALIGN(size, PAGE_SIZE); | |
419 | ||
420 | if (is_vmalloc_addr(addr)) | |
421 | vfree(addr); | |
422 | else | |
423 | free_pages((unsigned long)addr, get_order(align)); | |
424 | } | |
425 | ||
426 | static inline void scif_get_window(struct scif_window *window, int nr_pages) | |
427 | { | |
428 | window->ref_count += nr_pages; | |
429 | } | |
430 | ||
431 | static inline void scif_put_window(struct scif_window *window, int nr_pages) | |
432 | { | |
433 | window->ref_count -= nr_pages; | |
434 | } | |
435 | ||
436 | static inline void scif_set_window_ref(struct scif_window *window, int nr_pages) | |
437 | { | |
438 | window->ref_count = nr_pages; | |
439 | } | |
440 | ||
441 | static inline void | |
442 | scif_queue_for_cleanup(struct scif_window *window, struct list_head *list) | |
443 | { | |
444 | spin_lock(&scif_info.rmalock); | |
445 | list_add_tail(&window->list, list); | |
446 | spin_unlock(&scif_info.rmalock); | |
447 | schedule_work(&scif_info.misc_work); | |
448 | } | |
449 | ||
450 | static inline void __scif_rma_destroy_tcw_helper(struct scif_window *window) | |
451 | { | |
452 | list_del_init(&window->list); | |
453 | scif_queue_for_cleanup(window, &scif_info.rma_tc); | |
454 | } | |
455 | ||
456 | static inline bool scif_is_iommu_enabled(void) | |
457 | { | |
458 | #ifdef CONFIG_INTEL_IOMMU | |
459 | return intel_iommu_enabled; | |
460 | #else | |
461 | return false; | |
462 | #endif | |
463 | } | |
464 | #endif /* SCIF_RMA_H */ |