Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
0a19e61e DJS |
2 | /* |
3 | * channel program interfaces | |
4 | * | |
5 | * Copyright IBM Corp. 2017 | |
6 | * | |
7 | * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> | |
8 | * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com> | |
9 | */ | |
10 | ||
725b94d7 | 11 | #include <linux/ratelimit.h> |
0a19e61e DJS |
12 | #include <linux/mm.h> |
13 | #include <linux/slab.h> | |
c2863feb | 14 | #include <linux/highmem.h> |
0a19e61e DJS |
15 | #include <linux/iommu.h> |
16 | #include <linux/vfio.h> | |
17 | #include <asm/idals.h> | |
18 | ||
19 | #include "vfio_ccw_cp.h" | |
0a587956 | 20 | #include "vfio_ccw_private.h" |
0a19e61e | 21 | |
13314605 NC |
22 | struct page_array { |
23 | /* Array that stores pages need to pin. */ | |
24 | dma_addr_t *pa_iova; | |
34a255e6 NC |
25 | /* Array that receives the pinned pages. */ |
26 | struct page **pa_page; | |
5c1cfb1c | 27 | /* Number of pages pinned from @pa_iova. */ |
0a19e61e DJS |
28 | int pa_nr; |
29 | }; | |
30 | ||
0a19e61e DJS |
31 | struct ccwchain { |
32 | struct list_head next; | |
33 | struct ccw1 *ch_ccw; | |
34 | /* Guest physical address of the current chain. */ | |
35 | u64 ch_iova; | |
36 | /* Count of the valid ccws in chain. */ | |
37 | int ch_len; | |
38 | /* Pinned PAGEs for the original data. */ | |
13314605 | 39 | struct page_array *ch_pa; |
0a19e61e DJS |
40 | }; |
41 | ||
42 | /* | |
13314605 NC |
43 | * page_array_alloc() - alloc memory for page array |
44 | * @pa: page_array on which to perform the operation | |
62a97a56 | 45 | * @len: number of pages that should be pinned from @iova |
0a19e61e | 46 | * |
13314605 | 47 | * Attempt to allocate memory for page array. |
0a19e61e | 48 | * |
13314605 NC |
49 | * Usage of page_array: |
50 | * We expect (pa_nr == 0) and (pa_iova == NULL), any field in | |
5c1cfb1c | 51 | * this structure will be filled in by this function. |
0a19e61e DJS |
52 | * |
53 | * Returns: | |
13314605 NC |
54 | * 0 if page array is allocated |
55 | * -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL | |
e4f3f18b | 56 | * -ENOMEM if alloc failed |
0a19e61e | 57 | */ |
61783394 | 58 | static int page_array_alloc(struct page_array *pa, unsigned int len) |
0a19e61e | 59 | { |
13314605 | 60 | if (pa->pa_nr || pa->pa_iova) |
0a19e61e DJS |
61 | return -EINVAL; |
62 | ||
62a97a56 | 63 | if (len == 0) |
0a19e61e DJS |
64 | return -EINVAL; |
65 | ||
62a97a56 EF |
66 | pa->pa_nr = len; |
67 | ||
68 | pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL); | |
69 | if (!pa->pa_iova) | |
70 | return -ENOMEM; | |
71 | ||
72 | pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL); | |
73 | if (!pa->pa_page) { | |
74 | kfree(pa->pa_iova); | |
0a19e61e | 75 | return -ENOMEM; |
c1ab6926 | 76 | } |
0a19e61e | 77 | |
e4f3f18b EF |
78 | return 0; |
79 | } | |
80 | ||
cfedb3d5 | 81 | /* |
13314605 NC |
82 | * page_array_unpin() - Unpin user pages in memory |
83 | * @pa: page_array on which to perform the operation | |
cfedb3d5 NC |
84 | * @vdev: the vfio device to perform the operation |
85 | * @pa_nr: number of user pages to unpin | |
b5a73e8e | 86 | * @unaligned: were pages unaligned on the pin request |
cfedb3d5 NC |
87 | * |
88 | * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0, | |
89 | * otherwise only clear pa->pa_nr | |
90 | */ | |
13314605 | 91 | static void page_array_unpin(struct page_array *pa, |
b5a73e8e | 92 | struct vfio_device *vdev, int pa_nr, bool unaligned) |
cfedb3d5 NC |
93 | { |
94 | int unpinned = 0, npage = 1; | |
95 | ||
96 | while (unpinned < pa_nr) { | |
13314605 NC |
97 | dma_addr_t *first = &pa->pa_iova[unpinned]; |
98 | dma_addr_t *last = &first[npage]; | |
cfedb3d5 NC |
99 | |
100 | if (unpinned + npage < pa_nr && | |
b5a73e8e EF |
101 | *first + npage * PAGE_SIZE == *last && |
102 | !unaligned) { | |
cfedb3d5 NC |
103 | npage++; |
104 | continue; | |
105 | } | |
106 | ||
13314605 | 107 | vfio_unpin_pages(vdev, *first, npage); |
cfedb3d5 NC |
108 | unpinned += npage; |
109 | npage = 1; | |
110 | } | |
111 | ||
112 | pa->pa_nr = 0; | |
113 | } | |
114 | ||
e4f3f18b | 115 | /* |
13314605 NC |
116 | * page_array_pin() - Pin user pages in memory |
117 | * @pa: page_array on which to perform the operation | |
8a54e238 | 118 | * @vdev: the vfio device to perform pin operations |
b5a73e8e | 119 | * @unaligned: are pages aligned to 4K boundary? |
e4f3f18b EF |
120 | * |
121 | * Returns number of pages pinned upon success. | |
122 | * If the pin request partially succeeds, or fails completely, | |
123 | * all pages are left unpinned and a negative error value is returned. | |
b5a73e8e EF |
124 | * |
125 | * Requests to pin "aligned" pages can be coalesced into a single | |
126 | * vfio_pin_pages request for the sake of efficiency, based on the | |
127 | * expectation of 4K page requests. Unaligned requests are probably | |
128 | * dealing with 2K "pages", and cannot be coalesced without | |
129 | * reworking this logic to incorporate that math. | |
e4f3f18b | 130 | */ |
b5a73e8e | 131 | static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned) |
e4f3f18b | 132 | { |
cfedb3d5 | 133 | int pinned = 0, npage = 1; |
e4f3f18b EF |
134 | int ret = 0; |
135 | ||
cfedb3d5 | 136 | while (pinned < pa->pa_nr) { |
13314605 NC |
137 | dma_addr_t *first = &pa->pa_iova[pinned]; |
138 | dma_addr_t *last = &first[npage]; | |
cfedb3d5 NC |
139 | |
140 | if (pinned + npage < pa->pa_nr && | |
b5a73e8e EF |
141 | *first + npage * PAGE_SIZE == *last && |
142 | !unaligned) { | |
cfedb3d5 NC |
143 | npage++; |
144 | continue; | |
145 | } | |
146 | ||
13314605 | 147 | ret = vfio_pin_pages(vdev, *first, npage, |
cfedb3d5 | 148 | IOMMU_READ | IOMMU_WRITE, |
34a255e6 | 149 | &pa->pa_page[pinned]); |
cfedb3d5 NC |
150 | if (ret < 0) { |
151 | goto err_out; | |
152 | } else if (ret > 0 && ret != npage) { | |
153 | pinned += ret; | |
154 | ret = -EINVAL; | |
155 | goto err_out; | |
156 | } | |
157 | pinned += npage; | |
158 | npage = 1; | |
5c1cfb1c | 159 | } |
0a19e61e | 160 | |
5c1cfb1c DJS |
161 | return ret; |
162 | ||
163 | err_out: | |
b5a73e8e | 164 | page_array_unpin(pa, vdev, pinned, unaligned); |
0a19e61e DJS |
165 | return ret; |
166 | } | |
167 | ||
5c1cfb1c | 168 | /* Unpin the pages before releasing the memory. */ |
b5a73e8e | 169 | static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned) |
5c1cfb1c | 170 | { |
b5a73e8e | 171 | page_array_unpin(pa, vdev, pa->pa_nr, unaligned); |
62a97a56 | 172 | kfree(pa->pa_page); |
13314605 | 173 | kfree(pa->pa_iova); |
5c1cfb1c DJS |
174 | } |
175 | ||
5a4fe7c4 | 176 | static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length) |
0a19e61e | 177 | { |
5a4fe7c4 EF |
178 | u64 iova_pfn_start = iova >> PAGE_SHIFT; |
179 | u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT; | |
180 | u64 pfn; | |
e7eaf91b | 181 | int i; |
0a19e61e | 182 | |
5a4fe7c4 EF |
183 | for (i = 0; i < pa->pa_nr; i++) { |
184 | pfn = pa->pa_iova[i] >> PAGE_SHIFT; | |
185 | if (pfn >= iova_pfn_start && pfn <= iova_pfn_end) | |
e7eaf91b | 186 | return true; |
5a4fe7c4 | 187 | } |
0a19e61e DJS |
188 | |
189 | return false; | |
190 | } | |
13314605 NC |
191 | /* Create the list of IDAL words for a page_array. */ |
192 | static inline void page_array_idal_create_words(struct page_array *pa, | |
193 | unsigned long *idaws) | |
0a19e61e | 194 | { |
e7eaf91b | 195 | int i; |
0a19e61e DJS |
196 | |
197 | /* | |
198 | * Idal words (execept the first one) rely on the memory being 4k | |
199 | * aligned. If a user virtual address is 4K aligned, then it's | |
200 | * corresponding kernel physical address will also be 4K aligned. Thus | |
201 | * there will be no problem here to simply use the phys to create an | |
202 | * idaw. | |
203 | */ | |
e7eaf91b | 204 | |
61f3a16b | 205 | for (i = 0; i < pa->pa_nr; i++) { |
34a255e6 | 206 | idaws[i] = page_to_phys(pa->pa_page[i]); |
8aabf0ed | 207 | |
61f3a16b EF |
208 | /* Incorporate any offset from each starting address */ |
209 | idaws[i] += pa->pa_iova[i] & (PAGE_SIZE - 1); | |
210 | } | |
0a19e61e DJS |
211 | } |
212 | ||
dbd66558 | 213 | static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len) |
7f8e89a8 EF |
214 | { |
215 | struct ccw0 ccw0; | |
216 | struct ccw1 *pccw1 = source; | |
217 | int i; | |
218 | ||
219 | for (i = 0; i < len; i++) { | |
220 | ccw0 = *(struct ccw0 *)pccw1; | |
221 | if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) { | |
222 | pccw1->cmd_code = CCW_CMD_TIC; | |
223 | pccw1->flags = 0; | |
224 | pccw1->count = 0; | |
225 | } else { | |
226 | pccw1->cmd_code = ccw0.cmd_code; | |
227 | pccw1->flags = ccw0.flags; | |
228 | pccw1->count = ccw0.count; | |
229 | } | |
230 | pccw1->cda = ccw0.cda; | |
231 | pccw1++; | |
232 | } | |
233 | } | |
0a19e61e | 234 | |
1b676fe3 EF |
235 | #define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k) |
236 | ||
0a19e61e DJS |
237 | /* |
238 | * Helpers to operate ccwchain. | |
239 | */ | |
5d87fbf7 EF |
240 | #define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02) |
241 | #define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C) | |
242 | #define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE) | |
243 | ||
0a19e61e DJS |
244 | #define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP) |
245 | ||
246 | #define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC) | |
247 | ||
248 | #define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA) | |
5d87fbf7 | 249 | #define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP) |
0a19e61e DJS |
250 | |
251 | #define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC)) | |
252 | ||
5d87fbf7 EF |
253 | /* |
254 | * ccw_does_data_transfer() | |
255 | * | |
256 | * Determine whether a CCW will move any data, such that the guest pages | |
257 | * would need to be pinned before performing the I/O. | |
258 | * | |
259 | * Returns 1 if yes, 0 if no. | |
260 | */ | |
261 | static inline int ccw_does_data_transfer(struct ccw1 *ccw) | |
262 | { | |
453eac31 EF |
263 | /* If the count field is zero, then no data will be transferred */ |
264 | if (ccw->count == 0) | |
265 | return 0; | |
266 | ||
9b6e57e5 EF |
267 | /* If the command is a NOP, then no data will be transferred */ |
268 | if (ccw_is_noop(ccw)) | |
269 | return 0; | |
270 | ||
5d87fbf7 EF |
271 | /* If the skip flag is off, then data will be transferred */ |
272 | if (!ccw_is_skip(ccw)) | |
273 | return 1; | |
274 | ||
275 | /* | |
276 | * If the skip flag is on, it is only meaningful if the command | |
277 | * code is a read, read backward, sense, or sense ID. In those | |
278 | * cases, no data will be transferred. | |
279 | */ | |
280 | if (ccw_is_read(ccw) || ccw_is_read_backward(ccw)) | |
281 | return 0; | |
282 | ||
283 | if (ccw_is_sense(ccw)) | |
284 | return 0; | |
285 | ||
286 | /* The skip flag is on, but it is ignored for this command code. */ | |
287 | return 1; | |
288 | } | |
289 | ||
48bd0eee EF |
290 | /* |
291 | * is_cpa_within_range() | |
292 | * | |
293 | * @cpa: channel program address being questioned | |
294 | * @head: address of the beginning of a CCW chain | |
295 | * @len: number of CCWs within the chain | |
296 | * | |
297 | * Determine whether the address of a CCW (whether a new chain, | |
298 | * or the target of a TIC) falls within a range (including the end points). | |
299 | * | |
300 | * Returns 1 if yes, 0 if no. | |
301 | */ | |
302 | static inline int is_cpa_within_range(u32 cpa, u32 head, int len) | |
303 | { | |
304 | u32 tail = head + (len - 1) * sizeof(struct ccw1); | |
305 | ||
306 | return (head <= cpa && cpa <= tail); | |
307 | } | |
308 | ||
309 | static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len) | |
310 | { | |
311 | if (!ccw_is_tic(ccw)) | |
312 | return 0; | |
313 | ||
314 | return is_cpa_within_range(ccw->cda, head, len); | |
315 | } | |
316 | ||
0a19e61e DJS |
317 | static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len) |
318 | { | |
319 | struct ccwchain *chain; | |
4b946d65 EF |
320 | |
321 | chain = kzalloc(sizeof(*chain), GFP_KERNEL); | |
0a19e61e DJS |
322 | if (!chain) |
323 | return NULL; | |
324 | ||
4b946d65 EF |
325 | chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL); |
326 | if (!chain->ch_ccw) | |
327 | goto out_err; | |
0a19e61e | 328 | |
4b946d65 EF |
329 | chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL); |
330 | if (!chain->ch_pa) | |
331 | goto out_err; | |
0a19e61e DJS |
332 | |
333 | list_add_tail(&chain->next, &cp->ccwchain_list); | |
334 | ||
335 | return chain; | |
4b946d65 EF |
336 | |
337 | out_err: | |
338 | kfree(chain->ch_ccw); | |
339 | kfree(chain); | |
340 | return NULL; | |
0a19e61e DJS |
341 | } |
342 | ||
343 | static void ccwchain_free(struct ccwchain *chain) | |
344 | { | |
345 | list_del(&chain->next); | |
4b946d65 EF |
346 | kfree(chain->ch_pa); |
347 | kfree(chain->ch_ccw); | |
0a19e61e DJS |
348 | kfree(chain); |
349 | } | |
350 | ||
351 | /* Free resource for a ccw that allocated memory for its cda. */ | |
352 | static void ccwchain_cda_free(struct ccwchain *chain, int idx) | |
353 | { | |
4b946d65 | 354 | struct ccw1 *ccw = &chain->ch_ccw[idx]; |
0a19e61e | 355 | |
9b6e57e5 | 356 | if (ccw_is_tic(ccw)) |
408358b5 | 357 | return; |
0a19e61e | 358 | |
5de2322d | 359 | kfree(phys_to_virt(ccw->cda)); |
0a19e61e DJS |
360 | } |
361 | ||
0a19e61e DJS |
362 | /** |
363 | * ccwchain_calc_length - calculate the length of the ccw chain. | |
364 | * @iova: guest physical address of the target ccw chain | |
365 | * @cp: channel_program on which to perform the operation | |
366 | * | |
367 | * This is the chain length not considering any TICs. | |
368 | * You need to do a new round for each TIC target. | |
369 | * | |
fb9e7880 HP |
370 | * The program is also validated for absence of not yet supported |
371 | * indirect data addressing scenarios. | |
372 | * | |
0a19e61e DJS |
373 | * Returns: the length of the ccw chain or -errno. |
374 | */ | |
375 | static int ccwchain_calc_length(u64 iova, struct channel_program *cp) | |
376 | { | |
1d897e47 | 377 | struct ccw1 *ccw = cp->guest_cp; |
ded563f3 | 378 | int cnt = 0; |
0a19e61e | 379 | |
0a19e61e DJS |
380 | do { |
381 | cnt++; | |
382 | ||
48bd0eee EF |
383 | /* |
384 | * We want to keep counting if the current CCW has the | |
385 | * command-chaining flag enabled, or if it is a TIC CCW | |
386 | * that loops back into the current chain. The latter | |
387 | * is used for device orientation, where the CCW PRIOR to | |
388 | * the TIC can either jump to the TIC or a CCW immediately | |
389 | * after the TIC, depending on the results of its operation. | |
390 | */ | |
391 | if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt)) | |
0a19e61e DJS |
392 | break; |
393 | ||
394 | ccw++; | |
395 | } while (cnt < CCWCHAIN_LEN_MAX + 1); | |
396 | ||
397 | if (cnt == CCWCHAIN_LEN_MAX + 1) | |
398 | cnt = -EINVAL; | |
399 | ||
0a19e61e DJS |
400 | return cnt; |
401 | } | |
402 | ||
403 | static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp) | |
404 | { | |
405 | struct ccwchain *chain; | |
2904337f | 406 | u32 ccw_head; |
0a19e61e DJS |
407 | |
408 | list_for_each_entry(chain, &cp->ccwchain_list, next) { | |
409 | ccw_head = chain->ch_iova; | |
2904337f | 410 | if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len)) |
0a19e61e DJS |
411 | return 1; |
412 | } | |
413 | ||
414 | return 0; | |
415 | } | |
416 | ||
417 | static int ccwchain_loop_tic(struct ccwchain *chain, | |
418 | struct channel_program *cp); | |
419 | ||
363fe5f7 | 420 | static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp) |
0a19e61e | 421 | { |
0a587956 JG |
422 | struct vfio_device *vdev = |
423 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
0a19e61e | 424 | struct ccwchain *chain; |
8b515be5 | 425 | int len, ret; |
0a19e61e | 426 | |
ded563f3 | 427 | /* Copy 2K (the most we support today) of possible CCWs */ |
c5e8083f EF |
428 | ret = vfio_dma_rw(vdev, cda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false); |
429 | if (ret) | |
430 | return ret; | |
ded563f3 | 431 | |
7f8e89a8 EF |
432 | /* Convert any Format-0 CCWs to Format-1 */ |
433 | if (!cp->orb.cmd.fmt) | |
c382cbc6 | 434 | convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX); |
7f8e89a8 | 435 | |
ded563f3 | 436 | /* Count the CCWs in the current chain */ |
363fe5f7 | 437 | len = ccwchain_calc_length(cda, cp); |
0a19e61e DJS |
438 | if (len < 0) |
439 | return len; | |
440 | ||
441 | /* Need alloc a new chain for this one. */ | |
442 | chain = ccwchain_alloc(cp, len); | |
443 | if (!chain) | |
444 | return -ENOMEM; | |
4b946d65 EF |
445 | |
446 | chain->ch_len = len; | |
363fe5f7 | 447 | chain->ch_iova = cda; |
0a19e61e | 448 | |
62465902 EF |
449 | /* Copy the actual CCWs into the new chain */ |
450 | memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1)); | |
0a19e61e DJS |
451 | |
452 | /* Loop for tics on this new chain. */ | |
8b515be5 FA |
453 | ret = ccwchain_loop_tic(chain, cp); |
454 | ||
455 | if (ret) | |
456 | ccwchain_free(chain); | |
457 | ||
458 | return ret; | |
0a19e61e DJS |
459 | } |
460 | ||
461 | /* Loop for TICs. */ | |
462 | static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp) | |
463 | { | |
464 | struct ccw1 *tic; | |
465 | int i, ret; | |
466 | ||
467 | for (i = 0; i < chain->ch_len; i++) { | |
4b946d65 | 468 | tic = &chain->ch_ccw[i]; |
0a19e61e DJS |
469 | |
470 | if (!ccw_is_tic(tic)) | |
471 | continue; | |
472 | ||
e64bd689 EF |
473 | /* May transfer to an existing chain. */ |
474 | if (tic_target_chain_exists(tic, cp)) | |
475 | continue; | |
476 | ||
363fe5f7 EF |
477 | /* Build a ccwchain for the next segment */ |
478 | ret = ccwchain_handle_ccw(tic->cda, cp); | |
0a19e61e DJS |
479 | if (ret) |
480 | return ret; | |
481 | } | |
482 | ||
483 | return 0; | |
484 | } | |
485 | ||
a4c60404 | 486 | static int ccwchain_fetch_tic(struct ccw1 *ccw, |
0a19e61e DJS |
487 | struct channel_program *cp) |
488 | { | |
0a19e61e | 489 | struct ccwchain *iter; |
2904337f | 490 | u32 ccw_head; |
0a19e61e DJS |
491 | |
492 | list_for_each_entry(iter, &cp->ccwchain_list, next) { | |
493 | ccw_head = iter->ch_iova; | |
2904337f | 494 | if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) { |
c389377c | 495 | ccw->cda = (__u32) (addr_t) (((char *)iter->ch_ccw) + |
0a19e61e DJS |
496 | (ccw->cda - ccw_head)); |
497 | return 0; | |
498 | } | |
499 | } | |
500 | ||
501 | return -EFAULT; | |
502 | } | |
503 | ||
61f3a16b EF |
504 | static unsigned long *get_guest_idal(struct ccw1 *ccw, |
505 | struct channel_program *cp, | |
506 | int idaw_nr) | |
507 | { | |
508 | struct vfio_device *vdev = | |
509 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
510 | unsigned long *idaws; | |
1b676fe3 | 511 | unsigned int *idaws_f1; |
61f3a16b | 512 | int idal_len = idaw_nr * sizeof(*idaws); |
1b676fe3 | 513 | int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE; |
61f3a16b EF |
514 | int idaw_mask = ~(idaw_size - 1); |
515 | int i, ret; | |
516 | ||
517 | idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); | |
518 | if (!idaws) | |
519 | return ERR_PTR(-ENOMEM); | |
520 | ||
521 | if (ccw_is_idal(ccw)) { | |
522 | /* Copy IDAL from guest */ | |
523 | ret = vfio_dma_rw(vdev, ccw->cda, idaws, idal_len, false); | |
524 | if (ret) { | |
525 | kfree(idaws); | |
526 | return ERR_PTR(ret); | |
527 | } | |
528 | } else { | |
529 | /* Fabricate an IDAL based off CCW data address */ | |
530 | if (cp->orb.cmd.c64) { | |
531 | idaws[0] = ccw->cda; | |
532 | for (i = 1; i < idaw_nr; i++) | |
533 | idaws[i] = (idaws[i - 1] + idaw_size) & idaw_mask; | |
534 | } else { | |
1b676fe3 EF |
535 | idaws_f1 = (unsigned int *)idaws; |
536 | idaws_f1[0] = ccw->cda; | |
537 | for (i = 1; i < idaw_nr; i++) | |
538 | idaws_f1[i] = (idaws_f1[i - 1] + idaw_size) & idaw_mask; | |
61f3a16b EF |
539 | } |
540 | } | |
541 | ||
542 | return idaws; | |
543 | } | |
544 | ||
b21f9cb1 EF |
545 | /* |
546 | * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer | |
547 | * a specified amount of data | |
548 | * | |
549 | * @ccw: The Channel Command Word being translated | |
550 | * @cp: Channel Program being processed | |
6a6dc14a EF |
551 | * |
552 | * The ORB is examined, since it specifies what IDAWs could actually be | |
553 | * used by any CCW in the channel program, regardless of whether or not | |
554 | * the CCW actually does. An ORB that does not specify Format-2-IDAW | |
555 | * Control could still contain a CCW with an IDAL, which would be | |
556 | * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within | |
557 | * the channel program must follow the same size requirements. | |
b21f9cb1 EF |
558 | */ |
559 | static int ccw_count_idaws(struct ccw1 *ccw, | |
560 | struct channel_program *cp) | |
0a19e61e | 561 | { |
0a587956 JG |
562 | struct vfio_device *vdev = |
563 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
01aa26c6 | 564 | u64 iova; |
667e5dba | 565 | int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32); |
6238f921 | 566 | int ret; |
453eac31 | 567 | int bytes = 1; |
0a19e61e | 568 | |
e8573b39 | 569 | if (ccw->count) |
453eac31 | 570 | bytes = ccw->count; |
e8573b39 | 571 | |
01aa26c6 | 572 | if (ccw_is_idal(ccw)) { |
667e5dba EF |
573 | /* Read first IDAW to check its starting address. */ |
574 | /* All subsequent IDAWs will be 2K- or 4K-aligned. */ | |
575 | ret = vfio_dma_rw(vdev, ccw->cda, &iova, size, false); | |
01aa26c6 EF |
576 | if (ret) |
577 | return ret; | |
667e5dba EF |
578 | |
579 | /* | |
580 | * Format-1 IDAWs only occupy the first 32 bits, | |
581 | * and bit 0 is always off. | |
582 | */ | |
583 | if (!cp->orb.cmd.c64) | |
584 | iova = iova >> 32; | |
01aa26c6 EF |
585 | } else { |
586 | iova = ccw->cda; | |
587 | } | |
b21f9cb1 | 588 | |
6a6dc14a EF |
589 | /* Format-1 IDAWs operate on 2K each */ |
590 | if (!cp->orb.cmd.c64) | |
591 | return idal_2k_nr_words((void *)iova, bytes); | |
592 | ||
593 | /* Using the 2K variant of Format-2 IDAWs? */ | |
594 | if (cp->orb.cmd.i2k) | |
595 | return idal_2k_nr_words((void *)iova, bytes); | |
596 | ||
597 | /* The 'usual' case is 4K Format-2 IDAWs */ | |
b21f9cb1 EF |
598 | return idal_nr_words((void *)iova, bytes); |
599 | } | |
600 | ||
601 | static int ccwchain_fetch_ccw(struct ccw1 *ccw, | |
602 | struct page_array *pa, | |
603 | struct channel_program *cp) | |
604 | { | |
605 | struct vfio_device *vdev = | |
606 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
607 | unsigned long *idaws; | |
1b676fe3 | 608 | unsigned int *idaws_f1; |
b21f9cb1 | 609 | int ret; |
61f3a16b | 610 | int idaw_nr; |
b21f9cb1 EF |
611 | int i; |
612 | ||
613 | /* Calculate size of IDAL */ | |
614 | idaw_nr = ccw_count_idaws(ccw, cp); | |
615 | if (idaw_nr < 0) | |
616 | return idaw_nr; | |
617 | ||
e8573b39 | 618 | /* Allocate an IDAL from host storage */ |
61f3a16b EF |
619 | idaws = get_guest_idal(ccw, cp, idaw_nr); |
620 | if (IS_ERR(idaws)) { | |
621 | ret = PTR_ERR(idaws); | |
e8573b39 | 622 | goto out_init; |
4cebc5d6 DJS |
623 | } |
624 | ||
0a19e61e | 625 | /* |
13314605 | 626 | * Allocate an array of pages to pin/translate. |
01aa26c6 EF |
627 | * The number of pages is actually the count of the idaws |
628 | * required for the data transfer, since we only only support | |
629 | * 4K IDAWs today. | |
0a19e61e | 630 | */ |
61783394 | 631 | ret = page_array_alloc(pa, idaw_nr); |
e4f3f18b | 632 | if (ret < 0) |
e8573b39 | 633 | goto out_free_idaws; |
e4f3f18b | 634 | |
61f3a16b EF |
635 | /* |
636 | * Copy guest IDAWs into page_array, in case the memory they | |
637 | * occupy is not contiguous. | |
638 | */ | |
1b676fe3 | 639 | idaws_f1 = (unsigned int *)idaws; |
61f3a16b EF |
640 | for (i = 0; i < idaw_nr; i++) { |
641 | if (cp->orb.cmd.c64) | |
13314605 | 642 | pa->pa_iova[i] = idaws[i]; |
1b676fe3 EF |
643 | else |
644 | pa->pa_iova[i] = idaws_f1[i]; | |
01aa26c6 EF |
645 | } |
646 | ||
5d87fbf7 | 647 | if (ccw_does_data_transfer(ccw)) { |
b5a73e8e | 648 | ret = page_array_pin(pa, vdev, idal_is_2k(cp)); |
5d87fbf7 EF |
649 | if (ret < 0) |
650 | goto out_unpin; | |
651 | } else { | |
e7eaf91b | 652 | pa->pa_nr = 0; |
5d87fbf7 | 653 | } |
0a19e61e | 654 | |
0a19e61e DJS |
655 | ccw->cda = (__u32) virt_to_phys(idaws); |
656 | ccw->flags |= CCW_FLAG_IDA; | |
657 | ||
13314605 NC |
658 | /* Populate the IDAL with pinned/translated addresses from page */ |
659 | page_array_idal_create_words(pa, idaws); | |
0a19e61e DJS |
660 | |
661 | return 0; | |
6238f921 DJS |
662 | |
663 | out_unpin: | |
b5a73e8e | 664 | page_array_unpin_free(pa, vdev, idal_is_2k(cp)); |
e8573b39 EF |
665 | out_free_idaws: |
666 | kfree(idaws); | |
6238f921 DJS |
667 | out_init: |
668 | ccw->cda = 0; | |
669 | return ret; | |
0a19e61e DJS |
670 | } |
671 | ||
0a19e61e DJS |
672 | /* |
673 | * Fetch one ccw. | |
674 | * To reduce memory copy, we'll pin the cda page in memory, | |
cada938a | 675 | * and to get rid of the cda 2G limitation of ccw1, we'll translate |
0a19e61e DJS |
676 | * direct ccws to idal ccws. |
677 | */ | |
a4c60404 EF |
678 | static int ccwchain_fetch_one(struct ccw1 *ccw, |
679 | struct page_array *pa, | |
0a19e61e | 680 | struct channel_program *cp) |
0a19e61e | 681 | |
a4c60404 | 682 | { |
0a19e61e | 683 | if (ccw_is_tic(ccw)) |
a4c60404 | 684 | return ccwchain_fetch_tic(ccw, cp); |
0a19e61e | 685 | |
a4c60404 | 686 | return ccwchain_fetch_ccw(ccw, pa, cp); |
0a19e61e DJS |
687 | } |
688 | ||
689 | /** | |
690 | * cp_init() - allocate ccwchains for a channel program. | |
691 | * @cp: channel_program on which to perform the operation | |
0a19e61e DJS |
692 | * @orb: control block for the channel program from the guest |
693 | * | |
694 | * This creates one or more ccwchain(s), and copies the raw data of | |
695 | * the target channel program from @orb->cmd.iova to the new ccwchain(s). | |
696 | * | |
697 | * Limitations: | |
725b94d7 JR |
698 | * 1. Supports idal(c64) ccw chaining. |
699 | * 2. Supports 4k idaw. | |
0a19e61e DJS |
700 | * |
701 | * Returns: | |
702 | * %0 on success and a negative error value on failure. | |
703 | */ | |
0a587956 | 704 | int cp_init(struct channel_program *cp, union orb *orb) |
0a19e61e | 705 | { |
0a587956 JG |
706 | struct vfio_device *vdev = |
707 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
725b94d7 JR |
708 | /* custom ratelimit used to avoid flood during guest IPL */ |
709 | static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); | |
99afcb05 | 710 | int ret; |
0a19e61e | 711 | |
c6c82e0c EF |
712 | /* this is an error in the caller */ |
713 | if (cp->initialized) | |
714 | return -EBUSY; | |
715 | ||
0a19e61e | 716 | /* |
725b94d7 JR |
717 | * We only support prefetching the channel program. We assume all channel |
718 | * programs executed by supported guests likewise support prefetching. | |
719 | * Executing a channel program that does not specify prefetching will | |
720 | * typically not cause an error, but a warning is issued to help identify | |
721 | * the problem if something does break. | |
0a19e61e | 722 | */ |
725b94d7 | 723 | if (!orb->cmd.pfch && __ratelimit(&ratelimit_state)) |
0a587956 JG |
724 | dev_warn( |
725 | vdev->dev, | |
726 | "Prefetching channel program even though prefetch not specified in ORB"); | |
0a19e61e DJS |
727 | |
728 | INIT_LIST_HEAD(&cp->ccwchain_list); | |
729 | memcpy(&cp->orb, orb, sizeof(*orb)); | |
0a19e61e | 730 | |
99afcb05 EF |
731 | /* Build a ccwchain for the first CCW segment */ |
732 | ret = ccwchain_handle_ccw(orb->cmd.cpa, cp); | |
99afcb05 | 733 | |
254cb663 | 734 | if (!ret) |
71189f26 CH |
735 | cp->initialized = true; |
736 | ||
0a19e61e DJS |
737 | return ret; |
738 | } | |
739 | ||
740 | ||
741 | /** | |
742 | * cp_free() - free resources for channel program. | |
743 | * @cp: channel_program on which to perform the operation | |
744 | * | |
745 | * This unpins the memory pages and frees the memory space occupied by | |
746 | * @cp, which must have been returned by a previous call to cp_init(). | |
747 | * Otherwise, undefined behavior occurs. | |
748 | */ | |
749 | void cp_free(struct channel_program *cp) | |
750 | { | |
0a587956 JG |
751 | struct vfio_device *vdev = |
752 | &container_of(cp, struct vfio_ccw_private, cp)->vdev; | |
812271b9 EF |
753 | struct ccwchain *chain, *temp; |
754 | int i; | |
755 | ||
756 | if (!cp->initialized) | |
757 | return; | |
758 | ||
759 | cp->initialized = false; | |
760 | list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) { | |
761 | for (i = 0; i < chain->ch_len; i++) { | |
b5a73e8e | 762 | page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp)); |
812271b9 EF |
763 | ccwchain_cda_free(chain, i); |
764 | } | |
765 | ccwchain_free(chain); | |
766 | } | |
0a19e61e DJS |
767 | } |
768 | ||
769 | /** | |
770 | * cp_prefetch() - translate a guest physical address channel program to | |
771 | * a real-device runnable channel program. | |
772 | * @cp: channel_program on which to perform the operation | |
773 | * | |
774 | * This function translates the guest-physical-address channel program | |
775 | * and stores the result to ccwchain list. @cp must have been | |
776 | * initialized by a previous call with cp_init(). Otherwise, undefined | |
777 | * behavior occurs. | |
d66a7355 HP |
778 | * For each chain composing the channel program: |
779 | * - On entry ch_len holds the count of CCWs to be translated. | |
780 | * - On exit ch_len is adjusted to the count of successfully translated CCWs. | |
781 | * This allows cp_free to find in ch_len the count of CCWs to free in a chain. | |
0a19e61e DJS |
782 | * |
783 | * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced | |
784 | * as helpers to do ccw chain translation inside the kernel. Basically | |
785 | * they accept a channel program issued by a virtual machine, and | |
786 | * translate the channel program to a real-device runnable channel | |
787 | * program. | |
788 | * | |
789 | * These APIs will copy the ccws into kernel-space buffers, and update | |
cada938a | 790 | * the guest physical addresses with their corresponding host physical |
0a19e61e DJS |
791 | * addresses. Then channel I/O device drivers could issue the |
792 | * translated channel program to real devices to perform an I/O | |
793 | * operation. | |
794 | * | |
795 | * These interfaces are designed to support translation only for | |
796 | * channel programs, which are generated and formatted by a | |
797 | * guest. Thus this will make it possible for things like VFIO to | |
798 | * leverage the interfaces to passthrough a channel I/O mediated | |
799 | * device in QEMU. | |
800 | * | |
801 | * We support direct ccw chaining by translating them to idal ccws. | |
802 | * | |
803 | * Returns: | |
804 | * %0 on success and a negative error value on failure. | |
805 | */ | |
806 | int cp_prefetch(struct channel_program *cp) | |
807 | { | |
808 | struct ccwchain *chain; | |
a4c60404 EF |
809 | struct ccw1 *ccw; |
810 | struct page_array *pa; | |
0a19e61e DJS |
811 | int len, idx, ret; |
812 | ||
71189f26 CH |
813 | /* this is an error in the caller */ |
814 | if (!cp->initialized) | |
815 | return -EINVAL; | |
816 | ||
0a19e61e DJS |
817 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
818 | len = chain->ch_len; | |
819 | for (idx = 0; idx < len; idx++) { | |
4b946d65 EF |
820 | ccw = &chain->ch_ccw[idx]; |
821 | pa = &chain->ch_pa[idx]; | |
a4c60404 EF |
822 | |
823 | ret = ccwchain_fetch_one(ccw, pa, cp); | |
0a19e61e | 824 | if (ret) |
d66a7355 | 825 | goto out_err; |
0a19e61e DJS |
826 | } |
827 | } | |
828 | ||
829 | return 0; | |
d66a7355 HP |
830 | out_err: |
831 | /* Only cleanup the chain elements that were actually translated. */ | |
832 | chain->ch_len = idx; | |
833 | list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { | |
834 | chain->ch_len = 0; | |
835 | } | |
836 | return ret; | |
0a19e61e DJS |
837 | } |
838 | ||
839 | /** | |
840 | * cp_get_orb() - get the orb of the channel program | |
841 | * @cp: channel_program on which to perform the operation | |
9fbed59f | 842 | * @sch: subchannel the operation will be performed against |
0a19e61e DJS |
843 | * |
844 | * This function returns the address of the updated orb of the channel | |
845 | * program. Channel I/O device drivers could use this orb to issue a | |
846 | * ssch. | |
847 | */ | |
9fbed59f | 848 | union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch) |
0a19e61e DJS |
849 | { |
850 | union orb *orb; | |
851 | struct ccwchain *chain; | |
852 | struct ccw1 *cpa; | |
853 | ||
71189f26 CH |
854 | /* this is an error in the caller */ |
855 | if (!cp->initialized) | |
856 | return NULL; | |
857 | ||
0a19e61e DJS |
858 | orb = &cp->orb; |
859 | ||
9fbed59f | 860 | orb->cmd.intparm = (u32)virt_to_phys(sch); |
0a19e61e | 861 | orb->cmd.fmt = 1; |
0a19e61e | 862 | |
254cb663 EF |
863 | /* |
864 | * Everything built by vfio-ccw is a Format-2 IDAL. | |
1b676fe3 EF |
865 | * If the input was a Format-1 IDAL, indicate that |
866 | * 2K Format-2 IDAWs were created here. | |
254cb663 | 867 | */ |
1b676fe3 EF |
868 | if (!orb->cmd.c64) |
869 | orb->cmd.i2k = 1; | |
254cb663 EF |
870 | orb->cmd.c64 = 1; |
871 | ||
0a19e61e | 872 | if (orb->cmd.lpm == 0) |
9fbed59f | 873 | orb->cmd.lpm = sch->lpm; |
0a19e61e DJS |
874 | |
875 | chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next); | |
876 | cpa = chain->ch_ccw; | |
5de2322d | 877 | orb->cmd.cpa = (__u32)virt_to_phys(cpa); |
0a19e61e DJS |
878 | |
879 | return orb; | |
880 | } | |
881 | ||
882 | /** | |
883 | * cp_update_scsw() - update scsw for a channel program. | |
884 | * @cp: channel_program on which to perform the operation | |
885 | * @scsw: I/O results of the channel program and also the target to be | |
886 | * updated | |
887 | * | |
888 | * @scsw contains the I/O results of the channel program that pointed | |
889 | * to by @cp. However what @scsw->cpa stores is a host physical | |
890 | * address, which is meaningless for the guest, which is waiting for | |
891 | * the I/O results. | |
892 | * | |
893 | * This function updates @scsw->cpa to its coressponding guest physical | |
894 | * address. | |
895 | */ | |
896 | void cp_update_scsw(struct channel_program *cp, union scsw *scsw) | |
897 | { | |
898 | struct ccwchain *chain; | |
899 | u32 cpa = scsw->cmd.cpa; | |
2904337f | 900 | u32 ccw_head; |
0a19e61e | 901 | |
71189f26 CH |
902 | if (!cp->initialized) |
903 | return; | |
904 | ||
0a19e61e DJS |
905 | /* |
906 | * LATER: | |
907 | * For now, only update the cmd.cpa part. We may need to deal with | |
908 | * other portions of the schib as well, even if we don't return them | |
909 | * in the ioctl directly. Path status changes etc. | |
910 | */ | |
911 | list_for_each_entry(chain, &cp->ccwchain_list, next) { | |
912 | ccw_head = (u32)(u64)chain->ch_ccw; | |
15f0eb3d EF |
913 | /* |
914 | * On successful execution, cpa points just beyond the end | |
915 | * of the chain. | |
916 | */ | |
917 | if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) { | |
0a19e61e DJS |
918 | /* |
919 | * (cpa - ccw_head) is the offset value of the host | |
920 | * physical ccw to its chain head. | |
921 | * Adding this value to the guest physical ccw chain | |
922 | * head gets us the guest cpa. | |
923 | */ | |
924 | cpa = chain->ch_iova + (cpa - ccw_head); | |
925 | break; | |
926 | } | |
927 | } | |
928 | ||
929 | scsw->cmd.cpa = cpa; | |
930 | } | |
931 | ||
932 | /** | |
933 | * cp_iova_pinned() - check if an iova is pinned for a ccw chain. | |
364e3f90 | 934 | * @cp: channel_program on which to perform the operation |
0a19e61e | 935 | * @iova: the iova to check |
5a4fe7c4 | 936 | * @length: the length to check from @iova |
0a19e61e DJS |
937 | * |
938 | * If the @iova is currently pinned for the ccw chain, return true; | |
939 | * else return false. | |
940 | */ | |
5a4fe7c4 | 941 | bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length) |
0a19e61e DJS |
942 | { |
943 | struct ccwchain *chain; | |
944 | int i; | |
945 | ||
71189f26 CH |
946 | if (!cp->initialized) |
947 | return false; | |
948 | ||
0a19e61e DJS |
949 | list_for_each_entry(chain, &cp->ccwchain_list, next) { |
950 | for (i = 0; i < chain->ch_len; i++) | |
4b946d65 | 951 | if (page_array_iova_pinned(&chain->ch_pa[i], iova, length)) |
0a19e61e DJS |
952 | return true; |
953 | } | |
954 | ||
955 | return false; | |
956 | } |