Commit | Line | Data |
---|---|---|
f938d2c8 RR |
1 | /*P:300 The I/O mechanism in lguest is simple yet flexible, allowing the Guest |
2 | * to talk to the Launcher or directly to another Guest. It uses familiar | |
3 | * concepts of DMA and interrupts, plus some neat code stolen from | |
4 | * futexes... :*/ | |
5 | ||
6 | /* Copyright (C) 2006 Rusty Russell IBM Corporation | |
d7e28ffe RR |
7 | * |
8 | * This program is free software; you can redistribute it and/or modify | |
9 | * it under the terms of the GNU General Public License as published by | |
10 | * the Free Software Foundation; either version 2 of the License, or | |
11 | * (at your option) any later version. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, | |
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | * GNU General Public License for more details. | |
17 | * | |
18 | * You should have received a copy of the GNU General Public License | |
19 | * along with this program; if not, write to the Free Software | |
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
21 | */ | |
22 | #include <linux/types.h> | |
23 | #include <linux/futex.h> | |
24 | #include <linux/jhash.h> | |
25 | #include <linux/mm.h> | |
26 | #include <linux/highmem.h> | |
27 | #include <linux/uaccess.h> | |
28 | #include "lg.h" | |
29 | ||
dde79789 RR |
30 | /*L:300 |
31 | * I/O | |
32 | * | |
33 | * Getting data in and out of the Guest is quite an art. There are numerous | |
34 | * ways to do it, and they all suck differently. We try to keep things fairly | |
35 | * close to "real" hardware so our Guest's drivers don't look like an alien | |
36 | * visitation in the middle of the Linux code, and yet make sure that Guests | |
37 | * can talk directly to other Guests, not just the Launcher. | |
38 | * | |
39 | * To do this, the Guest gives us a key when it binds or sends DMA buffers. | |
40 | * The key corresponds to a "physical" address inside the Guest (ie. a virtual | |
41 | * address inside the Launcher process). We don't, however, use this key | |
42 | * directly. | |
43 | * | |
44 | * We want Guests which share memory to be able to DMA to each other: two | |
45 | * Launchers can mmap memory the same file, then the Guests can communicate. | |
46 | * Fortunately, the futex code provides us with a way to get a "union | |
47 | * futex_key" corresponding to the memory lying at a virtual address: if the | |
48 | * two processes share memory, the "union futex_key" for that memory will match | |
49 | * even if the memory is mapped at different addresses in each. So we always | |
50 | * convert the keys to "union futex_key"s to compare them. | |
51 | * | |
52 | * Before we dive into this though, we need to look at another set of helper | |
53 | * routines used throughout the Host kernel code to access Guest memory. | |
54 | :*/ | |
d7e28ffe RR |
55 | static struct list_head dma_hash[61]; |
56 | ||
dde79789 RR |
57 | /* An unfortunate side effect of the Linux double-linked list implementation is |
58 | * that there's no good way to statically initialize an array of linked | |
59 | * lists. */ | |
d7e28ffe RR |
60 | void lguest_io_init(void) |
61 | { | |
62 | unsigned int i; | |
63 | ||
64 | for (i = 0; i < ARRAY_SIZE(dma_hash); i++) | |
65 | INIT_LIST_HEAD(&dma_hash[i]); | |
66 | } | |
67 | ||
68 | /* FIXME: allow multi-page lengths. */ | |
69 | static int check_dma_list(struct lguest *lg, const struct lguest_dma *dma) | |
70 | { | |
71 | unsigned int i; | |
72 | ||
73 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { | |
74 | if (!dma->len[i]) | |
75 | return 1; | |
76 | if (!lguest_address_ok(lg, dma->addr[i], dma->len[i])) | |
77 | goto kill; | |
78 | if (dma->len[i] > PAGE_SIZE) | |
79 | goto kill; | |
80 | /* We could do over a page, but is it worth it? */ | |
81 | if ((dma->addr[i] % PAGE_SIZE) + dma->len[i] > PAGE_SIZE) | |
82 | goto kill; | |
83 | } | |
84 | return 1; | |
85 | ||
86 | kill: | |
87 | kill_guest(lg, "bad DMA entry: %u@%#lx", dma->len[i], dma->addr[i]); | |
88 | return 0; | |
89 | } | |
90 | ||
dde79789 RR |
91 | /*L:330 This is our hash function, using the wonderful Jenkins hash. |
92 | * | |
93 | * The futex key is a union with three parts: an unsigned long word, a pointer, | |
94 | * and an int "offset". We could use jhash_2words() which takes three u32s. | |
95 | * (Ok, the hash functions are great: the naming sucks though). | |
96 | * | |
97 | * It's nice to be portable to 64-bit platforms, so we use the more generic | |
98 | * jhash2(), which takes an array of u32, the number of u32s, and an initial | |
99 | * u32 to roll in. This is uglier, but breaks down to almost the same code on | |
100 | * 32-bit platforms like this one. | |
101 | * | |
102 | * We want a position in the array, so we modulo ARRAY_SIZE(dma_hash) (ie. 61). | |
103 | */ | |
d7e28ffe RR |
104 | static unsigned int hash(const union futex_key *key) |
105 | { | |
106 | return jhash2((u32*)&key->both.word, | |
107 | (sizeof(key->both.word)+sizeof(key->both.ptr))/4, | |
108 | key->both.offset) | |
109 | % ARRAY_SIZE(dma_hash); | |
110 | } | |
111 | ||
dde79789 RR |
112 | /* This is a convenience routine to compare two keys. It's a much bemoaned C |
113 | * weakness that it doesn't allow '==' on structures or unions, so we have to | |
114 | * open-code it like this. */ | |
d7e28ffe RR |
115 | static inline int key_eq(const union futex_key *a, const union futex_key *b) |
116 | { | |
117 | return (a->both.word == b->both.word | |
118 | && a->both.ptr == b->both.ptr | |
119 | && a->both.offset == b->both.offset); | |
120 | } | |
121 | ||
dde79789 RR |
122 | /*L:360 OK, when we need to actually free up a Guest's DMA array we do several |
123 | * things, so we have a convenient function to do it. | |
124 | * | |
125 | * The caller must hold a read lock on dmainfo owner's current->mm->mmap_sem | |
126 | * for the drop_futex_key_refs(). */ | |
d7e28ffe RR |
127 | static void unlink_dma(struct lguest_dma_info *dmainfo) |
128 | { | |
dde79789 | 129 | /* You locked this too, right? */ |
d7e28ffe | 130 | BUG_ON(!mutex_is_locked(&lguest_lock)); |
dde79789 | 131 | /* This is how we know that the entry is free. */ |
d7e28ffe | 132 | dmainfo->interrupt = 0; |
dde79789 | 133 | /* Remove it from the hash table. */ |
d7e28ffe | 134 | list_del(&dmainfo->list); |
dde79789 | 135 | /* Drop the references we were holding (to the inode or mm). */ |
d7e28ffe RR |
136 | drop_futex_key_refs(&dmainfo->key); |
137 | } | |
138 | ||
dde79789 RR |
139 | /*L:350 This is the routine which we call when the Guest asks to unregister a |
140 | * DMA array attached to a given key. Returns true if the array was found. */ | |
d7e28ffe RR |
141 | static int unbind_dma(struct lguest *lg, |
142 | const union futex_key *key, | |
143 | unsigned long dmas) | |
144 | { | |
145 | int i, ret = 0; | |
146 | ||
dde79789 RR |
147 | /* We don't bother with the hash table, just look through all this |
148 | * Guest's DMA arrays. */ | |
d7e28ffe | 149 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
dde79789 RR |
150 | /* In theory it could have more than one array on the same key, |
151 | * or one array on multiple keys, so we check both */ | |
d7e28ffe RR |
152 | if (key_eq(key, &lg->dma[i].key) && dmas == lg->dma[i].dmas) { |
153 | unlink_dma(&lg->dma[i]); | |
154 | ret = 1; | |
155 | break; | |
156 | } | |
157 | } | |
158 | return ret; | |
159 | } | |
160 | ||
dde79789 RR |
161 | /*L:340 BIND_DMA: this is the hypercall which sets up an array of "struct |
162 | * lguest_dma" for receiving I/O. | |
163 | * | |
164 | * The Guest wants to bind an array of "struct lguest_dma"s to a particular key | |
165 | * to receive input. This only happens when the Guest is setting up a new | |
166 | * device, so it doesn't have to be very fast. | |
167 | * | |
168 | * It returns 1 on a successful registration (it can fail if we hit the limit | |
169 | * of registrations for this Guest). | |
170 | */ | |
d7e28ffe RR |
171 | int bind_dma(struct lguest *lg, |
172 | unsigned long ukey, unsigned long dmas, u16 numdmas, u8 interrupt) | |
173 | { | |
174 | unsigned int i; | |
175 | int ret = 0; | |
176 | union futex_key key; | |
dde79789 | 177 | /* Futex code needs the mmap_sem. */ |
d7e28ffe RR |
178 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; |
179 | ||
dde79789 | 180 | /* Invalid interrupt? (We could kill the guest here). */ |
d7e28ffe RR |
181 | if (interrupt >= LGUEST_IRQS) |
182 | return 0; | |
183 | ||
dde79789 RR |
184 | /* We need to grab the Big Lguest Lock, because other Guests may be |
185 | * trying to look through this Guest's DMAs to send something while | |
186 | * we're doing this. */ | |
d7e28ffe RR |
187 | mutex_lock(&lguest_lock); |
188 | down_read(fshared); | |
189 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { | |
190 | kill_guest(lg, "bad dma key %#lx", ukey); | |
191 | goto unlock; | |
192 | } | |
dde79789 RR |
193 | |
194 | /* We want to keep this key valid once we drop mmap_sem, so we have to | |
195 | * hold a reference. */ | |
d7e28ffe RR |
196 | get_futex_key_refs(&key); |
197 | ||
dde79789 RR |
198 | /* If the Guest specified an interrupt of 0, that means they want to |
199 | * unregister this array of "struct lguest_dma"s. */ | |
d7e28ffe RR |
200 | if (interrupt == 0) |
201 | ret = unbind_dma(lg, &key, dmas); | |
202 | else { | |
dde79789 | 203 | /* Look through this Guest's dma array for an unused entry. */ |
d7e28ffe | 204 | for (i = 0; i < LGUEST_MAX_DMA; i++) { |
dde79789 RR |
205 | /* If the interrupt is non-zero, the entry is already |
206 | * used. */ | |
d7e28ffe RR |
207 | if (lg->dma[i].interrupt) |
208 | continue; | |
209 | ||
dde79789 | 210 | /* OK, a free one! Fill on our details. */ |
d7e28ffe RR |
211 | lg->dma[i].dmas = dmas; |
212 | lg->dma[i].num_dmas = numdmas; | |
213 | lg->dma[i].next_dma = 0; | |
214 | lg->dma[i].key = key; | |
215 | lg->dma[i].guestid = lg->guestid; | |
216 | lg->dma[i].interrupt = interrupt; | |
dde79789 RR |
217 | |
218 | /* Now we add it to the hash table: the position | |
219 | * depends on the futex key that we got. */ | |
d7e28ffe | 220 | list_add(&lg->dma[i].list, &dma_hash[hash(&key)]); |
dde79789 | 221 | /* Success! */ |
d7e28ffe RR |
222 | ret = 1; |
223 | goto unlock; | |
224 | } | |
225 | } | |
dde79789 RR |
226 | /* If we didn't find a slot to put the key in, drop the reference |
227 | * again. */ | |
d7e28ffe RR |
228 | drop_futex_key_refs(&key); |
229 | unlock: | |
dde79789 | 230 | /* Unlock and out. */ |
d7e28ffe RR |
231 | up_read(fshared); |
232 | mutex_unlock(&lguest_lock); | |
233 | return ret; | |
234 | } | |
235 | ||
dde79789 RR |
236 | /*L:385 Note that our routines to access a different Guest's memory are called |
237 | * lgread_other() and lgwrite_other(): these names emphasize that they are only | |
238 | * used when the Guest is *not* the current Guest. | |
239 | * | |
240 | * The interface for copying from another process's memory is called | |
241 | * access_process_vm(), with a final argument of 0 for a read, and 1 for a | |
242 | * write. | |
243 | * | |
244 | * We need lgread_other() to read the destination Guest's "struct lguest_dma" | |
245 | * array. */ | |
d7e28ffe RR |
246 | static int lgread_other(struct lguest *lg, |
247 | void *buf, u32 addr, unsigned bytes) | |
248 | { | |
249 | if (!lguest_address_ok(lg, addr, bytes) | |
250 | || access_process_vm(lg->tsk, addr, buf, bytes, 0) != bytes) { | |
251 | memset(buf, 0, bytes); | |
252 | kill_guest(lg, "bad address in registered DMA struct"); | |
253 | return 0; | |
254 | } | |
255 | return 1; | |
256 | } | |
257 | ||
dde79789 RR |
258 | /* "lgwrite()" to another Guest: used to update the destination "used_len" once |
259 | * we've transferred data into the buffer. */ | |
d7e28ffe RR |
260 | static int lgwrite_other(struct lguest *lg, u32 addr, |
261 | const void *buf, unsigned bytes) | |
262 | { | |
263 | if (!lguest_address_ok(lg, addr, bytes) | |
264 | || (access_process_vm(lg->tsk, addr, (void *)buf, bytes, 1) | |
265 | != bytes)) { | |
266 | kill_guest(lg, "bad address writing to registered DMA"); | |
267 | return 0; | |
268 | } | |
269 | return 1; | |
270 | } | |
271 | ||
dde79789 RR |
272 | /*L:400 This is the generic engine which copies from a source "struct |
273 | * lguest_dma" from this Guest into another Guest's "struct lguest_dma". The | |
274 | * destination Guest's pages have already been mapped, as contained in the | |
275 | * pages array. | |
276 | * | |
277 | * If you're wondering if there's a nice "copy from one process to another" | |
278 | * routine, so was I. But Linux isn't really set up to copy between two | |
279 | * unrelated processes, so we have to write it ourselves. | |
280 | */ | |
d7e28ffe RR |
281 | static u32 copy_data(struct lguest *srclg, |
282 | const struct lguest_dma *src, | |
283 | const struct lguest_dma *dst, | |
284 | struct page *pages[]) | |
285 | { | |
286 | unsigned int totlen, si, di, srcoff, dstoff; | |
287 | void *maddr = NULL; | |
288 | ||
dde79789 | 289 | /* We return the total length transferred. */ |
d7e28ffe | 290 | totlen = 0; |
dde79789 RR |
291 | |
292 | /* We keep indexes into the source and destination "struct lguest_dma", | |
293 | * and an offset within each region. */ | |
d7e28ffe RR |
294 | si = di = 0; |
295 | srcoff = dstoff = 0; | |
dde79789 RR |
296 | |
297 | /* We loop until the source or destination is exhausted. */ | |
d7e28ffe RR |
298 | while (si < LGUEST_MAX_DMA_SECTIONS && src->len[si] |
299 | && di < LGUEST_MAX_DMA_SECTIONS && dst->len[di]) { | |
dde79789 RR |
300 | /* We can only transfer the rest of the src buffer, or as much |
301 | * as will fit into the destination buffer. */ | |
d7e28ffe RR |
302 | u32 len = min(src->len[si] - srcoff, dst->len[di] - dstoff); |
303 | ||
dde79789 RR |
304 | /* For systems using "highmem" we need to use kmap() to access |
305 | * the page we want. We often use the same page over and over, | |
306 | * so rather than kmap() it on every loop, we set the maddr | |
307 | * pointer to NULL when we need to move to the next | |
308 | * destination page. */ | |
d7e28ffe RR |
309 | if (!maddr) |
310 | maddr = kmap(pages[di]); | |
311 | ||
dde79789 RR |
312 | /* Copy directly from (this Guest's) source address to the |
313 | * destination Guest's kmap()ed buffer. Note that maddr points | |
314 | * to the start of the page: we need to add the offset of the | |
315 | * destination address and offset within the buffer. */ | |
316 | ||
317 | /* FIXME: This is not completely portable. I looked at | |
318 | * copy_to_user_page(), and some arch's seem to need special | |
319 | * flushes. x86 is fine. */ | |
d7e28ffe | 320 | if (copy_from_user(maddr + (dst->addr[di] + dstoff)%PAGE_SIZE, |
6d14bfe7 | 321 | (void __user *)src->addr[si], len) != 0) { |
dde79789 | 322 | /* If a copy failed, it's the source's fault. */ |
d7e28ffe RR |
323 | kill_guest(srclg, "bad address in sending DMA"); |
324 | totlen = 0; | |
325 | break; | |
326 | } | |
327 | ||
dde79789 | 328 | /* Increment the total and src & dst offsets */ |
d7e28ffe RR |
329 | totlen += len; |
330 | srcoff += len; | |
331 | dstoff += len; | |
dde79789 RR |
332 | |
333 | /* Presumably we reached the end of the src or dest buffers: */ | |
d7e28ffe | 334 | if (srcoff == src->len[si]) { |
dde79789 | 335 | /* Move to the next buffer at offset 0 */ |
d7e28ffe RR |
336 | si++; |
337 | srcoff = 0; | |
338 | } | |
339 | if (dstoff == dst->len[di]) { | |
dde79789 RR |
340 | /* We need to unmap that destination page and reset |
341 | * maddr ready for the next one. */ | |
d7e28ffe RR |
342 | kunmap(pages[di]); |
343 | maddr = NULL; | |
344 | di++; | |
345 | dstoff = 0; | |
346 | } | |
347 | } | |
348 | ||
dde79789 | 349 | /* If we still had a page mapped at the end, unmap now. */ |
d7e28ffe RR |
350 | if (maddr) |
351 | kunmap(pages[di]); | |
352 | ||
353 | return totlen; | |
354 | } | |
355 | ||
dde79789 RR |
356 | /*L:390 This is how we transfer a "struct lguest_dma" from the source Guest |
357 | * (the current Guest which called SEND_DMA) to another Guest. */ | |
d7e28ffe RR |
358 | static u32 do_dma(struct lguest *srclg, const struct lguest_dma *src, |
359 | struct lguest *dstlg, const struct lguest_dma *dst) | |
360 | { | |
361 | int i; | |
362 | u32 ret; | |
363 | struct page *pages[LGUEST_MAX_DMA_SECTIONS]; | |
364 | ||
dde79789 RR |
365 | /* We check that both source and destination "struct lguest_dma"s are |
366 | * within the bounds of the source and destination Guests */ | |
d7e28ffe RR |
367 | if (!check_dma_list(dstlg, dst) || !check_dma_list(srclg, src)) |
368 | return 0; | |
369 | ||
dde79789 RR |
370 | /* We need to map the pages which correspond to each parts of |
371 | * destination buffer. */ | |
d7e28ffe RR |
372 | for (i = 0; i < LGUEST_MAX_DMA_SECTIONS; i++) { |
373 | if (dst->len[i] == 0) | |
374 | break; | |
dde79789 RR |
375 | /* get_user_pages() is a complicated function, especially since |
376 | * we only want a single page. But it works, and returns the | |
377 | * number of pages. Note that we're holding the destination's | |
378 | * mmap_sem, as get_user_pages() requires. */ | |
d7e28ffe RR |
379 | if (get_user_pages(dstlg->tsk, dstlg->mm, |
380 | dst->addr[i], 1, 1, 1, pages+i, NULL) | |
381 | != 1) { | |
dde79789 | 382 | /* This means the destination gave us a bogus buffer */ |
d7e28ffe RR |
383 | kill_guest(dstlg, "Error mapping DMA pages"); |
384 | ret = 0; | |
385 | goto drop_pages; | |
386 | } | |
387 | } | |
388 | ||
dde79789 | 389 | /* Now copy the data until we run out of src or dst. */ |
d7e28ffe RR |
390 | ret = copy_data(srclg, src, dst, pages); |
391 | ||
392 | drop_pages: | |
393 | while (--i >= 0) | |
394 | put_page(pages[i]); | |
395 | return ret; | |
396 | } | |
397 | ||
dde79789 RR |
398 | /*L:380 Transferring data from one Guest to another is not as simple as I'd |
399 | * like. We've found the "struct lguest_dma_info" bound to the same address as | |
400 | * the send, we need to copy into it. | |
401 | * | |
402 | * This function returns true if the destination array was empty. */ | |
d7e28ffe RR |
403 | static int dma_transfer(struct lguest *srclg, |
404 | unsigned long udma, | |
405 | struct lguest_dma_info *dst) | |
406 | { | |
407 | struct lguest_dma dst_dma, src_dma; | |
408 | struct lguest *dstlg; | |
409 | u32 i, dma = 0; | |
410 | ||
dde79789 RR |
411 | /* From the "struct lguest_dma_info" we found in the hash, grab the |
412 | * Guest. */ | |
d7e28ffe | 413 | dstlg = &lguests[dst->guestid]; |
dde79789 | 414 | /* Read in the source "struct lguest_dma" handed to SEND_DMA. */ |
d7e28ffe RR |
415 | lgread(srclg, &src_dma, udma, sizeof(src_dma)); |
416 | ||
dde79789 RR |
417 | /* We need the destination's mmap_sem, and we already hold the source's |
418 | * mmap_sem for the futex key lookup. Normally this would suggest that | |
419 | * we could deadlock if the destination Guest was trying to send to | |
420 | * this source Guest at the same time, which is another reason that all | |
421 | * I/O is done under the big lguest_lock. */ | |
d7e28ffe RR |
422 | down_read(&dstlg->mm->mmap_sem); |
423 | ||
dde79789 | 424 | /* Look through the destination DMA array for an available buffer. */ |
d7e28ffe | 425 | for (i = 0; i < dst->num_dmas; i++) { |
dde79789 RR |
426 | /* We keep a "next_dma" pointer which often helps us avoid |
427 | * looking at lots of previously-filled entries. */ | |
d7e28ffe RR |
428 | dma = (dst->next_dma + i) % dst->num_dmas; |
429 | if (!lgread_other(dstlg, &dst_dma, | |
430 | dst->dmas + dma * sizeof(struct lguest_dma), | |
431 | sizeof(dst_dma))) { | |
432 | goto fail; | |
433 | } | |
434 | if (!dst_dma.used_len) | |
435 | break; | |
436 | } | |
dde79789 RR |
437 | |
438 | /* If we found a buffer, we do the actual data copy. */ | |
d7e28ffe RR |
439 | if (i != dst->num_dmas) { |
440 | unsigned long used_lenp; | |
441 | unsigned int ret; | |
442 | ||
443 | ret = do_dma(srclg, &src_dma, dstlg, &dst_dma); | |
dde79789 RR |
444 | /* Put used length in the source "struct lguest_dma"'s used_len |
445 | * field. It's a little tricky to figure out where that is, | |
446 | * though. */ | |
d7e28ffe RR |
447 | lgwrite_u32(srclg, |
448 | udma+offsetof(struct lguest_dma, used_len), ret); | |
dde79789 | 449 | /* Tranferring 0 bytes is OK if the source buffer was empty. */ |
d7e28ffe RR |
450 | if (ret == 0 && src_dma.len[0] != 0) |
451 | goto fail; | |
452 | ||
dde79789 RR |
453 | /* The destination Guest might be running on a different CPU: |
454 | * we have to make sure that it will see the "used_len" field | |
455 | * change to non-zero *after* it sees the data we copied into | |
456 | * the buffer. Hence a write memory barrier. */ | |
d7e28ffe | 457 | wmb(); |
dde79789 RR |
458 | /* Figuring out where the destination's used_len field for this |
459 | * "struct lguest_dma" in the array is also a little ugly. */ | |
d7e28ffe RR |
460 | used_lenp = dst->dmas |
461 | + dma * sizeof(struct lguest_dma) | |
462 | + offsetof(struct lguest_dma, used_len); | |
463 | lgwrite_other(dstlg, used_lenp, &ret, sizeof(ret)); | |
dde79789 | 464 | /* Move the cursor for next time. */ |
d7e28ffe RR |
465 | dst->next_dma++; |
466 | } | |
467 | up_read(&dstlg->mm->mmap_sem); | |
468 | ||
dde79789 RR |
469 | /* We trigger the destination interrupt, even if the destination was |
470 | * empty and we didn't transfer anything: this gives them a chance to | |
471 | * wake up and refill. */ | |
d7e28ffe | 472 | set_bit(dst->interrupt, dstlg->irqs_pending); |
dde79789 | 473 | /* Wake up the destination process. */ |
d7e28ffe | 474 | wake_up_process(dstlg->tsk); |
dde79789 RR |
475 | /* If we passed the last "struct lguest_dma", the receive had no |
476 | * buffers left. */ | |
d7e28ffe RR |
477 | return i == dst->num_dmas; |
478 | ||
479 | fail: | |
480 | up_read(&dstlg->mm->mmap_sem); | |
481 | return 0; | |
482 | } | |
483 | ||
dde79789 RR |
484 | /*L:370 This is the counter-side to the BIND_DMA hypercall; the SEND_DMA |
485 | * hypercall. We find out who's listening, and send to them. */ | |
d7e28ffe RR |
486 | void send_dma(struct lguest *lg, unsigned long ukey, unsigned long udma) |
487 | { | |
488 | union futex_key key; | |
489 | int empty = 0; | |
490 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | |
491 | ||
492 | again: | |
493 | mutex_lock(&lguest_lock); | |
494 | down_read(fshared); | |
dde79789 | 495 | /* Get the futex key for the key the Guest gave us */ |
d7e28ffe RR |
496 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
497 | kill_guest(lg, "bad sending DMA key"); | |
498 | goto unlock; | |
499 | } | |
dde79789 RR |
500 | /* Since the key must be a multiple of 4, the futex key uses the lower |
501 | * bit of the "offset" field (which would always be 0) to indicate a | |
502 | * mapping which is shared with other processes (ie. Guests). */ | |
d7e28ffe RR |
503 | if (key.shared.offset & 1) { |
504 | struct lguest_dma_info *i; | |
dde79789 | 505 | /* Look through the hash for other Guests. */ |
d7e28ffe | 506 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
dde79789 | 507 | /* Don't send to ourselves. */ |
d7e28ffe RR |
508 | if (i->guestid == lg->guestid) |
509 | continue; | |
510 | if (!key_eq(&key, &i->key)) | |
511 | continue; | |
512 | ||
dde79789 RR |
513 | /* If dma_transfer() tells us the destination has no |
514 | * available buffers, we increment "empty". */ | |
d7e28ffe RR |
515 | empty += dma_transfer(lg, udma, i); |
516 | break; | |
517 | } | |
dde79789 RR |
518 | /* If the destination is empty, we release our locks and |
519 | * give the destination Guest a brief chance to restock. */ | |
d7e28ffe RR |
520 | if (empty == 1) { |
521 | /* Give any recipients one chance to restock. */ | |
522 | up_read(¤t->mm->mmap_sem); | |
523 | mutex_unlock(&lguest_lock); | |
dde79789 | 524 | /* Next time, we won't try again. */ |
d7e28ffe RR |
525 | empty++; |
526 | goto again; | |
527 | } | |
528 | } else { | |
dde79789 RR |
529 | /* Private mapping: Guest is sending to its Launcher. We set |
530 | * the "dma_is_pending" flag so that the main loop will exit | |
531 | * and the Launcher's read() from /dev/lguest will return. */ | |
d7e28ffe RR |
532 | lg->dma_is_pending = 1; |
533 | lg->pending_dma = udma; | |
534 | lg->pending_key = ukey; | |
535 | } | |
536 | unlock: | |
537 | up_read(fshared); | |
538 | mutex_unlock(&lguest_lock); | |
539 | } | |
dde79789 | 540 | /*:*/ |
d7e28ffe RR |
541 | |
542 | void release_all_dma(struct lguest *lg) | |
543 | { | |
544 | unsigned int i; | |
545 | ||
546 | BUG_ON(!mutex_is_locked(&lguest_lock)); | |
547 | ||
548 | down_read(&lg->mm->mmap_sem); | |
549 | for (i = 0; i < LGUEST_MAX_DMA; i++) { | |
550 | if (lg->dma[i].interrupt) | |
551 | unlink_dma(&lg->dma[i]); | |
552 | } | |
553 | up_read(&lg->mm->mmap_sem); | |
554 | } | |
555 | ||
f56a384e RR |
556 | /*M:007 We only return a single DMA buffer to the Launcher, but it would be |
557 | * more efficient to return a pointer to the entire array of DMA buffers, which | |
558 | * it can cache and choose one whenever it wants. | |
559 | * | |
560 | * Currently the Launcher uses a write to /dev/lguest, and the return value is | |
561 | * the address of the DMA structure with the interrupt number placed in | |
562 | * dma->used_len. If we wanted to return the entire array, we need to return | |
563 | * the address, array size and interrupt number: this seems to require an | |
564 | * ioctl(). :*/ | |
565 | ||
dde79789 RR |
566 | /*L:320 This routine looks for a DMA buffer registered by the Guest on the |
567 | * given key (using the BIND_DMA hypercall). */ | |
d7e28ffe RR |
568 | unsigned long get_dma_buffer(struct lguest *lg, |
569 | unsigned long ukey, unsigned long *interrupt) | |
570 | { | |
571 | unsigned long ret = 0; | |
572 | union futex_key key; | |
573 | struct lguest_dma_info *i; | |
574 | struct rw_semaphore *fshared = ¤t->mm->mmap_sem; | |
575 | ||
dde79789 RR |
576 | /* Take the Big Lguest Lock to stop other Guests sending this Guest DMA |
577 | * at the same time. */ | |
d7e28ffe | 578 | mutex_lock(&lguest_lock); |
dde79789 RR |
579 | /* To match between Guests sharing the same underlying memory we steal |
580 | * code from the futex infrastructure. This requires that we hold the | |
581 | * "mmap_sem" for our process (the Launcher), and pass it to the futex | |
582 | * code. */ | |
d7e28ffe | 583 | down_read(fshared); |
dde79789 RR |
584 | |
585 | /* This can fail if it's not a valid address, or if the address is not | |
586 | * divisible by 4 (the futex code needs that, we don't really). */ | |
d7e28ffe RR |
587 | if (get_futex_key((u32 __user *)ukey, fshared, &key) != 0) { |
588 | kill_guest(lg, "bad registered DMA buffer"); | |
589 | goto unlock; | |
590 | } | |
dde79789 RR |
591 | /* Search the hash table for matching entries (the Launcher can only |
592 | * send to its own Guest for the moment, so the entry must be for this | |
593 | * Guest) */ | |
d7e28ffe RR |
594 | list_for_each_entry(i, &dma_hash[hash(&key)], list) { |
595 | if (key_eq(&key, &i->key) && i->guestid == lg->guestid) { | |
596 | unsigned int j; | |
dde79789 RR |
597 | /* Look through the registered DMA array for an |
598 | * available buffer. */ | |
d7e28ffe RR |
599 | for (j = 0; j < i->num_dmas; j++) { |
600 | struct lguest_dma dma; | |
601 | ||
602 | ret = i->dmas + j * sizeof(struct lguest_dma); | |
603 | lgread(lg, &dma, ret, sizeof(dma)); | |
604 | if (dma.used_len == 0) | |
605 | break; | |
606 | } | |
dde79789 RR |
607 | /* Store the interrupt the Guest wants when the buffer |
608 | * is used. */ | |
d7e28ffe RR |
609 | *interrupt = i->interrupt; |
610 | break; | |
611 | } | |
612 | } | |
613 | unlock: | |
614 | up_read(fshared); | |
615 | mutex_unlock(&lguest_lock); | |
616 | return ret; | |
617 | } | |
dde79789 | 618 | /*:*/ |
d7e28ffe | 619 | |
dde79789 RR |
620 | /*L:410 This really has completed the Launcher. Not only have we now finished |
621 | * the longest chapter in our journey, but this also means we are over halfway | |
622 | * through! | |
623 | * | |
624 | * Enough prevaricating around the bush: it is time for us to dive into the | |
625 | * core of the Host, in "make Host". | |
626 | */ |