Commit | Line | Data |
---|---|---|
4bac07c9 JF |
1 | /****************************************************************************** |
2 | * xenbus_comms.c | |
3 | * | |
4 | * Low level code to talks to Xen Store: ringbuffer and event channel. | |
5 | * | |
6 | * Copyright (C) 2005 Rusty Russell, IBM Corporation | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version 2 | |
10 | * as published by the Free Software Foundation; or, when distributed | |
11 | * separately from the Linux kernel or incorporated into other | |
12 | * software packages, subject to the following license: | |
13 | * | |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15 | * of this source file (the "Software"), to deal in the Software without | |
16 | * restriction, including without limitation the rights to use, copy, modify, | |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | |
19 | * the following conditions: | |
20 | * | |
21 | * The above copyright notice and this permission notice shall be included in | |
22 | * all copies or substantial portions of the Software. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
30 | * IN THE SOFTWARE. | |
31 | */ | |
32 | ||
283c0972 JP |
33 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
34 | ||
4bac07c9 JF |
35 | #include <linux/wait.h> |
36 | #include <linux/interrupt.h> | |
fd8aa909 | 37 | #include <linux/kthread.h> |
4bac07c9 JF |
38 | #include <linux/sched.h> |
39 | #include <linux/err.h> | |
40 | #include <xen/xenbus.h> | |
41 | #include <asm/xen/hypervisor.h> | |
42 | #include <xen/events.h> | |
43 | #include <xen/page.h> | |
332f791d | 44 | #include "xenbus.h" |
4bac07c9 | 45 | |
fd8aa909 JG |
46 | /* A list of replies. Currently only one will ever be outstanding. */ |
47 | LIST_HEAD(xs_reply_list); | |
48 | ||
49 | /* A list of write requests. */ | |
50 | LIST_HEAD(xb_write_list); | |
51 | DECLARE_WAIT_QUEUE_HEAD(xb_waitq); | |
52 | DEFINE_MUTEX(xb_write_mutex); | |
53 | ||
54 | /* Protect xenbus reader thread against save/restore. */ | |
55 | DEFINE_MUTEX(xs_response_mutex); | |
56 | ||
4bac07c9 | 57 | static int xenbus_irq; |
fd8aa909 | 58 | static struct task_struct *xenbus_task; |
4bac07c9 | 59 | |
4bac07c9 JF |
60 | static irqreturn_t wake_waiting(int irq, void *unused) |
61 | { | |
4bac07c9 JF |
62 | wake_up(&xb_waitq); |
63 | return IRQ_HANDLED; | |
64 | } | |
65 | ||
66 | static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) | |
67 | { | |
68 | return ((prod - cons) <= XENSTORE_RING_SIZE); | |
69 | } | |
70 | ||
71 | static void *get_output_chunk(XENSTORE_RING_IDX cons, | |
72 | XENSTORE_RING_IDX prod, | |
73 | char *buf, uint32_t *len) | |
74 | { | |
75 | *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); | |
76 | if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) | |
77 | *len = XENSTORE_RING_SIZE - (prod - cons); | |
78 | return buf + MASK_XENSTORE_IDX(prod); | |
79 | } | |
80 | ||
81 | static const void *get_input_chunk(XENSTORE_RING_IDX cons, | |
82 | XENSTORE_RING_IDX prod, | |
83 | const char *buf, uint32_t *len) | |
84 | { | |
85 | *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); | |
86 | if ((prod - cons) < *len) | |
87 | *len = prod - cons; | |
88 | return buf + MASK_XENSTORE_IDX(cons); | |
89 | } | |
90 | ||
fd8aa909 JG |
91 | static int xb_data_to_write(void) |
92 | { | |
93 | struct xenstore_domain_interface *intf = xen_store_interface; | |
94 | ||
95 | return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE && | |
96 | !list_empty(&xb_write_list); | |
97 | } | |
98 | ||
4bac07c9 JF |
99 | /** |
100 | * xb_write - low level write | |
101 | * @data: buffer to send | |
102 | * @len: length of buffer | |
103 | * | |
fd8aa909 | 104 | * Returns number of bytes written or -err. |
4bac07c9 | 105 | */ |
fd8aa909 | 106 | static int xb_write(const void *data, unsigned int len) |
4bac07c9 JF |
107 | { |
108 | struct xenstore_domain_interface *intf = xen_store_interface; | |
109 | XENSTORE_RING_IDX cons, prod; | |
fd8aa909 | 110 | unsigned int bytes = 0; |
4bac07c9 JF |
111 | |
112 | while (len != 0) { | |
113 | void *dst; | |
114 | unsigned int avail; | |
115 | ||
4bac07c9 JF |
116 | /* Read indexes, then verify. */ |
117 | cons = intf->req_cons; | |
118 | prod = intf->req_prod; | |
119 | if (!check_indexes(cons, prod)) { | |
120 | intf->req_cons = intf->req_prod = 0; | |
121 | return -EIO; | |
122 | } | |
fd8aa909 JG |
123 | if (!xb_data_to_write()) |
124 | return bytes; | |
125 | ||
126 | /* Must write data /after/ reading the consumer index. */ | |
127 | virt_mb(); | |
4bac07c9 JF |
128 | |
129 | dst = get_output_chunk(cons, prod, intf->req, &avail); | |
130 | if (avail == 0) | |
131 | continue; | |
132 | if (avail > len) | |
133 | avail = len; | |
134 | ||
4bac07c9 JF |
135 | memcpy(dst, data, avail); |
136 | data += avail; | |
137 | len -= avail; | |
fd8aa909 | 138 | bytes += avail; |
4bac07c9 JF |
139 | |
140 | /* Other side must not see new producer until data is there. */ | |
5bb0c9be | 141 | virt_wmb(); |
4bac07c9 JF |
142 | intf->req_prod += avail; |
143 | ||
144 | /* Implies mb(): other side will see the updated producer. */ | |
fd8aa909 JG |
145 | if (prod <= intf->req_cons) |
146 | notify_remote_via_evtchn(xen_store_evtchn); | |
4bac07c9 JF |
147 | } |
148 | ||
fd8aa909 | 149 | return bytes; |
4bac07c9 JF |
150 | } |
151 | ||
fd8aa909 | 152 | static int xb_data_to_read(void) |
4bac07c9 JF |
153 | { |
154 | struct xenstore_domain_interface *intf = xen_store_interface; | |
155 | return (intf->rsp_cons != intf->rsp_prod); | |
156 | } | |
157 | ||
fd8aa909 | 158 | static int xb_read(void *data, unsigned int len) |
4bac07c9 JF |
159 | { |
160 | struct xenstore_domain_interface *intf = xen_store_interface; | |
161 | XENSTORE_RING_IDX cons, prod; | |
fd8aa909 | 162 | unsigned int bytes = 0; |
4bac07c9 JF |
163 | |
164 | while (len != 0) { | |
165 | unsigned int avail; | |
166 | const char *src; | |
167 | ||
4bac07c9 JF |
168 | /* Read indexes, then verify. */ |
169 | cons = intf->rsp_cons; | |
170 | prod = intf->rsp_prod; | |
fd8aa909 JG |
171 | if (cons == prod) |
172 | return bytes; | |
173 | ||
4bac07c9 JF |
174 | if (!check_indexes(cons, prod)) { |
175 | intf->rsp_cons = intf->rsp_prod = 0; | |
176 | return -EIO; | |
177 | } | |
178 | ||
179 | src = get_input_chunk(cons, prod, intf->rsp, &avail); | |
180 | if (avail == 0) | |
181 | continue; | |
182 | if (avail > len) | |
183 | avail = len; | |
184 | ||
185 | /* Must read data /after/ reading the producer index. */ | |
5bb0c9be | 186 | virt_rmb(); |
4bac07c9 JF |
187 | |
188 | memcpy(data, src, avail); | |
189 | data += avail; | |
190 | len -= avail; | |
fd8aa909 | 191 | bytes += avail; |
4bac07c9 JF |
192 | |
193 | /* Other side must not see free space until we've copied out */ | |
5bb0c9be | 194 | virt_mb(); |
4bac07c9 JF |
195 | intf->rsp_cons += avail; |
196 | ||
4bac07c9 | 197 | /* Implies mb(): other side will see the updated consumer. */ |
fd8aa909 JG |
198 | if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE) |
199 | notify_remote_via_evtchn(xen_store_evtchn); | |
200 | } | |
201 | ||
202 | return bytes; | |
203 | } | |
204 | ||
205 | static int process_msg(void) | |
206 | { | |
207 | static struct { | |
208 | struct xsd_sockmsg msg; | |
209 | char *body; | |
210 | union { | |
211 | void *alloc; | |
212 | struct xs_watch_event *watch; | |
213 | }; | |
214 | bool in_msg; | |
215 | bool in_hdr; | |
216 | unsigned int read; | |
217 | } state; | |
218 | struct xb_req_data *req; | |
219 | int err; | |
220 | unsigned int len; | |
221 | ||
222 | if (!state.in_msg) { | |
223 | state.in_msg = true; | |
224 | state.in_hdr = true; | |
225 | state.read = 0; | |
226 | ||
227 | /* | |
228 | * We must disallow save/restore while reading a message. | |
229 | * A partial read across s/r leaves us out of sync with | |
230 | * xenstored. | |
231 | * xs_response_mutex is locked as long as we are processing one | |
232 | * message. state.in_msg will be true as long as we are holding | |
233 | * the lock here. | |
234 | */ | |
235 | mutex_lock(&xs_response_mutex); | |
236 | ||
237 | if (!xb_data_to_read()) { | |
238 | /* We raced with save/restore: pending data 'gone'. */ | |
239 | mutex_unlock(&xs_response_mutex); | |
240 | state.in_msg = false; | |
241 | return 0; | |
242 | } | |
243 | } | |
244 | ||
245 | if (state.in_hdr) { | |
246 | if (state.read != sizeof(state.msg)) { | |
247 | err = xb_read((void *)&state.msg + state.read, | |
248 | sizeof(state.msg) - state.read); | |
249 | if (err < 0) | |
250 | goto out; | |
251 | state.read += err; | |
252 | if (state.read != sizeof(state.msg)) | |
253 | return 0; | |
254 | if (state.msg.len > XENSTORE_PAYLOAD_MAX) { | |
255 | err = -EINVAL; | |
256 | goto out; | |
257 | } | |
258 | } | |
259 | ||
260 | len = state.msg.len + 1; | |
261 | if (state.msg.type == XS_WATCH_EVENT) | |
262 | len += sizeof(*state.watch); | |
263 | ||
264 | state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH); | |
265 | if (!state.alloc) | |
266 | return -ENOMEM; | |
267 | ||
268 | if (state.msg.type == XS_WATCH_EVENT) | |
269 | state.body = state.watch->body; | |
270 | else | |
271 | state.body = state.alloc; | |
272 | state.in_hdr = false; | |
273 | state.read = 0; | |
274 | } | |
275 | ||
276 | err = xb_read(state.body + state.read, state.msg.len - state.read); | |
277 | if (err < 0) | |
278 | goto out; | |
279 | ||
280 | state.read += err; | |
281 | if (state.read != state.msg.len) | |
282 | return 0; | |
283 | ||
284 | state.body[state.msg.len] = '\0'; | |
285 | ||
286 | if (state.msg.type == XS_WATCH_EVENT) { | |
287 | state.watch->len = state.msg.len; | |
288 | err = xs_watch_msg(state.watch); | |
289 | } else { | |
290 | err = -ENOENT; | |
291 | mutex_lock(&xb_write_mutex); | |
292 | list_for_each_entry(req, &xs_reply_list, list) { | |
293 | if (req->msg.req_id == state.msg.req_id) { | |
1a3fc2c4 | 294 | list_del(&req->list); |
fd8aa909 JG |
295 | err = 0; |
296 | break; | |
297 | } | |
298 | } | |
299 | mutex_unlock(&xb_write_mutex); | |
300 | if (err) | |
301 | goto out; | |
1a3fc2c4 JG |
302 | |
303 | if (req->state == xb_req_state_wait_reply) { | |
29fee6ee | 304 | req->msg.req_id = req->caller_req_id; |
1a3fc2c4 JG |
305 | req->msg.type = state.msg.type; |
306 | req->msg.len = state.msg.len; | |
307 | req->body = state.body; | |
1b6a51e8 DZ |
308 | /* write body, then update state */ |
309 | virt_wmb(); | |
1a3fc2c4 JG |
310 | req->state = xb_req_state_got_reply; |
311 | req->cb(req); | |
312 | } else | |
313 | kfree(req); | |
4bac07c9 JF |
314 | } |
315 | ||
fd8aa909 JG |
316 | mutex_unlock(&xs_response_mutex); |
317 | ||
318 | state.in_msg = false; | |
319 | state.alloc = NULL; | |
320 | return err; | |
321 | ||
322 | out: | |
323 | mutex_unlock(&xs_response_mutex); | |
324 | state.in_msg = false; | |
325 | kfree(state.alloc); | |
326 | state.alloc = NULL; | |
327 | return err; | |
328 | } | |
329 | ||
330 | static int process_writes(void) | |
331 | { | |
332 | static struct { | |
333 | struct xb_req_data *req; | |
334 | int idx; | |
335 | unsigned int written; | |
336 | } state; | |
337 | void *base; | |
338 | unsigned int len; | |
339 | int err = 0; | |
340 | ||
341 | if (!xb_data_to_write()) | |
342 | return 0; | |
343 | ||
344 | mutex_lock(&xb_write_mutex); | |
345 | ||
346 | if (!state.req) { | |
347 | state.req = list_first_entry(&xb_write_list, | |
348 | struct xb_req_data, list); | |
349 | state.idx = -1; | |
350 | state.written = 0; | |
351 | } | |
352 | ||
353 | if (state.req->state == xb_req_state_aborted) | |
354 | goto out_err; | |
355 | ||
356 | while (state.idx < state.req->num_vecs) { | |
357 | if (state.idx < 0) { | |
358 | base = &state.req->msg; | |
359 | len = sizeof(state.req->msg); | |
360 | } else { | |
361 | base = state.req->vec[state.idx].iov_base; | |
362 | len = state.req->vec[state.idx].iov_len; | |
363 | } | |
364 | err = xb_write(base + state.written, len - state.written); | |
365 | if (err < 0) | |
366 | goto out_err; | |
367 | state.written += err; | |
368 | if (state.written != len) | |
369 | goto out; | |
370 | ||
371 | state.idx++; | |
372 | state.written = 0; | |
373 | } | |
374 | ||
375 | list_del(&state.req->list); | |
376 | state.req->state = xb_req_state_wait_reply; | |
377 | list_add_tail(&state.req->list, &xs_reply_list); | |
378 | state.req = NULL; | |
379 | ||
380 | out: | |
381 | mutex_unlock(&xb_write_mutex); | |
382 | ||
383 | return 0; | |
384 | ||
385 | out_err: | |
386 | state.req->msg.type = XS_ERROR; | |
387 | state.req->err = err; | |
388 | list_del(&state.req->list); | |
389 | if (state.req->state == xb_req_state_aborted) | |
390 | kfree(state.req); | |
391 | else { | |
8130b9d5 DZ |
392 | /* write err, then update state */ |
393 | virt_wmb(); | |
fd8aa909 JG |
394 | state.req->state = xb_req_state_got_reply; |
395 | wake_up(&state.req->wq); | |
396 | } | |
397 | ||
398 | mutex_unlock(&xb_write_mutex); | |
399 | ||
400 | state.req = NULL; | |
401 | ||
402 | return err; | |
403 | } | |
404 | ||
405 | static int xb_thread_work(void) | |
406 | { | |
407 | return xb_data_to_read() || xb_data_to_write(); | |
408 | } | |
409 | ||
410 | static int xenbus_thread(void *unused) | |
411 | { | |
412 | int err; | |
413 | ||
414 | while (!kthread_should_stop()) { | |
415 | if (wait_event_interruptible(xb_waitq, xb_thread_work())) | |
416 | continue; | |
417 | ||
418 | err = process_msg(); | |
419 | if (err == -ENOMEM) | |
420 | schedule(); | |
421 | else if (err) | |
422 | pr_warn_ratelimited("error %d while reading message\n", | |
423 | err); | |
424 | ||
425 | err = process_writes(); | |
426 | if (err) | |
427 | pr_warn_ratelimited("error %d while writing message\n", | |
428 | err); | |
429 | } | |
430 | ||
431 | xenbus_task = NULL; | |
4bac07c9 JF |
432 | return 0; |
433 | } | |
434 | ||
435 | /** | |
436 | * xb_init_comms - Set up interrupt handler off store event channel. | |
437 | */ | |
438 | int xb_init_comms(void) | |
439 | { | |
440 | struct xenstore_domain_interface *intf = xen_store_interface; | |
4bac07c9 JF |
441 | |
442 | if (intf->req_prod != intf->req_cons) | |
283c0972 JP |
443 | pr_err("request ring is not quiescent (%08x:%08x)!\n", |
444 | intf->req_cons, intf->req_prod); | |
4bac07c9 JF |
445 | |
446 | if (intf->rsp_prod != intf->rsp_cons) { | |
283c0972 JP |
447 | pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n", |
448 | intf->rsp_cons, intf->rsp_prod); | |
116df6f0 OH |
449 | /* breaks kdump */ |
450 | if (!reset_devices) | |
451 | intf->rsp_cons = intf->rsp_prod; | |
4bac07c9 JF |
452 | } |
453 | ||
7d88d32a JF |
454 | if (xenbus_irq) { |
455 | /* Already have an irq; assume we're resuming */ | |
456 | rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); | |
457 | } else { | |
458 | int err; | |
fd8aa909 | 459 | |
7d88d32a JF |
460 | err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, |
461 | 0, "xenbus", &xb_waitq); | |
ecc635f9 | 462 | if (err < 0) { |
283c0972 | 463 | pr_err("request irq failed %i\n", err); |
7d88d32a JF |
464 | return err; |
465 | } | |
4bac07c9 | 466 | |
7d88d32a | 467 | xenbus_irq = err; |
fd8aa909 JG |
468 | |
469 | if (!xenbus_task) { | |
470 | xenbus_task = kthread_run(xenbus_thread, NULL, | |
471 | "xenbus"); | |
472 | if (IS_ERR(xenbus_task)) | |
473 | return PTR_ERR(xenbus_task); | |
474 | } | |
4bac07c9 JF |
475 | } |
476 | ||
4bac07c9 JF |
477 | return 0; |
478 | } | |
d2fb4c51 DDG |
479 | |
480 | void xb_deinit_comms(void) | |
481 | { | |
482 | unbind_from_irqhandler(xenbus_irq, &xb_waitq); | |
483 | xenbus_irq = 0; | |
484 | } |