Commit | Line | Data |
---|---|---|
1107ba88 AZ |
1 | /* |
2 | * Driver giving user-space access to the kernel's xenbus connection | |
3 | * to xenstore. | |
4 | * | |
5 | * Copyright (c) 2005, Christian Limpach | |
6 | * Copyright (c) 2005, Rusty Russell, IBM Corporation | |
7 | * | |
8 | * This program is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU General Public License version 2 | |
10 | * as published by the Free Software Foundation; or, when distributed | |
11 | * separately from the Linux kernel or incorporated into other | |
12 | * software packages, subject to the following license: | |
13 | * | |
14 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
15 | * of this source file (the "Software"), to deal in the Software without | |
16 | * restriction, including without limitation the rights to use, copy, modify, | |
17 | * merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
18 | * and to permit persons to whom the Software is furnished to do so, subject to | |
19 | * the following conditions: | |
20 | * | |
21 | * The above copyright notice and this permission notice shall be included in | |
22 | * all copies or substantial portions of the Software. | |
23 | * | |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
25 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
26 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
27 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
28 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
29 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
30 | * IN THE SOFTWARE. | |
31 | * | |
32 | * Changes: | |
33 | * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem | |
34 | * and /proc/xen compatibility mount point. | |
35 | * Turned xenfs into a loadable module. | |
36 | */ | |
37 | ||
38 | #include <linux/kernel.h> | |
39 | #include <linux/errno.h> | |
40 | #include <linux/uio.h> | |
41 | #include <linux/notifier.h> | |
42 | #include <linux/wait.h> | |
43 | #include <linux/fs.h> | |
44 | #include <linux/poll.h> | |
45 | #include <linux/mutex.h> | |
46 | #include <linux/spinlock.h> | |
47 | #include <linux/mount.h> | |
48 | #include <linux/pagemap.h> | |
49 | #include <linux/uaccess.h> | |
50 | #include <linux/init.h> | |
51 | #include <linux/namei.h> | |
52 | #include <linux/string.h> | |
53 | ||
54 | #include "xenfs.h" | |
55 | #include "../xenbus/xenbus_comms.h" | |
56 | ||
57 | #include <xen/xenbus.h> | |
58 | #include <asm/xen/hypervisor.h> | |
59 | ||
60 | /* | |
61 | * An element of a list of outstanding transactions, for which we're | |
62 | * still waiting a reply. | |
63 | */ | |
64 | struct xenbus_transaction_holder { | |
65 | struct list_head list; | |
66 | struct xenbus_transaction handle; | |
67 | }; | |
68 | ||
69 | /* | |
70 | * A buffer of data on the queue. | |
71 | */ | |
72 | struct read_buffer { | |
73 | struct list_head list; | |
74 | unsigned int cons; | |
75 | unsigned int len; | |
76 | char msg[]; | |
77 | }; | |
78 | ||
79 | struct xenbus_file_priv { | |
80 | /* | |
81 | * msgbuffer_mutex is held while partial requests are built up | |
82 | * and complete requests are acted on. It therefore protects | |
83 | * the "transactions" and "watches" lists, and the partial | |
84 | * request length and buffer. | |
85 | * | |
86 | * reply_mutex protects the reply being built up to return to | |
87 | * usermode. It nests inside msgbuffer_mutex but may be held | |
88 | * alone during a watch callback. | |
89 | */ | |
90 | struct mutex msgbuffer_mutex; | |
91 | ||
92 | /* In-progress transactions */ | |
93 | struct list_head transactions; | |
94 | ||
95 | /* Active watches. */ | |
96 | struct list_head watches; | |
97 | ||
98 | /* Partial request. */ | |
99 | unsigned int len; | |
100 | union { | |
101 | struct xsd_sockmsg msg; | |
102 | char buffer[PAGE_SIZE]; | |
103 | } u; | |
104 | ||
105 | /* Response queue. */ | |
106 | struct mutex reply_mutex; | |
107 | struct list_head read_buffers; | |
108 | wait_queue_head_t read_waitq; | |
109 | ||
110 | }; | |
111 | ||
112 | /* Read out any raw xenbus messages queued up. */ | |
113 | static ssize_t xenbus_file_read(struct file *filp, | |
114 | char __user *ubuf, | |
115 | size_t len, loff_t *ppos) | |
116 | { | |
117 | struct xenbus_file_priv *u = filp->private_data; | |
118 | struct read_buffer *rb; | |
119 | unsigned i; | |
120 | int ret; | |
121 | ||
122 | mutex_lock(&u->reply_mutex); | |
7808121b | 123 | again: |
1107ba88 AZ |
124 | while (list_empty(&u->read_buffers)) { |
125 | mutex_unlock(&u->reply_mutex); | |
6280f190 PB |
126 | if (filp->f_flags & O_NONBLOCK) |
127 | return -EAGAIN; | |
128 | ||
1107ba88 AZ |
129 | ret = wait_event_interruptible(u->read_waitq, |
130 | !list_empty(&u->read_buffers)); | |
131 | if (ret) | |
132 | return ret; | |
133 | mutex_lock(&u->reply_mutex); | |
134 | } | |
135 | ||
136 | rb = list_entry(u->read_buffers.next, struct read_buffer, list); | |
137 | i = 0; | |
138 | while (i < len) { | |
139 | unsigned sz = min((unsigned)len - i, rb->len - rb->cons); | |
140 | ||
141 | ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); | |
142 | ||
143 | i += sz - ret; | |
144 | rb->cons += sz - ret; | |
145 | ||
fb27cfbc | 146 | if (ret != 0) { |
1107ba88 AZ |
147 | if (i == 0) |
148 | i = -EFAULT; | |
149 | goto out; | |
150 | } | |
151 | ||
152 | /* Clear out buffer if it has been consumed */ | |
153 | if (rb->cons == rb->len) { | |
154 | list_del(&rb->list); | |
155 | kfree(rb); | |
156 | if (list_empty(&u->read_buffers)) | |
157 | break; | |
158 | rb = list_entry(u->read_buffers.next, | |
159 | struct read_buffer, list); | |
160 | } | |
161 | } | |
7808121b DDG |
162 | if (i == 0) |
163 | goto again; | |
1107ba88 AZ |
164 | |
165 | out: | |
166 | mutex_unlock(&u->reply_mutex); | |
167 | return i; | |
168 | } | |
169 | ||
170 | /* | |
171 | * Add a buffer to the queue. Caller must hold the appropriate lock | |
172 | * if the queue is not local. (Commonly the caller will build up | |
173 | * multiple queued buffers on a temporary local list, and then add it | |
174 | * to the appropriate list under lock once all the buffers have een | |
175 | * successfully allocated.) | |
176 | */ | |
177 | static int queue_reply(struct list_head *queue, const void *data, size_t len) | |
178 | { | |
179 | struct read_buffer *rb; | |
180 | ||
181 | if (len == 0) | |
182 | return 0; | |
183 | ||
184 | rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); | |
185 | if (rb == NULL) | |
186 | return -ENOMEM; | |
187 | ||
188 | rb->cons = 0; | |
189 | rb->len = len; | |
190 | ||
191 | memcpy(rb->msg, data, len); | |
192 | ||
193 | list_add_tail(&rb->list, queue); | |
194 | return 0; | |
195 | } | |
196 | ||
197 | /* | |
198 | * Free all the read_buffer s on a list. | |
199 | * Caller must have sole reference to list. | |
200 | */ | |
201 | static void queue_cleanup(struct list_head *list) | |
202 | { | |
203 | struct read_buffer *rb; | |
204 | ||
205 | while (!list_empty(list)) { | |
206 | rb = list_entry(list->next, struct read_buffer, list); | |
207 | list_del(list->next); | |
208 | kfree(rb); | |
209 | } | |
210 | } | |
211 | ||
212 | struct watch_adapter { | |
213 | struct list_head list; | |
214 | struct xenbus_watch watch; | |
215 | struct xenbus_file_priv *dev_data; | |
216 | char *token; | |
217 | }; | |
218 | ||
219 | static void free_watch_adapter(struct watch_adapter *watch) | |
220 | { | |
221 | kfree(watch->watch.node); | |
222 | kfree(watch->token); | |
223 | kfree(watch); | |
224 | } | |
225 | ||
226 | static struct watch_adapter *alloc_watch_adapter(const char *path, | |
227 | const char *token) | |
228 | { | |
229 | struct watch_adapter *watch; | |
230 | ||
231 | watch = kzalloc(sizeof(*watch), GFP_KERNEL); | |
232 | if (watch == NULL) | |
233 | goto out_fail; | |
234 | ||
235 | watch->watch.node = kstrdup(path, GFP_KERNEL); | |
236 | if (watch->watch.node == NULL) | |
237 | goto out_free; | |
238 | ||
239 | watch->token = kstrdup(token, GFP_KERNEL); | |
240 | if (watch->token == NULL) | |
241 | goto out_free; | |
242 | ||
243 | return watch; | |
244 | ||
245 | out_free: | |
246 | free_watch_adapter(watch); | |
247 | ||
248 | out_fail: | |
249 | return NULL; | |
250 | } | |
251 | ||
252 | static void watch_fired(struct xenbus_watch *watch, | |
253 | const char **vec, | |
254 | unsigned int len) | |
255 | { | |
256 | struct watch_adapter *adap; | |
257 | struct xsd_sockmsg hdr; | |
258 | const char *path, *token; | |
259 | int path_len, tok_len, body_len, data_len = 0; | |
260 | int ret; | |
261 | LIST_HEAD(staging_q); | |
262 | ||
263 | adap = container_of(watch, struct watch_adapter, watch); | |
264 | ||
265 | path = vec[XS_WATCH_PATH]; | |
266 | token = adap->token; | |
267 | ||
268 | path_len = strlen(path) + 1; | |
269 | tok_len = strlen(token) + 1; | |
270 | if (len > 2) | |
271 | data_len = vec[len] - vec[2] + 1; | |
272 | body_len = path_len + tok_len + data_len; | |
273 | ||
274 | hdr.type = XS_WATCH_EVENT; | |
275 | hdr.len = body_len; | |
276 | ||
277 | mutex_lock(&adap->dev_data->reply_mutex); | |
278 | ||
279 | ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); | |
280 | if (!ret) | |
281 | ret = queue_reply(&staging_q, path, path_len); | |
282 | if (!ret) | |
283 | ret = queue_reply(&staging_q, token, tok_len); | |
284 | if (!ret && len > 2) | |
285 | ret = queue_reply(&staging_q, vec[2], data_len); | |
286 | ||
287 | if (!ret) { | |
288 | /* success: pass reply list onto watcher */ | |
289 | list_splice_tail(&staging_q, &adap->dev_data->read_buffers); | |
290 | wake_up(&adap->dev_data->read_waitq); | |
291 | } else | |
292 | queue_cleanup(&staging_q); | |
293 | ||
294 | mutex_unlock(&adap->dev_data->reply_mutex); | |
295 | } | |
296 | ||
297 | static int xenbus_write_transaction(unsigned msg_type, | |
298 | struct xenbus_file_priv *u) | |
299 | { | |
e88a0faa | 300 | int rc; |
1107ba88 AZ |
301 | void *reply; |
302 | struct xenbus_transaction_holder *trans = NULL; | |
303 | LIST_HEAD(staging_q); | |
304 | ||
305 | if (msg_type == XS_TRANSACTION_START) { | |
306 | trans = kmalloc(sizeof(*trans), GFP_KERNEL); | |
307 | if (!trans) { | |
308 | rc = -ENOMEM; | |
309 | goto out; | |
310 | } | |
311 | } | |
312 | ||
313 | reply = xenbus_dev_request_and_reply(&u->u.msg); | |
314 | if (IS_ERR(reply)) { | |
315 | kfree(trans); | |
316 | rc = PTR_ERR(reply); | |
317 | goto out; | |
318 | } | |
319 | ||
320 | if (msg_type == XS_TRANSACTION_START) { | |
321 | trans->handle.id = simple_strtoul(reply, NULL, 0); | |
322 | ||
323 | list_add(&trans->list, &u->transactions); | |
324 | } else if (msg_type == XS_TRANSACTION_END) { | |
325 | list_for_each_entry(trans, &u->transactions, list) | |
326 | if (trans->handle.id == u->u.msg.tx_id) | |
327 | break; | |
328 | BUG_ON(&trans->list == &u->transactions); | |
329 | list_del(&trans->list); | |
330 | ||
331 | kfree(trans); | |
332 | } | |
333 | ||
334 | mutex_lock(&u->reply_mutex); | |
e88a0faa IC |
335 | rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); |
336 | if (!rc) | |
337 | rc = queue_reply(&staging_q, reply, u->u.msg.len); | |
338 | if (!rc) { | |
1107ba88 AZ |
339 | list_splice_tail(&staging_q, &u->read_buffers); |
340 | wake_up(&u->read_waitq); | |
341 | } else { | |
342 | queue_cleanup(&staging_q); | |
1107ba88 AZ |
343 | } |
344 | mutex_unlock(&u->reply_mutex); | |
345 | ||
346 | kfree(reply); | |
347 | ||
348 | out: | |
349 | return rc; | |
350 | } | |
351 | ||
352 | static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) | |
353 | { | |
354 | struct watch_adapter *watch, *tmp_watch; | |
355 | char *path, *token; | |
356 | int err, rc; | |
357 | LIST_HEAD(staging_q); | |
358 | ||
359 | path = u->u.buffer + sizeof(u->u.msg); | |
360 | token = memchr(path, 0, u->u.msg.len); | |
361 | if (token == NULL) { | |
362 | rc = -EILSEQ; | |
363 | goto out; | |
364 | } | |
365 | token++; | |
366 | ||
367 | if (msg_type == XS_WATCH) { | |
368 | watch = alloc_watch_adapter(path, token); | |
369 | if (watch == NULL) { | |
370 | rc = -ENOMEM; | |
371 | goto out; | |
372 | } | |
373 | ||
374 | watch->watch.callback = watch_fired; | |
375 | watch->dev_data = u; | |
376 | ||
377 | err = register_xenbus_watch(&watch->watch); | |
378 | if (err) { | |
379 | free_watch_adapter(watch); | |
380 | rc = err; | |
381 | goto out; | |
382 | } | |
383 | list_add(&watch->list, &u->watches); | |
384 | } else { | |
385 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
386 | if (!strcmp(watch->token, token) && | |
387 | !strcmp(watch->watch.node, path)) { | |
388 | unregister_xenbus_watch(&watch->watch); | |
389 | list_del(&watch->list); | |
390 | free_watch_adapter(watch); | |
391 | break; | |
392 | } | |
393 | } | |
394 | } | |
395 | ||
396 | /* Success. Synthesize a reply to say all is OK. */ | |
397 | { | |
398 | struct { | |
399 | struct xsd_sockmsg hdr; | |
400 | char body[3]; | |
401 | } __packed reply = { | |
402 | { | |
403 | .type = msg_type, | |
404 | .len = sizeof(reply.body) | |
405 | }, | |
406 | "OK" | |
407 | }; | |
408 | ||
409 | mutex_lock(&u->reply_mutex); | |
410 | rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); | |
76ce7618 | 411 | wake_up(&u->read_waitq); |
1107ba88 AZ |
412 | mutex_unlock(&u->reply_mutex); |
413 | } | |
414 | ||
415 | out: | |
416 | return rc; | |
417 | } | |
418 | ||
419 | static ssize_t xenbus_file_write(struct file *filp, | |
420 | const char __user *ubuf, | |
421 | size_t len, loff_t *ppos) | |
422 | { | |
423 | struct xenbus_file_priv *u = filp->private_data; | |
424 | uint32_t msg_type; | |
425 | int rc = len; | |
426 | int ret; | |
427 | LIST_HEAD(staging_q); | |
428 | ||
429 | /* | |
430 | * We're expecting usermode to be writing properly formed | |
431 | * xenbus messages. If they write an incomplete message we | |
432 | * buffer it up. Once it is complete, we act on it. | |
433 | */ | |
434 | ||
435 | /* | |
436 | * Make sure concurrent writers can't stomp all over each | |
437 | * other's messages and make a mess of our partial message | |
438 | * buffer. We don't make any attemppt to stop multiple | |
439 | * writers from making a mess of each other's incomplete | |
440 | * messages; we're just trying to guarantee our own internal | |
441 | * consistency and make sure that single writes are handled | |
442 | * atomically. | |
443 | */ | |
444 | mutex_lock(&u->msgbuffer_mutex); | |
445 | ||
446 | /* Get this out of the way early to avoid confusion */ | |
447 | if (len == 0) | |
448 | goto out; | |
449 | ||
450 | /* Can't write a xenbus message larger we can buffer */ | |
451 | if ((len + u->len) > sizeof(u->u.buffer)) { | |
452 | /* On error, dump existing buffer */ | |
453 | u->len = 0; | |
454 | rc = -EINVAL; | |
455 | goto out; | |
456 | } | |
457 | ||
458 | ret = copy_from_user(u->u.buffer + u->len, ubuf, len); | |
459 | ||
fb27cfbc | 460 | if (ret != 0) { |
1107ba88 AZ |
461 | rc = -EFAULT; |
462 | goto out; | |
463 | } | |
464 | ||
465 | /* Deal with a partial copy. */ | |
466 | len -= ret; | |
467 | rc = len; | |
468 | ||
469 | u->len += len; | |
470 | ||
471 | /* Return if we haven't got a full message yet */ | |
472 | if (u->len < sizeof(u->u.msg)) | |
473 | goto out; /* not even the header yet */ | |
474 | ||
475 | /* If we're expecting a message that's larger than we can | |
476 | possibly send, dump what we have and return an error. */ | |
477 | if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { | |
478 | rc = -E2BIG; | |
479 | u->len = 0; | |
480 | goto out; | |
481 | } | |
482 | ||
483 | if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) | |
484 | goto out; /* incomplete data portion */ | |
485 | ||
486 | /* | |
487 | * OK, now we have a complete message. Do something with it. | |
488 | */ | |
489 | ||
490 | msg_type = u->u.msg.type; | |
491 | ||
492 | switch (msg_type) { | |
1107ba88 AZ |
493 | case XS_WATCH: |
494 | case XS_UNWATCH: | |
495 | /* (Un)Ask for some path to be watched for changes */ | |
496 | ret = xenbus_write_watch(msg_type, u); | |
497 | break; | |
498 | ||
499 | default: | |
6d6df2e4 DO |
500 | /* Send out a transaction */ |
501 | ret = xenbus_write_transaction(msg_type, u); | |
1107ba88 AZ |
502 | break; |
503 | } | |
504 | if (ret != 0) | |
505 | rc = ret; | |
506 | ||
507 | /* Buffered message consumed */ | |
508 | u->len = 0; | |
509 | ||
510 | out: | |
511 | mutex_unlock(&u->msgbuffer_mutex); | |
512 | return rc; | |
513 | } | |
514 | ||
515 | static int xenbus_file_open(struct inode *inode, struct file *filp) | |
516 | { | |
517 | struct xenbus_file_priv *u; | |
518 | ||
519 | if (xen_store_evtchn == 0) | |
520 | return -ENOENT; | |
521 | ||
522 | nonseekable_open(inode, filp); | |
523 | ||
524 | u = kzalloc(sizeof(*u), GFP_KERNEL); | |
525 | if (u == NULL) | |
526 | return -ENOMEM; | |
527 | ||
528 | INIT_LIST_HEAD(&u->transactions); | |
529 | INIT_LIST_HEAD(&u->watches); | |
530 | INIT_LIST_HEAD(&u->read_buffers); | |
531 | init_waitqueue_head(&u->read_waitq); | |
532 | ||
533 | mutex_init(&u->reply_mutex); | |
534 | mutex_init(&u->msgbuffer_mutex); | |
535 | ||
536 | filp->private_data = u; | |
537 | ||
538 | return 0; | |
539 | } | |
540 | ||
541 | static int xenbus_file_release(struct inode *inode, struct file *filp) | |
542 | { | |
543 | struct xenbus_file_priv *u = filp->private_data; | |
544 | struct xenbus_transaction_holder *trans, *tmp; | |
545 | struct watch_adapter *watch, *tmp_watch; | |
546 | ||
547 | /* | |
548 | * No need for locking here because there are no other users, | |
549 | * by definition. | |
550 | */ | |
551 | ||
552 | list_for_each_entry_safe(trans, tmp, &u->transactions, list) { | |
553 | xenbus_transaction_end(trans->handle, 1); | |
554 | list_del(&trans->list); | |
555 | kfree(trans); | |
556 | } | |
557 | ||
558 | list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { | |
559 | unregister_xenbus_watch(&watch->watch); | |
560 | list_del(&watch->list); | |
561 | free_watch_adapter(watch); | |
562 | } | |
563 | ||
564 | kfree(u); | |
565 | ||
566 | return 0; | |
567 | } | |
568 | ||
569 | static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) | |
570 | { | |
571 | struct xenbus_file_priv *u = file->private_data; | |
572 | ||
573 | poll_wait(file, &u->read_waitq, wait); | |
574 | if (!list_empty(&u->read_buffers)) | |
575 | return POLLIN | POLLRDNORM; | |
576 | return 0; | |
577 | } | |
578 | ||
579 | const struct file_operations xenbus_file_ops = { | |
580 | .read = xenbus_file_read, | |
581 | .write = xenbus_file_write, | |
582 | .open = xenbus_file_open, | |
583 | .release = xenbus_file_release, | |
584 | .poll = xenbus_file_poll, | |
585 | }; |