Merge tag 'mm-hotfixes-stable-2025-07-11-16-16' of git://git.kernel.org/pub/scm/linux...
[linux-block.git] / arch / um / drivers / ubd_kern.c
... / ...
CommitLineData
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/major.h>
31#include <linux/cdrom.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/ctype.h>
35#include <linux/slab.h>
36#include <linux/vmalloc.h>
37#include <linux/platform_device.h>
38#include <linux/scatterlist.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
49
50struct io_desc {
51 char *buffer;
52 unsigned long length;
53 unsigned long sector_mask;
54 unsigned long long cow_offset;
55 unsigned long bitmap_words[2];
56};
57
58struct io_thread_req {
59 struct request *req;
60 int fds[2];
61 unsigned long offsets[2];
62 unsigned long long offset;
63 int sectorsize;
64 int error;
65
66 int desc_cnt;
67 /* io_desc has to be the last element of the struct */
68 struct io_desc io_desc[];
69};
70
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
83{
84 __u64 n;
85 int bits, off;
86
87 bits = sizeof(data[0]) * 8;
88 n = bit / bits;
89 off = bit % bits;
90 return (data[n] & (1 << off)) != 0;
91}
92
93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
94{
95 __u64 n;
96 int bits, off;
97
98 bits = sizeof(data[0]) * 8;
99 n = bit / bits;
100 off = bit % bits;
101 data[n] |= (1 << off);
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
107static DEFINE_MUTEX(ubd_lock);
108
109static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
110 unsigned int cmd, unsigned long arg);
111static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
112
113#define MAX_DEV (16)
114
115static const struct block_device_operations ubd_blops = {
116 .owner = THIS_MODULE,
117 .ioctl = ubd_ioctl,
118 .compat_ioctl = blkdev_compat_ptr_ioctl,
119 .getgeo = ubd_getgeo,
120};
121
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
132 /* backing file name */
133 char *file;
134 /* backing file fd */
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
139 int data_offset;
140};
141
142#define MAX_SG 64
143
144struct ubd {
145 /* name (and fd, below) of the file opened for writing, either the
146 * backing or the cow file. */
147 char *file;
148 char *serial;
149 int fd;
150 __u64 size;
151 struct openflags boot_openflags;
152 struct openflags openflags;
153 unsigned shared:1;
154 unsigned no_cow:1;
155 unsigned no_trim:1;
156 struct cow cow;
157 struct platform_device pdev;
158 struct gendisk *disk;
159 struct blk_mq_tag_set tag_set;
160 spinlock_t lock;
161};
162
163#define DEFAULT_COW { \
164 .file = NULL, \
165 .fd = -1, \
166 .bitmap = NULL, \
167 .bitmap_offset = 0, \
168 .data_offset = 0, \
169}
170
171#define DEFAULT_UBD { \
172 .file = NULL, \
173 .serial = NULL, \
174 .fd = -1, \
175 .size = -1, \
176 .boot_openflags = OPEN_FLAGS, \
177 .openflags = OPEN_FLAGS, \
178 .no_cow = 0, \
179 .no_trim = 0, \
180 .shared = 0, \
181 .cow = DEFAULT_COW, \
182 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
183}
184
185/* Protected by ubd_lock */
186static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
187
188static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
189 const struct blk_mq_queue_data *bd);
190
191static int fake_ide_setup(char *str)
192{
193 pr_warn("The fake_ide option has been removed\n");
194 return 1;
195}
196__setup("fake_ide", fake_ide_setup);
197
198__uml_help(fake_ide_setup,
199"fake_ide\n"
200" Obsolete stub.\n\n"
201);
202
203static int parse_unit(char **ptr)
204{
205 char *str = *ptr, *end;
206 int n = -1;
207
208 if(isdigit(*str)) {
209 n = simple_strtoul(str, &end, 0);
210 if(end == str)
211 return -1;
212 *ptr = end;
213 }
214 else if (('a' <= *str) && (*str <= 'z')) {
215 n = *str - 'a';
216 str++;
217 *ptr = str;
218 }
219 return n;
220}
221
222/* If *index_out == -1 at exit, the passed option was a general one;
223 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
224 * should not be freed on exit.
225 */
226static int ubd_setup_common(char *str, int *index_out, char **error_out)
227{
228 struct ubd *ubd_dev;
229 struct openflags flags = global_openflags;
230 char *file, *backing_file, *serial;
231 int n, err = 0, i;
232
233 if(index_out) *index_out = -1;
234 n = *str;
235 if(n == '='){
236 str++;
237 if(!strcmp(str, "sync")){
238 global_openflags = of_sync(global_openflags);
239 return err;
240 }
241
242 pr_warn("fake major not supported any more\n");
243 return 0;
244 }
245
246 n = parse_unit(&str);
247 if(n < 0){
248 *error_out = "Couldn't parse device number";
249 return -EINVAL;
250 }
251 if(n >= MAX_DEV){
252 *error_out = "Device number out of range";
253 return 1;
254 }
255
256 err = -EBUSY;
257 mutex_lock(&ubd_lock);
258
259 ubd_dev = &ubd_devs[n];
260 if(ubd_dev->file != NULL){
261 *error_out = "Device is already configured";
262 goto out;
263 }
264
265 if (index_out)
266 *index_out = n;
267
268 err = -EINVAL;
269 for (i = 0; i < sizeof("rscdt="); i++) {
270 switch (*str) {
271 case 'r':
272 flags.w = 0;
273 break;
274 case 's':
275 flags.s = 1;
276 break;
277 case 'd':
278 ubd_dev->no_cow = 1;
279 break;
280 case 'c':
281 ubd_dev->shared = 1;
282 break;
283 case 't':
284 ubd_dev->no_trim = 1;
285 break;
286 case '=':
287 str++;
288 goto break_loop;
289 default:
290 *error_out = "Expected '=' or flag letter "
291 "(r, s, c, t or d)";
292 goto out;
293 }
294 str++;
295 }
296
297 if (*str == '=')
298 *error_out = "Too many flags specified";
299 else
300 *error_out = "Missing '='";
301 goto out;
302
303break_loop:
304 file = strsep(&str, ",:");
305 if (*file == '\0')
306 file = NULL;
307
308 backing_file = strsep(&str, ",:");
309 if (backing_file && *backing_file == '\0')
310 backing_file = NULL;
311
312 serial = strsep(&str, ",:");
313 if (serial && *serial == '\0')
314 serial = NULL;
315
316 if (backing_file && ubd_dev->no_cow) {
317 *error_out = "Can't specify both 'd' and a cow file";
318 goto out;
319 }
320
321 err = 0;
322 ubd_dev->file = file;
323 ubd_dev->cow.file = backing_file;
324 ubd_dev->serial = serial;
325 ubd_dev->boot_openflags = flags;
326out:
327 mutex_unlock(&ubd_lock);
328 return err;
329}
330
331static int ubd_setup(char *str)
332{
333 char *error;
334 int err;
335
336 err = ubd_setup_common(str, NULL, &error);
337 if(err)
338 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
339 "%s\n", str, error);
340 return 1;
341}
342
343__setup("ubd", ubd_setup);
344__uml_help(ubd_setup,
345"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n"
346" This is used to associate a device with a file in the underlying\n"
347" filesystem. When specifying two filenames, the first one is the\n"
348" COW name and the second is the backing file name. As separator you can\n"
349" use either a ':' or a ',': the first one allows writing things like;\n"
350" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
351" while with a ',' the shell would not expand the 2nd '~'.\n"
352" When using only one filename, UML will detect whether to treat it like\n"
353" a COW file or a backing file. To override this detection, add the 'd'\n"
354" flag:\n"
355" ubd0d=BackingFile\n"
356" Usually, there is a filesystem in the file, but \n"
357" that's not required. Swap devices containing swap files can be\n"
358" specified like this. Also, a file which doesn't contain a\n"
359" filesystem can have its contents read in the virtual \n"
360" machine by running 'dd' on the device. <n> must be in the range\n"
361" 0 to 7. Appending an 'r' to the number will cause that device\n"
362" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
363" an 's' will cause data to be written to disk on the host immediately.\n"
364" 'c' will cause the device to be treated as being shared between multiple\n"
365" UMLs and file locking will be turned off - this is appropriate for a\n"
366" cluster filesystem and inappropriate at almost all other times.\n\n"
367" 't' will disable trim/discard support on the device (enabled by default).\n\n"
368" An optional device serial number can be exposed using the serial parameter\n"
369" on the cmdline which is exposed as a sysfs entry. This is particularly\n"
370" useful when a unique number should be given to the device. Note when\n"
371" specifying a label, the filename2 must be also presented. It can be\n"
372" an empty string, in which case the backing file is not used:\n"
373" ubd0=File,,Serial\n"
374);
375
376static int udb_setup(char *str)
377{
378 printk("udb%s specified on command line is almost certainly a ubd -> "
379 "udb TYPO\n", str);
380 return 1;
381}
382
383__setup("udb", udb_setup);
384__uml_help(udb_setup,
385"udb\n"
386" This option is here solely to catch ubd -> udb typos, which can be\n"
387" to impossible to catch visually unless you specifically look for\n"
388" them. The only result of any option starting with 'udb' is an error\n"
389" in the boot output.\n\n"
390);
391
392/* Only changed by ubd_init, which is an initcall. */
393static int thread_fd = -1;
394
395/* Function to read several request pointers at a time
396* handling fractional reads if (and as) needed
397*/
398
399static int bulk_req_safe_read(
400 int fd,
401 struct io_thread_req * (*request_buffer)[],
402 struct io_thread_req **remainder,
403 int *remainder_size,
404 int max_recs
405 )
406{
407 int n = 0;
408 int res = 0;
409
410 if (*remainder_size > 0) {
411 memmove(
412 (char *) request_buffer,
413 (char *) remainder, *remainder_size
414 );
415 n = *remainder_size;
416 }
417
418 res = os_read_file(
419 fd,
420 ((char *) request_buffer) + *remainder_size,
421 sizeof(struct io_thread_req *)*max_recs
422 - *remainder_size
423 );
424 if (res > 0) {
425 n += res;
426 if ((n % sizeof(struct io_thread_req *)) > 0) {
427 /*
428 * Read somehow returned not a multiple of dword
429 * theoretically possible, but never observed in the
430 * wild, so read routine must be able to handle it
431 */
432 *remainder_size = n % sizeof(struct io_thread_req *);
433 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
434 memmove(
435 remainder,
436 ((char *) request_buffer) +
437 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
438 *remainder_size
439 );
440 n = n - *remainder_size;
441 }
442 } else {
443 n = res;
444 }
445 return n;
446}
447
448static void ubd_end_request(struct io_thread_req *io_req)
449{
450 if (io_req->error == BLK_STS_NOTSUPP) {
451 if (req_op(io_req->req) == REQ_OP_DISCARD)
452 blk_queue_disable_discard(io_req->req->q);
453 else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES)
454 blk_queue_disable_write_zeroes(io_req->req->q);
455 }
456 blk_mq_end_request(io_req->req, io_req->error);
457 kfree(io_req);
458}
459
460static irqreturn_t ubd_intr(int irq, void *dev)
461{
462 int len, i;
463
464 while ((len = bulk_req_safe_read(thread_fd, irq_req_buffer,
465 &irq_remainder, &irq_remainder_size,
466 UBD_REQ_BUFFER_SIZE)) >= 0) {
467 for (i = 0; i < len / sizeof(struct io_thread_req *); i++)
468 ubd_end_request((*irq_req_buffer)[i]);
469 }
470
471 if (len < 0 && len != -EAGAIN)
472 pr_err("spurious interrupt in %s, err = %d\n", __func__, len);
473 return IRQ_HANDLED;
474}
475
476/* Only changed by ubd_init, which is an initcall. */
477static struct os_helper_thread *io_td;
478
479static void kill_io_thread(void)
480{
481 if (io_td)
482 os_kill_helper_thread(io_td);
483}
484
485__uml_exitcall(kill_io_thread);
486
487static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
488{
489 char *file;
490 int fd;
491 int err;
492
493 __u32 version;
494 __u32 align;
495 char *backing_file;
496 time64_t mtime;
497 unsigned long long size;
498 int sector_size;
499 int bitmap_offset;
500
501 if (ubd_dev->file && ubd_dev->cow.file) {
502 file = ubd_dev->cow.file;
503
504 goto out;
505 }
506
507 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
508 if (fd < 0)
509 return fd;
510
511 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
512 &mtime, &size, &sector_size, &align, &bitmap_offset);
513 os_close_file(fd);
514
515 if(err == -EINVAL)
516 file = ubd_dev->file;
517 else
518 file = backing_file;
519
520out:
521 return os_file_size(file, size_out);
522}
523
524static int read_cow_bitmap(int fd, void *buf, int offset, int len)
525{
526 int err;
527
528 err = os_pread_file(fd, buf, len, offset);
529 if (err < 0)
530 return err;
531
532 return 0;
533}
534
535static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
536{
537 time64_t modtime;
538 unsigned long long actual;
539 int err;
540
541 err = os_file_modtime(file, &modtime);
542 if (err < 0) {
543 printk(KERN_ERR "Failed to get modification time of backing "
544 "file \"%s\", err = %d\n", file, -err);
545 return err;
546 }
547
548 err = os_file_size(file, &actual);
549 if (err < 0) {
550 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
551 "err = %d\n", file, -err);
552 return err;
553 }
554
555 if (actual != size) {
556 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
557 * the typecast.*/
558 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
559 "vs backing file\n", (unsigned long long) size, actual);
560 return -EINVAL;
561 }
562 if (modtime != mtime) {
563 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
564 "backing file\n", mtime, modtime);
565 return -EINVAL;
566 }
567 return 0;
568}
569
570static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
571{
572 struct uml_stat buf1, buf2;
573 int err;
574
575 if (from_cmdline == NULL)
576 return 0;
577 if (!strcmp(from_cmdline, from_cow))
578 return 0;
579
580 err = os_stat_file(from_cmdline, &buf1);
581 if (err < 0) {
582 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
583 -err);
584 return 0;
585 }
586 err = os_stat_file(from_cow, &buf2);
587 if (err < 0) {
588 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
589 -err);
590 return 1;
591 }
592 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
593 return 0;
594
595 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
596 "\"%s\" specified in COW header of \"%s\"\n",
597 from_cmdline, from_cow, cow);
598 return 1;
599}
600
601static int open_ubd_file(char *file, struct openflags *openflags, int shared,
602 char **backing_file_out, int *bitmap_offset_out,
603 unsigned long *bitmap_len_out, int *data_offset_out,
604 int *create_cow_out)
605{
606 time64_t mtime;
607 unsigned long long size;
608 __u32 version, align;
609 char *backing_file;
610 int fd, err, sectorsize, asked_switch, mode = 0644;
611
612 fd = os_open_file(file, *openflags, mode);
613 if (fd < 0) {
614 if ((fd == -ENOENT) && (create_cow_out != NULL))
615 *create_cow_out = 1;
616 if (!openflags->w ||
617 ((fd != -EROFS) && (fd != -EACCES)))
618 return fd;
619 openflags->w = 0;
620 fd = os_open_file(file, *openflags, mode);
621 if (fd < 0)
622 return fd;
623 }
624
625 if (shared)
626 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
627 else {
628 err = os_lock_file(fd, openflags->w);
629 if (err < 0) {
630 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
631 file, -err);
632 goto out_close;
633 }
634 }
635
636 /* Successful return case! */
637 if (backing_file_out == NULL)
638 return fd;
639
640 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
641 &size, &sectorsize, &align, bitmap_offset_out);
642 if (err && (*backing_file_out != NULL)) {
643 printk(KERN_ERR "Failed to read COW header from COW file "
644 "\"%s\", errno = %d\n", file, -err);
645 goto out_close;
646 }
647 if (err)
648 return fd;
649
650 asked_switch = path_requires_switch(*backing_file_out, backing_file,
651 file);
652
653 /* Allow switching only if no mismatch. */
654 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
655 mtime)) {
656 printk(KERN_ERR "Switching backing file to '%s'\n",
657 *backing_file_out);
658 err = write_cow_header(file, fd, *backing_file_out,
659 sectorsize, align, &size);
660 if (err) {
661 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
662 goto out_close;
663 }
664 } else {
665 *backing_file_out = backing_file;
666 err = backing_file_mismatch(*backing_file_out, size, mtime);
667 if (err)
668 goto out_close;
669 }
670
671 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
672 bitmap_len_out, data_offset_out);
673
674 return fd;
675 out_close:
676 os_close_file(fd);
677 return err;
678}
679
680static int create_cow_file(char *cow_file, char *backing_file,
681 struct openflags flags,
682 int sectorsize, int alignment, int *bitmap_offset_out,
683 unsigned long *bitmap_len_out, int *data_offset_out)
684{
685 int err, fd;
686
687 flags.c = 1;
688 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
689 if (fd < 0) {
690 err = fd;
691 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
692 cow_file, -err);
693 goto out;
694 }
695
696 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
697 bitmap_offset_out, bitmap_len_out,
698 data_offset_out);
699 if (!err)
700 return fd;
701 os_close_file(fd);
702 out:
703 return err;
704}
705
706static void ubd_close_dev(struct ubd *ubd_dev)
707{
708 os_close_file(ubd_dev->fd);
709 if(ubd_dev->cow.file == NULL)
710 return;
711
712 os_close_file(ubd_dev->cow.fd);
713 vfree(ubd_dev->cow.bitmap);
714 ubd_dev->cow.bitmap = NULL;
715}
716
717static int ubd_open_dev(struct ubd *ubd_dev)
718{
719 struct openflags flags;
720 char **back_ptr;
721 int err, create_cow, *create_ptr;
722 int fd;
723
724 ubd_dev->openflags = ubd_dev->boot_openflags;
725 create_cow = 0;
726 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
727 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
728
729 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
730 back_ptr, &ubd_dev->cow.bitmap_offset,
731 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
732 create_ptr);
733
734 if((fd == -ENOENT) && create_cow){
735 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
736 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
737 &ubd_dev->cow.bitmap_offset,
738 &ubd_dev->cow.bitmap_len,
739 &ubd_dev->cow.data_offset);
740 if(fd >= 0){
741 printk(KERN_INFO "Creating \"%s\" as COW file for "
742 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
743 }
744 }
745
746 if(fd < 0){
747 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
748 -fd);
749 return fd;
750 }
751 ubd_dev->fd = fd;
752
753 if(ubd_dev->cow.file != NULL){
754 err = -ENOMEM;
755 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
756 if(ubd_dev->cow.bitmap == NULL){
757 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
758 goto error;
759 }
760
761 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
762 ubd_dev->cow.bitmap_offset,
763 ubd_dev->cow.bitmap_len);
764 if(err < 0)
765 goto error;
766
767 flags = ubd_dev->openflags;
768 flags.w = 0;
769 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
770 NULL, NULL, NULL, NULL);
771 if(err < 0) goto error;
772 ubd_dev->cow.fd = err;
773 }
774 return 0;
775 error:
776 os_close_file(ubd_dev->fd);
777 return err;
778}
779
780static void ubd_device_release(struct device *dev)
781{
782 struct ubd *ubd_dev = container_of(dev, struct ubd, pdev.dev);
783
784 blk_mq_free_tag_set(&ubd_dev->tag_set);
785 *ubd_dev = ((struct ubd) DEFAULT_UBD);
786}
787
788static ssize_t serial_show(struct device *dev,
789 struct device_attribute *attr, char *buf)
790{
791 struct gendisk *disk = dev_to_disk(dev);
792 struct ubd *ubd_dev = disk->private_data;
793
794 if (!ubd_dev)
795 return 0;
796
797 return sprintf(buf, "%s", ubd_dev->serial);
798}
799
800static DEVICE_ATTR_RO(serial);
801
802static struct attribute *ubd_attrs[] = {
803 &dev_attr_serial.attr,
804 NULL,
805};
806
807static umode_t ubd_attrs_are_visible(struct kobject *kobj,
808 struct attribute *a, int n)
809{
810 return a->mode;
811}
812
813static const struct attribute_group ubd_attr_group = {
814 .attrs = ubd_attrs,
815 .is_visible = ubd_attrs_are_visible,
816};
817
818static const struct attribute_group *ubd_attr_groups[] = {
819 &ubd_attr_group,
820 NULL,
821};
822
823#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
824
825static const struct blk_mq_ops ubd_mq_ops = {
826 .queue_rq = ubd_queue_rq,
827};
828
829static int ubd_add(int n, char **error_out)
830{
831 struct ubd *ubd_dev = &ubd_devs[n];
832 struct queue_limits lim = {
833 .max_segments = MAX_SG,
834 .seg_boundary_mask = PAGE_SIZE - 1,
835 .features = BLK_FEAT_WRITE_CACHE,
836 };
837 struct gendisk *disk;
838 int err = 0;
839
840 if(ubd_dev->file == NULL)
841 goto out;
842
843 if (ubd_dev->cow.file)
844 lim.max_hw_sectors = 8 * sizeof(long);
845 if (!ubd_dev->no_trim) {
846 lim.max_hw_discard_sectors = UBD_MAX_REQUEST;
847 lim.max_write_zeroes_sectors = UBD_MAX_REQUEST;
848 }
849
850 err = ubd_file_size(ubd_dev, &ubd_dev->size);
851 if(err < 0){
852 *error_out = "Couldn't determine size of device's file";
853 goto out;
854 }
855
856 err = ubd_open_dev(ubd_dev);
857 if (err) {
858 pr_err("ubd%c: Can't open \"%s\": errno = %d\n",
859 'a' + n, ubd_dev->file, -err);
860 goto out;
861 }
862
863 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
864
865 ubd_dev->tag_set.ops = &ubd_mq_ops;
866 ubd_dev->tag_set.queue_depth = 64;
867 ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
868 ubd_dev->tag_set.driver_data = ubd_dev;
869 ubd_dev->tag_set.nr_hw_queues = 1;
870
871 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
872 if (err)
873 goto out_close;
874
875 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, &lim, ubd_dev);
876 if (IS_ERR(disk)) {
877 err = PTR_ERR(disk);
878 goto out_cleanup_tags;
879 }
880
881 disk->major = UBD_MAJOR;
882 disk->first_minor = n << UBD_SHIFT;
883 disk->minors = 1 << UBD_SHIFT;
884 disk->fops = &ubd_blops;
885 set_capacity(disk, ubd_dev->size / 512);
886 sprintf(disk->disk_name, "ubd%c", 'a' + n);
887 disk->private_data = ubd_dev;
888 set_disk_ro(disk, !ubd_dev->openflags.w);
889
890 ubd_dev->pdev.id = n;
891 ubd_dev->pdev.name = DRIVER_NAME;
892 ubd_dev->pdev.dev.release = ubd_device_release;
893 dev_set_drvdata(&ubd_dev->pdev.dev, ubd_dev);
894 platform_device_register(&ubd_dev->pdev);
895
896 err = device_add_disk(&ubd_dev->pdev.dev, disk, ubd_attr_groups);
897 if (err)
898 goto out_cleanup_disk;
899
900 ubd_dev->disk = disk;
901
902 return 0;
903
904out_cleanup_disk:
905 put_disk(disk);
906out_cleanup_tags:
907 blk_mq_free_tag_set(&ubd_dev->tag_set);
908out_close:
909 ubd_close_dev(ubd_dev);
910out:
911 return err;
912}
913
914static int ubd_config(char *str, char **error_out)
915{
916 int n, ret;
917
918 /* This string is possibly broken up and stored, so it's only
919 * freed if ubd_setup_common fails, or if only general options
920 * were set.
921 */
922 str = kstrdup(str, GFP_KERNEL);
923 if (str == NULL) {
924 *error_out = "Failed to allocate memory";
925 return -ENOMEM;
926 }
927
928 ret = ubd_setup_common(str, &n, error_out);
929 if (ret)
930 goto err_free;
931
932 if (n == -1) {
933 ret = 0;
934 goto err_free;
935 }
936
937 mutex_lock(&ubd_lock);
938 ret = ubd_add(n, error_out);
939 if (ret)
940 ubd_devs[n].file = NULL;
941 mutex_unlock(&ubd_lock);
942
943out:
944 return ret;
945
946err_free:
947 kfree(str);
948 goto out;
949}
950
951static int ubd_get_config(char *name, char *str, int size, char **error_out)
952{
953 struct ubd *ubd_dev;
954 int n, len = 0;
955
956 n = parse_unit(&name);
957 if((n >= MAX_DEV) || (n < 0)){
958 *error_out = "ubd_get_config : device number out of range";
959 return -1;
960 }
961
962 ubd_dev = &ubd_devs[n];
963 mutex_lock(&ubd_lock);
964
965 if(ubd_dev->file == NULL){
966 CONFIG_CHUNK(str, size, len, "", 1);
967 goto out;
968 }
969
970 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
971
972 if(ubd_dev->cow.file != NULL){
973 CONFIG_CHUNK(str, size, len, ",", 0);
974 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
975 }
976 else CONFIG_CHUNK(str, size, len, "", 1);
977
978 out:
979 mutex_unlock(&ubd_lock);
980 return len;
981}
982
983static int ubd_id(char **str, int *start_out, int *end_out)
984{
985 int n;
986
987 n = parse_unit(str);
988 *start_out = 0;
989 *end_out = MAX_DEV - 1;
990 return n;
991}
992
993static int ubd_remove(int n, char **error_out)
994{
995 struct ubd *ubd_dev;
996 int err = -ENODEV;
997
998 mutex_lock(&ubd_lock);
999
1000 ubd_dev = &ubd_devs[n];
1001
1002 if(ubd_dev->file == NULL)
1003 goto out;
1004
1005 if (ubd_dev->disk) {
1006 /* you cannot remove a open disk */
1007 err = -EBUSY;
1008 if (disk_openers(ubd_dev->disk))
1009 goto out;
1010
1011 del_gendisk(ubd_dev->disk);
1012 ubd_close_dev(ubd_dev);
1013 put_disk(ubd_dev->disk);
1014 }
1015
1016 err = 0;
1017 platform_device_unregister(&ubd_dev->pdev);
1018out:
1019 mutex_unlock(&ubd_lock);
1020 return err;
1021}
1022
1023/* All these are called by mconsole in process context and without
1024 * ubd-specific locks. The structure itself is const except for .list.
1025 */
1026static struct mc_device ubd_mc = {
1027 .list = LIST_HEAD_INIT(ubd_mc.list),
1028 .name = "ubd",
1029 .config = ubd_config,
1030 .get_config = ubd_get_config,
1031 .id = ubd_id,
1032 .remove = ubd_remove,
1033};
1034
1035static int __init ubd_mc_init(void)
1036{
1037 mconsole_register_dev(&ubd_mc);
1038 return 0;
1039}
1040
1041__initcall(ubd_mc_init);
1042
1043static int __init ubd0_init(void)
1044{
1045 struct ubd *ubd_dev = &ubd_devs[0];
1046
1047 mutex_lock(&ubd_lock);
1048 if(ubd_dev->file == NULL)
1049 ubd_dev->file = "root_fs";
1050 mutex_unlock(&ubd_lock);
1051
1052 return 0;
1053}
1054
1055__initcall(ubd0_init);
1056
1057/* Used in ubd_init, which is an initcall */
1058static struct platform_driver ubd_driver = {
1059 .driver = {
1060 .name = DRIVER_NAME,
1061 },
1062};
1063
1064static int __init ubd_init(void)
1065{
1066 char *error;
1067 int i, err;
1068
1069 if (register_blkdev(UBD_MAJOR, "ubd"))
1070 return -1;
1071
1072 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1073 sizeof(struct io_thread_req *),
1074 GFP_KERNEL
1075 );
1076 irq_remainder = 0;
1077
1078 if (irq_req_buffer == NULL) {
1079 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1080 return -ENOMEM;
1081 }
1082 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1083 sizeof(struct io_thread_req *),
1084 GFP_KERNEL
1085 );
1086
1087 io_remainder = 0;
1088
1089 if (io_req_buffer == NULL) {
1090 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1091 return -ENOMEM;
1092 }
1093 platform_driver_register(&ubd_driver);
1094 mutex_lock(&ubd_lock);
1095 for (i = 0; i < MAX_DEV; i++){
1096 err = ubd_add(i, &error);
1097 if(err)
1098 printk(KERN_ERR "Failed to initialize ubd device %d :"
1099 "%s\n", i, error);
1100 }
1101 mutex_unlock(&ubd_lock);
1102 return 0;
1103}
1104
1105late_initcall(ubd_init);
1106
1107static int __init ubd_driver_init(void)
1108{
1109 int err;
1110
1111 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1112 if(global_openflags.s){
1113 printk(KERN_INFO "ubd: Synchronous mode\n");
1114 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1115 * enough. So use anyway the io thread. */
1116 }
1117 err = start_io_thread(&io_td, &thread_fd);
1118 if (err < 0) {
1119 printk(KERN_ERR
1120 "ubd : Failed to start I/O thread (errno = %d) - "
1121 "falling back to synchronous I/O\n", -err);
1122 return 0;
1123 }
1124 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1125 0, "ubd", ubd_devs);
1126 if(err < 0)
1127 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1128 return 0;
1129}
1130
1131device_initcall(ubd_driver_init);
1132
1133static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1134 __u64 *cow_offset, unsigned long *bitmap,
1135 __u64 bitmap_offset, unsigned long *bitmap_words,
1136 __u64 bitmap_len)
1137{
1138 __u64 sector = io_offset >> SECTOR_SHIFT;
1139 int i, update_bitmap = 0;
1140
1141 for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1142 if(cow_mask != NULL)
1143 ubd_set_bit(i, (unsigned char *) cow_mask);
1144 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1145 continue;
1146
1147 update_bitmap = 1;
1148 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1149 }
1150
1151 if(!update_bitmap)
1152 return;
1153
1154 *cow_offset = sector / (sizeof(unsigned long) * 8);
1155
1156 /* This takes care of the case where we're exactly at the end of the
1157 * device, and *cow_offset + 1 is off the end. So, just back it up
1158 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1159 * for the original diagnosis.
1160 */
1161 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1162 sizeof(unsigned long)) - 1))
1163 (*cow_offset)--;
1164
1165 bitmap_words[0] = bitmap[*cow_offset];
1166 bitmap_words[1] = bitmap[*cow_offset + 1];
1167
1168 *cow_offset *= sizeof(unsigned long);
1169 *cow_offset += bitmap_offset;
1170}
1171
1172static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1173 unsigned long offset, unsigned long *bitmap,
1174 __u64 bitmap_offset, __u64 bitmap_len)
1175{
1176 __u64 sector = offset >> SECTOR_SHIFT;
1177 int i;
1178
1179 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1180 panic("Operation too long");
1181
1182 if (req_op(req->req) == REQ_OP_READ) {
1183 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1184 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1185 ubd_set_bit(i, (unsigned char *)
1186 &segment->sector_mask);
1187 }
1188 } else {
1189 cowify_bitmap(offset, segment->length, &segment->sector_mask,
1190 &segment->cow_offset, bitmap, bitmap_offset,
1191 segment->bitmap_words, bitmap_len);
1192 }
1193}
1194
1195static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1196 struct request *req)
1197{
1198 struct bio_vec bvec;
1199 struct req_iterator iter;
1200 int i = 0;
1201 unsigned long byte_offset = io_req->offset;
1202 enum req_op op = req_op(req);
1203
1204 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1205 io_req->io_desc[0].buffer = NULL;
1206 io_req->io_desc[0].length = blk_rq_bytes(req);
1207 } else {
1208 rq_for_each_segment(bvec, req, iter) {
1209 BUG_ON(i >= io_req->desc_cnt);
1210
1211 io_req->io_desc[i].buffer = bvec_virt(&bvec);
1212 io_req->io_desc[i].length = bvec.bv_len;
1213 i++;
1214 }
1215 }
1216
1217 if (dev->cow.file) {
1218 for (i = 0; i < io_req->desc_cnt; i++) {
1219 cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1220 dev->cow.bitmap, dev->cow.bitmap_offset,
1221 dev->cow.bitmap_len);
1222 byte_offset += io_req->io_desc[i].length;
1223 }
1224
1225 }
1226}
1227
1228static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1229 int desc_cnt)
1230{
1231 struct io_thread_req *io_req;
1232 int i;
1233
1234 io_req = kmalloc(sizeof(*io_req) +
1235 (desc_cnt * sizeof(struct io_desc)),
1236 GFP_ATOMIC);
1237 if (!io_req)
1238 return NULL;
1239
1240 io_req->req = req;
1241 if (dev->cow.file)
1242 io_req->fds[0] = dev->cow.fd;
1243 else
1244 io_req->fds[0] = dev->fd;
1245 io_req->error = 0;
1246 io_req->sectorsize = SECTOR_SIZE;
1247 io_req->fds[1] = dev->fd;
1248 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1249 io_req->offsets[0] = 0;
1250 io_req->offsets[1] = dev->cow.data_offset;
1251
1252 for (i = 0 ; i < desc_cnt; i++) {
1253 io_req->io_desc[i].sector_mask = 0;
1254 io_req->io_desc[i].cow_offset = -1;
1255 }
1256
1257 return io_req;
1258}
1259
1260static int ubd_submit_request(struct ubd *dev, struct request *req)
1261{
1262 int segs = 0;
1263 struct io_thread_req *io_req;
1264 int ret;
1265 enum req_op op = req_op(req);
1266
1267 if (op == REQ_OP_FLUSH)
1268 segs = 0;
1269 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1270 segs = 1;
1271 else
1272 segs = blk_rq_nr_phys_segments(req);
1273
1274 io_req = ubd_alloc_req(dev, req, segs);
1275 if (!io_req)
1276 return -ENOMEM;
1277
1278 io_req->desc_cnt = segs;
1279 if (segs)
1280 ubd_map_req(dev, io_req, req);
1281
1282 ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1283 if (ret != sizeof(io_req)) {
1284 if (ret != -EAGAIN)
1285 pr_err("write to io thread failed: %d\n", -ret);
1286 kfree(io_req);
1287 }
1288 return ret;
1289}
1290
1291static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1292 const struct blk_mq_queue_data *bd)
1293{
1294 struct ubd *ubd_dev = hctx->queue->queuedata;
1295 struct request *req = bd->rq;
1296 int ret = 0, res = BLK_STS_OK;
1297
1298 blk_mq_start_request(req);
1299
1300 spin_lock_irq(&ubd_dev->lock);
1301
1302 switch (req_op(req)) {
1303 case REQ_OP_FLUSH:
1304 case REQ_OP_READ:
1305 case REQ_OP_WRITE:
1306 case REQ_OP_DISCARD:
1307 case REQ_OP_WRITE_ZEROES:
1308 ret = ubd_submit_request(ubd_dev, req);
1309 break;
1310 default:
1311 WARN_ON_ONCE(1);
1312 res = BLK_STS_NOTSUPP;
1313 }
1314
1315 spin_unlock_irq(&ubd_dev->lock);
1316
1317 if (ret < 0) {
1318 if (ret == -ENOMEM)
1319 res = BLK_STS_RESOURCE;
1320 else
1321 res = BLK_STS_DEV_RESOURCE;
1322 }
1323
1324 return res;
1325}
1326
1327static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1328{
1329 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1330
1331 geo->heads = 128;
1332 geo->sectors = 32;
1333 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1334 return 0;
1335}
1336
1337static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode,
1338 unsigned int cmd, unsigned long arg)
1339{
1340 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1341 u16 ubd_id[ATA_ID_WORDS];
1342
1343 switch (cmd) {
1344 struct cdrom_volctrl volume;
1345 case HDIO_GET_IDENTITY:
1346 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1347 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1348 ubd_id[ATA_ID_HEADS] = 128;
1349 ubd_id[ATA_ID_SECTORS] = 32;
1350 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1351 sizeof(ubd_id)))
1352 return -EFAULT;
1353 return 0;
1354
1355 case CDROMVOLREAD:
1356 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1357 return -EFAULT;
1358 volume.channel0 = 255;
1359 volume.channel1 = 255;
1360 volume.channel2 = 255;
1361 volume.channel3 = 255;
1362 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1363 return -EFAULT;
1364 return 0;
1365 }
1366 return -EINVAL;
1367}
1368
1369static int map_error(int error_code)
1370{
1371 switch (error_code) {
1372 case 0:
1373 return BLK_STS_OK;
1374 case ENOSYS:
1375 case EOPNOTSUPP:
1376 return BLK_STS_NOTSUPP;
1377 case ENOSPC:
1378 return BLK_STS_NOSPC;
1379 }
1380 return BLK_STS_IOERR;
1381}
1382
1383/*
1384 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1385 *
1386 * The following functions are part of UML hypervisor code.
1387 * All functions from here onwards are executed as a helper
1388 * thread and are not allowed to execute any kernel functions.
1389 *
1390 * Any communication must occur strictly via shared memory and IPC.
1391 *
1392 * Do not add printks, locks, kernel memory operations, etc - it
1393 * will result in unpredictable behaviour and/or crashes.
1394 */
1395
1396static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1397{
1398 int n;
1399
1400 if (segment->cow_offset == -1)
1401 return map_error(0);
1402
1403 n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1404 sizeof(segment->bitmap_words), segment->cow_offset);
1405 if (n != sizeof(segment->bitmap_words))
1406 return map_error(-n);
1407
1408 return map_error(0);
1409}
1410
1411static void do_io(struct io_thread_req *req, struct io_desc *desc)
1412{
1413 char *buf = NULL;
1414 unsigned long len;
1415 int n, nsectors, start, end, bit;
1416 __u64 off;
1417
1418 /* FLUSH is really a special case, we cannot "case" it with others */
1419
1420 if (req_op(req->req) == REQ_OP_FLUSH) {
1421 /* fds[0] is always either the rw image or our cow file */
1422 req->error = map_error(-os_sync_file(req->fds[0]));
1423 return;
1424 }
1425
1426 nsectors = desc->length / req->sectorsize;
1427 start = 0;
1428 do {
1429 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1430 end = start;
1431 while((end < nsectors) &&
1432 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1433 end++;
1434
1435 off = req->offset + req->offsets[bit] +
1436 start * req->sectorsize;
1437 len = (end - start) * req->sectorsize;
1438 if (desc->buffer != NULL)
1439 buf = &desc->buffer[start * req->sectorsize];
1440
1441 switch (req_op(req->req)) {
1442 case REQ_OP_READ:
1443 n = 0;
1444 do {
1445 buf = &buf[n];
1446 len -= n;
1447 n = os_pread_file(req->fds[bit], buf, len, off);
1448 if (n < 0) {
1449 req->error = map_error(-n);
1450 return;
1451 }
1452 } while((n < len) && (n != 0));
1453 if (n < len) memset(&buf[n], 0, len - n);
1454 break;
1455 case REQ_OP_WRITE:
1456 n = os_pwrite_file(req->fds[bit], buf, len, off);
1457 if(n != len){
1458 req->error = map_error(-n);
1459 return;
1460 }
1461 break;
1462 case REQ_OP_DISCARD:
1463 n = os_falloc_punch(req->fds[bit], off, len);
1464 if (n) {
1465 req->error = map_error(-n);
1466 return;
1467 }
1468 break;
1469 case REQ_OP_WRITE_ZEROES:
1470 n = os_falloc_zeroes(req->fds[bit], off, len);
1471 if (n) {
1472 req->error = map_error(-n);
1473 return;
1474 }
1475 break;
1476 default:
1477 WARN_ON_ONCE(1);
1478 req->error = BLK_STS_NOTSUPP;
1479 return;
1480 }
1481
1482 start = end;
1483 } while(start < nsectors);
1484
1485 req->offset += len;
1486 req->error = update_bitmap(req, desc);
1487}
1488
1489/* Changed in start_io_thread, which is serialized by being called only
1490 * from ubd_init, which is an initcall.
1491 */
1492int kernel_fd = -1;
1493
1494/* Only changed by the io thread. XXX: currently unused. */
1495static int io_count;
1496
1497void *io_thread(void *arg)
1498{
1499 int n, count, written, res;
1500
1501 os_fix_helper_thread_signals();
1502
1503 while(1){
1504 n = bulk_req_safe_read(
1505 kernel_fd,
1506 io_req_buffer,
1507 &io_remainder,
1508 &io_remainder_size,
1509 UBD_REQ_BUFFER_SIZE
1510 );
1511 if (n <= 0) {
1512 if (n == -EAGAIN)
1513 ubd_read_poll(-1);
1514
1515 continue;
1516 }
1517
1518 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1519 struct io_thread_req *req = (*io_req_buffer)[count];
1520 int i;
1521
1522 io_count++;
1523 for (i = 0; !req->error && i < req->desc_cnt; i++)
1524 do_io(req, &(req->io_desc[i]));
1525
1526 }
1527
1528 written = 0;
1529
1530 do {
1531 res = os_write_file(kernel_fd,
1532 ((char *) io_req_buffer) + written,
1533 n - written);
1534 if (res >= 0) {
1535 written += res;
1536 }
1537 if (written < n) {
1538 ubd_write_poll(-1);
1539 }
1540 } while (written < n);
1541 }
1542
1543 return NULL;
1544}