treewide: kmalloc() -> kmalloc_array()
[linux-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
f88f0bdf 2 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
1da177e4
LT
3 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
4 * Licensed under the GPL
5 */
6
7/* 2001-09-28...2002-04-17
8 * Partition stuff by James_McMechan@hotmail.com
9 * old style ubd by setting UBD_SHIFT to 0
10 * 2002-09-27...2002-10-18 massive tinkering for 2.5
11 * partitions have changed in 2.5
12 * 2003-01-29 more tinkering for 2.5.59-1
13 * This should now address the sysfs problems and has
14 * the symlink for devfs to allow for booting with
15 * the common /dev/ubd/discX/... names rather than
16 * only /dev/ubdN/discN this version also has lots of
17 * clean ups preparing for ubd-many.
18 * James McMechan
19 */
20
1da177e4
LT
21#define UBD_SHIFT 4
22
8ea3c06a
AV
23#include <linux/module.h>
24#include <linux/init.h>
25#include <linux/blkdev.h>
26#include <linux/ata.h>
27#include <linux/hdreg.h>
28#include <linux/cdrom.h>
29#include <linux/proc_fs.h>
30#include <linux/seq_file.h>
31#include <linux/ctype.h>
32#include <linux/slab.h>
33#include <linux/vmalloc.h>
34#include <linux/platform_device.h>
35#include <linux/scatterlist.h>
36#include <asm/tlbflush.h>
37185b33 37#include <kern_util.h>
1da177e4 38#include "mconsole_kern.h"
37185b33
AV
39#include <init.h>
40#include <irq_kern.h>
8ea3c06a 41#include "ubd.h"
37185b33 42#include <os.h>
1da177e4
LT
43#include "cow.h"
44
805f11a0 45enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
1da177e4
LT
46
47struct io_thread_req {
62f96cb0 48 struct request *req;
91acb21f 49 enum ubd_req op;
1da177e4
LT
50 int fds[2];
51 unsigned long offsets[2];
52 unsigned long long offset;
53 unsigned long length;
54 char *buffer;
55 int sectorsize;
91acb21f
JD
56 unsigned long sector_mask;
57 unsigned long long cow_offset;
58 unsigned long bitmap_words[2];
1da177e4
LT
59 int error;
60};
61
f88f0bdf
AI
62
63static struct io_thread_req * (*irq_req_buffer)[];
64static struct io_thread_req *irq_remainder;
65static int irq_remainder_size;
66
67static struct io_thread_req * (*io_req_buffer)[];
68static struct io_thread_req *io_remainder;
69static int io_remainder_size;
70
71
72
91acb21f 73static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
74{
75 __u64 n;
76 int bits, off;
77
91acb21f 78 bits = sizeof(data[0]) * 8;
1da177e4
LT
79 n = bit / bits;
80 off = bit % bits;
dc764e50 81 return (data[n] & (1 << off)) != 0;
1da177e4
LT
82}
83
91acb21f 84static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
85{
86 __u64 n;
87 int bits, off;
88
91acb21f 89 bits = sizeof(data[0]) * 8;
1da177e4
LT
90 n = bit / bits;
91 off = bit % bits;
91acb21f 92 data[n] |= (1 << off);
1da177e4
LT
93}
94/*End stuff from ubd_user.h*/
95
96#define DRIVER_NAME "uml-blkdev"
97
d7fb2c38 98static DEFINE_MUTEX(ubd_lock);
9a181c58 99static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
1da177e4 100
a625c998 101static int ubd_open(struct block_device *bdev, fmode_t mode);
db2a144b 102static void ubd_release(struct gendisk *disk, fmode_t mode);
a625c998 103static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4 104 unsigned int cmd, unsigned long arg);
a885c8c4 105static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 106
97d88ac8 107#define MAX_DEV (16)
1da177e4 108
83d5cde4 109static const struct block_device_operations ubd_blops = {
1da177e4 110 .owner = THIS_MODULE,
a625c998
AV
111 .open = ubd_open,
112 .release = ubd_release,
113 .ioctl = ubd_ioctl,
a885c8c4 114 .getgeo = ubd_getgeo,
1da177e4
LT
115};
116
1da177e4 117/* Protected by ubd_lock */
792dd4fc 118static int fake_major = UBD_MAJOR;
1da177e4
LT
119static struct gendisk *ubd_gendisk[MAX_DEV];
120static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 121
1da177e4
LT
122#ifdef CONFIG_BLK_DEV_UBD_SYNC
123#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
124 .cl = 1 })
125#else
126#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
127 .cl = 1 })
128#endif
1da177e4
LT
129static struct openflags global_openflags = OPEN_FLAGS;
130
131struct cow {
2a9d32f6 132 /* backing file name */
1da177e4 133 char *file;
2a9d32f6 134 /* backing file fd */
1da177e4
LT
135 int fd;
136 unsigned long *bitmap;
137 unsigned long bitmap_len;
138 int bitmap_offset;
dc764e50 139 int data_offset;
1da177e4
LT
140};
141
a0044bdf
JD
142#define MAX_SG 64
143
1da177e4 144struct ubd {
a0044bdf 145 struct list_head restart;
2a9d32f6
PBG
146 /* name (and fd, below) of the file opened for writing, either the
147 * backing or the cow file. */
1da177e4
LT
148 char *file;
149 int count;
150 int fd;
151 __u64 size;
152 struct openflags boot_openflags;
153 struct openflags openflags;
84e945e3
PBG
154 unsigned shared:1;
155 unsigned no_cow:1;
1da177e4
LT
156 struct cow cow;
157 struct platform_device pdev;
62f96cb0
JD
158 struct request_queue *queue;
159 spinlock_t lock;
a0044bdf
JD
160 struct scatterlist sg[MAX_SG];
161 struct request *request;
162 int start_sg, end_sg;
47526903 163 sector_t rq_pos;
1da177e4
LT
164};
165
166#define DEFAULT_COW { \
167 .file = NULL, \
dc764e50
JD
168 .fd = -1, \
169 .bitmap = NULL, \
1da177e4 170 .bitmap_offset = 0, \
dc764e50 171 .data_offset = 0, \
1da177e4
LT
172}
173
174#define DEFAULT_UBD { \
175 .file = NULL, \
176 .count = 0, \
177 .fd = -1, \
178 .size = -1, \
179 .boot_openflags = OPEN_FLAGS, \
180 .openflags = OPEN_FLAGS, \
dc764e50 181 .no_cow = 0, \
6c29256c 182 .shared = 0, \
dc764e50 183 .cow = DEFAULT_COW, \
22e65004 184 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
a0044bdf
JD
185 .request = NULL, \
186 .start_sg = 0, \
187 .end_sg = 0, \
47526903 188 .rq_pos = 0, \
1da177e4
LT
189}
190
b8831a1d 191/* Protected by ubd_lock */
5dc62b1b 192static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 193
1da177e4
LT
194/* Only changed by fake_ide_setup which is a setup */
195static int fake_ide = 0;
196static struct proc_dir_entry *proc_ide_root = NULL;
197static struct proc_dir_entry *proc_ide = NULL;
198
199static void make_proc_ide(void)
200{
201 proc_ide_root = proc_mkdir("ide", NULL);
202 proc_ide = proc_mkdir("ide0", proc_ide_root);
203}
204
6613c5e8 205static int fake_ide_media_proc_show(struct seq_file *m, void *v)
1da177e4 206{
6613c5e8
AD
207 seq_puts(m, "disk\n");
208 return 0;
209}
210
c0a9290e 211static void make_ide_entries(const char *dev_name)
1da177e4
LT
212{
213 struct proc_dir_entry *dir, *ent;
214 char name[64];
215
216 if(proc_ide_root == NULL) make_proc_ide();
217
218 dir = proc_mkdir(dev_name, proc_ide);
219 if(!dir) return;
220
3f3942ac
CH
221 ent = proc_create_single("media", S_IRUGO, dir,
222 fake_ide_media_proc_show);
1da177e4 223 if(!ent) return;
c0a9290e 224 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
225 proc_symlink(dev_name, proc_ide_root, name);
226}
227
228static int fake_ide_setup(char *str)
229{
230 fake_ide = 1;
dc764e50 231 return 1;
1da177e4
LT
232}
233
234__setup("fake_ide", fake_ide_setup);
235
236__uml_help(fake_ide_setup,
237"fake_ide\n"
238" Create ide0 entries that map onto ubd devices.\n\n"
239);
240
241static int parse_unit(char **ptr)
242{
243 char *str = *ptr, *end;
244 int n = -1;
245
246 if(isdigit(*str)) {
247 n = simple_strtoul(str, &end, 0);
248 if(end == str)
dc764e50 249 return -1;
1da177e4
LT
250 *ptr = end;
251 }
97d88ac8 252 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
253 n = *str - 'a';
254 str++;
255 *ptr = str;
256 }
dc764e50 257 return n;
1da177e4
LT
258}
259
d8d7c28e
PBG
260/* If *index_out == -1 at exit, the passed option was a general one;
261 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
262 * should not be freed on exit.
263 */
f28169d2 264static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 265{
7d314e34 266 struct ubd *ubd_dev;
1da177e4
LT
267 struct openflags flags = global_openflags;
268 char *backing_file;
b8831a1d 269 int n, err = 0, i;
1da177e4
LT
270
271 if(index_out) *index_out = -1;
272 n = *str;
273 if(n == '='){
274 char *end;
275 int major;
276
277 str++;
1da177e4
LT
278 if(!strcmp(str, "sync")){
279 global_openflags = of_sync(global_openflags);
b8831a1d 280 goto out1;
1da177e4 281 }
b8831a1d
JD
282
283 err = -EINVAL;
1da177e4
LT
284 major = simple_strtoul(str, &end, 0);
285 if((*end != '\0') || (end == str)){
f28169d2 286 *error_out = "Didn't parse major number";
b8831a1d 287 goto out1;
1da177e4
LT
288 }
289
f28169d2 290 mutex_lock(&ubd_lock);
792dd4fc 291 if (fake_major != UBD_MAJOR) {
f28169d2
JD
292 *error_out = "Can't assign a fake major twice";
293 goto out1;
294 }
6c29256c 295
f28169d2 296 fake_major = major;
1da177e4
LT
297
298 printk(KERN_INFO "Setting extra ubd major number to %d\n",
299 major);
f28169d2
JD
300 err = 0;
301 out1:
302 mutex_unlock(&ubd_lock);
303 return err;
1da177e4
LT
304 }
305
306 n = parse_unit(&str);
307 if(n < 0){
f28169d2
JD
308 *error_out = "Couldn't parse device number";
309 return -EINVAL;
1da177e4
LT
310 }
311 if(n >= MAX_DEV){
f28169d2
JD
312 *error_out = "Device number out of range";
313 return 1;
1da177e4
LT
314 }
315
f28169d2 316 err = -EBUSY;
d7fb2c38 317 mutex_lock(&ubd_lock);
1da177e4 318
7d314e34
PBG
319 ubd_dev = &ubd_devs[n];
320 if(ubd_dev->file != NULL){
f28169d2 321 *error_out = "Device is already configured";
1da177e4
LT
322 goto out;
323 }
324
325 if (index_out)
326 *index_out = n;
327
f28169d2 328 err = -EINVAL;
6c29256c 329 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
330 switch (*str) {
331 case 'r':
332 flags.w = 0;
333 break;
334 case 's':
335 flags.s = 1;
336 break;
337 case 'd':
7d314e34 338 ubd_dev->no_cow = 1;
1da177e4 339 break;
6c29256c 340 case 'c':
7d314e34 341 ubd_dev->shared = 1;
6c29256c 342 break;
1da177e4
LT
343 case '=':
344 str++;
345 goto break_loop;
346 default:
f28169d2
JD
347 *error_out = "Expected '=' or flag letter "
348 "(r, s, c, or d)";
1da177e4
LT
349 goto out;
350 }
351 str++;
352 }
353
f28169d2
JD
354 if (*str == '=')
355 *error_out = "Too many flags specified";
356 else
357 *error_out = "Missing '='";
1da177e4
LT
358 goto out;
359
360break_loop:
1da177e4
LT
361 backing_file = strchr(str, ',');
362
f28169d2 363 if (backing_file == NULL)
1da177e4 364 backing_file = strchr(str, ':');
1da177e4 365
f28169d2
JD
366 if(backing_file != NULL){
367 if(ubd_dev->no_cow){
368 *error_out = "Can't specify both 'd' and a cow file";
369 goto out;
370 }
1da177e4
LT
371 else {
372 *backing_file = '\0';
373 backing_file++;
374 }
375 }
f28169d2 376 err = 0;
7d314e34
PBG
377 ubd_dev->file = str;
378 ubd_dev->cow.file = backing_file;
379 ubd_dev->boot_openflags = flags;
1da177e4 380out:
d7fb2c38 381 mutex_unlock(&ubd_lock);
f28169d2 382 return err;
1da177e4
LT
383}
384
385static int ubd_setup(char *str)
386{
f28169d2
JD
387 char *error;
388 int err;
389
390 err = ubd_setup_common(str, NULL, &error);
391 if(err)
392 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
393 "%s\n", str, error);
394 return 1;
1da177e4
LT
395}
396
397__setup("ubd", ubd_setup);
398__uml_help(ubd_setup,
399"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
400" This is used to associate a device with a file in the underlying\n"
401" filesystem. When specifying two filenames, the first one is the\n"
402" COW name and the second is the backing file name. As separator you can\n"
403" use either a ':' or a ',': the first one allows writing things like;\n"
404" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
405" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 406" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
407" a COW file or a backing file. To override this detection, add the 'd'\n"
408" flag:\n"
409" ubd0d=BackingFile\n"
410" Usually, there is a filesystem in the file, but \n"
411" that's not required. Swap devices containing swap files can be\n"
412" specified like this. Also, a file which doesn't contain a\n"
413" filesystem can have its contents read in the virtual \n"
414" machine by running 'dd' on the device. <n> must be in the range\n"
415" 0 to 7. Appending an 'r' to the number will cause that device\n"
416" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
417" an 's' will cause data to be written to disk on the host immediately.\n"
418" 'c' will cause the device to be treated as being shared between multiple\n"
419" UMLs and file locking will be turned off - this is appropriate for a\n"
420" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
421);
422
8299ca5c 423static int udb_setup(char *str)
1da177e4
LT
424{
425 printk("udb%s specified on command line is almost certainly a ubd -> "
426 "udb TYPO\n", str);
dc764e50 427 return 1;
1da177e4
LT
428}
429
430__setup("udb", udb_setup);
431__uml_help(udb_setup,
432"udb\n"
0894e27e
JD
433" This option is here solely to catch ubd -> udb typos, which can be\n"
434" to impossible to catch visually unless you specifically look for\n"
435" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
436" in the boot output.\n\n"
437);
438
165125e1 439static void do_ubd_request(struct request_queue * q);
91acb21f
JD
440
441/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 442static int thread_fd = -1;
a0044bdf
JD
443static LIST_HEAD(restart);
444
f88f0bdf
AI
445/* Function to read several request pointers at a time
446* handling fractional reads if (and as) needed
447*/
448
449static int bulk_req_safe_read(
450 int fd,
451 struct io_thread_req * (*request_buffer)[],
452 struct io_thread_req **remainder,
453 int *remainder_size,
454 int max_recs
455 )
456{
457 int n = 0;
458 int res = 0;
459
460 if (*remainder_size > 0) {
461 memmove(
462 (char *) request_buffer,
463 (char *) remainder, *remainder_size
464 );
465 n = *remainder_size;
466 }
467
468 res = os_read_file(
469 fd,
470 ((char *) request_buffer) + *remainder_size,
471 sizeof(struct io_thread_req *)*max_recs
472 - *remainder_size
473 );
474 if (res > 0) {
475 n += res;
476 if ((n % sizeof(struct io_thread_req *)) > 0) {
477 /*
478 * Read somehow returned not a multiple of dword
479 * theoretically possible, but never observed in the
480 * wild, so read routine must be able to handle it
481 */
482 *remainder_size = n % sizeof(struct io_thread_req *);
483 WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
484 memmove(
485 remainder,
486 ((char *) request_buffer) +
487 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
488 *remainder_size
489 );
490 n = n - *remainder_size;
491 }
492 } else {
493 n = res;
494 }
495 return n;
496}
497
62f96cb0 498/* Called without dev->lock held, and only in interrupt context. */
91acb21f 499static void ubd_handler(void)
1da177e4 500{
a0044bdf
JD
501 struct ubd *ubd;
502 struct list_head *list, *next_ele;
503 unsigned long flags;
91acb21f 504 int n;
f88f0bdf 505 int count;
91acb21f 506
a0044bdf 507 while(1){
f88f0bdf
AI
508 n = bulk_req_safe_read(
509 thread_fd,
510 irq_req_buffer,
511 &irq_remainder,
512 &irq_remainder_size,
513 UBD_REQ_BUFFER_SIZE
514 );
515 if (n < 0) {
a0044bdf
JD
516 if(n == -EAGAIN)
517 break;
518 printk(KERN_ERR "spurious interrupt in ubd_handler, "
519 "err = %d\n", -n);
520 return;
521 }
f88f0bdf
AI
522 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
523 blk_end_request(
524 (*irq_req_buffer)[count]->req,
2a842aca 525 BLK_STS_OK,
f88f0bdf
AI
526 (*irq_req_buffer)[count]->length
527 );
528 kfree((*irq_req_buffer)[count]);
529 }
a0044bdf 530 }
62f96cb0 531 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
532
533 list_for_each_safe(list, next_ele, &restart){
534 ubd = container_of(list, struct ubd, restart);
535 list_del_init(&ubd->restart);
536 spin_lock_irqsave(&ubd->lock, flags);
537 do_ubd_request(ubd->queue);
538 spin_unlock_irqrestore(&ubd->lock, flags);
539 }
1da177e4
LT
540}
541
7bea96fd 542static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 543{
91acb21f 544 ubd_handler();
dc764e50 545 return IRQ_HANDLED;
91acb21f 546}
09ace81c 547
91acb21f
JD
548/* Only changed by ubd_init, which is an initcall. */
549static int io_pid = -1;
09ace81c 550
5dc62b1b 551static void kill_io_thread(void)
91acb21f 552{
6c29256c 553 if(io_pid != -1)
91acb21f 554 os_kill_process(io_pid, 1);
09ace81c 555}
1da177e4 556
91acb21f
JD
557__uml_exitcall(kill_io_thread);
558
d8d7c28e 559static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
560{
561 char *file;
85356398
RW
562 int fd;
563 int err;
564
565 __u32 version;
566 __u32 align;
567 char *backing_file;
568 time_t mtime;
569 unsigned long long size;
570 int sector_size;
571 int bitmap_offset;
572
573 if (ubd_dev->file && ubd_dev->cow.file) {
574 file = ubd_dev->cow.file;
575
576 goto out;
577 }
578
d4afcba9 579 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
85356398
RW
580 if (fd < 0)
581 return fd;
582
583 err = read_cow_header(file_reader, &fd, &version, &backing_file, \
584 &mtime, &size, &sector_size, &align, &bitmap_offset);
585 os_close_file(fd);
1da177e4 586
85356398
RW
587 if(err == -EINVAL)
588 file = ubd_dev->file;
589 else
590 file = backing_file;
591
592out:
dc764e50 593 return os_file_size(file, size_out);
1da177e4
LT
594}
595
5dc62b1b
WC
596static int read_cow_bitmap(int fd, void *buf, int offset, int len)
597{
598 int err;
599
8c6157b6 600 err = os_pread_file(fd, buf, len, offset);
5dc62b1b
WC
601 if (err < 0)
602 return err;
603
604 return 0;
605}
606
607static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
608{
609 unsigned long modtime;
610 unsigned long long actual;
611 int err;
612
613 err = os_file_modtime(file, &modtime);
614 if (err < 0) {
615 printk(KERN_ERR "Failed to get modification time of backing "
616 "file \"%s\", err = %d\n", file, -err);
617 return err;
618 }
619
620 err = os_file_size(file, &actual);
621 if (err < 0) {
622 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
623 "err = %d\n", file, -err);
624 return err;
625 }
626
627 if (actual != size) {
628 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
629 * the typecast.*/
630 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
631 "vs backing file\n", (unsigned long long) size, actual);
632 return -EINVAL;
633 }
634 if (modtime != mtime) {
635 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
636 "backing file\n", mtime, modtime);
637 return -EINVAL;
638 }
639 return 0;
640}
641
642static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
643{
644 struct uml_stat buf1, buf2;
645 int err;
646
647 if (from_cmdline == NULL)
648 return 0;
649 if (!strcmp(from_cmdline, from_cow))
650 return 0;
651
652 err = os_stat_file(from_cmdline, &buf1);
653 if (err < 0) {
654 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
655 -err);
656 return 0;
657 }
658 err = os_stat_file(from_cow, &buf2);
659 if (err < 0) {
660 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
661 -err);
662 return 1;
663 }
664 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
665 return 0;
666
667 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
668 "\"%s\" specified in COW header of \"%s\"\n",
669 from_cmdline, from_cow, cow);
670 return 1;
671}
672
673static int open_ubd_file(char *file, struct openflags *openflags, int shared,
674 char **backing_file_out, int *bitmap_offset_out,
675 unsigned long *bitmap_len_out, int *data_offset_out,
676 int *create_cow_out)
677{
678 time_t mtime;
679 unsigned long long size;
680 __u32 version, align;
681 char *backing_file;
682 int fd, err, sectorsize, asked_switch, mode = 0644;
683
684 fd = os_open_file(file, *openflags, mode);
685 if (fd < 0) {
686 if ((fd == -ENOENT) && (create_cow_out != NULL))
687 *create_cow_out = 1;
688 if (!openflags->w ||
689 ((fd != -EROFS) && (fd != -EACCES)))
690 return fd;
691 openflags->w = 0;
692 fd = os_open_file(file, *openflags, mode);
693 if (fd < 0)
694 return fd;
695 }
696
697 if (shared)
698 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
699 else {
700 err = os_lock_file(fd, openflags->w);
701 if (err < 0) {
702 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
703 file, -err);
704 goto out_close;
705 }
706 }
707
708 /* Successful return case! */
709 if (backing_file_out == NULL)
710 return fd;
711
712 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
713 &size, &sectorsize, &align, bitmap_offset_out);
714 if (err && (*backing_file_out != NULL)) {
715 printk(KERN_ERR "Failed to read COW header from COW file "
716 "\"%s\", errno = %d\n", file, -err);
717 goto out_close;
718 }
719 if (err)
720 return fd;
721
722 asked_switch = path_requires_switch(*backing_file_out, backing_file,
723 file);
724
725 /* Allow switching only if no mismatch. */
726 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
727 mtime)) {
728 printk(KERN_ERR "Switching backing file to '%s'\n",
729 *backing_file_out);
730 err = write_cow_header(file, fd, *backing_file_out,
731 sectorsize, align, &size);
732 if (err) {
733 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
734 goto out_close;
735 }
736 } else {
737 *backing_file_out = backing_file;
738 err = backing_file_mismatch(*backing_file_out, size, mtime);
739 if (err)
740 goto out_close;
741 }
742
743 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
744 bitmap_len_out, data_offset_out);
745
746 return fd;
747 out_close:
748 os_close_file(fd);
749 return err;
750}
751
752static int create_cow_file(char *cow_file, char *backing_file,
753 struct openflags flags,
754 int sectorsize, int alignment, int *bitmap_offset_out,
755 unsigned long *bitmap_len_out, int *data_offset_out)
756{
757 int err, fd;
758
759 flags.c = 1;
760 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
761 if (fd < 0) {
762 err = fd;
763 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
764 cow_file, -err);
765 goto out;
766 }
767
768 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
769 bitmap_offset_out, bitmap_len_out,
770 data_offset_out);
771 if (!err)
772 return fd;
773 os_close_file(fd);
774 out:
775 return err;
776}
777
5f75a4f8 778static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 779{
7d314e34
PBG
780 os_close_file(ubd_dev->fd);
781 if(ubd_dev->cow.file == NULL)
1da177e4
LT
782 return;
783
7d314e34
PBG
784 os_close_file(ubd_dev->cow.fd);
785 vfree(ubd_dev->cow.bitmap);
786 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
787}
788
7d314e34 789static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
790{
791 struct openflags flags;
792 char **back_ptr;
793 int err, create_cow, *create_ptr;
0bf16bff 794 int fd;
1da177e4 795
7d314e34 796 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 797 create_cow = 0;
7d314e34
PBG
798 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
799 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
800
801 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
802 back_ptr, &ubd_dev->cow.bitmap_offset,
803 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 804 create_ptr);
1da177e4 805
0bf16bff
PBG
806 if((fd == -ENOENT) && create_cow){
807 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
808 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
809 &ubd_dev->cow.bitmap_offset,
810 &ubd_dev->cow.bitmap_len,
811 &ubd_dev->cow.data_offset);
0bf16bff 812 if(fd >= 0){
1da177e4 813 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 814 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
815 }
816 }
817
0bf16bff 818 if(fd < 0){
7d314e34 819 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
820 -fd);
821 return fd;
1da177e4 822 }
0bf16bff 823 ubd_dev->fd = fd;
1da177e4 824
7d314e34 825 if(ubd_dev->cow.file != NULL){
086fa5ff 826 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
f4768ffd 827
1da177e4 828 err = -ENOMEM;
da2486ba 829 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 830 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
831 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
832 goto error;
833 }
834 flush_tlb_kernel_vm();
835
7d314e34
PBG
836 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
837 ubd_dev->cow.bitmap_offset,
838 ubd_dev->cow.bitmap_len);
1da177e4
LT
839 if(err < 0)
840 goto error;
841
7d314e34 842 flags = ubd_dev->openflags;
1da177e4 843 flags.w = 0;
7d314e34 844 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 845 NULL, NULL, NULL, NULL);
1da177e4 846 if(err < 0) goto error;
7d314e34 847 ubd_dev->cow.fd = err;
1da177e4 848 }
dc764e50 849 return 0;
1da177e4 850 error:
7d314e34 851 os_close_file(ubd_dev->fd);
dc764e50 852 return err;
1da177e4
LT
853}
854
2e3f5251
JD
855static void ubd_device_release(struct device *dev)
856{
8691b97b 857 struct ubd *ubd_dev = dev_get_drvdata(dev);
2e3f5251
JD
858
859 blk_cleanup_queue(ubd_dev->queue);
860 *ubd_dev = ((struct ubd) DEFAULT_UBD);
861}
862
5f75a4f8 863static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 864 struct gendisk **disk_out)
1da177e4 865{
d72a5783 866 struct device *parent = NULL;
1da177e4 867 struct gendisk *disk;
1da177e4
LT
868
869 disk = alloc_disk(1 << UBD_SHIFT);
870 if(disk == NULL)
dc764e50 871 return -ENOMEM;
1da177e4
LT
872
873 disk->major = major;
874 disk->first_minor = unit << UBD_SHIFT;
875 disk->fops = &ubd_blops;
876 set_capacity(disk, size / 512);
792dd4fc 877 if (major == UBD_MAJOR)
1da177e4 878 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 879 else
1da177e4 880 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
881
882 /* sysfs register (not for ide fake devices) */
792dd4fc 883 if (major == UBD_MAJOR) {
7d314e34
PBG
884 ubd_devs[unit].pdev.id = unit;
885 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251 886 ubd_devs[unit].pdev.dev.release = ubd_device_release;
8691b97b 887 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
7d314e34 888 platform_device_register(&ubd_devs[unit].pdev);
d72a5783 889 parent = &ubd_devs[unit].pdev.dev;
1da177e4
LT
890 }
891
7d314e34 892 disk->private_data = &ubd_devs[unit];
62f96cb0 893 disk->queue = ubd_devs[unit].queue;
d72a5783 894 device_add_disk(parent, disk);
1da177e4
LT
895
896 *disk_out = disk;
897 return 0;
898}
899
900#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
901
f28169d2 902static int ubd_add(int n, char **error_out)
1da177e4 903{
7d314e34 904 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 905 int err = 0;
1da177e4 906
7d314e34 907 if(ubd_dev->file == NULL)
ec7cf783 908 goto out;
1da177e4 909
7d314e34 910 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
911 if(err < 0){
912 *error_out = "Couldn't determine size of device's file";
80c13749 913 goto out;
f28169d2 914 }
1da177e4 915
7d314e34 916 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 917
a0044bdf 918 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 919 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 920
62f96cb0
JD
921 err = -ENOMEM;
922 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
923 if (ubd_dev->queue == NULL) {
924 *error_out = "Failed to initialize device queue";
80c13749 925 goto out;
62f96cb0
JD
926 }
927 ubd_dev->queue->queuedata = ubd_dev;
f935a8ce 928 blk_queue_write_cache(ubd_dev->queue, true, false);
62f96cb0 929
8a78362c 930 blk_queue_max_segments(ubd_dev->queue, MAX_SG);
792dd4fc 931 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
62f96cb0
JD
932 if(err){
933 *error_out = "Failed to register device";
934 goto out_cleanup;
935 }
6c29256c 936
792dd4fc 937 if (fake_major != UBD_MAJOR)
5f75a4f8 938 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 939 &fake_gendisk[n]);
1da177e4 940
83380cc1
JD
941 /*
942 * Perhaps this should also be under the "if (fake_major)" above
943 * using the fake_disk->disk_name
944 */
1da177e4
LT
945 if (fake_ide)
946 make_ide_entries(ubd_gendisk[n]->disk_name);
947
ec7cf783 948 err = 0;
ec7cf783
JD
949out:
950 return err;
62f96cb0
JD
951
952out_cleanup:
953 blk_cleanup_queue(ubd_dev->queue);
954 goto out;
1da177e4
LT
955}
956
f28169d2 957static int ubd_config(char *str, char **error_out)
1da177e4 958{
e7f6552f 959 int n, ret;
1da177e4 960
f28169d2
JD
961 /* This string is possibly broken up and stored, so it's only
962 * freed if ubd_setup_common fails, or if only general options
963 * were set.
964 */
970d6e3a 965 str = kstrdup(str, GFP_KERNEL);
e7f6552f 966 if (str == NULL) {
f28169d2
JD
967 *error_out = "Failed to allocate memory";
968 return -ENOMEM;
1da177e4 969 }
f28169d2
JD
970
971 ret = ubd_setup_common(str, &n, error_out);
972 if (ret)
e7f6552f 973 goto err_free;
f28169d2 974
e7f6552f
PBG
975 if (n == -1) {
976 ret = 0;
d8d7c28e 977 goto err_free;
1da177e4 978 }
1da177e4 979
dc764e50 980 mutex_lock(&ubd_lock);
f28169d2 981 ret = ubd_add(n, error_out);
e7f6552f 982 if (ret)
7d314e34 983 ubd_devs[n].file = NULL;
dc764e50 984 mutex_unlock(&ubd_lock);
1da177e4 985
e7f6552f 986out:
dc764e50 987 return ret;
e7f6552f
PBG
988
989err_free:
990 kfree(str);
991 goto out;
1da177e4
LT
992}
993
994static int ubd_get_config(char *name, char *str, int size, char **error_out)
995{
7d314e34 996 struct ubd *ubd_dev;
1da177e4
LT
997 int n, len = 0;
998
999 n = parse_unit(&name);
1000 if((n >= MAX_DEV) || (n < 0)){
1001 *error_out = "ubd_get_config : device number out of range";
dc764e50 1002 return -1;
1da177e4
LT
1003 }
1004
7d314e34 1005 ubd_dev = &ubd_devs[n];
d7fb2c38 1006 mutex_lock(&ubd_lock);
1da177e4 1007
7d314e34 1008 if(ubd_dev->file == NULL){
1da177e4
LT
1009 CONFIG_CHUNK(str, size, len, "", 1);
1010 goto out;
1011 }
1012
7d314e34 1013 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 1014
7d314e34 1015 if(ubd_dev->cow.file != NULL){
1da177e4 1016 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 1017 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
1018 }
1019 else CONFIG_CHUNK(str, size, len, "", 1);
1020
1021 out:
d7fb2c38 1022 mutex_unlock(&ubd_lock);
dc764e50 1023 return len;
1da177e4
LT
1024}
1025
29d56cfe
JD
1026static int ubd_id(char **str, int *start_out, int *end_out)
1027{
dc764e50 1028 int n;
29d56cfe
JD
1029
1030 n = parse_unit(str);
dc764e50
JD
1031 *start_out = 0;
1032 *end_out = MAX_DEV - 1;
1033 return n;
29d56cfe
JD
1034}
1035
f28169d2 1036static int ubd_remove(int n, char **error_out)
1da177e4 1037{
2e3f5251 1038 struct gendisk *disk = ubd_gendisk[n];
7d314e34 1039 struct ubd *ubd_dev;
29d56cfe 1040 int err = -ENODEV;
1da177e4 1041
d7fb2c38 1042 mutex_lock(&ubd_lock);
1da177e4 1043
7d314e34 1044 ubd_dev = &ubd_devs[n];
1da177e4 1045
7d314e34 1046 if(ubd_dev->file == NULL)
29d56cfe 1047 goto out;
1da177e4 1048
29d56cfe
JD
1049 /* you cannot remove a open disk */
1050 err = -EBUSY;
7d314e34 1051 if(ubd_dev->count > 0)
1da177e4
LT
1052 goto out;
1053
dc764e50 1054 ubd_gendisk[n] = NULL;
b47d2deb
JD
1055 if(disk != NULL){
1056 del_gendisk(disk);
1057 put_disk(disk);
1058 }
1da177e4
LT
1059
1060 if(fake_gendisk[n] != NULL){
1061 del_gendisk(fake_gendisk[n]);
1062 put_disk(fake_gendisk[n]);
1063 fake_gendisk[n] = NULL;
1064 }
1065
1da177e4 1066 err = 0;
2e3f5251 1067 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1068out:
d7fb2c38 1069 mutex_unlock(&ubd_lock);
29d56cfe 1070 return err;
1da177e4
LT
1071}
1072
f28169d2 1073/* All these are called by mconsole in process context and without
b8831a1d 1074 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1075 */
1da177e4 1076static struct mc_device ubd_mc = {
84f48d4f 1077 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1078 .name = "ubd",
1079 .config = ubd_config,
dc764e50 1080 .get_config = ubd_get_config,
29d56cfe 1081 .id = ubd_id,
1da177e4
LT
1082 .remove = ubd_remove,
1083};
1084
d8d7c28e 1085static int __init ubd_mc_init(void)
1da177e4
LT
1086{
1087 mconsole_register_dev(&ubd_mc);
1088 return 0;
1089}
1090
1091__initcall(ubd_mc_init);
1092
d8d7c28e
PBG
1093static int __init ubd0_init(void)
1094{
1095 struct ubd *ubd_dev = &ubd_devs[0];
1096
b8831a1d 1097 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1098 if(ubd_dev->file == NULL)
1099 ubd_dev->file = "root_fs";
b8831a1d
JD
1100 mutex_unlock(&ubd_lock);
1101
dc764e50 1102 return 0;
d8d7c28e
PBG
1103}
1104
1105__initcall(ubd0_init);
1106
b8831a1d 1107/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1108static struct platform_driver ubd_driver = {
1109 .driver = {
1110 .name = DRIVER_NAME,
1111 },
1da177e4
LT
1112};
1113
d8d7c28e 1114static int __init ubd_init(void)
1da177e4 1115{
f28169d2
JD
1116 char *error;
1117 int i, err;
1da177e4 1118
792dd4fc 1119 if (register_blkdev(UBD_MAJOR, "ubd"))
1da177e4
LT
1120 return -1;
1121
792dd4fc 1122 if (fake_major != UBD_MAJOR) {
1da177e4
LT
1123 char name[sizeof("ubd_nnn\0")];
1124
1125 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
1126 if (register_blkdev(fake_major, "ubd"))
1127 return -1;
1128 }
f88f0bdf 1129
6da2ec56
KC
1130 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1131 sizeof(struct io_thread_req *),
1132 GFP_KERNEL
f88f0bdf
AI
1133 );
1134 irq_remainder = 0;
1135
1136 if (irq_req_buffer == NULL) {
1137 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1138 return -1;
1139 }
6da2ec56
KC
1140 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1141 sizeof(struct io_thread_req *),
1142 GFP_KERNEL
f88f0bdf
AI
1143 );
1144
1145 io_remainder = 0;
1146
1147 if (io_req_buffer == NULL) {
1148 printk(KERN_ERR "Failed to initialize ubd buffering\n");
1149 return -1;
1150 }
3ae5eaec 1151 platform_driver_register(&ubd_driver);
dc764e50 1152 mutex_lock(&ubd_lock);
f28169d2
JD
1153 for (i = 0; i < MAX_DEV; i++){
1154 err = ubd_add(i, &error);
1155 if(err)
1156 printk(KERN_ERR "Failed to initialize ubd device %d :"
1157 "%s\n", i, error);
1158 }
dc764e50 1159 mutex_unlock(&ubd_lock);
1da177e4
LT
1160 return 0;
1161}
1162
1163late_initcall(ubd_init);
1164
d8d7c28e 1165static int __init ubd_driver_init(void){
91acb21f
JD
1166 unsigned long stack;
1167 int err;
1168
1169 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1170 if(global_openflags.s){
1171 printk(KERN_INFO "ubd: Synchronous mode\n");
1172 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1173 * enough. So use anyway the io thread. */
1174 }
1175 stack = alloc_stack(0, 0);
6c29256c 1176 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
1177 &thread_fd);
1178 if(io_pid < 0){
6c29256c 1179 printk(KERN_ERR
91acb21f
JD
1180 "ubd : Failed to start I/O thread (errno = %d) - "
1181 "falling back to synchronous I/O\n", -io_pid);
1182 io_pid = -1;
dc764e50 1183 return 0;
91acb21f 1184 }
6c29256c 1185 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
c0b79a90 1186 0, "ubd", ubd_devs);
91acb21f
JD
1187 if(err != 0)
1188 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1189 return 0;
91acb21f
JD
1190}
1191
1192device_initcall(ubd_driver_init);
1193
a625c998 1194static int ubd_open(struct block_device *bdev, fmode_t mode)
1da177e4 1195{
a625c998 1196 struct gendisk *disk = bdev->bd_disk;
7d314e34 1197 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
1198 int err = 0;
1199
9a181c58 1200 mutex_lock(&ubd_mutex);
7d314e34
PBG
1201 if(ubd_dev->count == 0){
1202 err = ubd_open_dev(ubd_dev);
1da177e4
LT
1203 if(err){
1204 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 1205 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
1206 goto out;
1207 }
1208 }
7d314e34
PBG
1209 ubd_dev->count++;
1210 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
1211
1212 /* This should no more be needed. And it didn't work anyway to exclude
1213 * read-write remounting of filesystems.*/
a625c998 1214 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 1215 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 1216 err = -EROFS;
2c49be99 1217 }*/
6e9624b8 1218out:
9a181c58 1219 mutex_unlock(&ubd_mutex);
dc764e50 1220 return err;
1da177e4
LT
1221}
1222
db2a144b 1223static void ubd_release(struct gendisk *disk, fmode_t mode)
1da177e4 1224{
7d314e34 1225 struct ubd *ubd_dev = disk->private_data;
1da177e4 1226
9a181c58 1227 mutex_lock(&ubd_mutex);
7d314e34 1228 if(--ubd_dev->count == 0)
5f75a4f8 1229 ubd_close_dev(ubd_dev);
9a181c58 1230 mutex_unlock(&ubd_mutex);
1da177e4
LT
1231}
1232
91acb21f
JD
1233static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1234 __u64 *cow_offset, unsigned long *bitmap,
1235 __u64 bitmap_offset, unsigned long *bitmap_words,
1236 __u64 bitmap_len)
1da177e4 1237{
91acb21f
JD
1238 __u64 sector = io_offset >> 9;
1239 int i, update_bitmap = 0;
1240
1241 for(i = 0; i < length >> 9; i++){
1242 if(cow_mask != NULL)
1243 ubd_set_bit(i, (unsigned char *) cow_mask);
1244 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1245 continue;
1da177e4 1246
91acb21f
JD
1247 update_bitmap = 1;
1248 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1249 }
1250
1251 if(!update_bitmap)
1252 return;
1da177e4 1253
91acb21f 1254 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1255
91acb21f
JD
1256 /* This takes care of the case where we're exactly at the end of the
1257 * device, and *cow_offset + 1 is off the end. So, just back it up
1258 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1259 * for the original diagnosis.
1260 */
6d074242
JO
1261 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1262 sizeof(unsigned long)) - 1))
91acb21f
JD
1263 (*cow_offset)--;
1264
1265 bitmap_words[0] = bitmap[*cow_offset];
1266 bitmap_words[1] = bitmap[*cow_offset + 1];
1267
1268 *cow_offset *= sizeof(unsigned long);
1269 *cow_offset += bitmap_offset;
1270}
1271
1272static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1273 __u64 bitmap_offset, __u64 bitmap_len)
1274{
1275 __u64 sector = req->offset >> 9;
1276 int i;
1277
1278 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1279 panic("Operation too long");
1280
1281 if(req->op == UBD_READ) {
1282 for(i = 0; i < req->length >> 9; i++){
1283 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1284 ubd_set_bit(i, (unsigned char *)
91acb21f 1285 &req->sector_mask);
dc764e50 1286 }
91acb21f
JD
1287 }
1288 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1289 &req->cow_offset, bitmap, bitmap_offset,
1290 req->bitmap_words, bitmap_len);
1da177e4
LT
1291}
1292
62f96cb0 1293/* Called with dev->lock held */
a0044bdf
JD
1294static void prepare_request(struct request *req, struct io_thread_req *io_req,
1295 unsigned long long offset, int page_offset,
1296 int len, struct page *page)
1da177e4
LT
1297{
1298 struct gendisk *disk = req->rq_disk;
7d314e34 1299 struct ubd *ubd_dev = disk->private_data;
91acb21f 1300
62f96cb0 1301 io_req->req = req;
a0044bdf
JD
1302 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1303 ubd_dev->fd;
7d314e34 1304 io_req->fds[1] = ubd_dev->fd;
91acb21f 1305 io_req->cow_offset = -1;
1da177e4
LT
1306 io_req->offset = offset;
1307 io_req->length = len;
1308 io_req->error = 0;
91acb21f
JD
1309 io_req->sector_mask = 0;
1310
1311 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1312 io_req->offsets[0] = 0;
7d314e34 1313 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1314 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1315 io_req->sectorsize = 1 << 9;
1316
7d314e34 1317 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1318 cowify_req(io_req, ubd_dev->cow.bitmap,
1319 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1320
1da177e4
LT
1321}
1322
805f11a0
RW
1323/* Called with dev->lock held */
1324static void prepare_flush_request(struct request *req,
1325 struct io_thread_req *io_req)
1326{
1327 struct gendisk *disk = req->rq_disk;
1328 struct ubd *ubd_dev = disk->private_data;
1329
1330 io_req->req = req;
1331 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1332 ubd_dev->fd;
1333 io_req->op = UBD_FLUSH;
1334}
1335
bc1d72e7
RW
1336static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
1337{
1338 int n = os_write_file(thread_fd, &io_req,
1339 sizeof(io_req));
1340 if (n != sizeof(io_req)) {
1341 if (n != -EAGAIN)
1342 printk("write to io thread failed, "
1343 "errno = %d\n", -n);
1344 else if (list_empty(&dev->restart))
1345 list_add(&dev->restart, &restart);
1346
1347 kfree(io_req);
1348 return false;
1349 }
1350 return true;
1351}
1352
62f96cb0 1353/* Called with dev->lock held */
165125e1 1354static void do_ubd_request(struct request_queue *q)
1da177e4 1355{
2adcec21 1356 struct io_thread_req *io_req;
1da177e4 1357 struct request *req;
a0044bdf
JD
1358
1359 while(1){
2a9529a0 1360 struct ubd *dev = q->queuedata;
2a236122 1361 if(dev->request == NULL){
9934c8c0 1362 struct request *req = blk_fetch_request(q);
a0044bdf
JD
1363 if(req == NULL)
1364 return;
1365
1366 dev->request = req;
47526903 1367 dev->rq_pos = blk_rq_pos(req);
a0044bdf
JD
1368 dev->start_sg = 0;
1369 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1370 }
1371
1372 req = dev->request;
805f11a0 1373
3a5e02ce 1374 if (req_op(req) == REQ_OP_FLUSH) {
805f11a0
RW
1375 io_req = kmalloc(sizeof(struct io_thread_req),
1376 GFP_ATOMIC);
1377 if (io_req == NULL) {
1378 if (list_empty(&dev->restart))
1379 list_add(&dev->restart, &restart);
1380 return;
1381 }
1382 prepare_flush_request(req, io_req);
2a236122
TK
1383 if (submit_request(io_req, dev) == false)
1384 return;
805f11a0
RW
1385 }
1386
a0044bdf
JD
1387 while(dev->start_sg < dev->end_sg){
1388 struct scatterlist *sg = &dev->sg[dev->start_sg];
1389
2adcec21 1390 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1391 GFP_ATOMIC);
2adcec21
JD
1392 if(io_req == NULL){
1393 if(list_empty(&dev->restart))
1394 list_add(&dev->restart, &restart);
1395 return;
1396 }
1397 prepare_request(req, io_req,
47526903 1398 (unsigned long long)dev->rq_pos << 9,
45711f1a 1399 sg->offset, sg->length, sg_page(sg));
a0044bdf 1400
bc1d72e7 1401 if (submit_request(io_req, dev) == false)
a0044bdf 1402 return;
a0044bdf 1403
47526903 1404 dev->rq_pos += sg->length >> 9;
a0044bdf 1405 dev->start_sg++;
1da177e4 1406 }
a0044bdf
JD
1407 dev->end_sg = 0;
1408 dev->request = NULL;
1da177e4
LT
1409 }
1410}
1411
a885c8c4
CH
1412static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1413{
7d314e34 1414 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1415
1416 geo->heads = 128;
1417 geo->sectors = 32;
7d314e34 1418 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1419 return 0;
1420}
1421
a625c998 1422static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1da177e4
LT
1423 unsigned int cmd, unsigned long arg)
1424{
a625c998 1425 struct ubd *ubd_dev = bdev->bd_disk->private_data;
73855e13 1426 u16 ubd_id[ATA_ID_WORDS];
1da177e4
LT
1427
1428 switch (cmd) {
1da177e4 1429 struct cdrom_volctrl volume;
1da177e4 1430 case HDIO_GET_IDENTITY:
73855e13
BZ
1431 memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1432 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512);
1433 ubd_id[ATA_ID_HEADS] = 128;
1434 ubd_id[ATA_ID_SECTORS] = 32;
1da177e4
LT
1435 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1436 sizeof(ubd_id)))
dc764e50
JD
1437 return -EFAULT;
1438 return 0;
b8831a1d 1439
1da177e4
LT
1440 case CDROMVOLREAD:
1441 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1442 return -EFAULT;
1da177e4
LT
1443 volume.channel0 = 255;
1444 volume.channel1 = 255;
1445 volume.channel2 = 255;
1446 volume.channel3 = 255;
1447 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1448 return -EFAULT;
1449 return 0;
1da177e4 1450 }
dc764e50 1451 return -EINVAL;
1da177e4
LT
1452}
1453
91acb21f 1454static int update_bitmap(struct io_thread_req *req)
1da177e4 1455{
91acb21f 1456 int n;
1da177e4 1457
91acb21f 1458 if(req->cow_offset == -1)
dc764e50 1459 return 0;
1da177e4 1460
8c6157b6
AI
1461 n = os_pwrite_file(req->fds[1], &req->bitmap_words,
1462 sizeof(req->bitmap_words), req->cow_offset);
91acb21f
JD
1463 if(n != sizeof(req->bitmap_words)){
1464 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1465 req->fds[1]);
dc764e50 1466 return 1;
91acb21f 1467 }
1da177e4 1468
dc764e50 1469 return 0;
91acb21f 1470}
1da177e4 1471
5dc62b1b 1472static void do_io(struct io_thread_req *req)
91acb21f
JD
1473{
1474 char *buf;
1475 unsigned long len;
1476 int n, nsectors, start, end, bit;
91acb21f
JD
1477 __u64 off;
1478
805f11a0
RW
1479 if (req->op == UBD_FLUSH) {
1480 /* fds[0] is always either the rw image or our cow file */
1481 n = os_sync_file(req->fds[0]);
1482 if (n != 0) {
1483 printk("do_io - sync failed err = %d "
1484 "fd = %d\n", -n, req->fds[0]);
1485 req->error = 1;
1486 }
1487 return;
1488 }
1489
91acb21f
JD
1490 nsectors = req->length / req->sectorsize;
1491 start = 0;
1492 do {
1493 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1494 end = start;
1495 while((end < nsectors) &&
1496 (ubd_test_bit(end, (unsigned char *)
1497 &req->sector_mask) == bit))
1498 end++;
1499
1500 off = req->offset + req->offsets[bit] +
1501 start * req->sectorsize;
1502 len = (end - start) * req->sectorsize;
1503 buf = &req->buffer[start * req->sectorsize];
1504
91acb21f
JD
1505 if(req->op == UBD_READ){
1506 n = 0;
1507 do {
1508 buf = &buf[n];
1509 len -= n;
8c6157b6 1510 n = os_pread_file(req->fds[bit], buf, len, off);
91acb21f
JD
1511 if (n < 0) {
1512 printk("do_io - read failed, err = %d "
1513 "fd = %d\n", -n, req->fds[bit]);
1514 req->error = 1;
1515 return;
1516 }
1517 } while((n < len) && (n != 0));
1518 if (n < len) memset(&buf[n], 0, len - n);
1519 } else {
8c6157b6 1520 n = os_pwrite_file(req->fds[bit], buf, len, off);
91acb21f
JD
1521 if(n != len){
1522 printk("do_io - write failed err = %d "
1523 "fd = %d\n", -n, req->fds[bit]);
1524 req->error = 1;
1525 return;
1526 }
1527 }
1528
1529 start = end;
1530 } while(start < nsectors);
1da177e4 1531
91acb21f 1532 req->error = update_bitmap(req);
1da177e4 1533}
91acb21f
JD
1534
1535/* Changed in start_io_thread, which is serialized by being called only
1536 * from ubd_init, which is an initcall.
1537 */
1538int kernel_fd = -1;
1539
d8d7c28e
PBG
1540/* Only changed by the io thread. XXX: currently unused. */
1541static int io_count = 0;
91acb21f
JD
1542
1543int io_thread(void *arg)
1544{
f88f0bdf 1545 int n, count, written, res;
91acb21f 1546
91d44ff8
RW
1547 os_fix_helper_signals();
1548
91acb21f 1549 while(1){
f88f0bdf
AI
1550 n = bulk_req_safe_read(
1551 kernel_fd,
1552 io_req_buffer,
1553 &io_remainder,
1554 &io_remainder_size,
1555 UBD_REQ_BUFFER_SIZE
1556 );
1557 if (n < 0) {
1558 if (n == -EAGAIN) {
1559 ubd_read_poll(-1);
1560 continue;
1561 } else {
91acb21f 1562 printk("io_thread - read failed, fd = %d, "
f88f0bdf
AI
1563 "err = %d,"
1564 "reminder = %d\n",
1565 kernel_fd, -n, io_remainder_size);
91acb21f 1566 }
91acb21f 1567 }
f88f0bdf
AI
1568
1569 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1570 io_count++;
1571 do_io((*io_req_buffer)[count]);
1572 }
1573
1574 written = 0;
1575
1576 do {
1577 res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
ff6a1798 1578 if (res >= 0) {
f88f0bdf
AI
1579 written += res;
1580 } else {
1581 if (res != -EAGAIN) {
ff6a1798 1582 printk("io_thread - write failed, fd = %d, "
f88f0bdf
AI
1583 "err = %d\n", kernel_fd, -n);
1584 }
1585 }
1586 if (written < n) {
1587 ubd_write_poll(-1);
1588 }
1589 } while (written < n);
91acb21f 1590 }
91acb21f 1591
1b57e9c2
JD
1592 return 0;
1593}