uml: use DIV_ROUND_UP
[linux-2.6-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
e16f5350 23#include "linux/kernel.h"
1da177e4
LT
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
1da177e4
LT
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
d052d1be 37#include "linux/platform_device.h"
23464ffa 38#include "linux/scatterlist.h"
1da177e4
LT
39#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
1da177e4
LT
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
edea1385 52#include "kern_util.h"
1da177e4
LT
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
7b9014c1 58enum ubd_req { UBD_READ, UBD_WRITE };
1da177e4
LT
59
60struct io_thread_req {
62f96cb0 61 struct request *req;
91acb21f 62 enum ubd_req op;
1da177e4
LT
63 int fds[2];
64 unsigned long offsets[2];
65 unsigned long long offset;
66 unsigned long length;
67 char *buffer;
68 int sectorsize;
91acb21f
JD
69 unsigned long sector_mask;
70 unsigned long long cow_offset;
71 unsigned long bitmap_words[2];
1da177e4
LT
72 int error;
73};
74
91acb21f 75static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
76{
77 __u64 n;
78 int bits, off;
79
91acb21f 80 bits = sizeof(data[0]) * 8;
1da177e4
LT
81 n = bit / bits;
82 off = bit % bits;
dc764e50 83 return (data[n] & (1 << off)) != 0;
1da177e4
LT
84}
85
91acb21f 86static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
87{
88 __u64 n;
89 int bits, off;
90
91acb21f 91 bits = sizeof(data[0]) * 8;
1da177e4
LT
92 n = bit / bits;
93 off = bit % bits;
91acb21f 94 data[n] |= (1 << off);
1da177e4
LT
95}
96/*End stuff from ubd_user.h*/
97
98#define DRIVER_NAME "uml-blkdev"
99
d7fb2c38 100static DEFINE_MUTEX(ubd_lock);
1da177e4 101
1da177e4
LT
102static int ubd_open(struct inode * inode, struct file * filp);
103static int ubd_release(struct inode * inode, struct file * file);
104static int ubd_ioctl(struct inode * inode, struct file * file,
105 unsigned int cmd, unsigned long arg);
a885c8c4 106static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 107
97d88ac8 108#define MAX_DEV (16)
1da177e4 109
1da177e4
LT
110static struct block_device_operations ubd_blops = {
111 .owner = THIS_MODULE,
112 .open = ubd_open,
113 .release = ubd_release,
114 .ioctl = ubd_ioctl,
a885c8c4 115 .getgeo = ubd_getgeo,
1da177e4
LT
116};
117
1da177e4
LT
118/* Protected by ubd_lock */
119static int fake_major = MAJOR_NR;
1da177e4
LT
120static struct gendisk *ubd_gendisk[MAX_DEV];
121static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 122
1da177e4
LT
123#ifdef CONFIG_BLK_DEV_UBD_SYNC
124#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
125 .cl = 1 })
126#else
127#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
128 .cl = 1 })
129#endif
1da177e4
LT
130static struct openflags global_openflags = OPEN_FLAGS;
131
132struct cow {
2a9d32f6 133 /* backing file name */
1da177e4 134 char *file;
2a9d32f6 135 /* backing file fd */
1da177e4
LT
136 int fd;
137 unsigned long *bitmap;
138 unsigned long bitmap_len;
139 int bitmap_offset;
dc764e50 140 int data_offset;
1da177e4
LT
141};
142
a0044bdf
JD
143#define MAX_SG 64
144
1da177e4 145struct ubd {
a0044bdf 146 struct list_head restart;
2a9d32f6
PBG
147 /* name (and fd, below) of the file opened for writing, either the
148 * backing or the cow file. */
1da177e4
LT
149 char *file;
150 int count;
151 int fd;
152 __u64 size;
153 struct openflags boot_openflags;
154 struct openflags openflags;
84e945e3
PBG
155 unsigned shared:1;
156 unsigned no_cow:1;
1da177e4
LT
157 struct cow cow;
158 struct platform_device pdev;
62f96cb0
JD
159 struct request_queue *queue;
160 spinlock_t lock;
a0044bdf
JD
161 struct scatterlist sg[MAX_SG];
162 struct request *request;
163 int start_sg, end_sg;
1da177e4
LT
164};
165
166#define DEFAULT_COW { \
167 .file = NULL, \
dc764e50
JD
168 .fd = -1, \
169 .bitmap = NULL, \
1da177e4 170 .bitmap_offset = 0, \
dc764e50 171 .data_offset = 0, \
1da177e4
LT
172}
173
174#define DEFAULT_UBD { \
175 .file = NULL, \
176 .count = 0, \
177 .fd = -1, \
178 .size = -1, \
179 .boot_openflags = OPEN_FLAGS, \
180 .openflags = OPEN_FLAGS, \
dc764e50 181 .no_cow = 0, \
6c29256c 182 .shared = 0, \
dc764e50 183 .cow = DEFAULT_COW, \
62f96cb0 184 .lock = SPIN_LOCK_UNLOCKED, \
a0044bdf
JD
185 .request = NULL, \
186 .start_sg = 0, \
187 .end_sg = 0, \
1da177e4
LT
188}
189
b8831a1d 190/* Protected by ubd_lock */
5dc62b1b 191static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
1da177e4 192
1da177e4
LT
193/* Only changed by fake_ide_setup which is a setup */
194static int fake_ide = 0;
195static struct proc_dir_entry *proc_ide_root = NULL;
196static struct proc_dir_entry *proc_ide = NULL;
197
198static void make_proc_ide(void)
199{
200 proc_ide_root = proc_mkdir("ide", NULL);
201 proc_ide = proc_mkdir("ide0", proc_ide_root);
202}
203
204static int proc_ide_read_media(char *page, char **start, off_t off, int count,
205 int *eof, void *data)
206{
207 int len;
208
209 strcpy(page, "disk\n");
210 len = strlen("disk\n");
211 len -= off;
212 if (len < count){
213 *eof = 1;
214 if (len <= 0) return 0;
215 }
216 else len = count;
217 *start = page + off;
218 return len;
219}
220
c0a9290e 221static void make_ide_entries(const char *dev_name)
1da177e4
LT
222{
223 struct proc_dir_entry *dir, *ent;
224 char name[64];
225
226 if(proc_ide_root == NULL) make_proc_ide();
227
228 dir = proc_mkdir(dev_name, proc_ide);
229 if(!dir) return;
230
231 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
232 if(!ent) return;
1da177e4
LT
233 ent->data = NULL;
234 ent->read_proc = proc_ide_read_media;
235 ent->write_proc = NULL;
c0a9290e 236 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
237 proc_symlink(dev_name, proc_ide_root, name);
238}
239
240static int fake_ide_setup(char *str)
241{
242 fake_ide = 1;
dc764e50 243 return 1;
1da177e4
LT
244}
245
246__setup("fake_ide", fake_ide_setup);
247
248__uml_help(fake_ide_setup,
249"fake_ide\n"
250" Create ide0 entries that map onto ubd devices.\n\n"
251);
252
253static int parse_unit(char **ptr)
254{
255 char *str = *ptr, *end;
256 int n = -1;
257
258 if(isdigit(*str)) {
259 n = simple_strtoul(str, &end, 0);
260 if(end == str)
dc764e50 261 return -1;
1da177e4
LT
262 *ptr = end;
263 }
97d88ac8 264 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
265 n = *str - 'a';
266 str++;
267 *ptr = str;
268 }
dc764e50 269 return n;
1da177e4
LT
270}
271
d8d7c28e
PBG
272/* If *index_out == -1 at exit, the passed option was a general one;
273 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
274 * should not be freed on exit.
275 */
f28169d2 276static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 277{
7d314e34 278 struct ubd *ubd_dev;
1da177e4
LT
279 struct openflags flags = global_openflags;
280 char *backing_file;
b8831a1d 281 int n, err = 0, i;
1da177e4
LT
282
283 if(index_out) *index_out = -1;
284 n = *str;
285 if(n == '='){
286 char *end;
287 int major;
288
289 str++;
1da177e4
LT
290 if(!strcmp(str, "sync")){
291 global_openflags = of_sync(global_openflags);
b8831a1d 292 goto out1;
1da177e4 293 }
b8831a1d
JD
294
295 err = -EINVAL;
1da177e4
LT
296 major = simple_strtoul(str, &end, 0);
297 if((*end != '\0') || (end == str)){
f28169d2 298 *error_out = "Didn't parse major number";
b8831a1d 299 goto out1;
1da177e4
LT
300 }
301
f28169d2
JD
302 mutex_lock(&ubd_lock);
303 if(fake_major != MAJOR_NR){
304 *error_out = "Can't assign a fake major twice";
305 goto out1;
306 }
6c29256c 307
f28169d2 308 fake_major = major;
1da177e4
LT
309
310 printk(KERN_INFO "Setting extra ubd major number to %d\n",
311 major);
f28169d2
JD
312 err = 0;
313 out1:
314 mutex_unlock(&ubd_lock);
315 return err;
1da177e4
LT
316 }
317
318 n = parse_unit(&str);
319 if(n < 0){
f28169d2
JD
320 *error_out = "Couldn't parse device number";
321 return -EINVAL;
1da177e4
LT
322 }
323 if(n >= MAX_DEV){
f28169d2
JD
324 *error_out = "Device number out of range";
325 return 1;
1da177e4
LT
326 }
327
f28169d2 328 err = -EBUSY;
d7fb2c38 329 mutex_lock(&ubd_lock);
1da177e4 330
7d314e34
PBG
331 ubd_dev = &ubd_devs[n];
332 if(ubd_dev->file != NULL){
f28169d2 333 *error_out = "Device is already configured";
1da177e4
LT
334 goto out;
335 }
336
337 if (index_out)
338 *index_out = n;
339
f28169d2 340 err = -EINVAL;
6c29256c 341 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
342 switch (*str) {
343 case 'r':
344 flags.w = 0;
345 break;
346 case 's':
347 flags.s = 1;
348 break;
349 case 'd':
7d314e34 350 ubd_dev->no_cow = 1;
1da177e4 351 break;
6c29256c 352 case 'c':
7d314e34 353 ubd_dev->shared = 1;
6c29256c 354 break;
1da177e4
LT
355 case '=':
356 str++;
357 goto break_loop;
358 default:
f28169d2
JD
359 *error_out = "Expected '=' or flag letter "
360 "(r, s, c, or d)";
1da177e4
LT
361 goto out;
362 }
363 str++;
364 }
365
f28169d2
JD
366 if (*str == '=')
367 *error_out = "Too many flags specified";
368 else
369 *error_out = "Missing '='";
1da177e4
LT
370 goto out;
371
372break_loop:
1da177e4
LT
373 backing_file = strchr(str, ',');
374
f28169d2 375 if (backing_file == NULL)
1da177e4 376 backing_file = strchr(str, ':');
1da177e4 377
f28169d2
JD
378 if(backing_file != NULL){
379 if(ubd_dev->no_cow){
380 *error_out = "Can't specify both 'd' and a cow file";
381 goto out;
382 }
1da177e4
LT
383 else {
384 *backing_file = '\0';
385 backing_file++;
386 }
387 }
f28169d2 388 err = 0;
7d314e34
PBG
389 ubd_dev->file = str;
390 ubd_dev->cow.file = backing_file;
391 ubd_dev->boot_openflags = flags;
1da177e4 392out:
d7fb2c38 393 mutex_unlock(&ubd_lock);
f28169d2 394 return err;
1da177e4
LT
395}
396
397static int ubd_setup(char *str)
398{
f28169d2
JD
399 char *error;
400 int err;
401
402 err = ubd_setup_common(str, NULL, &error);
403 if(err)
404 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
405 "%s\n", str, error);
406 return 1;
1da177e4
LT
407}
408
409__setup("ubd", ubd_setup);
410__uml_help(ubd_setup,
411"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
412" This is used to associate a device with a file in the underlying\n"
413" filesystem. When specifying two filenames, the first one is the\n"
414" COW name and the second is the backing file name. As separator you can\n"
415" use either a ':' or a ',': the first one allows writing things like;\n"
416" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
417" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 418" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
419" a COW file or a backing file. To override this detection, add the 'd'\n"
420" flag:\n"
421" ubd0d=BackingFile\n"
422" Usually, there is a filesystem in the file, but \n"
423" that's not required. Swap devices containing swap files can be\n"
424" specified like this. Also, a file which doesn't contain a\n"
425" filesystem can have its contents read in the virtual \n"
426" machine by running 'dd' on the device. <n> must be in the range\n"
427" 0 to 7. Appending an 'r' to the number will cause that device\n"
428" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
429" an 's' will cause data to be written to disk on the host immediately.\n"
430" 'c' will cause the device to be treated as being shared between multiple\n"
431" UMLs and file locking will be turned off - this is appropriate for a\n"
432" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
433);
434
8299ca5c 435static int udb_setup(char *str)
1da177e4
LT
436{
437 printk("udb%s specified on command line is almost certainly a ubd -> "
438 "udb TYPO\n", str);
dc764e50 439 return 1;
1da177e4
LT
440}
441
442__setup("udb", udb_setup);
443__uml_help(udb_setup,
444"udb\n"
0894e27e
JD
445" This option is here solely to catch ubd -> udb typos, which can be\n"
446" to impossible to catch visually unless you specifically look for\n"
447" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
448" in the boot output.\n\n"
449);
450
165125e1 451static void do_ubd_request(struct request_queue * q);
91acb21f
JD
452
453/* Only changed by ubd_init, which is an initcall. */
5dc62b1b 454static int thread_fd = -1;
1da177e4 455
4898b53a 456static void ubd_end_request(struct request *req, int bytes, int error)
1da177e4 457{
4898b53a 458 blk_end_request(req, error, bytes);
1da177e4
LT
459}
460
33f775ee
PBG
461/* Callable only from interrupt context - otherwise you need to do
462 * spin_lock_irq()/spin_lock_irqsave() */
a0044bdf 463static inline void ubd_finish(struct request *req, int bytes)
1da177e4 464{
a0044bdf 465 if(bytes < 0){
4898b53a 466 ubd_end_request(req, 0, -EIO);
a0044bdf
JD
467 return;
468 }
4898b53a 469 ubd_end_request(req, bytes, 0);
1da177e4
LT
470}
471
a0044bdf
JD
472static LIST_HEAD(restart);
473
2fe30a34 474/* XXX - move this inside ubd_intr. */
62f96cb0 475/* Called without dev->lock held, and only in interrupt context. */
91acb21f 476static void ubd_handler(void)
1da177e4 477{
2adcec21 478 struct io_thread_req *req;
62f96cb0 479 struct request *rq;
a0044bdf
JD
480 struct ubd *ubd;
481 struct list_head *list, *next_ele;
482 unsigned long flags;
91acb21f
JD
483 int n;
484
a0044bdf 485 while(1){
a6ea4cce
JD
486 n = os_read_file(thread_fd, &req,
487 sizeof(struct io_thread_req *));
a0044bdf
JD
488 if(n != sizeof(req)){
489 if(n == -EAGAIN)
490 break;
491 printk(KERN_ERR "spurious interrupt in ubd_handler, "
492 "err = %d\n", -n);
493 return;
494 }
62f96cb0 495
2adcec21
JD
496 rq = req->req;
497 rq->nr_sectors -= req->length >> 9;
a0044bdf
JD
498 if(rq->nr_sectors == 0)
499 ubd_finish(rq, rq->hard_nr_sectors << 9);
2adcec21 500 kfree(req);
a0044bdf 501 }
62f96cb0 502 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
503
504 list_for_each_safe(list, next_ele, &restart){
505 ubd = container_of(list, struct ubd, restart);
506 list_del_init(&ubd->restart);
507 spin_lock_irqsave(&ubd->lock, flags);
508 do_ubd_request(ubd->queue);
509 spin_unlock_irqrestore(&ubd->lock, flags);
510 }
1da177e4
LT
511}
512
7bea96fd 513static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 514{
91acb21f 515 ubd_handler();
dc764e50 516 return IRQ_HANDLED;
91acb21f 517}
09ace81c 518
91acb21f
JD
519/* Only changed by ubd_init, which is an initcall. */
520static int io_pid = -1;
09ace81c 521
5dc62b1b 522static void kill_io_thread(void)
91acb21f 523{
6c29256c 524 if(io_pid != -1)
91acb21f 525 os_kill_process(io_pid, 1);
09ace81c 526}
1da177e4 527
91acb21f
JD
528__uml_exitcall(kill_io_thread);
529
d8d7c28e 530static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
531{
532 char *file;
533
7d314e34 534 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
dc764e50 535 return os_file_size(file, size_out);
1da177e4
LT
536}
537
5dc62b1b
WC
538static int read_cow_bitmap(int fd, void *buf, int offset, int len)
539{
540 int err;
541
542 err = os_seek_file(fd, offset);
543 if (err < 0)
544 return err;
545
546 err = os_read_file(fd, buf, len);
547 if (err < 0)
548 return err;
549
550 return 0;
551}
552
553static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
554{
555 unsigned long modtime;
556 unsigned long long actual;
557 int err;
558
559 err = os_file_modtime(file, &modtime);
560 if (err < 0) {
561 printk(KERN_ERR "Failed to get modification time of backing "
562 "file \"%s\", err = %d\n", file, -err);
563 return err;
564 }
565
566 err = os_file_size(file, &actual);
567 if (err < 0) {
568 printk(KERN_ERR "Failed to get size of backing file \"%s\", "
569 "err = %d\n", file, -err);
570 return err;
571 }
572
573 if (actual != size) {
574 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
575 * the typecast.*/
576 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
577 "vs backing file\n", (unsigned long long) size, actual);
578 return -EINVAL;
579 }
580 if (modtime != mtime) {
581 printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
582 "backing file\n", mtime, modtime);
583 return -EINVAL;
584 }
585 return 0;
586}
587
588static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
589{
590 struct uml_stat buf1, buf2;
591 int err;
592
593 if (from_cmdline == NULL)
594 return 0;
595 if (!strcmp(from_cmdline, from_cow))
596 return 0;
597
598 err = os_stat_file(from_cmdline, &buf1);
599 if (err < 0) {
600 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
601 -err);
602 return 0;
603 }
604 err = os_stat_file(from_cow, &buf2);
605 if (err < 0) {
606 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
607 -err);
608 return 1;
609 }
610 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
611 return 0;
612
613 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
614 "\"%s\" specified in COW header of \"%s\"\n",
615 from_cmdline, from_cow, cow);
616 return 1;
617}
618
619static int open_ubd_file(char *file, struct openflags *openflags, int shared,
620 char **backing_file_out, int *bitmap_offset_out,
621 unsigned long *bitmap_len_out, int *data_offset_out,
622 int *create_cow_out)
623{
624 time_t mtime;
625 unsigned long long size;
626 __u32 version, align;
627 char *backing_file;
628 int fd, err, sectorsize, asked_switch, mode = 0644;
629
630 fd = os_open_file(file, *openflags, mode);
631 if (fd < 0) {
632 if ((fd == -ENOENT) && (create_cow_out != NULL))
633 *create_cow_out = 1;
634 if (!openflags->w ||
635 ((fd != -EROFS) && (fd != -EACCES)))
636 return fd;
637 openflags->w = 0;
638 fd = os_open_file(file, *openflags, mode);
639 if (fd < 0)
640 return fd;
641 }
642
643 if (shared)
644 printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
645 else {
646 err = os_lock_file(fd, openflags->w);
647 if (err < 0) {
648 printk(KERN_ERR "Failed to lock '%s', err = %d\n",
649 file, -err);
650 goto out_close;
651 }
652 }
653
654 /* Successful return case! */
655 if (backing_file_out == NULL)
656 return fd;
657
658 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
659 &size, &sectorsize, &align, bitmap_offset_out);
660 if (err && (*backing_file_out != NULL)) {
661 printk(KERN_ERR "Failed to read COW header from COW file "
662 "\"%s\", errno = %d\n", file, -err);
663 goto out_close;
664 }
665 if (err)
666 return fd;
667
668 asked_switch = path_requires_switch(*backing_file_out, backing_file,
669 file);
670
671 /* Allow switching only if no mismatch. */
672 if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
673 mtime)) {
674 printk(KERN_ERR "Switching backing file to '%s'\n",
675 *backing_file_out);
676 err = write_cow_header(file, fd, *backing_file_out,
677 sectorsize, align, &size);
678 if (err) {
679 printk(KERN_ERR "Switch failed, errno = %d\n", -err);
680 goto out_close;
681 }
682 } else {
683 *backing_file_out = backing_file;
684 err = backing_file_mismatch(*backing_file_out, size, mtime);
685 if (err)
686 goto out_close;
687 }
688
689 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
690 bitmap_len_out, data_offset_out);
691
692 return fd;
693 out_close:
694 os_close_file(fd);
695 return err;
696}
697
698static int create_cow_file(char *cow_file, char *backing_file,
699 struct openflags flags,
700 int sectorsize, int alignment, int *bitmap_offset_out,
701 unsigned long *bitmap_len_out, int *data_offset_out)
702{
703 int err, fd;
704
705 flags.c = 1;
706 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
707 if (fd < 0) {
708 err = fd;
709 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
710 cow_file, -err);
711 goto out;
712 }
713
714 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
715 bitmap_offset_out, bitmap_len_out,
716 data_offset_out);
717 if (!err)
718 return fd;
719 os_close_file(fd);
720 out:
721 return err;
722}
723
5f75a4f8 724static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 725{
7d314e34
PBG
726 os_close_file(ubd_dev->fd);
727 if(ubd_dev->cow.file == NULL)
1da177e4
LT
728 return;
729
7d314e34
PBG
730 os_close_file(ubd_dev->cow.fd);
731 vfree(ubd_dev->cow.bitmap);
732 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
733}
734
7d314e34 735static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
736{
737 struct openflags flags;
738 char **back_ptr;
739 int err, create_cow, *create_ptr;
0bf16bff 740 int fd;
1da177e4 741
7d314e34 742 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 743 create_cow = 0;
7d314e34
PBG
744 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
745 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
746
747 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
748 back_ptr, &ubd_dev->cow.bitmap_offset,
749 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 750 create_ptr);
1da177e4 751
0bf16bff
PBG
752 if((fd == -ENOENT) && create_cow){
753 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
754 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
755 &ubd_dev->cow.bitmap_offset,
756 &ubd_dev->cow.bitmap_len,
757 &ubd_dev->cow.data_offset);
0bf16bff 758 if(fd >= 0){
1da177e4 759 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 760 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
761 }
762 }
763
0bf16bff 764 if(fd < 0){
7d314e34 765 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
766 -fd);
767 return fd;
1da177e4 768 }
0bf16bff 769 ubd_dev->fd = fd;
1da177e4 770
7d314e34 771 if(ubd_dev->cow.file != NULL){
f4768ffd
JD
772 blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
773
1da177e4 774 err = -ENOMEM;
da2486ba 775 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 776 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
777 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
778 goto error;
779 }
780 flush_tlb_kernel_vm();
781
7d314e34
PBG
782 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
783 ubd_dev->cow.bitmap_offset,
784 ubd_dev->cow.bitmap_len);
1da177e4
LT
785 if(err < 0)
786 goto error;
787
7d314e34 788 flags = ubd_dev->openflags;
1da177e4 789 flags.w = 0;
7d314e34 790 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 791 NULL, NULL, NULL, NULL);
1da177e4 792 if(err < 0) goto error;
7d314e34 793 ubd_dev->cow.fd = err;
1da177e4 794 }
dc764e50 795 return 0;
1da177e4 796 error:
7d314e34 797 os_close_file(ubd_dev->fd);
dc764e50 798 return err;
1da177e4
LT
799}
800
2e3f5251
JD
801static void ubd_device_release(struct device *dev)
802{
803 struct ubd *ubd_dev = dev->driver_data;
804
805 blk_cleanup_queue(ubd_dev->queue);
806 *ubd_dev = ((struct ubd) DEFAULT_UBD);
807}
808
5f75a4f8 809static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 810 struct gendisk **disk_out)
1da177e4
LT
811{
812 struct gendisk *disk;
1da177e4
LT
813
814 disk = alloc_disk(1 << UBD_SHIFT);
815 if(disk == NULL)
dc764e50 816 return -ENOMEM;
1da177e4
LT
817
818 disk->major = major;
819 disk->first_minor = unit << UBD_SHIFT;
820 disk->fops = &ubd_blops;
821 set_capacity(disk, size / 512);
ce7b0f46 822 if(major == MAJOR_NR)
1da177e4 823 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 824 else
1da177e4 825 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
826
827 /* sysfs register (not for ide fake devices) */
828 if (major == MAJOR_NR) {
7d314e34
PBG
829 ubd_devs[unit].pdev.id = unit;
830 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251
JD
831 ubd_devs[unit].pdev.dev.release = ubd_device_release;
832 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
7d314e34
PBG
833 platform_device_register(&ubd_devs[unit].pdev);
834 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
835 }
836
7d314e34 837 disk->private_data = &ubd_devs[unit];
62f96cb0 838 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
839 add_disk(disk);
840
841 *disk_out = disk;
842 return 0;
843}
844
845#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
846
f28169d2 847static int ubd_add(int n, char **error_out)
1da177e4 848{
7d314e34 849 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 850 int err = 0;
1da177e4 851
7d314e34 852 if(ubd_dev->file == NULL)
ec7cf783 853 goto out;
1da177e4 854
7d314e34 855 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
856 if(err < 0){
857 *error_out = "Couldn't determine size of device's file";
80c13749 858 goto out;
f28169d2 859 }
1da177e4 860
7d314e34 861 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 862
a0044bdf 863 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 864 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 865
62f96cb0
JD
866 err = -ENOMEM;
867 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
868 if (ubd_dev->queue == NULL) {
869 *error_out = "Failed to initialize device queue";
80c13749 870 goto out;
62f96cb0
JD
871 }
872 ubd_dev->queue->queuedata = ubd_dev;
873
a0044bdf 874 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
62f96cb0
JD
875 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
876 if(err){
877 *error_out = "Failed to register device";
878 goto out_cleanup;
879 }
6c29256c 880
1da177e4 881 if(fake_major != MAJOR_NR)
5f75a4f8 882 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 883 &fake_gendisk[n]);
1da177e4 884
83380cc1
JD
885 /*
886 * Perhaps this should also be under the "if (fake_major)" above
887 * using the fake_disk->disk_name
888 */
1da177e4
LT
889 if (fake_ide)
890 make_ide_entries(ubd_gendisk[n]->disk_name);
891
ec7cf783 892 err = 0;
ec7cf783
JD
893out:
894 return err;
62f96cb0
JD
895
896out_cleanup:
897 blk_cleanup_queue(ubd_dev->queue);
898 goto out;
1da177e4
LT
899}
900
f28169d2 901static int ubd_config(char *str, char **error_out)
1da177e4 902{
e7f6552f 903 int n, ret;
1da177e4 904
f28169d2
JD
905 /* This string is possibly broken up and stored, so it's only
906 * freed if ubd_setup_common fails, or if only general options
907 * were set.
908 */
970d6e3a 909 str = kstrdup(str, GFP_KERNEL);
e7f6552f 910 if (str == NULL) {
f28169d2
JD
911 *error_out = "Failed to allocate memory";
912 return -ENOMEM;
1da177e4 913 }
f28169d2
JD
914
915 ret = ubd_setup_common(str, &n, error_out);
916 if (ret)
e7f6552f 917 goto err_free;
f28169d2 918
e7f6552f
PBG
919 if (n == -1) {
920 ret = 0;
d8d7c28e 921 goto err_free;
1da177e4 922 }
1da177e4 923
dc764e50 924 mutex_lock(&ubd_lock);
f28169d2 925 ret = ubd_add(n, error_out);
e7f6552f 926 if (ret)
7d314e34 927 ubd_devs[n].file = NULL;
dc764e50 928 mutex_unlock(&ubd_lock);
1da177e4 929
e7f6552f 930out:
dc764e50 931 return ret;
e7f6552f
PBG
932
933err_free:
934 kfree(str);
935 goto out;
1da177e4
LT
936}
937
938static int ubd_get_config(char *name, char *str, int size, char **error_out)
939{
7d314e34 940 struct ubd *ubd_dev;
1da177e4
LT
941 int n, len = 0;
942
943 n = parse_unit(&name);
944 if((n >= MAX_DEV) || (n < 0)){
945 *error_out = "ubd_get_config : device number out of range";
dc764e50 946 return -1;
1da177e4
LT
947 }
948
7d314e34 949 ubd_dev = &ubd_devs[n];
d7fb2c38 950 mutex_lock(&ubd_lock);
1da177e4 951
7d314e34 952 if(ubd_dev->file == NULL){
1da177e4
LT
953 CONFIG_CHUNK(str, size, len, "", 1);
954 goto out;
955 }
956
7d314e34 957 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 958
7d314e34 959 if(ubd_dev->cow.file != NULL){
1da177e4 960 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 961 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
962 }
963 else CONFIG_CHUNK(str, size, len, "", 1);
964
965 out:
d7fb2c38 966 mutex_unlock(&ubd_lock);
dc764e50 967 return len;
1da177e4
LT
968}
969
29d56cfe
JD
970static int ubd_id(char **str, int *start_out, int *end_out)
971{
dc764e50 972 int n;
29d56cfe
JD
973
974 n = parse_unit(str);
dc764e50
JD
975 *start_out = 0;
976 *end_out = MAX_DEV - 1;
977 return n;
29d56cfe
JD
978}
979
f28169d2 980static int ubd_remove(int n, char **error_out)
1da177e4 981{
2e3f5251 982 struct gendisk *disk = ubd_gendisk[n];
7d314e34 983 struct ubd *ubd_dev;
29d56cfe 984 int err = -ENODEV;
1da177e4 985
d7fb2c38 986 mutex_lock(&ubd_lock);
1da177e4 987
7d314e34 988 ubd_dev = &ubd_devs[n];
1da177e4 989
7d314e34 990 if(ubd_dev->file == NULL)
29d56cfe 991 goto out;
1da177e4 992
29d56cfe
JD
993 /* you cannot remove a open disk */
994 err = -EBUSY;
7d314e34 995 if(ubd_dev->count > 0)
1da177e4
LT
996 goto out;
997
dc764e50 998 ubd_gendisk[n] = NULL;
b47d2deb
JD
999 if(disk != NULL){
1000 del_gendisk(disk);
1001 put_disk(disk);
1002 }
1da177e4
LT
1003
1004 if(fake_gendisk[n] != NULL){
1005 del_gendisk(fake_gendisk[n]);
1006 put_disk(fake_gendisk[n]);
1007 fake_gendisk[n] = NULL;
1008 }
1009
1da177e4 1010 err = 0;
2e3f5251 1011 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 1012out:
d7fb2c38 1013 mutex_unlock(&ubd_lock);
29d56cfe 1014 return err;
1da177e4
LT
1015}
1016
f28169d2 1017/* All these are called by mconsole in process context and without
b8831a1d 1018 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 1019 */
1da177e4 1020static struct mc_device ubd_mc = {
84f48d4f 1021 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
1022 .name = "ubd",
1023 .config = ubd_config,
dc764e50 1024 .get_config = ubd_get_config,
29d56cfe 1025 .id = ubd_id,
1da177e4
LT
1026 .remove = ubd_remove,
1027};
1028
d8d7c28e 1029static int __init ubd_mc_init(void)
1da177e4
LT
1030{
1031 mconsole_register_dev(&ubd_mc);
1032 return 0;
1033}
1034
1035__initcall(ubd_mc_init);
1036
d8d7c28e
PBG
1037static int __init ubd0_init(void)
1038{
1039 struct ubd *ubd_dev = &ubd_devs[0];
1040
b8831a1d 1041 mutex_lock(&ubd_lock);
d8d7c28e
PBG
1042 if(ubd_dev->file == NULL)
1043 ubd_dev->file = "root_fs";
b8831a1d
JD
1044 mutex_unlock(&ubd_lock);
1045
dc764e50 1046 return 0;
d8d7c28e
PBG
1047}
1048
1049__initcall(ubd0_init);
1050
b8831a1d 1051/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
1052static struct platform_driver ubd_driver = {
1053 .driver = {
1054 .name = DRIVER_NAME,
1055 },
1da177e4
LT
1056};
1057
d8d7c28e 1058static int __init ubd_init(void)
1da177e4 1059{
f28169d2
JD
1060 char *error;
1061 int i, err;
1da177e4 1062
1da177e4
LT
1063 if (register_blkdev(MAJOR_NR, "ubd"))
1064 return -1;
1065
1da177e4
LT
1066 if (fake_major != MAJOR_NR) {
1067 char name[sizeof("ubd_nnn\0")];
1068
1069 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
1070 if (register_blkdev(fake_major, "ubd"))
1071 return -1;
1072 }
3ae5eaec 1073 platform_driver_register(&ubd_driver);
dc764e50 1074 mutex_lock(&ubd_lock);
f28169d2
JD
1075 for (i = 0; i < MAX_DEV; i++){
1076 err = ubd_add(i, &error);
1077 if(err)
1078 printk(KERN_ERR "Failed to initialize ubd device %d :"
1079 "%s\n", i, error);
1080 }
dc764e50 1081 mutex_unlock(&ubd_lock);
1da177e4
LT
1082 return 0;
1083}
1084
1085late_initcall(ubd_init);
1086
d8d7c28e 1087static int __init ubd_driver_init(void){
91acb21f
JD
1088 unsigned long stack;
1089 int err;
1090
1091 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1092 if(global_openflags.s){
1093 printk(KERN_INFO "ubd: Synchronous mode\n");
1094 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1095 * enough. So use anyway the io thread. */
1096 }
1097 stack = alloc_stack(0, 0);
6c29256c 1098 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
1099 &thread_fd);
1100 if(io_pid < 0){
6c29256c 1101 printk(KERN_ERR
91acb21f
JD
1102 "ubd : Failed to start I/O thread (errno = %d) - "
1103 "falling back to synchronous I/O\n", -io_pid);
1104 io_pid = -1;
dc764e50 1105 return 0;
91acb21f 1106 }
6c29256c 1107 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
7d314e34 1108 IRQF_DISABLED, "ubd", ubd_devs);
91acb21f
JD
1109 if(err != 0)
1110 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 1111 return 0;
91acb21f
JD
1112}
1113
1114device_initcall(ubd_driver_init);
1115
1da177e4
LT
1116static int ubd_open(struct inode *inode, struct file *filp)
1117{
1118 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 1119 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
1120 int err = 0;
1121
7d314e34
PBG
1122 if(ubd_dev->count == 0){
1123 err = ubd_open_dev(ubd_dev);
1da177e4
LT
1124 if(err){
1125 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 1126 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
1127 goto out;
1128 }
1129 }
7d314e34
PBG
1130 ubd_dev->count++;
1131 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
1132
1133 /* This should no more be needed. And it didn't work anyway to exclude
1134 * read-write remounting of filesystems.*/
7d314e34 1135 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 1136 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 1137 err = -EROFS;
2c49be99 1138 }*/
1da177e4 1139 out:
dc764e50 1140 return err;
1da177e4
LT
1141}
1142
1143static int ubd_release(struct inode * inode, struct file * file)
1144{
1145 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 1146 struct ubd *ubd_dev = disk->private_data;
1da177e4 1147
7d314e34 1148 if(--ubd_dev->count == 0)
5f75a4f8 1149 ubd_close_dev(ubd_dev);
dc764e50 1150 return 0;
1da177e4
LT
1151}
1152
91acb21f
JD
1153static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1154 __u64 *cow_offset, unsigned long *bitmap,
1155 __u64 bitmap_offset, unsigned long *bitmap_words,
1156 __u64 bitmap_len)
1da177e4 1157{
91acb21f
JD
1158 __u64 sector = io_offset >> 9;
1159 int i, update_bitmap = 0;
1160
1161 for(i = 0; i < length >> 9; i++){
1162 if(cow_mask != NULL)
1163 ubd_set_bit(i, (unsigned char *) cow_mask);
1164 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1165 continue;
1da177e4 1166
91acb21f
JD
1167 update_bitmap = 1;
1168 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1169 }
1170
1171 if(!update_bitmap)
1172 return;
1da177e4 1173
91acb21f 1174 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1175
91acb21f
JD
1176 /* This takes care of the case where we're exactly at the end of the
1177 * device, and *cow_offset + 1 is off the end. So, just back it up
1178 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1179 * for the original diagnosis.
1180 */
6d074242
JO
1181 if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1182 sizeof(unsigned long)) - 1))
91acb21f
JD
1183 (*cow_offset)--;
1184
1185 bitmap_words[0] = bitmap[*cow_offset];
1186 bitmap_words[1] = bitmap[*cow_offset + 1];
1187
1188 *cow_offset *= sizeof(unsigned long);
1189 *cow_offset += bitmap_offset;
1190}
1191
1192static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1193 __u64 bitmap_offset, __u64 bitmap_len)
1194{
1195 __u64 sector = req->offset >> 9;
1196 int i;
1197
1198 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1199 panic("Operation too long");
1200
1201 if(req->op == UBD_READ) {
1202 for(i = 0; i < req->length >> 9; i++){
1203 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1204 ubd_set_bit(i, (unsigned char *)
91acb21f 1205 &req->sector_mask);
dc764e50 1206 }
91acb21f
JD
1207 }
1208 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1209 &req->cow_offset, bitmap, bitmap_offset,
1210 req->bitmap_words, bitmap_len);
1da177e4
LT
1211}
1212
62f96cb0 1213/* Called with dev->lock held */
a0044bdf
JD
1214static void prepare_request(struct request *req, struct io_thread_req *io_req,
1215 unsigned long long offset, int page_offset,
1216 int len, struct page *page)
1da177e4
LT
1217{
1218 struct gendisk *disk = req->rq_disk;
7d314e34 1219 struct ubd *ubd_dev = disk->private_data;
91acb21f 1220
62f96cb0 1221 io_req->req = req;
a0044bdf
JD
1222 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1223 ubd_dev->fd;
7d314e34 1224 io_req->fds[1] = ubd_dev->fd;
91acb21f 1225 io_req->cow_offset = -1;
1da177e4
LT
1226 io_req->offset = offset;
1227 io_req->length = len;
1228 io_req->error = 0;
91acb21f
JD
1229 io_req->sector_mask = 0;
1230
1231 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1232 io_req->offsets[0] = 0;
7d314e34 1233 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1234 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1235 io_req->sectorsize = 1 << 9;
1236
7d314e34 1237 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1238 cowify_req(io_req, ubd_dev->cow.bitmap,
1239 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1240
1da177e4
LT
1241}
1242
62f96cb0 1243/* Called with dev->lock held */
165125e1 1244static void do_ubd_request(struct request_queue *q)
1da177e4 1245{
2adcec21 1246 struct io_thread_req *io_req;
1da177e4 1247 struct request *req;
0a6d3a2a 1248 int n, last_sectors;
a0044bdf
JD
1249
1250 while(1){
2a9529a0 1251 struct ubd *dev = q->queuedata;
a0044bdf
JD
1252 if(dev->end_sg == 0){
1253 struct request *req = elv_next_request(q);
1254 if(req == NULL)
1255 return;
1256
1257 dev->request = req;
1258 blkdev_dequeue_request(req);
1259 dev->start_sg = 0;
1260 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1261 }
1262
1263 req = dev->request;
0a6d3a2a 1264 last_sectors = 0;
a0044bdf
JD
1265 while(dev->start_sg < dev->end_sg){
1266 struct scatterlist *sg = &dev->sg[dev->start_sg];
1267
0a6d3a2a 1268 req->sector += last_sectors;
2adcec21 1269 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1270 GFP_ATOMIC);
2adcec21
JD
1271 if(io_req == NULL){
1272 if(list_empty(&dev->restart))
1273 list_add(&dev->restart, &restart);
1274 return;
1275 }
1276 prepare_request(req, io_req,
a0044bdf 1277 (unsigned long long) req->sector << 9,
45711f1a 1278 sg->offset, sg->length, sg_page(sg));
a0044bdf 1279
0a6d3a2a 1280 last_sectors = sg->length >> 9;
a6ea4cce
JD
1281 n = os_write_file(thread_fd, &io_req,
1282 sizeof(struct io_thread_req *));
2adcec21 1283 if(n != sizeof(struct io_thread_req *)){
a0044bdf
JD
1284 if(n != -EAGAIN)
1285 printk("write to io thread failed, "
1286 "errno = %d\n", -n);
1287 else if(list_empty(&dev->restart))
1288 list_add(&dev->restart, &restart);
12429bf9 1289 kfree(io_req);
a0044bdf
JD
1290 return;
1291 }
1292
a0044bdf 1293 dev->start_sg++;
1da177e4 1294 }
a0044bdf
JD
1295 dev->end_sg = 0;
1296 dev->request = NULL;
1da177e4
LT
1297 }
1298}
1299
a885c8c4
CH
1300static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1301{
7d314e34 1302 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1303
1304 geo->heads = 128;
1305 geo->sectors = 32;
7d314e34 1306 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1307 return 0;
1308}
1309
1da177e4
LT
1310static int ubd_ioctl(struct inode * inode, struct file * file,
1311 unsigned int cmd, unsigned long arg)
1312{
7d314e34 1313 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1da177e4
LT
1314 struct hd_driveid ubd_id = {
1315 .cyls = 0,
1316 .heads = 128,
1317 .sectors = 32,
1318 };
1319
1320 switch (cmd) {
1da177e4 1321 struct cdrom_volctrl volume;
1da177e4 1322 case HDIO_GET_IDENTITY:
7d314e34 1323 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1da177e4
LT
1324 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1325 sizeof(ubd_id)))
dc764e50
JD
1326 return -EFAULT;
1327 return 0;
b8831a1d 1328
1da177e4
LT
1329 case CDROMVOLREAD:
1330 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1331 return -EFAULT;
1da177e4
LT
1332 volume.channel0 = 255;
1333 volume.channel1 = 255;
1334 volume.channel2 = 255;
1335 volume.channel3 = 255;
1336 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1337 return -EFAULT;
1338 return 0;
1da177e4 1339 }
dc764e50 1340 return -EINVAL;
1da177e4
LT
1341}
1342
91acb21f 1343static int update_bitmap(struct io_thread_req *req)
1da177e4 1344{
91acb21f 1345 int n;
1da177e4 1346
91acb21f 1347 if(req->cow_offset == -1)
dc764e50 1348 return 0;
1da177e4 1349
91acb21f
JD
1350 n = os_seek_file(req->fds[1], req->cow_offset);
1351 if(n < 0){
1352 printk("do_io - bitmap lseek failed : err = %d\n", -n);
dc764e50 1353 return 1;
91acb21f 1354 }
1da177e4 1355
a6ea4cce
JD
1356 n = os_write_file(req->fds[1], &req->bitmap_words,
1357 sizeof(req->bitmap_words));
91acb21f
JD
1358 if(n != sizeof(req->bitmap_words)){
1359 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1360 req->fds[1]);
dc764e50 1361 return 1;
91acb21f 1362 }
1da177e4 1363
dc764e50 1364 return 0;
91acb21f 1365}
1da177e4 1366
5dc62b1b 1367static void do_io(struct io_thread_req *req)
91acb21f
JD
1368{
1369 char *buf;
1370 unsigned long len;
1371 int n, nsectors, start, end, bit;
1372 int err;
1373 __u64 off;
1374
1375 nsectors = req->length / req->sectorsize;
1376 start = 0;
1377 do {
1378 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1379 end = start;
1380 while((end < nsectors) &&
1381 (ubd_test_bit(end, (unsigned char *)
1382 &req->sector_mask) == bit))
1383 end++;
1384
1385 off = req->offset + req->offsets[bit] +
1386 start * req->sectorsize;
1387 len = (end - start) * req->sectorsize;
1388 buf = &req->buffer[start * req->sectorsize];
1389
1390 err = os_seek_file(req->fds[bit], off);
1391 if(err < 0){
1392 printk("do_io - lseek failed : err = %d\n", -err);
1393 req->error = 1;
1394 return;
1395 }
1396 if(req->op == UBD_READ){
1397 n = 0;
1398 do {
1399 buf = &buf[n];
1400 len -= n;
a6ea4cce 1401 n = os_read_file(req->fds[bit], buf, len);
91acb21f
JD
1402 if (n < 0) {
1403 printk("do_io - read failed, err = %d "
1404 "fd = %d\n", -n, req->fds[bit]);
1405 req->error = 1;
1406 return;
1407 }
1408 } while((n < len) && (n != 0));
1409 if (n < len) memset(&buf[n], 0, len - n);
1410 } else {
a6ea4cce 1411 n = os_write_file(req->fds[bit], buf, len);
91acb21f
JD
1412 if(n != len){
1413 printk("do_io - write failed err = %d "
1414 "fd = %d\n", -n, req->fds[bit]);
1415 req->error = 1;
1416 return;
1417 }
1418 }
1419
1420 start = end;
1421 } while(start < nsectors);
1da177e4 1422
91acb21f 1423 req->error = update_bitmap(req);
1da177e4 1424}
91acb21f
JD
1425
1426/* Changed in start_io_thread, which is serialized by being called only
1427 * from ubd_init, which is an initcall.
1428 */
1429int kernel_fd = -1;
1430
d8d7c28e
PBG
1431/* Only changed by the io thread. XXX: currently unused. */
1432static int io_count = 0;
91acb21f
JD
1433
1434int io_thread(void *arg)
1435{
2adcec21 1436 struct io_thread_req *req;
91acb21f
JD
1437 int n;
1438
1439 ignore_sigwinch_sig();
1440 while(1){
a6ea4cce 1441 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1442 sizeof(struct io_thread_req *));
1443 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1444 if(n < 0)
1445 printk("io_thread - read failed, fd = %d, "
1446 "err = %d\n", kernel_fd, -n);
1447 else {
1448 printk("io_thread - short read, fd = %d, "
1449 "length = %d\n", kernel_fd, n);
1450 }
1451 continue;
1452 }
1453 io_count++;
2adcec21 1454 do_io(req);
a6ea4cce 1455 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1456 sizeof(struct io_thread_req *));
1457 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1458 printk("io_thread - write failed, fd = %d, err = %d\n",
1459 kernel_fd, -n);
1460 }
91acb21f 1461
1b57e9c2
JD
1462 return 0;
1463}