uml: const and other tidying
[linux-2.6-block.git] / arch / um / drivers / ubd_kern.c
CommitLineData
6c29256c 1/*
1da177e4
LT
2 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
4 */
5
6/* 2001-09-28...2002-04-17
7 * Partition stuff by James_McMechan@hotmail.com
8 * old style ubd by setting UBD_SHIFT to 0
9 * 2002-09-27...2002-10-18 massive tinkering for 2.5
10 * partitions have changed in 2.5
11 * 2003-01-29 more tinkering for 2.5.59-1
12 * This should now address the sysfs problems and has
13 * the symlink for devfs to allow for booting with
14 * the common /dev/ubd/discX/... names rather than
15 * only /dev/ubdN/discN this version also has lots of
16 * clean ups preparing for ubd-many.
17 * James McMechan
18 */
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
e16f5350 23#include "linux/kernel.h"
1da177e4
LT
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
1da177e4
LT
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
d052d1be 37#include "linux/platform_device.h"
23464ffa 38#include "linux/scatterlist.h"
1da177e4
LT
39#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
1da177e4
LT
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
1da177e4
LT
52#include "os.h"
53#include "mem.h"
54#include "mem_kern.h"
55#include "cow.h"
56
7b9014c1 57enum ubd_req { UBD_READ, UBD_WRITE };
1da177e4
LT
58
59struct io_thread_req {
62f96cb0 60 struct request *req;
91acb21f 61 enum ubd_req op;
1da177e4
LT
62 int fds[2];
63 unsigned long offsets[2];
64 unsigned long long offset;
65 unsigned long length;
66 char *buffer;
67 int sectorsize;
91acb21f
JD
68 unsigned long sector_mask;
69 unsigned long long cow_offset;
70 unsigned long bitmap_words[2];
1da177e4
LT
71 int error;
72};
73
6c29256c 74extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
75 char **backing_file_out, int *bitmap_offset_out,
76 unsigned long *bitmap_len_out, int *data_offset_out,
77 int *create_cow_out);
78extern int create_cow_file(char *cow_file, char *backing_file,
79 struct openflags flags, int sectorsize,
80 int alignment, int *bitmap_offset_out,
81 unsigned long *bitmap_len_out,
82 int *data_offset_out);
83extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
91acb21f 84extern void do_io(struct io_thread_req *req);
1da177e4 85
91acb21f 86static inline int ubd_test_bit(__u64 bit, unsigned char *data)
1da177e4
LT
87{
88 __u64 n;
89 int bits, off;
90
91acb21f 91 bits = sizeof(data[0]) * 8;
1da177e4
LT
92 n = bit / bits;
93 off = bit % bits;
dc764e50 94 return (data[n] & (1 << off)) != 0;
1da177e4
LT
95}
96
91acb21f 97static inline void ubd_set_bit(__u64 bit, unsigned char *data)
1da177e4
LT
98{
99 __u64 n;
100 int bits, off;
101
91acb21f 102 bits = sizeof(data[0]) * 8;
1da177e4
LT
103 n = bit / bits;
104 off = bit % bits;
91acb21f 105 data[n] |= (1 << off);
1da177e4
LT
106}
107/*End stuff from ubd_user.h*/
108
109#define DRIVER_NAME "uml-blkdev"
110
d7fb2c38 111static DEFINE_MUTEX(ubd_lock);
1da177e4 112
1da177e4
LT
113static int ubd_open(struct inode * inode, struct file * filp);
114static int ubd_release(struct inode * inode, struct file * file);
115static int ubd_ioctl(struct inode * inode, struct file * file,
116 unsigned int cmd, unsigned long arg);
a885c8c4 117static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
1da177e4 118
97d88ac8 119#define MAX_DEV (16)
1da177e4 120
1da177e4
LT
121static struct block_device_operations ubd_blops = {
122 .owner = THIS_MODULE,
123 .open = ubd_open,
124 .release = ubd_release,
125 .ioctl = ubd_ioctl,
a885c8c4 126 .getgeo = ubd_getgeo,
1da177e4
LT
127};
128
1da177e4
LT
129/* Protected by ubd_lock */
130static int fake_major = MAJOR_NR;
1da177e4
LT
131static struct gendisk *ubd_gendisk[MAX_DEV];
132static struct gendisk *fake_gendisk[MAX_DEV];
6c29256c 133
1da177e4
LT
134#ifdef CONFIG_BLK_DEV_UBD_SYNC
135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
136 .cl = 1 })
137#else
138#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
139 .cl = 1 })
140#endif
1da177e4
LT
141static struct openflags global_openflags = OPEN_FLAGS;
142
143struct cow {
2a9d32f6 144 /* backing file name */
1da177e4 145 char *file;
2a9d32f6 146 /* backing file fd */
1da177e4
LT
147 int fd;
148 unsigned long *bitmap;
149 unsigned long bitmap_len;
150 int bitmap_offset;
dc764e50 151 int data_offset;
1da177e4
LT
152};
153
a0044bdf
JD
154#define MAX_SG 64
155
1da177e4 156struct ubd {
a0044bdf 157 struct list_head restart;
2a9d32f6
PBG
158 /* name (and fd, below) of the file opened for writing, either the
159 * backing or the cow file. */
1da177e4
LT
160 char *file;
161 int count;
162 int fd;
163 __u64 size;
164 struct openflags boot_openflags;
165 struct openflags openflags;
84e945e3
PBG
166 unsigned shared:1;
167 unsigned no_cow:1;
1da177e4
LT
168 struct cow cow;
169 struct platform_device pdev;
62f96cb0
JD
170 struct request_queue *queue;
171 spinlock_t lock;
a0044bdf
JD
172 struct scatterlist sg[MAX_SG];
173 struct request *request;
174 int start_sg, end_sg;
1da177e4
LT
175};
176
177#define DEFAULT_COW { \
178 .file = NULL, \
dc764e50
JD
179 .fd = -1, \
180 .bitmap = NULL, \
1da177e4 181 .bitmap_offset = 0, \
dc764e50 182 .data_offset = 0, \
1da177e4
LT
183}
184
185#define DEFAULT_UBD { \
186 .file = NULL, \
187 .count = 0, \
188 .fd = -1, \
189 .size = -1, \
190 .boot_openflags = OPEN_FLAGS, \
191 .openflags = OPEN_FLAGS, \
dc764e50 192 .no_cow = 0, \
6c29256c 193 .shared = 0, \
dc764e50 194 .cow = DEFAULT_COW, \
62f96cb0 195 .lock = SPIN_LOCK_UNLOCKED, \
a0044bdf
JD
196 .request = NULL, \
197 .start_sg = 0, \
198 .end_sg = 0, \
1da177e4
LT
199}
200
b8831a1d 201/* Protected by ubd_lock */
7d314e34 202struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
1da177e4 203
1da177e4
LT
204/* Only changed by fake_ide_setup which is a setup */
205static int fake_ide = 0;
206static struct proc_dir_entry *proc_ide_root = NULL;
207static struct proc_dir_entry *proc_ide = NULL;
208
209static void make_proc_ide(void)
210{
211 proc_ide_root = proc_mkdir("ide", NULL);
212 proc_ide = proc_mkdir("ide0", proc_ide_root);
213}
214
215static int proc_ide_read_media(char *page, char **start, off_t off, int count,
216 int *eof, void *data)
217{
218 int len;
219
220 strcpy(page, "disk\n");
221 len = strlen("disk\n");
222 len -= off;
223 if (len < count){
224 *eof = 1;
225 if (len <= 0) return 0;
226 }
227 else len = count;
228 *start = page + off;
229 return len;
230}
231
c0a9290e 232static void make_ide_entries(const char *dev_name)
1da177e4
LT
233{
234 struct proc_dir_entry *dir, *ent;
235 char name[64];
236
237 if(proc_ide_root == NULL) make_proc_ide();
238
239 dir = proc_mkdir(dev_name, proc_ide);
240 if(!dir) return;
241
242 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
243 if(!ent) return;
1da177e4
LT
244 ent->data = NULL;
245 ent->read_proc = proc_ide_read_media;
246 ent->write_proc = NULL;
c0a9290e 247 snprintf(name, sizeof(name), "ide0/%s", dev_name);
1da177e4
LT
248 proc_symlink(dev_name, proc_ide_root, name);
249}
250
251static int fake_ide_setup(char *str)
252{
253 fake_ide = 1;
dc764e50 254 return 1;
1da177e4
LT
255}
256
257__setup("fake_ide", fake_ide_setup);
258
259__uml_help(fake_ide_setup,
260"fake_ide\n"
261" Create ide0 entries that map onto ubd devices.\n\n"
262);
263
264static int parse_unit(char **ptr)
265{
266 char *str = *ptr, *end;
267 int n = -1;
268
269 if(isdigit(*str)) {
270 n = simple_strtoul(str, &end, 0);
271 if(end == str)
dc764e50 272 return -1;
1da177e4
LT
273 *ptr = end;
274 }
97d88ac8 275 else if (('a' <= *str) && (*str <= 'z')) {
1da177e4
LT
276 n = *str - 'a';
277 str++;
278 *ptr = str;
279 }
dc764e50 280 return n;
1da177e4
LT
281}
282
d8d7c28e
PBG
283/* If *index_out == -1 at exit, the passed option was a general one;
284 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
285 * should not be freed on exit.
286 */
f28169d2 287static int ubd_setup_common(char *str, int *index_out, char **error_out)
1da177e4 288{
7d314e34 289 struct ubd *ubd_dev;
1da177e4
LT
290 struct openflags flags = global_openflags;
291 char *backing_file;
b8831a1d 292 int n, err = 0, i;
1da177e4
LT
293
294 if(index_out) *index_out = -1;
295 n = *str;
296 if(n == '='){
297 char *end;
298 int major;
299
300 str++;
1da177e4
LT
301 if(!strcmp(str, "sync")){
302 global_openflags = of_sync(global_openflags);
b8831a1d 303 goto out1;
1da177e4 304 }
b8831a1d
JD
305
306 err = -EINVAL;
1da177e4
LT
307 major = simple_strtoul(str, &end, 0);
308 if((*end != '\0') || (end == str)){
f28169d2 309 *error_out = "Didn't parse major number";
b8831a1d 310 goto out1;
1da177e4
LT
311 }
312
f28169d2
JD
313 mutex_lock(&ubd_lock);
314 if(fake_major != MAJOR_NR){
315 *error_out = "Can't assign a fake major twice";
316 goto out1;
317 }
6c29256c 318
f28169d2 319 fake_major = major;
1da177e4
LT
320
321 printk(KERN_INFO "Setting extra ubd major number to %d\n",
322 major);
f28169d2
JD
323 err = 0;
324 out1:
325 mutex_unlock(&ubd_lock);
326 return err;
1da177e4
LT
327 }
328
329 n = parse_unit(&str);
330 if(n < 0){
f28169d2
JD
331 *error_out = "Couldn't parse device number";
332 return -EINVAL;
1da177e4
LT
333 }
334 if(n >= MAX_DEV){
f28169d2
JD
335 *error_out = "Device number out of range";
336 return 1;
1da177e4
LT
337 }
338
f28169d2 339 err = -EBUSY;
d7fb2c38 340 mutex_lock(&ubd_lock);
1da177e4 341
7d314e34
PBG
342 ubd_dev = &ubd_devs[n];
343 if(ubd_dev->file != NULL){
f28169d2 344 *error_out = "Device is already configured";
1da177e4
LT
345 goto out;
346 }
347
348 if (index_out)
349 *index_out = n;
350
f28169d2 351 err = -EINVAL;
6c29256c 352 for (i = 0; i < sizeof("rscd="); i++) {
1da177e4
LT
353 switch (*str) {
354 case 'r':
355 flags.w = 0;
356 break;
357 case 's':
358 flags.s = 1;
359 break;
360 case 'd':
7d314e34 361 ubd_dev->no_cow = 1;
1da177e4 362 break;
6c29256c 363 case 'c':
7d314e34 364 ubd_dev->shared = 1;
6c29256c 365 break;
1da177e4
LT
366 case '=':
367 str++;
368 goto break_loop;
369 default:
f28169d2
JD
370 *error_out = "Expected '=' or flag letter "
371 "(r, s, c, or d)";
1da177e4
LT
372 goto out;
373 }
374 str++;
375 }
376
f28169d2
JD
377 if (*str == '=')
378 *error_out = "Too many flags specified";
379 else
380 *error_out = "Missing '='";
1da177e4
LT
381 goto out;
382
383break_loop:
1da177e4
LT
384 backing_file = strchr(str, ',');
385
f28169d2 386 if (backing_file == NULL)
1da177e4 387 backing_file = strchr(str, ':');
1da177e4 388
f28169d2
JD
389 if(backing_file != NULL){
390 if(ubd_dev->no_cow){
391 *error_out = "Can't specify both 'd' and a cow file";
392 goto out;
393 }
1da177e4
LT
394 else {
395 *backing_file = '\0';
396 backing_file++;
397 }
398 }
f28169d2 399 err = 0;
7d314e34
PBG
400 ubd_dev->file = str;
401 ubd_dev->cow.file = backing_file;
402 ubd_dev->boot_openflags = flags;
1da177e4 403out:
d7fb2c38 404 mutex_unlock(&ubd_lock);
f28169d2 405 return err;
1da177e4
LT
406}
407
408static int ubd_setup(char *str)
409{
f28169d2
JD
410 char *error;
411 int err;
412
413 err = ubd_setup_common(str, NULL, &error);
414 if(err)
415 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
416 "%s\n", str, error);
417 return 1;
1da177e4
LT
418}
419
420__setup("ubd", ubd_setup);
421__uml_help(ubd_setup,
422"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
423" This is used to associate a device with a file in the underlying\n"
424" filesystem. When specifying two filenames, the first one is the\n"
425" COW name and the second is the backing file name. As separator you can\n"
426" use either a ':' or a ',': the first one allows writing things like;\n"
427" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
428" while with a ',' the shell would not expand the 2nd '~'.\n"
f28169d2 429" When using only one filename, UML will detect whether to treat it like\n"
1da177e4
LT
430" a COW file or a backing file. To override this detection, add the 'd'\n"
431" flag:\n"
432" ubd0d=BackingFile\n"
433" Usually, there is a filesystem in the file, but \n"
434" that's not required. Swap devices containing swap files can be\n"
435" specified like this. Also, a file which doesn't contain a\n"
436" filesystem can have its contents read in the virtual \n"
437" machine by running 'dd' on the device. <n> must be in the range\n"
438" 0 to 7. Appending an 'r' to the number will cause that device\n"
439" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
20ede453
JD
440" an 's' will cause data to be written to disk on the host immediately.\n"
441" 'c' will cause the device to be treated as being shared between multiple\n"
442" UMLs and file locking will be turned off - this is appropriate for a\n"
443" cluster filesystem and inappropriate at almost all other times.\n\n"
1da177e4
LT
444);
445
c0a9290e 446static int udb_setup(const char *str)
1da177e4
LT
447{
448 printk("udb%s specified on command line is almost certainly a ubd -> "
449 "udb TYPO\n", str);
dc764e50 450 return 1;
1da177e4
LT
451}
452
453__setup("udb", udb_setup);
454__uml_help(udb_setup,
455"udb\n"
0894e27e
JD
456" This option is here solely to catch ubd -> udb typos, which can be\n"
457" to impossible to catch visually unless you specifically look for\n"
458" them. The only result of any option starting with 'udb' is an error\n"
1da177e4
LT
459" in the boot output.\n\n"
460);
461
462static int fakehd_set = 0;
463static int fakehd(char *str)
464{
465 printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
466 fakehd_set = 1;
467 return 1;
468}
469
470__setup("fakehd", fakehd);
471__uml_help(fakehd,
472"fakehd\n"
473" Change the ubd device name to \"hd\".\n\n"
474);
475
165125e1 476static void do_ubd_request(struct request_queue * q);
91acb21f
JD
477
478/* Only changed by ubd_init, which is an initcall. */
479int thread_fd = -1;
1da177e4 480
4898b53a 481static void ubd_end_request(struct request *req, int bytes, int error)
1da177e4 482{
4898b53a 483 blk_end_request(req, error, bytes);
1da177e4
LT
484}
485
33f775ee
PBG
486/* Callable only from interrupt context - otherwise you need to do
487 * spin_lock_irq()/spin_lock_irqsave() */
a0044bdf 488static inline void ubd_finish(struct request *req, int bytes)
1da177e4 489{
a0044bdf 490 if(bytes < 0){
4898b53a 491 ubd_end_request(req, 0, -EIO);
a0044bdf
JD
492 return;
493 }
4898b53a 494 ubd_end_request(req, bytes, 0);
1da177e4
LT
495}
496
a0044bdf
JD
497static LIST_HEAD(restart);
498
2fe30a34 499/* XXX - move this inside ubd_intr. */
62f96cb0 500/* Called without dev->lock held, and only in interrupt context. */
91acb21f 501static void ubd_handler(void)
1da177e4 502{
2adcec21 503 struct io_thread_req *req;
62f96cb0 504 struct request *rq;
a0044bdf
JD
505 struct ubd *ubd;
506 struct list_head *list, *next_ele;
507 unsigned long flags;
91acb21f
JD
508 int n;
509
a0044bdf 510 while(1){
a6ea4cce
JD
511 n = os_read_file(thread_fd, &req,
512 sizeof(struct io_thread_req *));
a0044bdf
JD
513 if(n != sizeof(req)){
514 if(n == -EAGAIN)
515 break;
516 printk(KERN_ERR "spurious interrupt in ubd_handler, "
517 "err = %d\n", -n);
518 return;
519 }
62f96cb0 520
2adcec21
JD
521 rq = req->req;
522 rq->nr_sectors -= req->length >> 9;
a0044bdf
JD
523 if(rq->nr_sectors == 0)
524 ubd_finish(rq, rq->hard_nr_sectors << 9);
2adcec21 525 kfree(req);
a0044bdf 526 }
62f96cb0 527 reactivate_fd(thread_fd, UBD_IRQ);
a0044bdf
JD
528
529 list_for_each_safe(list, next_ele, &restart){
530 ubd = container_of(list, struct ubd, restart);
531 list_del_init(&ubd->restart);
532 spin_lock_irqsave(&ubd->lock, flags);
533 do_ubd_request(ubd->queue);
534 spin_unlock_irqrestore(&ubd->lock, flags);
535 }
1da177e4
LT
536}
537
7bea96fd 538static irqreturn_t ubd_intr(int irq, void *dev)
1da177e4 539{
91acb21f 540 ubd_handler();
dc764e50 541 return IRQ_HANDLED;
91acb21f 542}
09ace81c 543
91acb21f
JD
544/* Only changed by ubd_init, which is an initcall. */
545static int io_pid = -1;
09ace81c 546
91acb21f
JD
547void kill_io_thread(void)
548{
6c29256c 549 if(io_pid != -1)
91acb21f 550 os_kill_process(io_pid, 1);
09ace81c 551}
1da177e4 552
91acb21f
JD
553__uml_exitcall(kill_io_thread);
554
d8d7c28e 555static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
1da177e4
LT
556{
557 char *file;
558
7d314e34 559 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
dc764e50 560 return os_file_size(file, size_out);
1da177e4
LT
561}
562
5f75a4f8 563static void ubd_close_dev(struct ubd *ubd_dev)
1da177e4 564{
7d314e34
PBG
565 os_close_file(ubd_dev->fd);
566 if(ubd_dev->cow.file == NULL)
1da177e4
LT
567 return;
568
7d314e34
PBG
569 os_close_file(ubd_dev->cow.fd);
570 vfree(ubd_dev->cow.bitmap);
571 ubd_dev->cow.bitmap = NULL;
1da177e4
LT
572}
573
7d314e34 574static int ubd_open_dev(struct ubd *ubd_dev)
1da177e4
LT
575{
576 struct openflags flags;
577 char **back_ptr;
578 int err, create_cow, *create_ptr;
0bf16bff 579 int fd;
1da177e4 580
7d314e34 581 ubd_dev->openflags = ubd_dev->boot_openflags;
1da177e4 582 create_cow = 0;
7d314e34
PBG
583 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
584 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
0bf16bff
PBG
585
586 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
7d314e34
PBG
587 back_ptr, &ubd_dev->cow.bitmap_offset,
588 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
6c29256c 589 create_ptr);
1da177e4 590
0bf16bff
PBG
591 if((fd == -ENOENT) && create_cow){
592 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
7d314e34
PBG
593 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
594 &ubd_dev->cow.bitmap_offset,
595 &ubd_dev->cow.bitmap_len,
596 &ubd_dev->cow.data_offset);
0bf16bff 597 if(fd >= 0){
1da177e4 598 printk(KERN_INFO "Creating \"%s\" as COW file for "
7d314e34 599 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
1da177e4
LT
600 }
601 }
602
0bf16bff 603 if(fd < 0){
7d314e34 604 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
0bf16bff
PBG
605 -fd);
606 return fd;
1da177e4 607 }
0bf16bff 608 ubd_dev->fd = fd;
1da177e4 609
7d314e34 610 if(ubd_dev->cow.file != NULL){
f4768ffd
JD
611 blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
612
1da177e4 613 err = -ENOMEM;
da2486ba 614 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
7d314e34 615 if(ubd_dev->cow.bitmap == NULL){
1da177e4
LT
616 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
617 goto error;
618 }
619 flush_tlb_kernel_vm();
620
7d314e34
PBG
621 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
622 ubd_dev->cow.bitmap_offset,
623 ubd_dev->cow.bitmap_len);
1da177e4
LT
624 if(err < 0)
625 goto error;
626
7d314e34 627 flags = ubd_dev->openflags;
1da177e4 628 flags.w = 0;
7d314e34 629 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
6c29256c 630 NULL, NULL, NULL, NULL);
1da177e4 631 if(err < 0) goto error;
7d314e34 632 ubd_dev->cow.fd = err;
1da177e4 633 }
dc764e50 634 return 0;
1da177e4 635 error:
7d314e34 636 os_close_file(ubd_dev->fd);
dc764e50 637 return err;
1da177e4
LT
638}
639
2e3f5251
JD
640static void ubd_device_release(struct device *dev)
641{
642 struct ubd *ubd_dev = dev->driver_data;
643
644 blk_cleanup_queue(ubd_dev->queue);
645 *ubd_dev = ((struct ubd) DEFAULT_UBD);
646}
647
5f75a4f8 648static int ubd_disk_register(int major, u64 size, int unit,
b8831a1d 649 struct gendisk **disk_out)
1da177e4
LT
650{
651 struct gendisk *disk;
1da177e4
LT
652
653 disk = alloc_disk(1 << UBD_SHIFT);
654 if(disk == NULL)
dc764e50 655 return -ENOMEM;
1da177e4
LT
656
657 disk->major = major;
658 disk->first_minor = unit << UBD_SHIFT;
659 disk->fops = &ubd_blops;
660 set_capacity(disk, size / 512);
ce7b0f46 661 if(major == MAJOR_NR)
1da177e4 662 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
ce7b0f46 663 else
1da177e4 664 sprintf(disk->disk_name, "ubd_fake%d", unit);
1da177e4
LT
665
666 /* sysfs register (not for ide fake devices) */
667 if (major == MAJOR_NR) {
7d314e34
PBG
668 ubd_devs[unit].pdev.id = unit;
669 ubd_devs[unit].pdev.name = DRIVER_NAME;
2e3f5251
JD
670 ubd_devs[unit].pdev.dev.release = ubd_device_release;
671 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
7d314e34
PBG
672 platform_device_register(&ubd_devs[unit].pdev);
673 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
1da177e4
LT
674 }
675
7d314e34 676 disk->private_data = &ubd_devs[unit];
62f96cb0 677 disk->queue = ubd_devs[unit].queue;
1da177e4
LT
678 add_disk(disk);
679
680 *disk_out = disk;
681 return 0;
682}
683
684#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
685
f28169d2 686static int ubd_add(int n, char **error_out)
1da177e4 687{
7d314e34 688 struct ubd *ubd_dev = &ubd_devs[n];
f28169d2 689 int err = 0;
1da177e4 690
7d314e34 691 if(ubd_dev->file == NULL)
ec7cf783 692 goto out;
1da177e4 693
7d314e34 694 err = ubd_file_size(ubd_dev, &ubd_dev->size);
f28169d2
JD
695 if(err < 0){
696 *error_out = "Couldn't determine size of device's file";
80c13749 697 goto out;
f28169d2 698 }
1da177e4 699
7d314e34 700 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
1da177e4 701
a0044bdf 702 INIT_LIST_HEAD(&ubd_dev->restart);
4f40c055 703 sg_init_table(ubd_dev->sg, MAX_SG);
a0044bdf 704
62f96cb0
JD
705 err = -ENOMEM;
706 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
707 if (ubd_dev->queue == NULL) {
708 *error_out = "Failed to initialize device queue";
80c13749 709 goto out;
62f96cb0
JD
710 }
711 ubd_dev->queue->queuedata = ubd_dev;
712
a0044bdf 713 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
62f96cb0
JD
714 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
715 if(err){
716 *error_out = "Failed to register device";
717 goto out_cleanup;
718 }
6c29256c 719
1da177e4 720 if(fake_major != MAJOR_NR)
5f75a4f8 721 ubd_disk_register(fake_major, ubd_dev->size, n,
62f96cb0 722 &fake_gendisk[n]);
1da177e4
LT
723
724 /* perhaps this should also be under the "if (fake_major)" above */
725 /* using the fake_disk->disk_name and also the fakehd_set name */
726 if (fake_ide)
727 make_ide_entries(ubd_gendisk[n]->disk_name);
728
ec7cf783 729 err = 0;
ec7cf783
JD
730out:
731 return err;
62f96cb0
JD
732
733out_cleanup:
734 blk_cleanup_queue(ubd_dev->queue);
735 goto out;
1da177e4
LT
736}
737
f28169d2 738static int ubd_config(char *str, char **error_out)
1da177e4 739{
e7f6552f 740 int n, ret;
1da177e4 741
f28169d2
JD
742 /* This string is possibly broken up and stored, so it's only
743 * freed if ubd_setup_common fails, or if only general options
744 * were set.
745 */
970d6e3a 746 str = kstrdup(str, GFP_KERNEL);
e7f6552f 747 if (str == NULL) {
f28169d2
JD
748 *error_out = "Failed to allocate memory";
749 return -ENOMEM;
1da177e4 750 }
f28169d2
JD
751
752 ret = ubd_setup_common(str, &n, error_out);
753 if (ret)
e7f6552f 754 goto err_free;
f28169d2 755
e7f6552f
PBG
756 if (n == -1) {
757 ret = 0;
d8d7c28e 758 goto err_free;
1da177e4 759 }
1da177e4 760
dc764e50 761 mutex_lock(&ubd_lock);
f28169d2 762 ret = ubd_add(n, error_out);
e7f6552f 763 if (ret)
7d314e34 764 ubd_devs[n].file = NULL;
dc764e50 765 mutex_unlock(&ubd_lock);
1da177e4 766
e7f6552f 767out:
dc764e50 768 return ret;
e7f6552f
PBG
769
770err_free:
771 kfree(str);
772 goto out;
1da177e4
LT
773}
774
775static int ubd_get_config(char *name, char *str, int size, char **error_out)
776{
7d314e34 777 struct ubd *ubd_dev;
1da177e4
LT
778 int n, len = 0;
779
780 n = parse_unit(&name);
781 if((n >= MAX_DEV) || (n < 0)){
782 *error_out = "ubd_get_config : device number out of range";
dc764e50 783 return -1;
1da177e4
LT
784 }
785
7d314e34 786 ubd_dev = &ubd_devs[n];
d7fb2c38 787 mutex_lock(&ubd_lock);
1da177e4 788
7d314e34 789 if(ubd_dev->file == NULL){
1da177e4
LT
790 CONFIG_CHUNK(str, size, len, "", 1);
791 goto out;
792 }
793
7d314e34 794 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1da177e4 795
7d314e34 796 if(ubd_dev->cow.file != NULL){
1da177e4 797 CONFIG_CHUNK(str, size, len, ",", 0);
7d314e34 798 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1da177e4
LT
799 }
800 else CONFIG_CHUNK(str, size, len, "", 1);
801
802 out:
d7fb2c38 803 mutex_unlock(&ubd_lock);
dc764e50 804 return len;
1da177e4
LT
805}
806
29d56cfe
JD
807static int ubd_id(char **str, int *start_out, int *end_out)
808{
dc764e50 809 int n;
29d56cfe
JD
810
811 n = parse_unit(str);
dc764e50
JD
812 *start_out = 0;
813 *end_out = MAX_DEV - 1;
814 return n;
29d56cfe
JD
815}
816
f28169d2 817static int ubd_remove(int n, char **error_out)
1da177e4 818{
2e3f5251 819 struct gendisk *disk = ubd_gendisk[n];
7d314e34 820 struct ubd *ubd_dev;
29d56cfe 821 int err = -ENODEV;
1da177e4 822
d7fb2c38 823 mutex_lock(&ubd_lock);
1da177e4 824
7d314e34 825 ubd_dev = &ubd_devs[n];
1da177e4 826
7d314e34 827 if(ubd_dev->file == NULL)
29d56cfe 828 goto out;
1da177e4 829
29d56cfe
JD
830 /* you cannot remove a open disk */
831 err = -EBUSY;
7d314e34 832 if(ubd_dev->count > 0)
1da177e4
LT
833 goto out;
834
dc764e50 835 ubd_gendisk[n] = NULL;
b47d2deb
JD
836 if(disk != NULL){
837 del_gendisk(disk);
838 put_disk(disk);
839 }
1da177e4
LT
840
841 if(fake_gendisk[n] != NULL){
842 del_gendisk(fake_gendisk[n]);
843 put_disk(fake_gendisk[n]);
844 fake_gendisk[n] = NULL;
845 }
846
1da177e4 847 err = 0;
2e3f5251 848 platform_device_unregister(&ubd_dev->pdev);
29d56cfe 849out:
d7fb2c38 850 mutex_unlock(&ubd_lock);
29d56cfe 851 return err;
1da177e4
LT
852}
853
f28169d2 854/* All these are called by mconsole in process context and without
b8831a1d 855 * ubd-specific locks. The structure itself is const except for .list.
f28169d2 856 */
1da177e4 857static struct mc_device ubd_mc = {
84f48d4f 858 .list = LIST_HEAD_INIT(ubd_mc.list),
1da177e4
LT
859 .name = "ubd",
860 .config = ubd_config,
dc764e50 861 .get_config = ubd_get_config,
29d56cfe 862 .id = ubd_id,
1da177e4
LT
863 .remove = ubd_remove,
864};
865
d8d7c28e 866static int __init ubd_mc_init(void)
1da177e4
LT
867{
868 mconsole_register_dev(&ubd_mc);
869 return 0;
870}
871
872__initcall(ubd_mc_init);
873
d8d7c28e
PBG
874static int __init ubd0_init(void)
875{
876 struct ubd *ubd_dev = &ubd_devs[0];
877
b8831a1d 878 mutex_lock(&ubd_lock);
d8d7c28e
PBG
879 if(ubd_dev->file == NULL)
880 ubd_dev->file = "root_fs";
b8831a1d
JD
881 mutex_unlock(&ubd_lock);
882
dc764e50 883 return 0;
d8d7c28e
PBG
884}
885
886__initcall(ubd0_init);
887
b8831a1d 888/* Used in ubd_init, which is an initcall */
3ae5eaec
RK
889static struct platform_driver ubd_driver = {
890 .driver = {
891 .name = DRIVER_NAME,
892 },
1da177e4
LT
893};
894
d8d7c28e 895static int __init ubd_init(void)
1da177e4 896{
f28169d2
JD
897 char *error;
898 int i, err;
1da177e4 899
1da177e4
LT
900 if (register_blkdev(MAJOR_NR, "ubd"))
901 return -1;
902
1da177e4
LT
903 if (fake_major != MAJOR_NR) {
904 char name[sizeof("ubd_nnn\0")];
905
906 snprintf(name, sizeof(name), "ubd_%d", fake_major);
1da177e4
LT
907 if (register_blkdev(fake_major, "ubd"))
908 return -1;
909 }
3ae5eaec 910 platform_driver_register(&ubd_driver);
dc764e50 911 mutex_lock(&ubd_lock);
f28169d2
JD
912 for (i = 0; i < MAX_DEV; i++){
913 err = ubd_add(i, &error);
914 if(err)
915 printk(KERN_ERR "Failed to initialize ubd device %d :"
916 "%s\n", i, error);
917 }
dc764e50 918 mutex_unlock(&ubd_lock);
1da177e4
LT
919 return 0;
920}
921
922late_initcall(ubd_init);
923
d8d7c28e 924static int __init ubd_driver_init(void){
91acb21f
JD
925 unsigned long stack;
926 int err;
927
928 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
929 if(global_openflags.s){
930 printk(KERN_INFO "ubd: Synchronous mode\n");
931 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is
932 * enough. So use anyway the io thread. */
933 }
934 stack = alloc_stack(0, 0);
6c29256c 935 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
91acb21f
JD
936 &thread_fd);
937 if(io_pid < 0){
6c29256c 938 printk(KERN_ERR
91acb21f
JD
939 "ubd : Failed to start I/O thread (errno = %d) - "
940 "falling back to synchronous I/O\n", -io_pid);
941 io_pid = -1;
dc764e50 942 return 0;
91acb21f 943 }
6c29256c 944 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
7d314e34 945 IRQF_DISABLED, "ubd", ubd_devs);
91acb21f
JD
946 if(err != 0)
947 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
f4c57a78 948 return 0;
91acb21f
JD
949}
950
951device_initcall(ubd_driver_init);
952
1da177e4
LT
953static int ubd_open(struct inode *inode, struct file *filp)
954{
955 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 956 struct ubd *ubd_dev = disk->private_data;
1da177e4
LT
957 int err = 0;
958
7d314e34
PBG
959 if(ubd_dev->count == 0){
960 err = ubd_open_dev(ubd_dev);
1da177e4
LT
961 if(err){
962 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
7d314e34 963 disk->disk_name, ubd_dev->file, -err);
1da177e4
LT
964 goto out;
965 }
966 }
7d314e34
PBG
967 ubd_dev->count++;
968 set_disk_ro(disk, !ubd_dev->openflags.w);
2c49be99
PBG
969
970 /* This should no more be needed. And it didn't work anyway to exclude
971 * read-write remounting of filesystems.*/
7d314e34 972 /*if((filp->f_mode & FMODE_WRITE) && !ubd_dev->openflags.w){
5f75a4f8 973 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1da177e4 974 err = -EROFS;
2c49be99 975 }*/
1da177e4 976 out:
dc764e50 977 return err;
1da177e4
LT
978}
979
980static int ubd_release(struct inode * inode, struct file * file)
981{
982 struct gendisk *disk = inode->i_bdev->bd_disk;
7d314e34 983 struct ubd *ubd_dev = disk->private_data;
1da177e4 984
7d314e34 985 if(--ubd_dev->count == 0)
5f75a4f8 986 ubd_close_dev(ubd_dev);
dc764e50 987 return 0;
1da177e4
LT
988}
989
91acb21f
JD
990static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
991 __u64 *cow_offset, unsigned long *bitmap,
992 __u64 bitmap_offset, unsigned long *bitmap_words,
993 __u64 bitmap_len)
1da177e4 994{
91acb21f
JD
995 __u64 sector = io_offset >> 9;
996 int i, update_bitmap = 0;
997
998 for(i = 0; i < length >> 9; i++){
999 if(cow_mask != NULL)
1000 ubd_set_bit(i, (unsigned char *) cow_mask);
1001 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1002 continue;
1da177e4 1003
91acb21f
JD
1004 update_bitmap = 1;
1005 ubd_set_bit(sector + i, (unsigned char *) bitmap);
1006 }
1007
1008 if(!update_bitmap)
1009 return;
1da177e4 1010
91acb21f 1011 *cow_offset = sector / (sizeof(unsigned long) * 8);
1da177e4 1012
91acb21f
JD
1013 /* This takes care of the case where we're exactly at the end of the
1014 * device, and *cow_offset + 1 is off the end. So, just back it up
1015 * by one word. Thanks to Lynn Kerby for the fix and James McMechan
1016 * for the original diagnosis.
1017 */
1018 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1019 sizeof(unsigned long) - 1))
1020 (*cow_offset)--;
1021
1022 bitmap_words[0] = bitmap[*cow_offset];
1023 bitmap_words[1] = bitmap[*cow_offset + 1];
1024
1025 *cow_offset *= sizeof(unsigned long);
1026 *cow_offset += bitmap_offset;
1027}
1028
1029static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1030 __u64 bitmap_offset, __u64 bitmap_len)
1031{
1032 __u64 sector = req->offset >> 9;
1033 int i;
1034
1035 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1036 panic("Operation too long");
1037
1038 if(req->op == UBD_READ) {
1039 for(i = 0; i < req->length >> 9; i++){
1040 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
6c29256c 1041 ubd_set_bit(i, (unsigned char *)
91acb21f 1042 &req->sector_mask);
dc764e50 1043 }
91acb21f
JD
1044 }
1045 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1046 &req->cow_offset, bitmap, bitmap_offset,
1047 req->bitmap_words, bitmap_len);
1da177e4
LT
1048}
1049
62f96cb0 1050/* Called with dev->lock held */
a0044bdf
JD
1051static void prepare_request(struct request *req, struct io_thread_req *io_req,
1052 unsigned long long offset, int page_offset,
1053 int len, struct page *page)
1da177e4
LT
1054{
1055 struct gendisk *disk = req->rq_disk;
7d314e34 1056 struct ubd *ubd_dev = disk->private_data;
91acb21f 1057
62f96cb0 1058 io_req->req = req;
a0044bdf
JD
1059 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1060 ubd_dev->fd;
7d314e34 1061 io_req->fds[1] = ubd_dev->fd;
91acb21f 1062 io_req->cow_offset = -1;
1da177e4
LT
1063 io_req->offset = offset;
1064 io_req->length = len;
1065 io_req->error = 0;
91acb21f
JD
1066 io_req->sector_mask = 0;
1067
1068 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1da177e4 1069 io_req->offsets[0] = 0;
7d314e34 1070 io_req->offsets[1] = ubd_dev->cow.data_offset;
a0044bdf 1071 io_req->buffer = page_address(page) + page_offset;
1da177e4
LT
1072 io_req->sectorsize = 1 << 9;
1073
7d314e34 1074 if(ubd_dev->cow.file != NULL)
a0044bdf
JD
1075 cowify_req(io_req, ubd_dev->cow.bitmap,
1076 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
91acb21f 1077
1da177e4
LT
1078}
1079
62f96cb0 1080/* Called with dev->lock held */
165125e1 1081static void do_ubd_request(struct request_queue *q)
1da177e4 1082{
2adcec21 1083 struct io_thread_req *io_req;
1da177e4 1084 struct request *req;
0a6d3a2a 1085 int n, last_sectors;
a0044bdf
JD
1086
1087 while(1){
2a9529a0 1088 struct ubd *dev = q->queuedata;
a0044bdf
JD
1089 if(dev->end_sg == 0){
1090 struct request *req = elv_next_request(q);
1091 if(req == NULL)
1092 return;
1093
1094 dev->request = req;
1095 blkdev_dequeue_request(req);
1096 dev->start_sg = 0;
1097 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1098 }
1099
1100 req = dev->request;
0a6d3a2a 1101 last_sectors = 0;
a0044bdf
JD
1102 while(dev->start_sg < dev->end_sg){
1103 struct scatterlist *sg = &dev->sg[dev->start_sg];
1104
0a6d3a2a 1105 req->sector += last_sectors;
2adcec21 1106 io_req = kmalloc(sizeof(struct io_thread_req),
990c5587 1107 GFP_ATOMIC);
2adcec21
JD
1108 if(io_req == NULL){
1109 if(list_empty(&dev->restart))
1110 list_add(&dev->restart, &restart);
1111 return;
1112 }
1113 prepare_request(req, io_req,
a0044bdf 1114 (unsigned long long) req->sector << 9,
45711f1a 1115 sg->offset, sg->length, sg_page(sg));
a0044bdf 1116
0a6d3a2a 1117 last_sectors = sg->length >> 9;
a6ea4cce
JD
1118 n = os_write_file(thread_fd, &io_req,
1119 sizeof(struct io_thread_req *));
2adcec21 1120 if(n != sizeof(struct io_thread_req *)){
a0044bdf
JD
1121 if(n != -EAGAIN)
1122 printk("write to io thread failed, "
1123 "errno = %d\n", -n);
1124 else if(list_empty(&dev->restart))
1125 list_add(&dev->restart, &restart);
12429bf9 1126 kfree(io_req);
a0044bdf
JD
1127 return;
1128 }
1129
a0044bdf 1130 dev->start_sg++;
1da177e4 1131 }
a0044bdf
JD
1132 dev->end_sg = 0;
1133 dev->request = NULL;
1da177e4
LT
1134 }
1135}
1136
a885c8c4
CH
1137static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1138{
7d314e34 1139 struct ubd *ubd_dev = bdev->bd_disk->private_data;
a885c8c4
CH
1140
1141 geo->heads = 128;
1142 geo->sectors = 32;
7d314e34 1143 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
a885c8c4
CH
1144 return 0;
1145}
1146
1da177e4
LT
1147static int ubd_ioctl(struct inode * inode, struct file * file,
1148 unsigned int cmd, unsigned long arg)
1149{
7d314e34 1150 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1da177e4
LT
1151 struct hd_driveid ubd_id = {
1152 .cyls = 0,
1153 .heads = 128,
1154 .sectors = 32,
1155 };
1156
1157 switch (cmd) {
1da177e4 1158 struct cdrom_volctrl volume;
1da177e4 1159 case HDIO_GET_IDENTITY:
7d314e34 1160 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1da177e4
LT
1161 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1162 sizeof(ubd_id)))
dc764e50
JD
1163 return -EFAULT;
1164 return 0;
b8831a1d 1165
1da177e4
LT
1166 case CDROMVOLREAD:
1167 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
dc764e50 1168 return -EFAULT;
1da177e4
LT
1169 volume.channel0 = 255;
1170 volume.channel1 = 255;
1171 volume.channel2 = 255;
1172 volume.channel3 = 255;
1173 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
dc764e50
JD
1174 return -EFAULT;
1175 return 0;
1da177e4 1176 }
dc764e50 1177 return -EINVAL;
1da177e4
LT
1178}
1179
4833aff7 1180static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1da177e4
LT
1181{
1182 struct uml_stat buf1, buf2;
1183 int err;
1184
4833aff7
PBG
1185 if(from_cmdline == NULL)
1186 return 0;
1187 if(!strcmp(from_cmdline, from_cow))
1188 return 0;
1da177e4
LT
1189
1190 err = os_stat_file(from_cmdline, &buf1);
1191 if(err < 0){
1192 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
4833aff7 1193 return 0;
1da177e4
LT
1194 }
1195 err = os_stat_file(from_cow, &buf2);
1196 if(err < 0){
1197 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
4833aff7 1198 return 1;
1da177e4
LT
1199 }
1200 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
4833aff7 1201 return 0;
1da177e4
LT
1202
1203 printk("Backing file mismatch - \"%s\" requested,\n"
1204 "\"%s\" specified in COW header of \"%s\"\n",
1205 from_cmdline, from_cow, cow);
4833aff7 1206 return 1;
1da177e4
LT
1207}
1208
1209static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1210{
1211 unsigned long modtime;
fe1db50c 1212 unsigned long long actual;
1da177e4
LT
1213 int err;
1214
1215 err = os_file_modtime(file, &modtime);
1216 if(err < 0){
1217 printk("Failed to get modification time of backing file "
1218 "\"%s\", err = %d\n", file, -err);
dc764e50 1219 return err;
1da177e4
LT
1220 }
1221
1222 err = os_file_size(file, &actual);
1223 if(err < 0){
1224 printk("Failed to get size of backing file \"%s\", "
1225 "err = %d\n", file, -err);
dc764e50 1226 return err;
1da177e4
LT
1227 }
1228
dc764e50 1229 if(actual != size){
1da177e4
LT
1230 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1231 * the typecast.*/
1232 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1233 "file\n", (unsigned long long) size, actual);
dc764e50 1234 return -EINVAL;
1da177e4
LT
1235 }
1236 if(modtime != mtime){
1237 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1238 "file\n", mtime, modtime);
dc764e50 1239 return -EINVAL;
1da177e4 1240 }
dc764e50 1241 return 0;
1da177e4
LT
1242}
1243
1244int read_cow_bitmap(int fd, void *buf, int offset, int len)
1245{
1246 int err;
1247
1248 err = os_seek_file(fd, offset);
1249 if(err < 0)
dc764e50 1250 return err;
1da177e4 1251
dc764e50 1252 err = os_read_file(fd, buf, len);
1da177e4 1253 if(err < 0)
dc764e50 1254 return err;
1da177e4 1255
dc764e50 1256 return 0;
1da177e4
LT
1257}
1258
6c29256c 1259int open_ubd_file(char *file, struct openflags *openflags, int shared,
1da177e4
LT
1260 char **backing_file_out, int *bitmap_offset_out,
1261 unsigned long *bitmap_len_out, int *data_offset_out,
1262 int *create_cow_out)
1263{
1264 time_t mtime;
1265 unsigned long long size;
1266 __u32 version, align;
1267 char *backing_file;
4833aff7 1268 int fd, err, sectorsize, asked_switch, mode = 0644;
1da177e4
LT
1269
1270 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1271 if (fd < 0) {
1272 if ((fd == -ENOENT) && (create_cow_out != NULL))
1da177e4 1273 *create_cow_out = 1;
dc764e50
JD
1274 if (!openflags->w ||
1275 ((fd != -EROFS) && (fd != -EACCES)))
a374a48f 1276 return fd;
1da177e4
LT
1277 openflags->w = 0;
1278 fd = os_open_file(file, *openflags, mode);
a374a48f
PBG
1279 if (fd < 0)
1280 return fd;
dc764e50 1281 }
1da177e4 1282
6c29256c
JD
1283 if(shared)
1284 printk("Not locking \"%s\" on the host\n", file);
1285 else {
1286 err = os_lock_file(fd, openflags->w);
1287 if(err < 0){
1288 printk("Failed to lock '%s', err = %d\n", file, -err);
1289 goto out_close;
1290 }
1da177e4
LT
1291 }
1292
d6e05edc 1293 /* Successful return case! */
a374a48f 1294 if(backing_file_out == NULL)
dc764e50 1295 return fd;
1da177e4
LT
1296
1297 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1298 &size, &sectorsize, &align, bitmap_offset_out);
1299 if(err && (*backing_file_out != NULL)){
1300 printk("Failed to read COW header from COW file \"%s\", "
1301 "errno = %d\n", file, -err);
1302 goto out_close;
1303 }
a374a48f 1304 if(err)
dc764e50 1305 return fd;
1da177e4 1306
4833aff7 1307 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1da177e4 1308
4833aff7
PBG
1309 /* Allow switching only if no mismatch. */
1310 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1da177e4
LT
1311 printk("Switching backing file to '%s'\n", *backing_file_out);
1312 err = write_cow_header(file, fd, *backing_file_out,
1313 sectorsize, align, &size);
a374a48f 1314 if (err) {
1da177e4 1315 printk("Switch failed, errno = %d\n", -err);
4833aff7 1316 goto out_close;
1da177e4 1317 }
a374a48f 1318 } else {
1da177e4
LT
1319 *backing_file_out = backing_file;
1320 err = backing_file_mismatch(*backing_file_out, size, mtime);
a374a48f
PBG
1321 if (err)
1322 goto out_close;
1da177e4
LT
1323 }
1324
1325 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1326 bitmap_len_out, data_offset_out);
1327
dc764e50 1328 return fd;
1da177e4
LT
1329 out_close:
1330 os_close_file(fd);
a374a48f 1331 return err;
1da177e4
LT
1332}
1333
1334int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1335 int sectorsize, int alignment, int *bitmap_offset_out,
1336 unsigned long *bitmap_len_out, int *data_offset_out)
1337{
1338 int err, fd;
1339
1340 flags.c = 1;
6c29256c 1341 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1da177e4
LT
1342 if(fd < 0){
1343 err = fd;
1344 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1345 -err);
1346 goto out;
1347 }
1348
1349 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1350 bitmap_offset_out, bitmap_len_out,
1351 data_offset_out);
1352 if(!err)
dc764e50 1353 return fd;
1da177e4
LT
1354 os_close_file(fd);
1355 out:
dc764e50 1356 return err;
1da177e4
LT
1357}
1358
91acb21f 1359static int update_bitmap(struct io_thread_req *req)
1da177e4 1360{
91acb21f 1361 int n;
1da177e4 1362
91acb21f 1363 if(req->cow_offset == -1)
dc764e50 1364 return 0;
1da177e4 1365
91acb21f
JD
1366 n = os_seek_file(req->fds[1], req->cow_offset);
1367 if(n < 0){
1368 printk("do_io - bitmap lseek failed : err = %d\n", -n);
dc764e50 1369 return 1;
91acb21f 1370 }
1da177e4 1371
a6ea4cce
JD
1372 n = os_write_file(req->fds[1], &req->bitmap_words,
1373 sizeof(req->bitmap_words));
91acb21f
JD
1374 if(n != sizeof(req->bitmap_words)){
1375 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1376 req->fds[1]);
dc764e50 1377 return 1;
91acb21f 1378 }
1da177e4 1379
dc764e50 1380 return 0;
91acb21f 1381}
1da177e4 1382
91acb21f
JD
1383void do_io(struct io_thread_req *req)
1384{
1385 char *buf;
1386 unsigned long len;
1387 int n, nsectors, start, end, bit;
1388 int err;
1389 __u64 off;
1390
1391 nsectors = req->length / req->sectorsize;
1392 start = 0;
1393 do {
1394 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1395 end = start;
1396 while((end < nsectors) &&
1397 (ubd_test_bit(end, (unsigned char *)
1398 &req->sector_mask) == bit))
1399 end++;
1400
1401 off = req->offset + req->offsets[bit] +
1402 start * req->sectorsize;
1403 len = (end - start) * req->sectorsize;
1404 buf = &req->buffer[start * req->sectorsize];
1405
1406 err = os_seek_file(req->fds[bit], off);
1407 if(err < 0){
1408 printk("do_io - lseek failed : err = %d\n", -err);
1409 req->error = 1;
1410 return;
1411 }
1412 if(req->op == UBD_READ){
1413 n = 0;
1414 do {
1415 buf = &buf[n];
1416 len -= n;
a6ea4cce 1417 n = os_read_file(req->fds[bit], buf, len);
91acb21f
JD
1418 if (n < 0) {
1419 printk("do_io - read failed, err = %d "
1420 "fd = %d\n", -n, req->fds[bit]);
1421 req->error = 1;
1422 return;
1423 }
1424 } while((n < len) && (n != 0));
1425 if (n < len) memset(&buf[n], 0, len - n);
1426 } else {
a6ea4cce 1427 n = os_write_file(req->fds[bit], buf, len);
91acb21f
JD
1428 if(n != len){
1429 printk("do_io - write failed err = %d "
1430 "fd = %d\n", -n, req->fds[bit]);
1431 req->error = 1;
1432 return;
1433 }
1434 }
1435
1436 start = end;
1437 } while(start < nsectors);
1da177e4 1438
91acb21f 1439 req->error = update_bitmap(req);
1da177e4 1440}
91acb21f
JD
1441
1442/* Changed in start_io_thread, which is serialized by being called only
1443 * from ubd_init, which is an initcall.
1444 */
1445int kernel_fd = -1;
1446
d8d7c28e
PBG
1447/* Only changed by the io thread. XXX: currently unused. */
1448static int io_count = 0;
91acb21f
JD
1449
1450int io_thread(void *arg)
1451{
2adcec21 1452 struct io_thread_req *req;
91acb21f
JD
1453 int n;
1454
1455 ignore_sigwinch_sig();
1456 while(1){
a6ea4cce 1457 n = os_read_file(kernel_fd, &req,
2adcec21
JD
1458 sizeof(struct io_thread_req *));
1459 if(n != sizeof(struct io_thread_req *)){
91acb21f
JD
1460 if(n < 0)
1461 printk("io_thread - read failed, fd = %d, "
1462 "err = %d\n", kernel_fd, -n);
1463 else {
1464 printk("io_thread - short read, fd = %d, "
1465 "length = %d\n", kernel_fd, n);
1466 }
1467 continue;
1468 }
1469 io_count++;
2adcec21 1470 do_io(req);
a6ea4cce 1471 n = os_write_file(kernel_fd, &req,
2adcec21
JD
1472 sizeof(struct io_thread_req *));
1473 if(n != sizeof(struct io_thread_req *))
91acb21f
JD
1474 printk("io_thread - write failed, fd = %d, err = %d\n",
1475 kernel_fd, -n);
1476 }
91acb21f 1477
1b57e9c2
JD
1478 return 0;
1479}