Merge branch 'for-linus-4.21-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2019 02:39:22 +0000 (18:39 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 3 Jan 2019 02:39:22 +0000 (18:39 -0800)
Pull UML updates from Richard Weinberger:

 - DISCARD support for our block device driver

 - Many TLB flush optimizations

 - Various smaller fixes

 - And most important, Anton agreed to help me maintaining UML

* 'for-linus-4.21-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
  um: Remove obsolete reenable_XX calls
  um: writev needs <sys/uio.h>
  Add Anton Ivanov to UML maintainers
  um: remove redundant generic-y
  um: Optimize Flush TLB for force/fork case
  um: Avoid marking pages with "changed protection"
  um: Skip TLB flushing where not needed
  um: Optimize TLB operations v2
  um: Remove unnecessary faulted check in uaccess.c
  um: Add support for DISCARD in the UBD Driver
  um: Remove unsafe printks from the io thread
  um: Clean-up command processing in UML UBD driver
  um: Switch to block-mq constants in the UML UBD driver
  um: Make GCOV depend on !KCOV
  um: Include sys/uio.h to have writev()
  um: Add HAVE_DEBUG_BUGVERBOSE
  um: Update maintainers file entry

20 files changed:
MAINTAINERS
arch/um/Kconfig
arch/um/Kconfig.debug
arch/um/drivers/chan_kern.c
arch/um/drivers/line.c
arch/um/drivers/mconsole_kern.c
arch/um/drivers/net_kern.c
arch/um/drivers/port_kern.c
arch/um/drivers/random.c
arch/um/drivers/ubd_kern.c
arch/um/drivers/vector_user.c
arch/um/include/asm/Kbuild
arch/um/include/asm/pgtable.h
arch/um/include/shared/irq_user.h
arch/um/include/shared/os.h
arch/um/kernel/irq.c
arch/um/kernel/sigio.c
arch/um/kernel/skas/uaccess.c
arch/um/kernel/tlb.c
arch/um/os-Linux/file.c

index a69c127e357818126c7983799f9247451f7b4d8d..99113b9fcdd289ca594c5960c616851755c7c3ed 100644 (file)
@@ -15951,15 +15951,16 @@ F:    drivers/media/usb/zr364xx/
 USER-MODE LINUX (UML)
 M:     Jeff Dike <jdike@addtoit.com>
 M:     Richard Weinberger <richard@nod.at>
+M:     Anton Ivanov <anton.ivanov@cambridgegreys.com>
 L:     linux-um@lists.infradead.org
 W:     http://user-mode-linux.sourceforge.net
+Q:     https://patchwork.ozlabs.org/project/linux-um/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git
 S:     Maintained
 F:     Documentation/virtual/uml/
 F:     arch/um/
 F:     arch/x86/um/
 F:     fs/hostfs/
-F:     fs/hppfs/
 
 USERSPACE COPYIN/COPYOUT (UIOVEC)
 M:     Alexander Viro <viro@zeniv.linux.org.uk>
index a238547671d616672636ee6f9dd72a6a1fb0202e..ec9711d068b7f9ab9a4f721d46a0db53b4c62414 100644 (file)
@@ -12,6 +12,7 @@ config UML
        select HAVE_UID16
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_DEBUG_KMEMLEAK
+       select HAVE_DEBUG_BUGVERBOSE
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
        select GENERIC_CLOCKEVENTS
index 2014597605ea9cd24ff881370ce063ddb57992e0..85726eeec34512cde5fa0dd66a4485a9b64af311 100644 (file)
@@ -16,6 +16,7 @@ config GPROF
 config GCOV
        bool "Enable gcov support"
        depends on DEBUG_INFO
+       depends on !KCOV
        help
          This option allows developers to retrieve coverage data from a UML
          session.
index 05588f9466c7ff1a1b028cb710cab37b65b0d7ba..a4e64edb8f3825ac02cfd0bdd78d82a3d087d8f2 100644 (file)
@@ -211,12 +211,6 @@ void deactivate_chan(struct chan *chan, int irq)
                deactivate_fd(chan->fd, irq);
 }
 
-void reactivate_chan(struct chan *chan, int irq)
-{
-       if (chan && chan->enabled)
-               reactivate_fd(chan->fd, irq);
-}
-
 int write_chan(struct chan *chan, const char *buf, int len,
               int write_irq)
 {
@@ -228,8 +222,6 @@ int write_chan(struct chan *chan, const char *buf, int len,
        n = chan->ops->write(chan->fd, buf, len, chan->data);
        if (chan->primary) {
                ret = n;
-               if ((ret == -EAGAIN) || ((ret >= 0) && (ret < len)))
-                       reactivate_fd(chan->fd, write_irq);
        }
        return ret;
 }
@@ -527,8 +519,6 @@ void chan_interrupt(struct line *line, int irq)
                        tty_insert_flip_char(port, c, TTY_NORMAL);
        } while (err > 0);
 
-       if (err == 0)
-               reactivate_fd(chan->fd, irq);
        if (err == -EIO) {
                if (chan->primary) {
                        tty_port_tty_hangup(&line->port, false);
index 7e524efed58484c394beefd2d6d3651ee0eeb5fa..e0e63931fb2b46829e5a1b6ba8eada3a1c4c9e0b 100644 (file)
@@ -235,14 +235,6 @@ void line_unthrottle(struct tty_struct *tty)
 
        line->throttled = 0;
        chan_interrupt(line, line->driver->read_irq);
-
-       /*
-        * Maybe there is enough stuff pending that calling the interrupt
-        * throttles us again.  In this case, line->throttled will be 1
-        * again and we shouldn't turn the interrupt back on.
-        */
-       if (!line->throttled)
-               reactivate_chan(line->chan_in, line->driver->read_irq);
 }
 
 static irqreturn_t line_write_interrupt(int irq, void *data)
@@ -667,8 +659,6 @@ static irqreturn_t winch_interrupt(int irq, void *data)
                tty_kref_put(tty);
        }
  out:
-       if (winch->fd != -1)
-               reactivate_fd(winch->fd, WINCH_IRQ);
        return IRQ_HANDLED;
 }
 
index d5f9a2d1da1ba8f8e667e3fc781db1a5136432af..ff3ab72fd90ff192504205535cc29422c902420c 100644 (file)
@@ -96,7 +96,6 @@ static irqreturn_t mconsole_interrupt(int irq, void *dev_id)
        }
        if (!list_empty(&mc_requests))
                schedule_work(&mconsole_work);
-       reactivate_fd(fd, MCONSOLE_IRQ);
        return IRQ_HANDLED;
 }
 
@@ -240,7 +239,6 @@ void mconsole_stop(struct mc_request *req)
                (*req->cmd->handler)(req);
        }
        os_set_fd_block(req->originating_fd, 0);
-       reactivate_fd(req->originating_fd, MCONSOLE_IRQ);
        mconsole_reply(req, "", 0, 0);
 }
 
index 624cb47cc9cd35b56c10a27e565c64b078cd84fa..d80cfb1d943077c5551bd9c1a60528cf7eec4c62 100644 (file)
@@ -137,8 +137,6 @@ static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
                schedule_work(&lp->work);
                goto out;
        }
-       reactivate_fd(lp->fd, UM_ETH_IRQ);
-
 out:
        spin_unlock(&lp->lock);
        return IRQ_HANDLED;
index 40ca5cc275e9eaa68bb1d3b758fb856aba31b816..b0e9ff35daee29bd5f574a7c819940586598c05d 100644 (file)
@@ -137,7 +137,6 @@ static void port_work_proc(struct work_struct *unused)
                if (!port->has_connection)
                        continue;
 
-               reactivate_fd(port->fd, ACCEPT_IRQ);
                while (port_accept(port))
                        ;
                port->has_connection = 0;
index 778a0e52d5a5c618fbeebf82679060ab3e7a6520..1d5d3057e6f1f607ecec1ba0b941a80150c0dcb6 100644 (file)
@@ -73,7 +73,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
                                return ret ? : -EAGAIN;
 
                        atomic_inc(&host_sleep_count);
-                       reactivate_fd(random_fd, RANDOM_IRQ);
                        add_sigio_fd(random_fd);
 
                        add_wait_queue(&host_read_wait, &wait);
index 28c40624bcb6f0e9b15030037d6f199b46c5fa0f..a4a41421c5e2a005cca783cfd652e41076b65a93 100644 (file)
@@ -1,4 +1,5 @@
 /*
+ * Copyright (C) 2018 Cambridge Greys Ltd
  * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
 #include <os.h>
 #include "cow.h"
 
-enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
+/* Max request size is determined by sector mask - 32K */
+#define UBD_MAX_REQUEST (8 * sizeof(long))
 
 struct io_thread_req {
        struct request *req;
-       enum ubd_req op;
        int fds[2];
        unsigned long offsets[2];
        unsigned long long offset;
@@ -153,6 +154,7 @@ struct ubd {
        struct openflags openflags;
        unsigned shared:1;
        unsigned no_cow:1;
+       unsigned no_trim:1;
        struct cow cow;
        struct platform_device pdev;
        struct request_queue *queue;
@@ -176,6 +178,7 @@ struct ubd {
        .boot_openflags =       OPEN_FLAGS, \
        .openflags =            OPEN_FLAGS, \
        .no_cow =               0, \
+       .no_trim =              0, \
        .shared =               0, \
        .cow =                  DEFAULT_COW, \
        .lock =                 __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
@@ -322,7 +325,7 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
                *index_out = n;
 
        err = -EINVAL;
-       for (i = 0; i < sizeof("rscd="); i++) {
+       for (i = 0; i < sizeof("rscdt="); i++) {
                switch (*str) {
                case 'r':
                        flags.w = 0;
@@ -336,12 +339,15 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
                case 'c':
                        ubd_dev->shared = 1;
                        break;
+               case 't':
+                       ubd_dev->no_trim = 1;
+                       break;
                case '=':
                        str++;
                        goto break_loop;
                default:
                        *error_out = "Expected '=' or flag letter "
-                               "(r, s, c, or d)";
+                               "(r, s, c, or d)";
                        goto out;
                }
                str++;
@@ -414,6 +420,7 @@ __uml_help(ubd_setup,
 "    'c' will cause the device to be treated as being shared between multiple\n"
 "    UMLs and file locking will be turned off - this is appropriate for a\n"
 "    cluster filesystem and inappropriate at almost all other times.\n\n"
+"    't' will disable trim/discard support on the device (enabled by default).\n\n"
 );
 
 static int udb_setup(char *str)
@@ -511,16 +518,21 @@ static void ubd_handler(void)
                }
                for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
                        struct io_thread_req *io_req = (*irq_req_buffer)[count];
-                       int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK;
-
-                       if (!blk_update_request(io_req->req, err, io_req->length))
-                               __blk_mq_end_request(io_req->req, err);
 
+                       if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
+                               blk_queue_max_discard_sectors(io_req->req->q, 0);
+                               blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
+                               blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
+                       }
+                       if ((io_req->error) || (io_req->buffer == NULL))
+                               blk_mq_end_request(io_req->req, io_req->error);
+                       else {
+                               if (!blk_update_request(io_req->req, io_req->error, io_req->length))
+                                       __blk_mq_end_request(io_req->req, io_req->error);
+                       }
                        kfree(io_req);
                }
        }
-
-       reactivate_fd(thread_fd, UBD_IRQ);
 }
 
 static irqreturn_t ubd_intr(int irq, void *dev)
@@ -789,7 +801,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 
        if((fd == -ENOENT) && create_cow){
                fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
-                                         ubd_dev->openflags, 1 << 9, PAGE_SIZE,
+                                         ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
                                          &ubd_dev->cow.bitmap_offset,
                                          &ubd_dev->cow.bitmap_len,
                                          &ubd_dev->cow.data_offset);
@@ -830,6 +842,14 @@ static int ubd_open_dev(struct ubd *ubd_dev)
                if(err < 0) goto error;
                ubd_dev->cow.fd = err;
        }
+       if (ubd_dev->no_trim == 0) {
+               ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
+               ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
+               blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+               blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
+               blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
+       }
+       blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
        return 0;
  error:
        os_close_file(ubd_dev->fd);
@@ -882,7 +902,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
        return 0;
 }
 
-#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
+#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
 
 static const struct blk_mq_ops ubd_mq_ops = {
        .queue_rq = ubd_queue_rq,
@@ -1234,10 +1254,10 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
                          __u64 bitmap_offset, unsigned long *bitmap_words,
                          __u64 bitmap_len)
 {
-       __u64 sector = io_offset >> 9;
+       __u64 sector = io_offset >> SECTOR_SHIFT;
        int i, update_bitmap = 0;
 
-       for(i = 0; i < length >> 9; i++){
+       for (i = 0; i < length >> SECTOR_SHIFT; i++) {
                if(cow_mask != NULL)
                        ubd_set_bit(i, (unsigned char *) cow_mask);
                if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
@@ -1271,14 +1291,14 @@ static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
 static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
                       __u64 bitmap_offset, __u64 bitmap_len)
 {
-       __u64 sector = req->offset >> 9;
+       __u64 sector = req->offset >> SECTOR_SHIFT;
        int i;
 
-       if(req->length > (sizeof(req->sector_mask) * 8) << 9)
+       if (req->length > (sizeof(req->sector_mask) * 8) << SECTOR_SHIFT)
                panic("Operation too long");
 
-       if(req->op == UBD_READ) {
-               for(i = 0; i < req->length >> 9; i++){
+       if (req_op(req->req) == REQ_OP_READ) {
+               for (i = 0; i < req->length >> SECTOR_SHIFT; i++) {
                        if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
                                ubd_set_bit(i, (unsigned char *)
                                            &req->sector_mask);
@@ -1307,68 +1327,86 @@ static int ubd_queue_one_vec(struct blk_mq_hw_ctx *hctx, struct request *req,
                io_req->fds[0] = dev->fd;
        io_req->error = 0;
 
-       if (req_op(req) == REQ_OP_FLUSH) {
-               io_req->op = UBD_FLUSH;
-       } else {
-               io_req->fds[1] = dev->fd;
-               io_req->cow_offset = -1;
-               io_req->offset = off;
-               io_req->length = bvec->bv_len;
-               io_req->sector_mask = 0;
-               io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
-               io_req->offsets[0] = 0;
-               io_req->offsets[1] = dev->cow.data_offset;
+       if (bvec != NULL) {
                io_req->buffer = page_address(bvec->bv_page) + bvec->bv_offset;
-               io_req->sectorsize = 1 << 9;
-
-               if (dev->cow.file) {
-                       cowify_req(io_req, dev->cow.bitmap,
-                                  dev->cow.bitmap_offset, dev->cow.bitmap_len);
-               }
+               io_req->length = bvec->bv_len;
+       } else {
+               io_req->buffer = NULL;
+               io_req->length = blk_rq_bytes(req);
        }
 
+       io_req->sectorsize = SECTOR_SIZE;
+       io_req->fds[1] = dev->fd;
+       io_req->cow_offset = -1;
+       io_req->offset = off;
+       io_req->sector_mask = 0;
+       io_req->offsets[0] = 0;
+       io_req->offsets[1] = dev->cow.data_offset;
+
+       if (dev->cow.file)
+               cowify_req(io_req, dev->cow.bitmap,
+                          dev->cow.bitmap_offset, dev->cow.bitmap_len);
+
        ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
        if (ret != sizeof(io_req)) {
                if (ret != -EAGAIN)
                        pr_err("write to io thread failed: %d\n", -ret);
                kfree(io_req);
        }
-
        return ret;
 }
 
+static int queue_rw_req(struct blk_mq_hw_ctx *hctx, struct request *req)
+{
+       struct req_iterator iter;
+       struct bio_vec bvec;
+       int ret;
+       u64 off = (u64)blk_rq_pos(req) << SECTOR_SHIFT;
+
+       rq_for_each_segment(bvec, req, iter) {
+               ret = ubd_queue_one_vec(hctx, req, off, &bvec);
+               if (ret < 0)
+                       return ret;
+               off += bvec.bv_len;
+       }
+       return 0;
+}
+
 static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
                                 const struct blk_mq_queue_data *bd)
 {
        struct ubd *ubd_dev = hctx->queue->queuedata;
        struct request *req = bd->rq;
-       int ret = 0;
+       int ret = 0, res = BLK_STS_OK;
 
        blk_mq_start_request(req);
 
        spin_lock_irq(&ubd_dev->lock);
 
-       if (req_op(req) == REQ_OP_FLUSH) {
+       switch (req_op(req)) {
+       /* operations with no lentgth/offset arguments */
+       case REQ_OP_FLUSH:
                ret = ubd_queue_one_vec(hctx, req, 0, NULL);
-       } else {
-               struct req_iterator iter;
-               struct bio_vec bvec;
-               u64 off = (u64)blk_rq_pos(req) << 9;
-
-               rq_for_each_segment(bvec, req, iter) {
-                       ret = ubd_queue_one_vec(hctx, req, off, &bvec);
-                       if (ret < 0)
-                               goto out;
-                       off += bvec.bv_len;
-               }
+               break;
+       case REQ_OP_READ:
+       case REQ_OP_WRITE:
+               ret = queue_rw_req(hctx, req);
+               break;
+       case REQ_OP_DISCARD:
+       case REQ_OP_WRITE_ZEROES:
+               ret = ubd_queue_one_vec(hctx, req, (u64)blk_rq_pos(req) << 9, NULL);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               res = BLK_STS_NOTSUPP;
        }
-out:
+
        spin_unlock_irq(&ubd_dev->lock);
 
        if (ret < 0)
                blk_mq_requeue_request(req, true);
 
-       return BLK_STS_OK;
+       return res;
 }
 
 static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1413,39 +1451,60 @@ static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
        return -EINVAL;
 }
 
+static int map_error(int error_code)
+{
+       switch (error_code) {
+       case 0:
+               return BLK_STS_OK;
+       case ENOSYS:
+       case EOPNOTSUPP:
+               return BLK_STS_NOTSUPP;
+       case ENOSPC:
+               return BLK_STS_NOSPC;
+       }
+       return BLK_STS_IOERR;
+}
+
+/*
+ * Everything from here onwards *IS NOT PART OF THE KERNEL*
+ *
+ * The following functions are part of UML hypervisor code.
+ * All functions from here onwards are executed as a helper
+ * thread and are not allowed to execute any kernel functions.
+ *
+ * Any communication must occur strictly via shared memory and IPC.
+ *
+ * Do not add printks, locks, kernel memory operations, etc - it
+ * will result in unpredictable behaviour and/or crashes.
+ */
+
 static int update_bitmap(struct io_thread_req *req)
 {
        int n;
 
        if(req->cow_offset == -1)
-               return 0;
+               return map_error(0);
 
        n = os_pwrite_file(req->fds[1], &req->bitmap_words,
                          sizeof(req->bitmap_words), req->cow_offset);
-       if(n != sizeof(req->bitmap_words)){
-               printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
-                      req->fds[1]);
-               return 1;
-       }
+       if (n != sizeof(req->bitmap_words))
+               return map_error(-n);
 
-       return 0;
+       return map_error(0);
 }
 
 static void do_io(struct io_thread_req *req)
 {
-       char *buf;
+       char *buf = NULL;
        unsigned long len;
        int n, nsectors, start, end, bit;
        __u64 off;
 
-       if (req->op == UBD_FLUSH) {
+       /* FLUSH is really a special case, we cannot "case" it with others */
+
+       if (req_op(req->req) == REQ_OP_FLUSH) {
                /* fds[0] is always either the rw image or our cow file */
-               n = os_sync_file(req->fds[0]);
-               if (n != 0) {
-                       printk("do_io - sync failed err = %d "
-                              "fd = %d\n", -n, req->fds[0]);
-                       req->error = 1;
-               }
+               req->error = map_error(-os_sync_file(req->fds[0]));
                return;
        }
 
@@ -1462,30 +1521,42 @@ static void do_io(struct io_thread_req *req)
                off = req->offset + req->offsets[bit] +
                        start * req->sectorsize;
                len = (end - start) * req->sectorsize;
-               buf = &req->buffer[start * req->sectorsize];
+               if (req->buffer != NULL)
+                       buf = &req->buffer[start * req->sectorsize];
 
-               if(req->op == UBD_READ){
+               switch (req_op(req->req)) {
+               case REQ_OP_READ:
                        n = 0;
                        do {
                                buf = &buf[n];
                                len -= n;
                                n = os_pread_file(req->fds[bit], buf, len, off);
                                if (n < 0) {
-                                       printk("do_io - read failed, err = %d "
-                                              "fd = %d\n", -n, req->fds[bit]);
-                                       req->error = 1;
+                                       req->error = map_error(-n);
                                        return;
                                }
                        } while((n < len) && (n != 0));
                        if (n < len) memset(&buf[n], 0, len - n);
-               } else {
+                       break;
+               case REQ_OP_WRITE:
                        n = os_pwrite_file(req->fds[bit], buf, len, off);
                        if(n != len){
-                               printk("do_io - write failed err = %d "
-                                      "fd = %d\n", -n, req->fds[bit]);
-                               req->error = 1;
+                               req->error = map_error(-n);
+                               return;
+                       }
+                       break;
+               case REQ_OP_DISCARD:
+               case REQ_OP_WRITE_ZEROES:
+                       n = os_falloc_punch(req->fds[bit], off, len);
+                       if (n) {
+                               req->error = map_error(-n);
                                return;
                        }
+                       break;
+               default:
+                       WARN_ON_ONCE(1);
+                       req->error = BLK_STS_NOTSUPP;
+                       return;
                }
 
                start = end;
@@ -1520,11 +1591,6 @@ int io_thread(void *arg)
                        if (n == -EAGAIN) {
                                ubd_read_poll(-1);
                                continue;
-                       } else {
-                               printk("io_thread - read failed, fd = %d, "
-                                      "err = %d,"
-                                      "reminder = %d\n",
-                                      kernel_fd, -n, io_remainder_size);
                        }
                }
 
@@ -1539,11 +1605,6 @@ int io_thread(void *arg)
                        res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n);
                        if (res >= 0) {
                                written += res;
-                       } else {
-                               if (res != -EAGAIN) {
-                                       printk("io_thread - write failed, fd = %d, "
-                                              "err = %d\n", kernel_fd, -n);
-                               }
                        }
                        if (written < n) {
                                ubd_write_poll(-1);
index 3d8cdbdb4e661988df125b9cdb68d906fb1312a6..d2c17dd746204a4147b5112cb5cef344ea08f47d 100644 (file)
 #include <linux/if_packet.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
+#include <sys/uio.h>
 #include <linux/virtio_net.h>
 #include <netdb.h>
 #include <stdlib.h>
 #include <os.h>
 #include <um_malloc.h>
+#include <sys/uio.h>
 #include "vector_user.h"
 
 #define ID_GRE 0
index b10dde6cb793b059f03e6432185050f3937aed01..00bcbe2326d9ea712bb6e824da7e998cfd9e43f2 100644 (file)
@@ -10,9 +10,7 @@ generic-y += exec.h
 generic-y += extable.h
 generic-y += ftrace.h
 generic-y += futex.h
-generic-y += hardirq.h
 generic-y += hw_irq.h
-generic-y += io.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
index 7485398d07370034e361ea1fe4173eeb9bd51d1c..9c04562310b36630bb3ec008c99fdf05e3597c85 100644 (file)
@@ -197,12 +197,17 @@ static inline pte_t pte_mkold(pte_t pte)
 
 static inline pte_t pte_wrprotect(pte_t pte)
 { 
-       pte_clear_bits(pte, _PAGE_RW);
+       if (likely(pte_get_bits(pte, _PAGE_RW)))
+               pte_clear_bits(pte, _PAGE_RW);
+       else
+               return pte;
        return(pte_mknewprot(pte)); 
 }
 
 static inline pte_t pte_mkread(pte_t pte)
 { 
+       if (unlikely(pte_get_bits(pte, _PAGE_USER)))
+               return pte;
        pte_set_bits(pte, _PAGE_USER);
        return(pte_mknewprot(pte)); 
 }
@@ -221,6 +226,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
 
 static inline pte_t pte_mkwrite(pte_t pte)     
 {
+       if (unlikely(pte_get_bits(pte,  _PAGE_RW)))
+               return pte;
        pte_set_bits(pte, _PAGE_RW);
        return(pte_mknewprot(pte)); 
 }
index a7a6120f19d55ae505114e419d1b841ea7f90cee..e7242a0ae48972edf4a05b2d4f265ac5e437e9e2 100644 (file)
@@ -31,7 +31,6 @@ struct irq_fd {
 struct siginfo;
 extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs);
 extern void free_irq_by_fd(int fd);
-extern void reactivate_fd(int fd, int irqnum);
 extern void deactivate_fd(int fd, int irqnum);
 extern int deactivate_all_fds(void);
 extern int activate_ipi(int fd, int pid);
index 048ae37eb5aa1add1d9732085d03489320a1c42b..ebf23012a59bbc2c019ecc3f4ffb324dccf2d92c 100644 (file)
@@ -175,6 +175,7 @@ extern int os_fchange_dir(int fd);
 extern unsigned os_major(unsigned long long dev);
 extern unsigned os_minor(unsigned long long dev);
 extern unsigned long long os_makedev(unsigned major, unsigned minor);
+extern int os_falloc_punch(int fd, unsigned long long offset, int count);
 
 /* start_up.c */
 extern void os_early_checks(void);
index 8360fa3f676df2ec0e051235bb5aebea68cc9f83..f4874b7ec5038a2dcf8e3cf01b6873048d0bb8e5 100644 (file)
@@ -350,11 +350,6 @@ static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 }
 
 
-void reactivate_fd(int fd, int irqnum)
-{
-       /** NOP - we do auto-EOI now **/
-}
-
 void deactivate_fd(int fd, int irqnum)
 {
        struct irq_entry *to_free;
@@ -449,7 +444,6 @@ int um_request_irq(unsigned int irq, int fd, int type,
 }
 
 EXPORT_SYMBOL(um_request_irq);
-EXPORT_SYMBOL(reactivate_fd);
 
 /*
  * irq_chip must define at least enable/disable and ack when
index b5e0cbb34382845056c9d4c8625212122b79c81e..3fb6a4041ed6ba4d5791fcddcff08a79cc0ad6cf 100644 (file)
@@ -16,7 +16,6 @@ static irqreturn_t sigio_interrupt(int irq, void *data)
        char c;
 
        os_read_file(sigio_irq_fd, &c, sizeof(c));
-       reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
        return IRQ_HANDLED;
 }
 
index d450797a3a7cd478e3cdcec2308043ffc9537684..7f06fdbc7ee110468e70ba9fbb4670253d655d63 100644 (file)
@@ -62,27 +62,28 @@ static int do_op_one_page(unsigned long addr, int len, int is_write,
        jmp_buf buf;
        struct page *page;
        pte_t *pte;
-       int n, faulted;
+       int n;
 
        pte = maybe_map(addr, is_write);
        if (pte == NULL)
                return -1;
 
        page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+       pagefault_disable();
+       addr = (unsigned long) page_address(page) +
+               (addr & ~PAGE_MASK);
+#else
        addr = (unsigned long) kmap_atomic(page) +
                (addr & ~PAGE_MASK);
+#endif
+       n = (*op)(addr, len, arg);
 
-       current->thread.fault_catcher = &buf;
-
-       faulted = UML_SETJMP(&buf);
-       if (faulted == 0)
-               n = (*op)(addr, len, arg);
-       else
-               n = -1;
-
-       current->thread.fault_catcher = NULL;
-
+#ifdef CONFIG_64BIT
+       pagefault_enable();
+#else
        kunmap_atomic((void *)addr);
+#endif
 
        return n;
 }
index 37508b190106db73a75ca20a9a52162a4849adc9..8347161c2ae0fb0557dd7c008671d5d95c7c720a 100644 (file)
@@ -37,17 +37,19 @@ struct host_vm_change {
                        } mprotect;
                } u;
        } ops[1];
+       int userspace;
        int index;
-       struct mm_id *id;
+       struct mm_struct *mm;
        void *data;
        int force;
 };
 
-#define INIT_HVC(mm, force) \
+#define INIT_HVC(mm, force, userspace) \
        ((struct host_vm_change) \
         { .ops         = { { .type = NONE } }, \
-          .id          = &mm->context.id, \
+          .mm          = mm, \
                   .data        = NULL, \
+          .userspace   = userspace, \
           .index       = 0, \
           .force       = force })
 
@@ -68,18 +70,40 @@ static int do_ops(struct host_vm_change *hvc, int end,
                op = &hvc->ops[i];
                switch (op->type) {
                case MMAP:
-                       ret = map(hvc->id, op->u.mmap.addr, op->u.mmap.len,
-                                 op->u.mmap.prot, op->u.mmap.fd,
-                                 op->u.mmap.offset, finished, &hvc->data);
+                       if (hvc->userspace)
+                               ret = map(&hvc->mm->context.id, op->u.mmap.addr,
+                                         op->u.mmap.len, op->u.mmap.prot,
+                                         op->u.mmap.fd,
+                                         op->u.mmap.offset, finished,
+                                         &hvc->data);
+                       else
+                               map_memory(op->u.mmap.addr, op->u.mmap.offset,
+                                          op->u.mmap.len, 1, 1, 1);
                        break;
                case MUNMAP:
-                       ret = unmap(hvc->id, op->u.munmap.addr,
-                                   op->u.munmap.len, finished, &hvc->data);
+                       if (hvc->userspace)
+                               ret = unmap(&hvc->mm->context.id,
+                                           op->u.munmap.addr,
+                                           op->u.munmap.len, finished,
+                                           &hvc->data);
+                       else
+                               ret = os_unmap_memory(
+                                       (void *) op->u.munmap.addr,
+                                                     op->u.munmap.len);
+
                        break;
                case MPROTECT:
-                       ret = protect(hvc->id, op->u.mprotect.addr,
-                                     op->u.mprotect.len, op->u.mprotect.prot,
-                                     finished, &hvc->data);
+                       if (hvc->userspace)
+                               ret = protect(&hvc->mm->context.id,
+                                             op->u.mprotect.addr,
+                                             op->u.mprotect.len,
+                                             op->u.mprotect.prot,
+                                             finished, &hvc->data);
+                       else
+                               ret = os_protect_memory(
+                                       (void *) op->u.mprotect.addr,
+                                                       op->u.mprotect.len,
+                                                       1, 1, 1);
                        break;
                default:
                        printk(KERN_ERR "Unknown op type %d in do_ops\n",
@@ -100,9 +124,12 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
 {
        __u64 offset;
        struct host_vm_op *last;
-       int fd, ret = 0;
+       int fd = -1, ret = 0;
 
-       fd = phys_mapping(phys, &offset);
+       if (hvc->userspace)
+               fd = phys_mapping(phys, &offset);
+       else
+               offset = phys;
        if (hvc->index != 0) {
                last = &hvc->ops[hvc->index - 1];
                if ((last->type == MMAP) &&
@@ -215,10 +242,11 @@ static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
                prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
                        (x ? UM_PROT_EXEC : 0));
                if (hvc->force || pte_newpage(*pte)) {
-                       if (pte_present(*pte))
-                               ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
-                                              PAGE_SIZE, prot, hvc);
-                       else
+                       if (pte_present(*pte)) {
+                               if (pte_newpage(*pte))
+                                       ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
+                                                      PAGE_SIZE, prot, hvc);
+                       } else
                                ret = add_munmap(addr, PAGE_SIZE, hvc);
                } else if (pte_newprot(*pte))
                        ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
@@ -277,9 +305,9 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
        pgd_t *pgd;
        struct host_vm_change hvc;
        unsigned long addr = start_addr, next;
-       int ret = 0;
+       int ret = 0, userspace = 1;
 
-       hvc = INIT_HVC(mm, force);
+       hvc = INIT_HVC(mm, force, userspace);
        pgd = pgd_offset(mm, addr);
        do {
                next = pgd_addr_end(addr, end_addr);
@@ -314,9 +342,11 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
        pmd_t *pmd;
        pte_t *pte;
        unsigned long addr, last;
-       int updated = 0, err;
+       int updated = 0, err = 0, force = 0, userspace = 0;
+       struct host_vm_change hvc;
 
        mm = &init_mm;
+       hvc = INIT_HVC(mm, force, userspace);
        for (addr = start; addr < end;) {
                pgd = pgd_offset(mm, addr);
                if (!pgd_present(*pgd)) {
@@ -325,8 +355,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
                                last = end;
                        if (pgd_newpage(*pgd)) {
                                updated = 1;
-                               err = os_unmap_memory((void *) addr,
-                                                     last - addr);
+                               err = add_munmap(addr, last - addr, &hvc);
                                if (err < 0)
                                        panic("munmap failed, errno = %d\n",
                                              -err);
@@ -342,8 +371,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
                                last = end;
                        if (pud_newpage(*pud)) {
                                updated = 1;
-                               err = os_unmap_memory((void *) addr,
-                                                     last - addr);
+                               err = add_munmap(addr, last - addr, &hvc);
                                if (err < 0)
                                        panic("munmap failed, errno = %d\n",
                                              -err);
@@ -359,8 +387,7 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
                                last = end;
                        if (pmd_newpage(*pmd)) {
                                updated = 1;
-                               err = os_unmap_memory((void *) addr,
-                                                     last - addr);
+                               err = add_munmap(addr, last - addr, &hvc);
                                if (err < 0)
                                        panic("munmap failed, errno = %d\n",
                                              -err);
@@ -372,22 +399,25 @@ static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
                pte = pte_offset_kernel(pmd, addr);
                if (!pte_present(*pte) || pte_newpage(*pte)) {
                        updated = 1;
-                       err = os_unmap_memory((void *) addr,
-                                             PAGE_SIZE);
+                       err = add_munmap(addr, PAGE_SIZE, &hvc);
                        if (err < 0)
                                panic("munmap failed, errno = %d\n",
                                      -err);
                        if (pte_present(*pte))
-                               map_memory(addr,
-                                          pte_val(*pte) & PAGE_MASK,
-                                          PAGE_SIZE, 1, 1, 1);
+                               err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
+                                              PAGE_SIZE, 0, &hvc);
                }
                else if (pte_newprot(*pte)) {
                        updated = 1;
-                       os_protect_memory((void *) addr, PAGE_SIZE, 1, 1, 1);
+                       err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
                }
                addr += PAGE_SIZE;
        }
+       if (!err)
+               err = do_ops(&hvc, hvc.index, 1);
+
+       if (err < 0)
+               panic("flush_tlb_kernel failed, errno = %d\n", err);
        return updated;
 }
 
@@ -491,6 +521,13 @@ pte_t *addr_pte(struct task_struct *task, unsigned long addr)
 
 void flush_tlb_all(void)
 {
+       /*
+        * Don't bother flushing if this address space is about to be
+        * destroyed.
+        */
+       if (atomic_read(&current->mm->mm_users) == 0)
+               return;
+
        flush_tlb_mm(current->mm);
 }
 
@@ -512,6 +549,13 @@ void __flush_tlb_one(unsigned long addr)
 static void fix_range(struct mm_struct *mm, unsigned long start_addr,
                      unsigned long end_addr, int force)
 {
+       /*
+        * Don't bother flushing if this address space is about to be
+        * destroyed.
+        */
+       if (atomic_read(&mm->mm_users) == 0)
+               return;
+
        fix_range_common(mm, start_addr, end_addr, force);
 }
 
@@ -527,13 +571,6 @@ EXPORT_SYMBOL(flush_tlb_range);
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
                        unsigned long end)
 {
-       /*
-        * Don't bother flushing if this address space is about to be
-        * destroyed.
-        */
-       if (atomic_read(&mm->mm_users) == 0)
-               return;
-
        fix_range(mm, start, end, 0);
 }
 
index c0197097c86e5075c146ad17142895e70b9fe25c..f25b110d4e7012712aa358e4d7c744311b6e0ede 100644 (file)
@@ -610,3 +610,13 @@ unsigned long long os_makedev(unsigned major, unsigned minor)
 {
        return makedev(major, minor);
 }
+
+int os_falloc_punch(int fd, unsigned long long offset, int len)
+{
+       int n = fallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, offset, len);
+
+       if (n < 0)
+               return -errno;
+       return n;
+}
+