smb3: missing defines and structs for reparse point handling
[linux-2.6-block.git] / fs / read_write.c
index 766bdcb381f34a0d4c3f6fb17d9481c87f3c3226..bfcb4ced5664c00f2fab706ba483094a4bc1ca5a 100644 (file)
@@ -331,7 +331,7 @@ COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned i
 }
 #endif
 
-#ifdef __ARCH_WANT_SYS_LLSEEK
+#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT)
 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
                unsigned long, offset_low, loff_t __user *, result,
                unsigned int, whence)
@@ -1407,7 +1407,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
                goto fput_in;
        if (!(out.file->f_mode & FMODE_WRITE))
                goto fput_out;
-       retval = -EINVAL;
        in_inode = file_inode(in.file);
        out_inode = file_inode(out.file);
        out_pos = out.file->f_pos;
@@ -1589,10 +1588,14 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
         * more efficient if both clone and copy are supported (e.g. NFS).
         */
        if (file_in->f_op->remap_file_range) {
-               ret = file_in->f_op->remap_file_range(file_in, pos_in,
-                               file_out, pos_out, len, 0);
-               if (ret == 0) {
-                       ret = len;
+               loff_t cloned;
+
+               cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+                               file_out, pos_out,
+                               min_t(loff_t, MAX_RW_COUNT, len),
+                               REMAP_FILE_CAN_SHORTEN);
+               if (cloned > 0) {
+                       ret = cloned;
                        goto done;
                }
        }
@@ -1686,11 +1689,12 @@ out2:
        return ret;
 }
 
-static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+                            bool write)
 {
        struct inode *inode = file_inode(file);
 
-       if (unlikely(pos < 0))
+       if (unlikely(pos < 0 || len < 0))
                return -EINVAL;
 
         if (unlikely((loff_t) (pos + len) < 0))
@@ -1717,24 +1721,125 @@ static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  * can't meaningfully compare post-EOF contents.
  *
  * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
  */
 static int generic_remap_check_len(struct inode *inode_in,
                                   struct inode *inode_out,
                                   loff_t pos_out,
-                                  u64 *len,
-                                  bool is_dedupe)
+                                  loff_t *len,
+                                  unsigned int remap_flags)
 {
        u64 blkmask = i_blocksize(inode_in) - 1;
+       loff_t new_len = *len;
 
        if ((*len & blkmask) == 0)
                return 0;
 
-       if (is_dedupe)
-               *len &= ~blkmask;
-       else if (pos_out + *len < i_size_read(inode_out))
-               return -EINVAL;
+       if ((remap_flags & REMAP_FILE_DEDUP) ||
+           pos_out + *len < i_size_read(inode_out))
+               new_len &= ~blkmask;
+
+       if (new_len == *len)
+               return 0;
+
+       if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+               *len = new_len;
+               return 0;
+       }
+
+       return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
+}
+
+/*
+ * Read a page's worth of file data into the page cache.  Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+       struct page *page;
+
+       page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
+       if (IS_ERR(page))
+               return page;
+       if (!PageUptodate(page)) {
+               put_page(page);
+               return ERR_PTR(-EIO);
+       }
+       lock_page(page);
+       return page;
+}
 
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+                                        struct inode *dest, loff_t destoff,
+                                        loff_t len, bool *is_same)
+{
+       loff_t src_poff;
+       loff_t dest_poff;
+       void *src_addr;
+       void *dest_addr;
+       struct page *src_page;
+       struct page *dest_page;
+       loff_t cmp_len;
+       bool same;
+       int error;
+
+       error = -EINVAL;
+       same = true;
+       while (len) {
+               src_poff = srcoff & (PAGE_SIZE - 1);
+               dest_poff = destoff & (PAGE_SIZE - 1);
+               cmp_len = min(PAGE_SIZE - src_poff,
+                             PAGE_SIZE - dest_poff);
+               cmp_len = min(cmp_len, len);
+               if (cmp_len <= 0)
+                       goto out_error;
+
+               src_page = vfs_dedupe_get_page(src, srcoff);
+               if (IS_ERR(src_page)) {
+                       error = PTR_ERR(src_page);
+                       goto out_error;
+               }
+               dest_page = vfs_dedupe_get_page(dest, destoff);
+               if (IS_ERR(dest_page)) {
+                       error = PTR_ERR(dest_page);
+                       unlock_page(src_page);
+                       put_page(src_page);
+                       goto out_error;
+               }
+               src_addr = kmap_atomic(src_page);
+               dest_addr = kmap_atomic(dest_page);
+
+               flush_dcache_page(src_page);
+               flush_dcache_page(dest_page);
+
+               if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+                       same = false;
+
+               kunmap_atomic(dest_addr);
+               kunmap_atomic(src_addr);
+               unlock_page(dest_page);
+               unlock_page(src_page);
+               put_page(dest_page);
+               put_page(src_page);
+
+               if (!same)
+                       break;
+
+               srcoff += cmp_len;
+               destoff += cmp_len;
+               len -= cmp_len;
+       }
+
+       *is_same = same;
        return 0;
+
+out_error:
+       return error;
 }
 
 /*
@@ -1742,12 +1847,12 @@ static int generic_remap_check_len(struct inode *inode_in,
  * sense, and then flush all dirty data.  Caller must ensure that the
  * inodes have been locked against any other modifications.
  *
- * Returns: 0 for "nothing to clone", 1 for "something to clone", or
- * the usual negative error code.
+ * If there's an error, then the usual negative error code is returned.
+ * Otherwise returns 0 with *len set to the request length.
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
                                  struct file *file_out, loff_t pos_out,
-                                 u64 *len, bool is_dedupe)
+                                 loff_t *len, unsigned int remap_flags)
 {
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);
@@ -1771,7 +1876,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
        if (*len == 0) {
                loff_t isize = i_size_read(inode_in);
 
-               if (is_dedupe || pos_in == isize)
+               if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
                        return 0;
                if (pos_in > isize)
                        return -EINVAL;
@@ -1782,7 +1887,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 
        /* Check that we don't violate system file offset limits. */
        ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-                       is_dedupe);
+                       remap_flags);
        if (ret)
                return ret;
 
@@ -1804,7 +1909,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
        /*
         * Check that the extents are the same.
         */
-       if (is_dedupe) {
+       if (remap_flags & REMAP_FILE_DEDUP) {
                bool            is_same = false;
 
                ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
@@ -1816,20 +1921,42 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
        }
 
        ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
-                       is_dedupe);
+                       remap_flags);
        if (ret)
                return ret;
 
-       return 1;
+       /* If can't alter the file contents, we're done. */
+       if (!(remap_flags & REMAP_FILE_DEDUP)) {
+               /* Update the timestamps, since we can alter file contents. */
+               if (!(file_out->f_mode & FMODE_NOCMTIME)) {
+                       ret = file_update_time(file_out);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                * Clear the security bits if the process is not being run by
+                * root.  This keeps people from modifying setuid and setgid
+                * binaries.
+                */
+               ret = file_remove_privs(file_out);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
-int do_clone_file_range(struct file *file_in, loff_t pos_in,
-                       struct file *file_out, loff_t pos_out, u64 len)
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+                          struct file *file_out, loff_t pos_out,
+                          loff_t len, unsigned int remap_flags)
 {
        struct inode *inode_in = file_inode(file_in);
        struct inode *inode_out = file_inode(file_out);
-       int ret;
+       loff_t ret;
+
+       WARN_ON_ONCE(remap_flags);
 
        if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
                return -EISDIR;
@@ -1861,129 +1988,53 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
                return ret;
 
        ret = file_in->f_op->remap_file_range(file_in, pos_in,
-                       file_out, pos_out, len, 0);
-       if (!ret) {
-               fsnotify_access(file_in);
-               fsnotify_modify(file_out);
-       }
+                       file_out, pos_out, len, remap_flags);
+       if (ret < 0)
+               return ret;
 
+       fsnotify_access(file_in);
+       fsnotify_modify(file_out);
        return ret;
 }
 EXPORT_SYMBOL(do_clone_file_range);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-                        struct file *file_out, loff_t pos_out, u64 len)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+                           struct file *file_out, loff_t pos_out,
+                           loff_t len, unsigned int remap_flags)
 {
-       int ret;
+       loff_t ret;
 
        file_start_write(file_out);
-       ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+       ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+                                 remap_flags);
        file_end_write(file_out);
 
        return ret;
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
-static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+/* Check whether we are allowed to dedupe the destination file */
+static bool allow_file_dedupe(struct file *file)
 {
-       struct address_space *mapping;
-       struct page *page;
-       pgoff_t n;
-
-       n = offset >> PAGE_SHIFT;
-       mapping = inode->i_mapping;
-       page = read_mapping_page(mapping, n, NULL);
-       if (IS_ERR(page))
-               return page;
-       if (!PageUptodate(page)) {
-               put_page(page);
-               return ERR_PTR(-EIO);
-       }
-       lock_page(page);
-       return page;
+       if (capable(CAP_SYS_ADMIN))
+               return true;
+       if (file->f_mode & FMODE_WRITE)
+               return true;
+       if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+               return true;
+       if (!inode_permission(file_inode(file), MAY_WRITE))
+               return true;
+       return false;
 }
 
-/*
- * Compare extents of two files to see if they are the same.
- * Caller must have locked both inodes to prevent write races.
- */
-int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-                                 struct inode *dest, loff_t destoff,
-                                 loff_t len, bool *is_same)
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+                                struct file *dst_file, loff_t dst_pos,
+                                loff_t len, unsigned int remap_flags)
 {
-       loff_t src_poff;
-       loff_t dest_poff;
-       void *src_addr;
-       void *dest_addr;
-       struct page *src_page;
-       struct page *dest_page;
-       loff_t cmp_len;
-       bool same;
-       int error;
-
-       error = -EINVAL;
-       same = true;
-       while (len) {
-               src_poff = srcoff & (PAGE_SIZE - 1);
-               dest_poff = destoff & (PAGE_SIZE - 1);
-               cmp_len = min(PAGE_SIZE - src_poff,
-                             PAGE_SIZE - dest_poff);
-               cmp_len = min(cmp_len, len);
-               if (cmp_len <= 0)
-                       goto out_error;
+       loff_t ret;
 
-               src_page = vfs_dedupe_get_page(src, srcoff);
-               if (IS_ERR(src_page)) {
-                       error = PTR_ERR(src_page);
-                       goto out_error;
-               }
-               dest_page = vfs_dedupe_get_page(dest, destoff);
-               if (IS_ERR(dest_page)) {
-                       error = PTR_ERR(dest_page);
-                       unlock_page(src_page);
-                       put_page(src_page);
-                       goto out_error;
-               }
-               src_addr = kmap_atomic(src_page);
-               dest_addr = kmap_atomic(dest_page);
-
-               flush_dcache_page(src_page);
-               flush_dcache_page(dest_page);
-
-               if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
-                       same = false;
-
-               kunmap_atomic(dest_addr);
-               kunmap_atomic(src_addr);
-               unlock_page(dest_page);
-               unlock_page(src_page);
-               put_page(dest_page);
-               put_page(src_page);
-
-               if (!same)
-                       break;
-
-               srcoff += cmp_len;
-               destoff += cmp_len;
-               len -= cmp_len;
-       }
-
-       *is_same = same;
-       return 0;
-
-out_error:
-       return error;
-}
-EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
-
-int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-                             struct file *dst_file, loff_t dst_pos, u64 len)
-{
-       s64 ret;
+       WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+                                    REMAP_FILE_CAN_SHORTEN));
 
        ret = mnt_want_write_file(dst_file);
        if (ret)
@@ -1993,8 +2044,8 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
        if (ret < 0)
                goto out_drop_write;
 
-       ret = -EINVAL;
-       if (!(capable(CAP_SYS_ADMIN) || (dst_file->f_mode & FMODE_WRITE)))
+       ret = -EPERM;
+       if (!allow_file_dedupe(dst_file))
                goto out_drop_write;
 
        ret = -EXDEV;
@@ -2015,7 +2066,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
        }
 
        ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
-                       dst_pos, len, REMAP_FILE_DEDUP);
+                       dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
 out_drop_write:
        mnt_drop_write_file(dst_file);
 
@@ -2032,7 +2083,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
        int i;
        int ret;
        u16 count = same->dest_count;
-       int deduped;
+       loff_t deduped;
 
        if (!(file->f_mode & FMODE_READ))
                return -EINVAL;
@@ -2083,7 +2134,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
                }
 
                deduped = vfs_dedupe_file_range_one(file, off, dst_file,
-                                                   info->dest_offset, len);
+                                                   info->dest_offset, len,
+                                                   REMAP_FILE_CAN_SHORTEN);
                if (deduped == -EBADE)
                        info->status = FILE_DEDUPE_RANGE_DIFFERS;
                else if (deduped < 0)