[linux-2.6-block.git] / fs / nfs / file.c

/*
 *  linux/fs/nfs/file.c
 *
 *  Copyright (C) 1992  Rick Sladkey
 *
 *  Changes Copyright (C) 1994 by Florian La Roche
 *   - Do not copy data too often around in the kernel.
 *   - In nfs_file_read the return value of kmalloc wasn't checked.
 *   - Put in a better version of read look-ahead buffering. Original idea
 *     and implementation by Wai S Kok elekokws@ee.nus.sg.
 *
 *  Expire cache on write to a file by Wai S Kok (Oct 1994).
 *
 *  Total rewrite of read side for new NFS buffer cache.. Linus.
 *
 *  nfs regular file handling functions
 */

#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/aio.h>

#include <asm/uaccess.h>
#include <asm/system.h>

#include "delegation.h"
#include "internal.h"
#include "iostat.h"

#define NFSDBG_FACILITY		NFSDBG_FILE

static int nfs_file_open(struct inode *, struct file *);
static int nfs_file_release(struct inode *, struct file *);
static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
static int  nfs_file_mmap(struct file *, struct vm_area_struct *);
static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos,
					struct pipe_inode_info *pipe,
					size_t count, unsigned int flags);
static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos);
static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos);
static int  nfs_file_flush(struct file *, fl_owner_t id);
static int  nfs_fsync(struct file *, struct dentry *dentry, int datasync);
static int nfs_check_flags(int flags);
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);

static struct vm_operations_struct nfs_file_vm_ops;

const struct file_operations nfs_file_operations = {
	.llseek		= nfs_file_llseek,
	.read		= do_sync_read,
	.write		= do_sync_write,
	.aio_read	= nfs_file_read,
	.aio_write	= nfs_file_write,
#ifdef CONFIG_MMU
	.mmap		= nfs_file_mmap,
#else
	.mmap		= generic_file_mmap,
#endif
	.open		= nfs_file_open,
	.flush		= nfs_file_flush,
	.release	= nfs_file_release,
	.fsync		= nfs_fsync,
	.lock		= nfs_lock,
	.flock		= nfs_flock,
	.splice_read	= nfs_file_splice_read,
	.check_flags	= nfs_check_flags,
	.setlease	= nfs_setlease,
};

const struct inode_operations nfs_file_inode_operations = {
	.permission	= nfs_permission,
	.getattr	= nfs_getattr,
	.setattr	= nfs_setattr,
};

#ifdef CONFIG_NFS_V3
const struct inode_operations nfs3_file_inode_operations = {
	.permission	= nfs_permission,
	.getattr	= nfs_getattr,
	.setattr	= nfs_setattr,
	.listxattr	= nfs3_listxattr,
	.getxattr	= nfs3_getxattr,
	.setxattr	= nfs3_setxattr,
	.removexattr	= nfs3_removexattr,
};
#endif  /* CONFIG_NFS_v3 */

/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode)	(0)
#endif

static int nfs_check_flags(int flags)
{
	if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
		return -EINVAL;

	return 0;
}

/*
 * Open file
 */
static int
nfs_file_open(struct inode *inode, struct file *filp)
{
	int res;

	res = nfs_check_flags(filp->f_flags);
	if (res)
		return res;

	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
	lock_kernel();
	res = NFS_PROTO(inode)->file_open(inode, filp);
	unlock_kernel();
	return res;
}

static int
nfs_file_release(struct inode *inode, struct file *filp)
{
	/* Ensure that dirty pages are flushed out with the right creds */
	if (filp->f_mode & FMODE_WRITE)
		nfs_wb_all(filp->f_path.dentry->d_inode);
	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
	return NFS_PROTO(inode)->file_release(inode, filp);
}

/**
 * nfs_revalidate_size - Revalidate the file size
 * @inode - pointer to inode struct
 * @file - pointer to struct file
 *
 * Revalidates the file length. This is basically a wrapper around
 * nfs_revalidate_inode() that takes into account the fact that we may
 * have cached writes (in which case we don't care about the server's
 * idea of what the file length is), or O_DIRECT (in which case we
 * shouldn't trust the cache).
 */
static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
{
	struct nfs_server *server = NFS_SERVER(inode);
	struct nfs_inode *nfsi = NFS_I(inode);

	if (server->flags & NFS_MOUNT_NOAC)
		goto force_reval;
	if (filp->f_flags & O_DIRECT)
		goto force_reval;
	if (nfsi->npages != 0)
		return 0;
	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
		return 0;
force_reval:
	return __nfs_revalidate_inode(server, inode);
}

static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
{
	/* origin == SEEK_END => we must revalidate the cached file length */
	if (origin == SEEK_END) {
		struct inode *inode = filp->f_mapping->host;
		int retval = nfs_revalidate_file_size(inode, filp);
		if (retval < 0)
			return (loff_t)retval;
	}
	return remote_llseek(filp, offset, origin);
}

/*
 * Helper for nfs_file_flush() and nfs_fsync()
 *
 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
 * disk, but it retrieves and clears ctx->error after synching, despite
 * the two being set at the same time in nfs_context_set_write_error().
 * This is because the former is used to notify the _next_ call to
 * nfs_file_write() that a write error occured, and hence cause it to
 * fall back to doing a synchronous write.
 */
static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode)
{
	int have_error, status;
	int ret = 0;

	have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
	status = nfs_wb_all(inode);
	have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
	if (have_error)
		ret = xchg(&ctx->error, 0);
	if (!ret)
		ret = status;
	return ret;
}

/*
 * Flush all dirty pages, and check for write errors.
 *
 */
static int
nfs_file_flush(struct file *file, fl_owner_t id)
{
	struct nfs_open_context *ctx = nfs_file_open_context(file);
	struct inode	*inode = file->f_path.dentry->d_inode;
	int		status;

	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);

	if ((file->f_mode & FMODE_WRITE) == 0)
		return 0;
	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);

	/* Ensure that data+attribute caches are up to date after close() */
	status = nfs_do_fsync(ctx, inode);
	if (!status)
		nfs_revalidate_inode(NFS_SERVER(inode), inode);
	return status;
}

static ssize_t
nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
		unsigned long nr_segs, loff_t pos)
{
	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	struct inode * inode = dentry->d_inode;
	ssize_t result;
	size_t count = iov_length(iov, nr_segs);

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_read(iocb, iov, nr_segs, pos);

	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		(unsigned long) count, (unsigned long) pos);

	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
	if (!result)
		result = generic_file_aio_read(iocb, iov, nr_segs, pos);
	return result;
}

static ssize_t
nfs_file_splice_read(struct file *filp, loff_t *ppos,
		     struct pipe_inode_info *pipe, size_t count,
		     unsigned int flags)
{
	struct dentry *dentry = filp->f_path.dentry;
	struct inode *inode = dentry->d_inode;
	ssize_t res;

	dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		(unsigned long) count, (unsigned long long) *ppos);

	res = nfs_revalidate_mapping(inode, filp->f_mapping);
	if (!res)
		res = generic_file_splice_read(filp, ppos, pipe, count, flags);
	return res;
}

static int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
	struct dentry *dentry = file->f_path.dentry;
	struct inode *inode = dentry->d_inode;
	int	status;

	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name);

	status = nfs_revalidate_mapping(inode, file->f_mapping);
	if (!status) {
		vma->vm_ops = &nfs_file_vm_ops;
		vma->vm_flags |= VM_CAN_NONLINEAR;
		file_accessed(file);
	}
	return status;
}

/*
 * Flush any dirty pages for this process, and check for write errors.
 * The return status from this call provides a reliable indication of
 * whether any write errors occurred for this process.
 */
static int
nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
	struct nfs_open_context *ctx = nfs_file_open_context(file);
	struct inode *inode = dentry->d_inode;

	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);

	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
	return nfs_do_fsync(ctx, inode);
}

/*
 * This does the "real" work of the write. We must allocate and lock the
 * page to be sent back to the generic routine, which then copies the
 * data from user space.
 *
 * If the writer ends up delaying the write, the writer needs to
 * increment the page use counts until he is done with the page.
 */
static int nfs_write_begin(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned flags,
			struct page **pagep, void **fsdata)
{
	int ret;
	pgoff_t index;
	struct page *page;
	index = pos >> PAGE_CACHE_SHIFT;

	page = __grab_cache_page(mapping, index);
	if (!page)
		return -ENOMEM;
	*pagep = page;

	ret = nfs_flush_incompatible(file, page);
	if (ret) {
		unlock_page(page);
		page_cache_release(page);
	}
	return ret;
}

static int nfs_write_end(struct file *file, struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
	int status;

	lock_kernel();
	status = nfs_updatepage(file, page, offset, copied);
	unlock_kernel();

	unlock_page(page);
	page_cache_release(page);

	if (status < 0)
		return status;
	return copied;
}

static void nfs_invalidate_page(struct page *page, unsigned long offset)
{
	if (offset != 0)
		return;
	/* Cancel any unstarted writes on this page */
	nfs_wb_page_cancel(page->mapping->host, page);
}

static int nfs_release_page(struct page *page, gfp_t gfp)
{
	/* If PagePrivate() is set, then the page is not freeable */
	return 0;
}

static int nfs_launder_page(struct page *page)
{
	return nfs_wb_page(page->mapping->host, page);
}

const struct address_space_operations nfs_file_aops = {
	.readpage = nfs_readpage,
	.readpages = nfs_readpages,
	.set_page_dirty = __set_page_dirty_nobuffers,
	.writepage = nfs_writepage,
	.writepages = nfs_writepages,
	.write_begin = nfs_write_begin,
	.write_end = nfs_write_end,
	.invalidatepage = nfs_invalidate_page,
	.releasepage = nfs_release_page,
	.direct_IO = nfs_direct_IO,
	.launder_page = nfs_launder_page,
};

static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
{
	struct file *filp = vma->vm_file;
	unsigned pagelen;
	int ret = -EINVAL;
	struct address_space *mapping;

	lock_page(page);
	mapping = page->mapping;
	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
		goto out_unlock;

	ret = 0;
	pagelen = nfs_page_length(page);
	if (pagelen == 0)
		goto out_unlock;

	ret = nfs_flush_incompatible(filp, page);
	if (ret != 0)
		goto out_unlock;

	ret = nfs_updatepage(filp, page, 0, pagelen);
	if (ret == 0)
		ret = pagelen;
out_unlock:
	unlock_page(page);
	return ret;
}

static struct vm_operations_struct nfs_file_vm_ops = {
	.fault = filemap_fault,
	.page_mkwrite = nfs_vm_page_mkwrite,
};

static int nfs_need_sync_write(struct file *filp, struct inode *inode)
{
	struct nfs_open_context *ctx;

	if (IS_SYNC(inode) || (filp->f_flags & O_SYNC))
		return 1;
	ctx = nfs_file_open_context(filp);
	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
		return 1;
	return 0;
}

static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
				unsigned long nr_segs, loff_t pos)
{
	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	struct inode * inode = dentry->d_inode;
	ssize_t result;
	size_t count = iov_length(iov, nr_segs);

	if (iocb->ki_filp->f_flags & O_DIRECT)
		return nfs_file_direct_write(iocb, iov, nr_segs, pos);

	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
		dentry->d_parent->d_name.name, dentry->d_name.name,
		inode->i_ino, (unsigned long) count, (long long) pos);

	result = -EBUSY;
	if (IS_SWAPFILE(inode))
		goto out_swapfile;
	/*
	 * O_APPEND implies that we must revalidate the file length.
	 */
	if (iocb->ki_filp->f_flags & O_APPEND) {
		result = nfs_revalidate_file_size(inode, iocb->ki_filp);
		if (result)
			goto out;
	}

	result = count;
	if (!count)
		goto out;

	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
	/* Return error values for O_SYNC and IS_SYNC() */
	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
		int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
		if (err < 0)
			result = err;
	}
out:
	return result;

out_swapfile:
	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
	goto out;
}

static int do_getlk(struct file *filp, int cmd, struct file_lock *fl)
{
	struct inode *inode = filp->f_mapping->host;
	int status = 0;

	lock_kernel();
	/* Try local locking first */
	posix_test_lock(filp, fl);
	if (fl->fl_type != F_UNLCK) {
		/* found a conflict */
		goto out;
	}

	if (nfs_have_delegation(inode, FMODE_READ))
		goto out_noconflict;

	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
		goto out_noconflict;

	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
out:
	unlock_kernel();
	return status;
out_noconflict:
	fl->fl_type = F_UNLCK;
	goto out;
}

static int do_vfs_lock(struct file *file, struct file_lock *fl)
{
	int res = 0;
	switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
		case FL_POSIX:
			res = posix_lock_file_wait(file, fl);
			break;
		case FL_FLOCK:
			res = flock_lock_file_wait(file, fl);
			break;
		default:
			BUG();
	}
	if (res < 0)
		dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
			" - error %d!\n",
				__func__, res);
	return res;
}

static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
{
	struct inode *inode = filp->f_mapping->host;
	int status;

	/*
	 * Flush all pending writes before doing anything
	 * with locks..
	 */
	nfs_sync_mapping(filp->f_mapping);

	/* NOTE: special case
	 * 	If we're signalled while cleaning up locks on process exit, we
	 * 	still need to complete the unlock.
	 */
	lock_kernel();
	/* Use local locking if mounted with "-onolock" */
	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
	else
		status = do_vfs_lock(filp, fl);
	unlock_kernel();
	return status;
}

static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
{
	struct inode *inode = filp->f_mapping->host;
	int status;

	/*
	 * Flush all pending writes before doing anything
	 * with locks..
	 */
	status = nfs_sync_mapping(filp->f_mapping);
	if (status != 0)
		goto out;

	lock_kernel();
	/* Use local locking if mounted with "-onolock" */
	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
	else
		status = do_vfs_lock(filp, fl);
	unlock_kernel();
	if (status < 0)
		goto out;
	/*
	 * Make sure we clear the cache whenever we try to get the lock.
	 * This makes locking act as a cache coherency point.
	 */
	nfs_sync_mapping(filp->f_mapping);
	nfs_zap_caches(inode);
out:
	return status;
}

/*
 * Lock a (portion of) a file
 */
static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
	struct inode * inode = filp->f_mapping->host;
	int ret = -ENOLCK;

	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
			inode->i_sb->s_id, inode->i_ino,
			fl->fl_type, fl->fl_flags,
			(long long)fl->fl_start, (long long)fl->fl_end);
	nfs_inc_stats(inode, NFSIOS_VFSLOCK);

	/* No mandatory locks over NFS */
	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
		goto out_err;

	if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
		ret = NFS_PROTO(inode)->lock_check_bounds(fl);
		if (ret < 0)
			goto out_err;
	}

	if (IS_GETLK(cmd))
		ret = do_getlk(filp, cmd, fl);
	else if (fl->fl_type == F_UNLCK)
		ret = do_unlk(filp, cmd, fl);
	else
		ret = do_setlk(filp, cmd, fl);
out_err:
	return ret;
}

/*
 * Lock a (portion of) a file
 */
static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
{
	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
			filp->f_path.dentry->d_inode->i_sb->s_id,
			filp->f_path.dentry->d_inode->i_ino,
			fl->fl_type, fl->fl_flags);

	/*
	 * No BSD flocks over NFS allowed.
	 * Note: we could try to fake a POSIX lock request here by
	 * using ((u32) filp | 0x80000000) or some such as the pid.
	 * Not sure whether that would be unique, though, or whether
	 * that would break in other places.
	 */
	if (!(fl->fl_flags & FL_FLOCK))
		return -ENOLCK;

	/* We're simulating flock() locks using posix locks on the server */
	fl->fl_owner = (fl_owner_t)filp;
	fl->fl_start = 0;
	fl->fl_end = OFFSET_MAX;

	if (fl->fl_type == F_UNLCK)
		return do_unlk(filp, cmd, fl);
	return do_setlk(filp, cmd, fl);
}

static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
{
	/*
	 * There is no protocol support for leases, so we have no way
	 * to implement them correctly in the face of opens by other
	 * clients.
	 */
	return -EINVAL;
}
Commit	Line	Data
	1	/*
	2	* linux/fs/nfs/file.c
	3	*
	4	* Copyright (C) 1992 Rick Sladkey
	5	*
	6	* Changes Copyright (C) 1994 by Florian La Roche
	7	* - Do not copy data too often around in the kernel.
	8	* - In nfs_file_read the return value of kmalloc wasn't checked.
	9	* - Put in a better version of read look-ahead buffering. Original idea
	10	* and implementation by Wai S Kok elekokws@ee.nus.sg.
	11	*
	12	* Expire cache on write to a file by Wai S Kok (Oct 1994).
	13	*
	14	* Total rewrite of read side for new NFS buffer cache.. Linus.
	15	*
	16	* nfs regular file handling functions
	17	*/
	18
	19	#include <linux/time.h>
	20	#include <linux/kernel.h>
	21	#include <linux/errno.h>
	22	#include <linux/fcntl.h>
	23	#include <linux/stat.h>
	24	#include <linux/nfs_fs.h>
	25	#include <linux/nfs_mount.h>
	26	#include <linux/mm.h>
	27	#include <linux/slab.h>
	28	#include <linux/pagemap.h>
	29	#include <linux/smp_lock.h>
	30	#include <linux/aio.h>
	31
	32	#include <asm/uaccess.h>
	33	#include <asm/system.h>
	34
	35	#include "delegation.h"
	36	#include "internal.h"
	37	#include "iostat.h"
	38
	39	#define NFSDBG_FACILITY NFSDBG_FILE
	40
	41	static int nfs_file_open(struct inode , struct file );
	42	static int nfs_file_release(struct inode , struct file );
	43	static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin);
	44	static int nfs_file_mmap(struct file , struct vm_area_struct );
	45	static ssize_t nfs_file_splice_read(struct file filp, loff_t ppos,
	46	struct pipe_inode_info *pipe,
	47	size_t count, unsigned int flags);
	48	static ssize_t nfs_file_read(struct kiocb , const struct iovec iov,
	49	unsigned long nr_segs, loff_t pos);
	50	static ssize_t nfs_file_write(struct kiocb , const struct iovec iov,
	51	unsigned long nr_segs, loff_t pos);
	52	static int nfs_file_flush(struct file *, fl_owner_t id);
	53	static int nfs_fsync(struct file , struct dentry dentry, int datasync);
	54	static int nfs_check_flags(int flags);
	55	static int nfs_lock(struct file filp, int cmd, struct file_lock fl);
	56	static int nfs_flock(struct file filp, int cmd, struct file_lock fl);
	57	static int nfs_setlease(struct file file, long arg, struct file_lock *fl);
	58
	59	static struct vm_operations_struct nfs_file_vm_ops;
	60
	61	const struct file_operations nfs_file_operations = {
	62	.llseek = nfs_file_llseek,
	63	.read = do_sync_read,
	64	.write = do_sync_write,
	65	.aio_read = nfs_file_read,
	66	.aio_write = nfs_file_write,
	67	#ifdef CONFIG_MMU
	68	.mmap = nfs_file_mmap,
	69	#else
	70	.mmap = generic_file_mmap,
	71	#endif
	72	.open = nfs_file_open,
	73	.flush = nfs_file_flush,
	74	.release = nfs_file_release,
	75	.fsync = nfs_fsync,
	76	.lock = nfs_lock,
	77	.flock = nfs_flock,
	78	.splice_read = nfs_file_splice_read,
	79	.check_flags = nfs_check_flags,
	80	.setlease = nfs_setlease,
	81	};
	82
	83	const struct inode_operations nfs_file_inode_operations = {
	84	.permission = nfs_permission,
	85	.getattr = nfs_getattr,
	86	.setattr = nfs_setattr,
	87	};
	88
	89	#ifdef CONFIG_NFS_V3
	90	const struct inode_operations nfs3_file_inode_operations = {
	91	.permission = nfs_permission,
	92	.getattr = nfs_getattr,
	93	.setattr = nfs_setattr,
	94	.listxattr = nfs3_listxattr,
	95	.getxattr = nfs3_getxattr,
	96	.setxattr = nfs3_setxattr,
	97	.removexattr = nfs3_removexattr,
	98	};
	99	#endif /* CONFIG_NFS_v3 */
	100
	101	/* Hack for future NFS swap support */
	102	#ifndef IS_SWAPFILE
	103	# define IS_SWAPFILE(inode) (0)
	104	#endif
	105
	106	static int nfs_check_flags(int flags)
	107	{
	108	if ((flags & (O_APPEND \| O_DIRECT)) == (O_APPEND \| O_DIRECT))
	109	return -EINVAL;
	110
	111	return 0;
	112	}
	113
	114	/*
	115	* Open file
	116	*/
	117	static int
	118	nfs_file_open(struct inode inode, struct file filp)
	119	{
	120	int res;
	121
	122	res = nfs_check_flags(filp->f_flags);
	123	if (res)
	124	return res;
	125
	126	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
	127	lock_kernel();
	128	res = NFS_PROTO(inode)->file_open(inode, filp);
	129	unlock_kernel();
	130	return res;
	131	}
	132
	133	static int
	134	nfs_file_release(struct inode inode, struct file filp)
	135	{
	136	/* Ensure that dirty pages are flushed out with the right creds */
	137	if (filp->f_mode & FMODE_WRITE)
	138	nfs_wb_all(filp->f_path.dentry->d_inode);
	139	nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
	140	return NFS_PROTO(inode)->file_release(inode, filp);
	141	}
	142
	143	/**
	144	* nfs_revalidate_size - Revalidate the file size
	145	* @inode - pointer to inode struct
	146	* @file - pointer to struct file
	147	*
	148	* Revalidates the file length. This is basically a wrapper around
	149	* nfs_revalidate_inode() that takes into account the fact that we may
	150	* have cached writes (in which case we don't care about the server's
	151	* idea of what the file length is), or O_DIRECT (in which case we
	152	* shouldn't trust the cache).
	153	*/
	154	static int nfs_revalidate_file_size(struct inode inode, struct file filp)
	155	{
	156	struct nfs_server *server = NFS_SERVER(inode);
	157	struct nfs_inode *nfsi = NFS_I(inode);
	158
	159	if (server->flags & NFS_MOUNT_NOAC)
	160	goto force_reval;
	161	if (filp->f_flags & O_DIRECT)
	162	goto force_reval;
	163	if (nfsi->npages != 0)
	164	return 0;
	165	if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode))
	166	return 0;
	167	force_reval:
	168	return __nfs_revalidate_inode(server, inode);
	169	}
	170
	171	static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
	172	{
	173	/* origin == SEEK_END => we must revalidate the cached file length */
	174	if (origin == SEEK_END) {
	175	struct inode *inode = filp->f_mapping->host;
	176	int retval = nfs_revalidate_file_size(inode, filp);
	177	if (retval < 0)
	178	return (loff_t)retval;
	179	}
	180	return remote_llseek(filp, offset, origin);
	181	}
	182
	183	/*
	184	* Helper for nfs_file_flush() and nfs_fsync()
	185	*
	186	* Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to
	187	* disk, but it retrieves and clears ctx->error after synching, despite
	188	* the two being set at the same time in nfs_context_set_write_error().
	189	* This is because the former is used to notify the _next_ call to
	190	* nfs_file_write() that a write error occured, and hence cause it to
	191	* fall back to doing a synchronous write.
	192	*/
	193	static int nfs_do_fsync(struct nfs_open_context ctx, struct inode inode)
	194	{
	195	int have_error, status;
	196	int ret = 0;
	197
	198	have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
	199	status = nfs_wb_all(inode);
	200	have_error \|= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
	201	if (have_error)
	202	ret = xchg(&ctx->error, 0);
	203	if (!ret)
	204	ret = status;
	205	return ret;
	206	}
	207
	208	/*
	209	* Flush all dirty pages, and check for write errors.
	210	*
	211	*/
	212	static int
	213	nfs_file_flush(struct file *file, fl_owner_t id)
	214	{
	215	struct nfs_open_context *ctx = nfs_file_open_context(file);
	216	struct inode *inode = file->f_path.dentry->d_inode;
	217	int status;
	218
	219	dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
	220
	221	if ((file->f_mode & FMODE_WRITE) == 0)
	222	return 0;
	223	nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
	224
	225	/* Ensure that data+attribute caches are up to date after close() */
	226	status = nfs_do_fsync(ctx, inode);
	227	if (!status)
	228	nfs_revalidate_inode(NFS_SERVER(inode), inode);
	229	return status;
	230	}
	231
	232	static ssize_t
	233	nfs_file_read(struct kiocb iocb, const struct iovec iov,
	234	unsigned long nr_segs, loff_t pos)
	235	{
	236	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	237	struct inode * inode = dentry->d_inode;
	238	ssize_t result;
	239	size_t count = iov_length(iov, nr_segs);
	240
	241	if (iocb->ki_filp->f_flags & O_DIRECT)
	242	return nfs_file_direct_read(iocb, iov, nr_segs, pos);
	243
	244	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
	245	dentry->d_parent->d_name.name, dentry->d_name.name,
	246	(unsigned long) count, (unsigned long) pos);
	247
	248	result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
	249	nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count);
	250	if (!result)
	251	result = generic_file_aio_read(iocb, iov, nr_segs, pos);
	252	return result;
	253	}
	254
	255	static ssize_t
	256	nfs_file_splice_read(struct file filp, loff_t ppos,
	257	struct pipe_inode_info *pipe, size_t count,
	258	unsigned int flags)
	259	{
	260	struct dentry *dentry = filp->f_path.dentry;
	261	struct inode *inode = dentry->d_inode;
	262	ssize_t res;
	263
	264	dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n",
	265	dentry->d_parent->d_name.name, dentry->d_name.name,
	266	(unsigned long) count, (unsigned long long) *ppos);
	267
	268	res = nfs_revalidate_mapping(inode, filp->f_mapping);
	269	if (!res)
	270	res = generic_file_splice_read(filp, ppos, pipe, count, flags);
	271	return res;
	272	}
	273
	274	static int
	275	nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
	276	{
	277	struct dentry *dentry = file->f_path.dentry;
	278	struct inode *inode = dentry->d_inode;
	279	int status;
	280
	281	dfprintk(VFS, "nfs: mmap(%s/%s)\n",
	282	dentry->d_parent->d_name.name, dentry->d_name.name);
	283
	284	status = nfs_revalidate_mapping(inode, file->f_mapping);
	285	if (!status) {
	286	vma->vm_ops = &nfs_file_vm_ops;
	287	vma->vm_flags \|= VM_CAN_NONLINEAR;
	288	file_accessed(file);
	289	}
	290	return status;
	291	}
	292
	293	/*
	294	* Flush any dirty pages for this process, and check for write errors.
	295	* The return status from this call provides a reliable indication of
	296	* whether any write errors occurred for this process.
	297	*/
	298	static int
	299	nfs_fsync(struct file file, struct dentry dentry, int datasync)
	300	{
	301	struct nfs_open_context *ctx = nfs_file_open_context(file);
	302	struct inode *inode = dentry->d_inode;
	303
	304	dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
	305
	306	nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
	307	return nfs_do_fsync(ctx, inode);
	308	}
	309
	310	/*
	311	* This does the "real" work of the write. We must allocate and lock the
	312	* page to be sent back to the generic routine, which then copies the
	313	* data from user space.
	314	*
	315	* If the writer ends up delaying the write, the writer needs to
	316	* increment the page use counts until he is done with the page.
	317	*/
	318	static int nfs_write_begin(struct file file, struct address_space mapping,
	319	loff_t pos, unsigned len, unsigned flags,
	320	struct page pagep, void fsdata)
	321	{
	322	int ret;
	323	pgoff_t index;
	324	struct page *page;
	325	index = pos >> PAGE_CACHE_SHIFT;
	326
	327	page = __grab_cache_page(mapping, index);
	328	if (!page)
	329	return -ENOMEM;
	330	*pagep = page;
	331
	332	ret = nfs_flush_incompatible(file, page);
	333	if (ret) {
	334	unlock_page(page);
	335	page_cache_release(page);
	336	}
	337	return ret;
	338	}
	339
	340	static int nfs_write_end(struct file file, struct address_space mapping,
	341	loff_t pos, unsigned len, unsigned copied,
	342	struct page page, void fsdata)
	343	{
	344	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
	345	int status;
	346
	347	lock_kernel();
	348	status = nfs_updatepage(file, page, offset, copied);
	349	unlock_kernel();
	350
	351	unlock_page(page);
	352	page_cache_release(page);
	353
	354	if (status < 0)
	355	return status;
	356	return copied;
	357	}
	358
	359	static void nfs_invalidate_page(struct page *page, unsigned long offset)
	360	{
	361	if (offset != 0)
	362	return;
	363	/* Cancel any unstarted writes on this page */
	364	nfs_wb_page_cancel(page->mapping->host, page);
	365	}
	366
	367	static int nfs_release_page(struct page *page, gfp_t gfp)
	368	{
	369	/* If PagePrivate() is set, then the page is not freeable */
	370	return 0;
	371	}
	372
	373	static int nfs_launder_page(struct page *page)
	374	{
	375	return nfs_wb_page(page->mapping->host, page);
	376	}
	377
	378	const struct address_space_operations nfs_file_aops = {
	379	.readpage = nfs_readpage,
	380	.readpages = nfs_readpages,
	381	.set_page_dirty = __set_page_dirty_nobuffers,
	382	.writepage = nfs_writepage,
	383	.writepages = nfs_writepages,
	384	.write_begin = nfs_write_begin,
	385	.write_end = nfs_write_end,
	386	.invalidatepage = nfs_invalidate_page,
	387	.releasepage = nfs_release_page,
	388	.direct_IO = nfs_direct_IO,
	389	.launder_page = nfs_launder_page,
	390	};
	391
	392	static int nfs_vm_page_mkwrite(struct vm_area_struct vma, struct page page)
	393	{
	394	struct file *filp = vma->vm_file;
	395	unsigned pagelen;
	396	int ret = -EINVAL;
	397	struct address_space *mapping;
	398
	399	lock_page(page);
	400	mapping = page->mapping;
	401	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
	402	goto out_unlock;
	403
	404	ret = 0;
	405	pagelen = nfs_page_length(page);
	406	if (pagelen == 0)
	407	goto out_unlock;
	408
	409	ret = nfs_flush_incompatible(filp, page);
	410	if (ret != 0)
	411	goto out_unlock;
	412
	413	ret = nfs_updatepage(filp, page, 0, pagelen);
	414	if (ret == 0)
	415	ret = pagelen;
	416	out_unlock:
	417	unlock_page(page);
	418	return ret;
	419	}
	420
	421	static struct vm_operations_struct nfs_file_vm_ops = {
	422	.fault = filemap_fault,
	423	.page_mkwrite = nfs_vm_page_mkwrite,
	424	};
	425
	426	static int nfs_need_sync_write(struct file filp, struct inode inode)
	427	{
	428	struct nfs_open_context *ctx;
	429
	430	if (IS_SYNC(inode) \|\| (filp->f_flags & O_SYNC))
	431	return 1;
	432	ctx = nfs_file_open_context(filp);
	433	if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags))
	434	return 1;
	435	return 0;
	436	}
	437
	438	static ssize_t nfs_file_write(struct kiocb iocb, const struct iovec iov,
	439	unsigned long nr_segs, loff_t pos)
	440	{
	441	struct dentry * dentry = iocb->ki_filp->f_path.dentry;
	442	struct inode * inode = dentry->d_inode;
	443	ssize_t result;
	444	size_t count = iov_length(iov, nr_segs);
	445
	446	if (iocb->ki_filp->f_flags & O_DIRECT)
	447	return nfs_file_direct_write(iocb, iov, nr_segs, pos);
	448
	449	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
	450	dentry->d_parent->d_name.name, dentry->d_name.name,
	451	inode->i_ino, (unsigned long) count, (long long) pos);
	452
	453	result = -EBUSY;
	454	if (IS_SWAPFILE(inode))
	455	goto out_swapfile;
	456	/*
	457	* O_APPEND implies that we must revalidate the file length.
	458	*/
	459	if (iocb->ki_filp->f_flags & O_APPEND) {
	460	result = nfs_revalidate_file_size(inode, iocb->ki_filp);
	461	if (result)
	462	goto out;
	463	}
	464
	465	result = count;
	466	if (!count)
	467	goto out;
	468
	469	nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
	470	result = generic_file_aio_write(iocb, iov, nr_segs, pos);
	471	/* Return error values for O_SYNC and IS_SYNC() */
	472	if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) {
	473	int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode);
	474	if (err < 0)
	475	result = err;
	476	}
	477	out:
	478	return result;
	479
	480	out_swapfile:
	481	printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
	482	goto out;
	483	}
	484
	485	static int do_getlk(struct file filp, int cmd, struct file_lock fl)
	486	{
	487	struct inode *inode = filp->f_mapping->host;
	488	int status = 0;
	489
	490	lock_kernel();
	491	/* Try local locking first */
	492	posix_test_lock(filp, fl);
	493	if (fl->fl_type != F_UNLCK) {
	494	/* found a conflict */
	495	goto out;
	496	}
	497
	498	if (nfs_have_delegation(inode, FMODE_READ))
	499	goto out_noconflict;
	500
	501	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
	502	goto out_noconflict;
	503
	504	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
	505	out:
	506	unlock_kernel();
	507	return status;
	508	out_noconflict:
	509	fl->fl_type = F_UNLCK;
	510	goto out;
	511	}
	512
	513	static int do_vfs_lock(struct file file, struct file_lock fl)
	514	{
	515	int res = 0;
	516	switch (fl->fl_flags & (FL_POSIX\|FL_FLOCK)) {
	517	case FL_POSIX:
	518	res = posix_lock_file_wait(file, fl);
	519	break;
	520	case FL_FLOCK:
	521	res = flock_lock_file_wait(file, fl);
	522	break;
	523	default:
	524	BUG();
	525	}
	526	if (res < 0)
	527	dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager"
	528	" - error %d!\n",
	529	__func__, res);
	530	return res;
	531	}
	532
	533	static int do_unlk(struct file filp, int cmd, struct file_lock fl)
	534	{
	535	struct inode *inode = filp->f_mapping->host;
	536	int status;
	537
	538	/*
	539	* Flush all pending writes before doing anything
	540	* with locks..
	541	*/
	542	nfs_sync_mapping(filp->f_mapping);
	543
	544	/* NOTE: special case
	545	* If we're signalled while cleaning up locks on process exit, we
	546	* still need to complete the unlock.
	547	*/
	548	lock_kernel();
	549	/* Use local locking if mounted with "-onolock" */
	550	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
	551	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
	552	else
	553	status = do_vfs_lock(filp, fl);
	554	unlock_kernel();
	555	return status;
	556	}
	557
	558	static int do_setlk(struct file filp, int cmd, struct file_lock fl)
	559	{
	560	struct inode *inode = filp->f_mapping->host;
	561	int status;
	562
	563	/*
	564	* Flush all pending writes before doing anything
	565	* with locks..
	566	*/
	567	status = nfs_sync_mapping(filp->f_mapping);
	568	if (status != 0)
	569	goto out;
	570
	571	lock_kernel();
	572	/* Use local locking if mounted with "-onolock" */
	573	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
	574	status = NFS_PROTO(inode)->lock(filp, cmd, fl);
	575	else
	576	status = do_vfs_lock(filp, fl);
	577	unlock_kernel();
	578	if (status < 0)
	579	goto out;
	580	/*
	581	* Make sure we clear the cache whenever we try to get the lock.
	582	* This makes locking act as a cache coherency point.
	583	*/
	584	nfs_sync_mapping(filp->f_mapping);
	585	nfs_zap_caches(inode);
	586	out:
	587	return status;
	588	}
	589
	590	/*
	591	* Lock a (portion of) a file
	592	*/
	593	static int nfs_lock(struct file filp, int cmd, struct file_lock fl)
	594	{
	595	struct inode * inode = filp->f_mapping->host;
	596	int ret = -ENOLCK;
	597
	598	dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n",
	599	inode->i_sb->s_id, inode->i_ino,
	600	fl->fl_type, fl->fl_flags,
	601	(long long)fl->fl_start, (long long)fl->fl_end);
	602	nfs_inc_stats(inode, NFSIOS_VFSLOCK);
	603
	604	/* No mandatory locks over NFS */
	605	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
	606	goto out_err;
	607
	608	if (NFS_PROTO(inode)->lock_check_bounds != NULL) {
	609	ret = NFS_PROTO(inode)->lock_check_bounds(fl);
	610	if (ret < 0)
	611	goto out_err;
	612	}
	613
	614	if (IS_GETLK(cmd))
	615	ret = do_getlk(filp, cmd, fl);
	616	else if (fl->fl_type == F_UNLCK)
	617	ret = do_unlk(filp, cmd, fl);
	618	else
	619	ret = do_setlk(filp, cmd, fl);
	620	out_err:
	621	return ret;
	622	}
	623
	624	/*
	625	* Lock a (portion of) a file
	626	*/
	627	static int nfs_flock(struct file filp, int cmd, struct file_lock fl)
	628	{
	629	dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n",
	630	filp->f_path.dentry->d_inode->i_sb->s_id,
	631	filp->f_path.dentry->d_inode->i_ino,
	632	fl->fl_type, fl->fl_flags);
	633
	634	/*
	635	* No BSD flocks over NFS allowed.
	636	* Note: we could try to fake a POSIX lock request here by
	637	* using ((u32) filp \| 0x80000000) or some such as the pid.
	638	* Not sure whether that would be unique, though, or whether
	639	* that would break in other places.
	640	*/
	641	if (!(fl->fl_flags & FL_FLOCK))
	642	return -ENOLCK;
	643
	644	/* We're simulating flock() locks using posix locks on the server */
	645	fl->fl_owner = (fl_owner_t)filp;
	646	fl->fl_start = 0;
	647	fl->fl_end = OFFSET_MAX;
	648
	649	if (fl->fl_type == F_UNLCK)
	650	return do_unlk(filp, cmd, fl);
	651	return do_setlk(filp, cmd, fl);
	652	}
	653
	654	static int nfs_setlease(struct file file, long arg, struct file_lock *fl)
	655	{
	656	/*
	657	* There is no protocol support for leases, so we have no way
	658	* to implement them correctly in the face of opens by other
	659	* clients.
	660	*/
	661	return -EINVAL;
	662	}