4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173 struct cifs_fid *fid, unsigned int xid)
178 int create_options = CREATE_NOT_DIR;
181 if (!tcon->ses->server->ops->open)
184 desired_access = cifs_convert_flags(f_flags);
186 /*********************************************************************
187 * open flag mapping table:
189 * POSIX Flag CIFS Disposition
190 * ---------- ----------------
191 * O_CREAT FILE_OPEN_IF
192 * O_CREAT | O_EXCL FILE_CREATE
193 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
194 * O_TRUNC FILE_OVERWRITE
195 * none of the above FILE_OPEN
197 * Note that there is not a direct match between disposition
198 * FILE_SUPERSEDE (ie create whether or not file exists although
199 * O_CREAT | O_TRUNC is similar but truncates the existing
200 * file rather than creating a new file as FILE_SUPERSEDE does
201 * (which uses the attributes / metadata passed in on open call)
203 *? O_SYNC is a reasonable match to CIFS writethrough flag
204 *? and the read write flags match reasonably. O_LARGEFILE
205 *? is irrelevant because largefile support is always used
206 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
207 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
208 *********************************************************************/
210 disposition = cifs_get_disposition(f_flags);
212 /* BB pass O_SYNC flag through on file attributes .. BB */
214 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
218 if (backup_cred(cifs_sb))
219 create_options |= CREATE_OPEN_BACKUP_INTENT;
221 rc = tcon->ses->server->ops->open(xid, tcon, full_path, disposition,
222 desired_access, create_options, fid,
223 oplock, buf, cifs_sb);
229 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
232 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
240 struct cifsFileInfo *
241 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
242 struct tcon_link *tlink, __u32 oplock)
244 struct dentry *dentry = file->f_path.dentry;
245 struct inode *inode = dentry->d_inode;
246 struct cifsInodeInfo *cinode = CIFS_I(inode);
247 struct cifsFileInfo *cfile;
249 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 cfile->pid = current->tgid;
255 cfile->uid = current_fsuid();
256 cfile->dentry = dget(dentry);
257 cfile->f_flags = file->f_flags;
258 cfile->invalidHandle = false;
259 cfile->tlink = cifs_get_tlink(tlink);
260 mutex_init(&cfile->fh_mutex);
261 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
262 INIT_LIST_HEAD(&cfile->llist);
263 tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
265 spin_lock(&cifs_file_list_lock);
266 list_add(&cfile->tlist, &(tlink_tcon(tlink)->openFileList));
267 /* if readable file instance put first in list*/
268 if (file->f_mode & FMODE_READ)
269 list_add(&cfile->flist, &cinode->openFileList);
271 list_add_tail(&cfile->flist, &cinode->openFileList);
272 spin_unlock(&cifs_file_list_lock);
274 file->private_data = cfile;
278 static void cifs_del_lock_waiters(struct cifsLockInfo *lock);
280 struct cifsFileInfo *
281 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
283 spin_lock(&cifs_file_list_lock);
284 cifsFileInfo_get_locked(cifs_file);
285 spin_unlock(&cifs_file_list_lock);
290 * Release a reference on the file private data. This may involve closing
291 * the filehandle out on the server. Must be called without holding
292 * cifs_file_list_lock.
294 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
296 struct inode *inode = cifs_file->dentry->d_inode;
297 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
298 struct cifsInodeInfo *cifsi = CIFS_I(inode);
299 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
300 struct cifsLockInfo *li, *tmp;
302 spin_lock(&cifs_file_list_lock);
303 if (--cifs_file->count > 0) {
304 spin_unlock(&cifs_file_list_lock);
308 /* remove it from the lists */
309 list_del(&cifs_file->flist);
310 list_del(&cifs_file->tlist);
312 if (list_empty(&cifsi->openFileList)) {
313 cFYI(1, "closing last open instance for inode %p",
314 cifs_file->dentry->d_inode);
316 * In strict cache mode we need invalidate mapping on the last
317 * close because it may cause a error when we open this file
318 * again and get at least level II oplock.
320 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
321 CIFS_I(inode)->invalid_mapping = true;
322 cifs_set_oplock_level(cifsi, 0);
324 spin_unlock(&cifs_file_list_lock);
326 cancel_work_sync(&cifs_file->oplock_break);
328 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
329 struct TCP_Server_Info *server = tcon->ses->server;
334 if (server->ops->close)
335 rc = server->ops->close(xid, tcon, &cifs_file->fid);
339 /* Delete any outstanding lock records. We'll lose them when the file
342 mutex_lock(&cifsi->lock_mutex);
343 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
344 list_del(&li->llist);
345 cifs_del_lock_waiters(li);
348 mutex_unlock(&cifsi->lock_mutex);
350 cifs_put_tlink(cifs_file->tlink);
351 dput(cifs_file->dentry);
355 int cifs_open(struct inode *inode, struct file *file)
360 struct cifs_sb_info *cifs_sb;
361 struct cifs_tcon *tcon;
362 struct tcon_link *tlink;
363 struct cifsFileInfo *cfile = NULL;
364 char *full_path = NULL;
365 bool posix_open_ok = false;
370 cifs_sb = CIFS_SB(inode->i_sb);
371 tlink = cifs_sb_tlink(cifs_sb);
374 return PTR_ERR(tlink);
376 tcon = tlink_tcon(tlink);
378 full_path = build_path_from_dentry(file->f_path.dentry);
379 if (full_path == NULL) {
384 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
385 inode, file->f_flags, full_path);
387 if (tcon->ses->server->oplocks)
392 if (!tcon->broken_posix_open && tcon->unix_ext &&
393 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
394 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
395 /* can not refresh inode info since size could be stale */
396 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
397 cifs_sb->mnt_file_mode /* ignored */,
398 file->f_flags, &oplock, &fid.netfid, xid);
400 cFYI(1, "posix open succeeded");
401 posix_open_ok = true;
402 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
403 if (tcon->ses->serverNOS)
404 cERROR(1, "server %s of type %s returned"
405 " unexpected error on SMB posix open"
406 ", disabling posix open support."
407 " Check if server update available.",
408 tcon->ses->serverName,
409 tcon->ses->serverNOS);
410 tcon->broken_posix_open = true;
411 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
412 (rc != -EOPNOTSUPP)) /* path not found or net err */
415 * Else fallthrough to retry open the old way on network i/o
420 if (!posix_open_ok) {
421 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
422 file->f_flags, &oplock, &fid, xid);
427 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
429 if (tcon->ses->server->ops->close)
430 tcon->ses->server->ops->close(xid, tcon, &fid);
435 cifs_fscache_set_inode_cookie(inode, file);
437 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
439 * Time to set mode which we can not set earlier due to
440 * problems creating new read-only files.
442 struct cifs_unix_set_info_args args = {
443 .mode = inode->i_mode,
446 .ctime = NO_CHANGE_64,
447 .atime = NO_CHANGE_64,
448 .mtime = NO_CHANGE_64,
451 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
458 cifs_put_tlink(tlink);
463 * Try to reacquire byte range locks that were released when session
466 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
470 /* BB list all locks open on this file and relock */
476 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
481 struct cifs_sb_info *cifs_sb;
482 struct cifs_tcon *tcon;
483 struct TCP_Server_Info *server;
484 struct cifsInodeInfo *cinode;
486 char *full_path = NULL;
488 int disposition = FILE_OPEN;
489 int create_options = CREATE_NOT_DIR;
493 mutex_lock(&cfile->fh_mutex);
494 if (!cfile->invalidHandle) {
495 mutex_unlock(&cfile->fh_mutex);
501 inode = cfile->dentry->d_inode;
502 cifs_sb = CIFS_SB(inode->i_sb);
503 tcon = tlink_tcon(cfile->tlink);
504 server = tcon->ses->server;
507 * Can not grab rename sem here because various ops, including those
508 * that already have the rename sem can end up causing writepage to get
509 * called and if the server was down that means we end up here, and we
510 * can never tell if the caller already has the rename_sem.
512 full_path = build_path_from_dentry(cfile->dentry);
513 if (full_path == NULL) {
515 mutex_unlock(&cfile->fh_mutex);
520 cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
523 if (tcon->ses->server->oplocks)
528 if (tcon->unix_ext && cap_unix(tcon->ses) &&
529 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
530 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
532 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
533 * original open. Must mask them off for a reopen.
535 unsigned int oflags = cfile->f_flags &
536 ~(O_CREAT | O_EXCL | O_TRUNC);
538 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
539 cifs_sb->mnt_file_mode /* ignored */,
540 oflags, &oplock, &fid.netfid, xid);
542 cFYI(1, "posix reopen succeeded");
546 * fallthrough to retry open the old way on errors, especially
547 * in the reconnect path it is important to retry hard
551 desired_access = cifs_convert_flags(cfile->f_flags);
553 if (backup_cred(cifs_sb))
554 create_options |= CREATE_OPEN_BACKUP_INTENT;
557 * Can not refresh inode by passing in file_info buf to be returned by
558 * CIFSSMBOpen and then calling get_inode_info with returned buf since
559 * file might have write behind data that needs to be flushed and server
560 * version of file size can be stale. If we knew for sure that inode was
561 * not dirty locally we could do this.
563 rc = server->ops->open(xid, tcon, full_path, disposition,
564 desired_access, create_options, &fid, &oplock,
567 mutex_unlock(&cfile->fh_mutex);
568 cFYI(1, "cifs_reopen returned 0x%x", rc);
569 cFYI(1, "oplock: %d", oplock);
570 goto reopen_error_exit;
574 cfile->invalidHandle = false;
575 mutex_unlock(&cfile->fh_mutex);
576 cinode = CIFS_I(inode);
579 rc = filemap_write_and_wait(inode->i_mapping);
580 mapping_set_error(inode->i_mapping, rc);
583 rc = cifs_get_inode_info_unix(&inode, full_path,
586 rc = cifs_get_inode_info(&inode, full_path, NULL,
587 inode->i_sb, xid, NULL);
590 * Else we are writing out data to server already and could deadlock if
591 * we tried to flush data, and since we do not know if we have data that
592 * would invalidate the current end of file on the server we can not go
593 * to the server to get the new inode info.
596 server->ops->set_fid(cfile, &fid, oplock);
597 cifs_relock_file(cfile);
605 int cifs_close(struct inode *inode, struct file *file)
607 if (file->private_data != NULL) {
608 cifsFileInfo_put(file->private_data);
609 file->private_data = NULL;
612 /* return code from the ->release op is always ignored */
616 int cifs_closedir(struct inode *inode, struct file *file)
620 struct cifsFileInfo *cfile = file->private_data;
621 struct cifs_tcon *tcon;
622 struct TCP_Server_Info *server;
625 cFYI(1, "Closedir inode = 0x%p", inode);
631 tcon = tlink_tcon(cfile->tlink);
632 server = tcon->ses->server;
634 cFYI(1, "Freeing private data in close dir");
635 spin_lock(&cifs_file_list_lock);
636 if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
637 cfile->invalidHandle = true;
638 spin_unlock(&cifs_file_list_lock);
639 if (server->ops->close_dir)
640 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
643 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
644 /* not much we can do if it fails anyway, ignore rc */
647 spin_unlock(&cifs_file_list_lock);
649 buf = cfile->srch_inf.ntwrk_buf_start;
651 cFYI(1, "closedir free smb buf in srch struct");
652 cfile->srch_inf.ntwrk_buf_start = NULL;
653 if (cfile->srch_inf.smallBuf)
654 cifs_small_buf_release(buf);
656 cifs_buf_release(buf);
659 cifs_put_tlink(cfile->tlink);
660 kfree(file->private_data);
661 file->private_data = NULL;
662 /* BB can we lock the filestruct while this is going on? */
667 static struct cifsLockInfo *
668 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
670 struct cifsLockInfo *lock =
671 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
674 lock->offset = offset;
675 lock->length = length;
677 lock->pid = current->tgid;
678 INIT_LIST_HEAD(&lock->blist);
679 init_waitqueue_head(&lock->block_q);
684 cifs_del_lock_waiters(struct cifsLockInfo *lock)
686 struct cifsLockInfo *li, *tmp;
687 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
688 list_del_init(&li->blist);
689 wake_up(&li->block_q);
694 cifs_find_fid_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
695 __u64 length, __u8 type, struct cifsFileInfo *cur,
696 struct cifsLockInfo **conf_lock)
698 struct cifsLockInfo *li;
699 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
701 list_for_each_entry(li, &cfile->llist, llist) {
702 if (offset + length <= li->offset ||
703 offset >= li->offset + li->length)
705 else if ((type & server->vals->shared_lock_type) &&
706 ((server->ops->compare_fids(cur, cfile) &&
707 current->tgid == li->pid) || type == li->type))
718 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
719 __u8 type, struct cifsLockInfo **conf_lock)
722 struct cifsFileInfo *fid, *tmp;
723 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
725 spin_lock(&cifs_file_list_lock);
726 list_for_each_entry_safe(fid, tmp, &cinode->openFileList, flist) {
727 rc = cifs_find_fid_lock_conflict(fid, offset, length, type,
732 spin_unlock(&cifs_file_list_lock);
738 * Check if there is another lock that prevents us to set the lock (mandatory
739 * style). If such a lock exists, update the flock structure with its
740 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
741 * or leave it the same if we can't. Returns 0 if we don't need to request to
742 * the server or 1 otherwise.
745 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
746 __u8 type, struct file_lock *flock)
749 struct cifsLockInfo *conf_lock;
750 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
751 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
754 mutex_lock(&cinode->lock_mutex);
756 exist = cifs_find_lock_conflict(cfile, offset, length, type,
759 flock->fl_start = conf_lock->offset;
760 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
761 flock->fl_pid = conf_lock->pid;
762 if (conf_lock->type & server->vals->shared_lock_type)
763 flock->fl_type = F_RDLCK;
765 flock->fl_type = F_WRLCK;
766 } else if (!cinode->can_cache_brlcks)
769 flock->fl_type = F_UNLCK;
771 mutex_unlock(&cinode->lock_mutex);
776 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
778 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
779 mutex_lock(&cinode->lock_mutex);
780 list_add_tail(&lock->llist, &cfile->llist);
781 mutex_unlock(&cinode->lock_mutex);
785 * Set the byte-range lock (mandatory style). Returns:
786 * 1) 0, if we set the lock and don't need to request to the server;
787 * 2) 1, if no locks prevent us but we need to request to the server;
788 * 3) -EACCESS, if there is a lock that prevents us and wait is false.
791 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
794 struct cifsLockInfo *conf_lock;
795 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
801 mutex_lock(&cinode->lock_mutex);
803 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
804 lock->type, &conf_lock);
805 if (!exist && cinode->can_cache_brlcks) {
806 list_add_tail(&lock->llist, &cfile->llist);
807 mutex_unlock(&cinode->lock_mutex);
816 list_add_tail(&lock->blist, &conf_lock->blist);
817 mutex_unlock(&cinode->lock_mutex);
818 rc = wait_event_interruptible(lock->block_q,
819 (lock->blist.prev == &lock->blist) &&
820 (lock->blist.next == &lock->blist));
823 mutex_lock(&cinode->lock_mutex);
824 list_del_init(&lock->blist);
827 mutex_unlock(&cinode->lock_mutex);
832 * Check if there is another lock that prevents us to set the lock (posix
833 * style). If such a lock exists, update the flock structure with its
834 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
835 * or leave it the same if we can't. Returns 0 if we don't need to request to
836 * the server or 1 otherwise.
839 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
842 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
843 unsigned char saved_type = flock->fl_type;
845 if ((flock->fl_flags & FL_POSIX) == 0)
848 mutex_lock(&cinode->lock_mutex);
849 posix_test_lock(file, flock);
851 if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
852 flock->fl_type = saved_type;
856 mutex_unlock(&cinode->lock_mutex);
861 * Set the byte-range lock (posix style). Returns:
862 * 1) 0, if we set the lock and don't need to request to the server;
863 * 2) 1, if we need to request to the server;
864 * 3) <0, if the error occurs while setting the lock.
867 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
869 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
872 if ((flock->fl_flags & FL_POSIX) == 0)
876 mutex_lock(&cinode->lock_mutex);
877 if (!cinode->can_cache_brlcks) {
878 mutex_unlock(&cinode->lock_mutex);
882 rc = posix_lock_file(file, flock, NULL);
883 mutex_unlock(&cinode->lock_mutex);
884 if (rc == FILE_LOCK_DEFERRED) {
885 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
888 locks_delete_block(flock);
894 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
897 int rc = 0, stored_rc;
898 struct cifsLockInfo *li, *tmp;
899 struct cifs_tcon *tcon;
900 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
901 unsigned int num, max_num, max_buf;
902 LOCKING_ANDX_RANGE *buf, *cur;
903 int types[] = {LOCKING_ANDX_LARGE_FILES,
904 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
908 tcon = tlink_tcon(cfile->tlink);
910 mutex_lock(&cinode->lock_mutex);
911 if (!cinode->can_cache_brlcks) {
912 mutex_unlock(&cinode->lock_mutex);
918 * Accessing maxBuf is racy with cifs_reconnect - need to store value
919 * and check it for zero before using.
921 max_buf = tcon->ses->server->maxBuf;
923 mutex_unlock(&cinode->lock_mutex);
928 max_num = (max_buf - sizeof(struct smb_hdr)) /
929 sizeof(LOCKING_ANDX_RANGE);
930 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
932 mutex_unlock(&cinode->lock_mutex);
937 for (i = 0; i < 2; i++) {
940 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
941 if (li->type != types[i])
943 cur->Pid = cpu_to_le16(li->pid);
944 cur->LengthLow = cpu_to_le32((u32)li->length);
945 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
946 cur->OffsetLow = cpu_to_le32((u32)li->offset);
947 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
948 if (++num == max_num) {
949 stored_rc = cifs_lockv(xid, tcon,
951 (__u8)li->type, 0, num,
962 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
963 (__u8)types[i], 0, num, buf);
969 cinode->can_cache_brlcks = false;
970 mutex_unlock(&cinode->lock_mutex);
977 /* copied from fs/locks.c with a name change */
978 #define cifs_for_each_lock(inode, lockp) \
979 for (lockp = &inode->i_flock; *lockp != NULL; \
980 lockp = &(*lockp)->fl_next)
982 struct lock_to_push {
983 struct list_head llist;
992 cifs_push_posix_locks(struct cifsFileInfo *cfile)
994 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
995 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
996 struct file_lock *flock, **before;
997 unsigned int count = 0, i = 0;
998 int rc = 0, xid, type;
999 struct list_head locks_to_send, *el;
1000 struct lock_to_push *lck, *tmp;
1005 mutex_lock(&cinode->lock_mutex);
1006 if (!cinode->can_cache_brlcks) {
1007 mutex_unlock(&cinode->lock_mutex);
1013 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1014 if ((*before)->fl_flags & FL_POSIX)
1019 INIT_LIST_HEAD(&locks_to_send);
1022 * Allocating count locks is enough because no FL_POSIX locks can be
1023 * added to the list while we are holding cinode->lock_mutex that
1024 * protects locking operations of this inode.
1026 for (; i < count; i++) {
1027 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1032 list_add_tail(&lck->llist, &locks_to_send);
1035 el = locks_to_send.next;
1037 cifs_for_each_lock(cfile->dentry->d_inode, before) {
1039 if ((flock->fl_flags & FL_POSIX) == 0)
1041 if (el == &locks_to_send) {
1043 * The list ended. We don't have enough allocated
1044 * structures - something is really wrong.
1046 cERROR(1, "Can't push all brlocks!");
1049 length = 1 + flock->fl_end - flock->fl_start;
1050 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1054 lck = list_entry(el, struct lock_to_push, llist);
1055 lck->pid = flock->fl_pid;
1056 lck->netfid = cfile->fid.netfid;
1057 lck->length = length;
1059 lck->offset = flock->fl_start;
1064 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1067 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1068 lck->offset, lck->length, NULL,
1072 list_del(&lck->llist);
1077 cinode->can_cache_brlcks = false;
1078 mutex_unlock(&cinode->lock_mutex);
1083 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1084 list_del(&lck->llist);
1091 cifs_push_locks(struct cifsFileInfo *cfile)
1093 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1094 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1096 if (cap_unix(tcon->ses) &&
1097 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1098 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1099 return cifs_push_posix_locks(cfile);
1101 return cifs_push_mandatory_locks(cfile);
1105 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1106 bool *wait_flag, struct TCP_Server_Info *server)
1108 if (flock->fl_flags & FL_POSIX)
1110 if (flock->fl_flags & FL_FLOCK)
1112 if (flock->fl_flags & FL_SLEEP) {
1113 cFYI(1, "Blocking lock");
1116 if (flock->fl_flags & FL_ACCESS)
1117 cFYI(1, "Process suspended by mandatory locking - "
1118 "not implemented yet");
1119 if (flock->fl_flags & FL_LEASE)
1120 cFYI(1, "Lease on file - not implemented yet");
1121 if (flock->fl_flags &
1122 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
1123 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1125 *type = server->vals->large_lock_type;
1126 if (flock->fl_type == F_WRLCK) {
1127 cFYI(1, "F_WRLCK ");
1128 *type |= server->vals->exclusive_lock_type;
1130 } else if (flock->fl_type == F_UNLCK) {
1132 *type |= server->vals->unlock_lock_type;
1134 /* Check if unlock includes more than one lock range */
1135 } else if (flock->fl_type == F_RDLCK) {
1137 *type |= server->vals->shared_lock_type;
1139 } else if (flock->fl_type == F_EXLCK) {
1141 *type |= server->vals->exclusive_lock_type;
1143 } else if (flock->fl_type == F_SHLCK) {
1145 *type |= server->vals->shared_lock_type;
1148 cFYI(1, "Unknown type of lock");
1152 cifs_mandatory_lock(unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
1153 __u64 length, __u32 type, int lock, int unlock, bool wait)
1155 return CIFSSMBLock(xid, tlink_tcon(cfile->tlink), cfile->fid.netfid,
1156 current->tgid, length, offset, unlock, lock,
1157 (__u8)type, wait, 0);
1161 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1162 bool wait_flag, bool posix_lck, unsigned int xid)
1165 __u64 length = 1 + flock->fl_end - flock->fl_start;
1166 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1167 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1168 struct TCP_Server_Info *server = tcon->ses->server;
1169 __u16 netfid = cfile->fid.netfid;
1172 int posix_lock_type;
1174 rc = cifs_posix_lock_test(file, flock);
1178 if (type & server->vals->shared_lock_type)
1179 posix_lock_type = CIFS_RDLCK;
1181 posix_lock_type = CIFS_WRLCK;
1182 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1183 flock->fl_start, length, flock,
1184 posix_lock_type, wait_flag);
1188 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1192 /* BB we could chain these into one lock request BB */
1193 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length, type,
1196 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1198 flock->fl_type = F_UNLCK;
1200 cERROR(1, "Error unlocking previously locked "
1201 "range %d during test of lock", rc);
1205 if (type & server->vals->shared_lock_type) {
1206 flock->fl_type = F_WRLCK;
1210 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1211 type | server->vals->shared_lock_type, 1, 0,
1214 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1215 type | server->vals->shared_lock_type,
1217 flock->fl_type = F_RDLCK;
1219 cERROR(1, "Error unlocking previously locked "
1220 "range %d during test of lock", rc);
1222 flock->fl_type = F_WRLCK;
1228 cifs_move_llist(struct list_head *source, struct list_head *dest)
1230 struct list_head *li, *tmp;
1231 list_for_each_safe(li, tmp, source)
1232 list_move(li, dest);
1236 cifs_free_llist(struct list_head *llist)
1238 struct cifsLockInfo *li, *tmp;
1239 list_for_each_entry_safe(li, tmp, llist, llist) {
1240 cifs_del_lock_waiters(li);
1241 list_del(&li->llist);
1247 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1250 int rc = 0, stored_rc;
1251 int types[] = {LOCKING_ANDX_LARGE_FILES,
1252 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1254 unsigned int max_num, num, max_buf;
1255 LOCKING_ANDX_RANGE *buf, *cur;
1256 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1257 struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1258 struct cifsLockInfo *li, *tmp;
1259 __u64 length = 1 + flock->fl_end - flock->fl_start;
1260 struct list_head tmp_llist;
1262 INIT_LIST_HEAD(&tmp_llist);
1265 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1266 * and check it for zero before using.
1268 max_buf = tcon->ses->server->maxBuf;
1272 max_num = (max_buf - sizeof(struct smb_hdr)) /
1273 sizeof(LOCKING_ANDX_RANGE);
1274 buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1278 mutex_lock(&cinode->lock_mutex);
1279 for (i = 0; i < 2; i++) {
1282 list_for_each_entry_safe(li, tmp, &cfile->llist, llist) {
1283 if (flock->fl_start > li->offset ||
1284 (flock->fl_start + length) <
1285 (li->offset + li->length))
1287 if (current->tgid != li->pid)
1289 if (types[i] != li->type)
1291 if (cinode->can_cache_brlcks) {
1293 * We can cache brlock requests - simply remove
1294 * a lock from the file's list.
1296 list_del(&li->llist);
1297 cifs_del_lock_waiters(li);
1301 cur->Pid = cpu_to_le16(li->pid);
1302 cur->LengthLow = cpu_to_le32((u32)li->length);
1303 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1304 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1305 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1307 * We need to save a lock here to let us add it again to
1308 * the file's list if the unlock range request fails on
1311 list_move(&li->llist, &tmp_llist);
1312 if (++num == max_num) {
1313 stored_rc = cifs_lockv(xid, tcon,
1315 li->type, num, 0, buf);
1318 * We failed on the unlock range
1319 * request - add all locks from the tmp
1320 * list to the head of the file's list.
1322 cifs_move_llist(&tmp_llist,
1327 * The unlock range request succeed -
1328 * free the tmp list.
1330 cifs_free_llist(&tmp_llist);
1337 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1338 types[i], num, 0, buf);
1340 cifs_move_llist(&tmp_llist, &cfile->llist);
1343 cifs_free_llist(&tmp_llist);
1347 mutex_unlock(&cinode->lock_mutex);
1353 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1354 bool wait_flag, bool posix_lck, int lock, int unlock,
1358 __u64 length = 1 + flock->fl_end - flock->fl_start;
1359 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1360 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1361 struct TCP_Server_Info *server = tcon->ses->server;
1362 __u16 netfid = cfile->fid.netfid;
1365 int posix_lock_type;
1367 rc = cifs_posix_lock_set(file, flock);
1371 if (type & server->vals->shared_lock_type)
1372 posix_lock_type = CIFS_RDLCK;
1374 posix_lock_type = CIFS_WRLCK;
1377 posix_lock_type = CIFS_UNLCK;
1379 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1380 flock->fl_start, length, NULL,
1381 posix_lock_type, wait_flag);
1386 struct cifsLockInfo *lock;
1388 lock = cifs_lock_init(flock->fl_start, length, type);
1392 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1398 rc = cifs_mandatory_lock(xid, cfile, flock->fl_start, length,
1399 type, 1, 0, wait_flag);
1405 cifs_lock_add(cfile, lock);
1407 rc = cifs_unlock_range(cfile, flock, xid);
1410 if (flock->fl_flags & FL_POSIX)
1411 posix_lock_file_wait(file, flock);
1415 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1418 int lock = 0, unlock = 0;
1419 bool wait_flag = false;
1420 bool posix_lck = false;
1421 struct cifs_sb_info *cifs_sb;
1422 struct cifs_tcon *tcon;
1423 struct cifsInodeInfo *cinode;
1424 struct cifsFileInfo *cfile;
1431 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1432 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1433 flock->fl_start, flock->fl_end);
1435 cfile = (struct cifsFileInfo *)file->private_data;
1436 tcon = tlink_tcon(cfile->tlink);
1438 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1441 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1442 netfid = cfile->fid.netfid;
1443 cinode = CIFS_I(file->f_path.dentry->d_inode);
1445 if (cap_unix(tcon->ses) &&
1446 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1447 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1450 * BB add code here to normalize offset and length to account for
1451 * negative length which we can not accept over the wire.
1453 if (IS_GETLK(cmd)) {
1454 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1459 if (!lock && !unlock) {
1461 * if no lock or unlock then nothing to do since we do not
1468 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1475 * update the file size (if needed) after a write. Should be called with
1476 * the inode->i_lock held
1479 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1480 unsigned int bytes_written)
1482 loff_t end_of_write = offset + bytes_written;
1484 if (end_of_write > cifsi->server_eof)
1485 cifsi->server_eof = end_of_write;
1489 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1490 size_t write_size, loff_t *offset)
1493 unsigned int bytes_written = 0;
1494 unsigned int total_written;
1495 struct cifs_sb_info *cifs_sb;
1496 struct cifs_tcon *tcon;
1497 struct TCP_Server_Info *server;
1499 struct dentry *dentry = open_file->dentry;
1500 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1501 struct cifs_io_parms io_parms;
1503 cifs_sb = CIFS_SB(dentry->d_sb);
1505 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1506 *offset, dentry->d_name.name);
1508 tcon = tlink_tcon(open_file->tlink);
1509 server = tcon->ses->server;
1511 if (!server->ops->sync_write)
1516 for (total_written = 0; write_size > total_written;
1517 total_written += bytes_written) {
1519 while (rc == -EAGAIN) {
1523 if (open_file->invalidHandle) {
1524 /* we could deadlock if we called
1525 filemap_fdatawait from here so tell
1526 reopen_file not to flush data to
1528 rc = cifs_reopen_file(open_file, false);
1533 len = min((size_t)cifs_sb->wsize,
1534 write_size - total_written);
1535 /* iov[0] is reserved for smb header */
1536 iov[1].iov_base = (char *)write_data + total_written;
1537 iov[1].iov_len = len;
1539 io_parms.tcon = tcon;
1540 io_parms.offset = *offset;
1541 io_parms.length = len;
1542 rc = server->ops->sync_write(xid, open_file, &io_parms,
1543 &bytes_written, iov, 1);
1545 if (rc || (bytes_written == 0)) {
1553 spin_lock(&dentry->d_inode->i_lock);
1554 cifs_update_eof(cifsi, *offset, bytes_written);
1555 spin_unlock(&dentry->d_inode->i_lock);
1556 *offset += bytes_written;
1560 cifs_stats_bytes_written(tcon, total_written);
1562 if (total_written > 0) {
1563 spin_lock(&dentry->d_inode->i_lock);
1564 if (*offset > dentry->d_inode->i_size)
1565 i_size_write(dentry->d_inode, *offset);
1566 spin_unlock(&dentry->d_inode->i_lock);
1568 mark_inode_dirty_sync(dentry->d_inode);
1570 return total_written;
1573 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1576 struct cifsFileInfo *open_file = NULL;
1577 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1579 /* only filter by fsuid on multiuser mounts */
1580 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1583 spin_lock(&cifs_file_list_lock);
1584 /* we could simply get the first_list_entry since write-only entries
1585 are always at the end of the list but since the first entry might
1586 have a close pending, we go through the whole list */
1587 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1588 if (fsuid_only && open_file->uid != current_fsuid())
1590 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1591 if (!open_file->invalidHandle) {
1592 /* found a good file */
1593 /* lock it so it will not be closed on us */
1594 cifsFileInfo_get_locked(open_file);
1595 spin_unlock(&cifs_file_list_lock);
1597 } /* else might as well continue, and look for
1598 another, or simply have the caller reopen it
1599 again rather than trying to fix this handle */
1600 } else /* write only file */
1601 break; /* write only files are last so must be done */
1603 spin_unlock(&cifs_file_list_lock);
1607 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1610 struct cifsFileInfo *open_file, *inv_file = NULL;
1611 struct cifs_sb_info *cifs_sb;
1612 bool any_available = false;
1614 unsigned int refind = 0;
1616 /* Having a null inode here (because mapping->host was set to zero by
1617 the VFS or MM) should not happen but we had reports of on oops (due to
1618 it being zero) during stress testcases so we need to check for it */
1620 if (cifs_inode == NULL) {
1621 cERROR(1, "Null inode passed to cifs_writeable_file");
1626 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1628 /* only filter by fsuid on multiuser mounts */
1629 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1632 spin_lock(&cifs_file_list_lock);
1634 if (refind > MAX_REOPEN_ATT) {
1635 spin_unlock(&cifs_file_list_lock);
1638 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1639 if (!any_available && open_file->pid != current->tgid)
1641 if (fsuid_only && open_file->uid != current_fsuid())
1643 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1644 if (!open_file->invalidHandle) {
1645 /* found a good writable file */
1646 cifsFileInfo_get_locked(open_file);
1647 spin_unlock(&cifs_file_list_lock);
1651 inv_file = open_file;
1655 /* couldn't find useable FH with same pid, try any available */
1656 if (!any_available) {
1657 any_available = true;
1658 goto refind_writable;
1662 any_available = false;
1663 cifsFileInfo_get_locked(inv_file);
1666 spin_unlock(&cifs_file_list_lock);
1669 rc = cifs_reopen_file(inv_file, false);
1673 spin_lock(&cifs_file_list_lock);
1674 list_move_tail(&inv_file->flist,
1675 &cifs_inode->openFileList);
1676 spin_unlock(&cifs_file_list_lock);
1677 cifsFileInfo_put(inv_file);
1678 spin_lock(&cifs_file_list_lock);
1680 goto refind_writable;
1687 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1689 struct address_space *mapping = page->mapping;
1690 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1693 int bytes_written = 0;
1694 struct inode *inode;
1695 struct cifsFileInfo *open_file;
1697 if (!mapping || !mapping->host)
1700 inode = page->mapping->host;
1702 offset += (loff_t)from;
1703 write_data = kmap(page);
1706 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1711 /* racing with truncate? */
1712 if (offset > mapping->host->i_size) {
1714 return 0; /* don't care */
1717 /* check to make sure that we are not extending the file */
1718 if (mapping->host->i_size - offset < (loff_t)to)
1719 to = (unsigned)(mapping->host->i_size - offset);
1721 open_file = find_writable_file(CIFS_I(mapping->host), false);
1723 bytes_written = cifs_write(open_file, open_file->pid,
1724 write_data, to - from, &offset);
1725 cifsFileInfo_put(open_file);
1726 /* Does mm or vfs already set times? */
1727 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1728 if ((bytes_written > 0) && (offset))
1730 else if (bytes_written < 0)
1733 cFYI(1, "No writeable filehandles for inode");
1742 * Marshal up the iov array, reserving the first one for the header. Also,
1746 cifs_writepages_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
1749 struct inode *inode = wdata->cfile->dentry->d_inode;
1750 loff_t size = i_size_read(inode);
1752 /* marshal up the pages into iov array */
1754 for (i = 0; i < wdata->nr_pages; i++) {
1755 iov[i + 1].iov_len = min(size - page_offset(wdata->pages[i]),
1756 (loff_t)PAGE_CACHE_SIZE);
1757 iov[i + 1].iov_base = kmap(wdata->pages[i]);
1758 wdata->bytes += iov[i + 1].iov_len;
1762 static int cifs_writepages(struct address_space *mapping,
1763 struct writeback_control *wbc)
1765 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1766 bool done = false, scanned = false, range_whole = false;
1768 struct cifs_writedata *wdata;
1769 struct TCP_Server_Info *server;
1774 * If wsize is smaller than the page cache size, default to writing
1775 * one page at a time via cifs_writepage
1777 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1778 return generic_writepages(mapping, wbc);
1780 if (wbc->range_cyclic) {
1781 index = mapping->writeback_index; /* Start from prev offset */
1784 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1785 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1786 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1791 while (!done && index <= end) {
1792 unsigned int i, nr_pages, found_pages;
1793 pgoff_t next = 0, tofind;
1794 struct page **pages;
1796 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1799 wdata = cifs_writedata_alloc((unsigned int)tofind,
1800 cifs_writev_complete);
1807 * find_get_pages_tag seems to return a max of 256 on each
1808 * iteration, so we must call it several times in order to
1809 * fill the array or the wsize is effectively limited to
1810 * 256 * PAGE_CACHE_SIZE.
1813 pages = wdata->pages;
1815 nr_pages = find_get_pages_tag(mapping, &index,
1816 PAGECACHE_TAG_DIRTY,
1818 found_pages += nr_pages;
1821 } while (nr_pages && tofind && index <= end);
1823 if (found_pages == 0) {
1824 kref_put(&wdata->refcount, cifs_writedata_release);
1829 for (i = 0; i < found_pages; i++) {
1830 page = wdata->pages[i];
1832 * At this point we hold neither mapping->tree_lock nor
1833 * lock on the page itself: the page may be truncated or
1834 * invalidated (changing page->mapping to NULL), or even
1835 * swizzled back from swapper_space to tmpfs file
1841 else if (!trylock_page(page))
1844 if (unlikely(page->mapping != mapping)) {
1849 if (!wbc->range_cyclic && page->index > end) {
1855 if (next && (page->index != next)) {
1856 /* Not next consecutive page */
1861 if (wbc->sync_mode != WB_SYNC_NONE)
1862 wait_on_page_writeback(page);
1864 if (PageWriteback(page) ||
1865 !clear_page_dirty_for_io(page)) {
1871 * This actually clears the dirty bit in the radix tree.
1872 * See cifs_writepage() for more commentary.
1874 set_page_writeback(page);
1876 if (page_offset(page) >= mapping->host->i_size) {
1879 end_page_writeback(page);
1883 wdata->pages[i] = page;
1884 next = page->index + 1;
1888 /* reset index to refind any pages skipped */
1890 index = wdata->pages[0]->index + 1;
1892 /* put any pages we aren't going to use */
1893 for (i = nr_pages; i < found_pages; i++) {
1894 page_cache_release(wdata->pages[i]);
1895 wdata->pages[i] = NULL;
1898 /* nothing to write? */
1899 if (nr_pages == 0) {
1900 kref_put(&wdata->refcount, cifs_writedata_release);
1904 wdata->sync_mode = wbc->sync_mode;
1905 wdata->nr_pages = nr_pages;
1906 wdata->offset = page_offset(wdata->pages[0]);
1907 wdata->marshal_iov = cifs_writepages_marshal_iov;
1910 if (wdata->cfile != NULL)
1911 cifsFileInfo_put(wdata->cfile);
1912 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1914 if (!wdata->cfile) {
1915 cERROR(1, "No writable handles for inode");
1919 wdata->pid = wdata->cfile->pid;
1920 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1921 rc = server->ops->async_writev(wdata);
1922 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1924 for (i = 0; i < nr_pages; ++i)
1925 unlock_page(wdata->pages[i]);
1927 /* send failure -- clean up the mess */
1929 for (i = 0; i < nr_pages; ++i) {
1931 redirty_page_for_writepage(wbc,
1934 SetPageError(wdata->pages[i]);
1935 end_page_writeback(wdata->pages[i]);
1936 page_cache_release(wdata->pages[i]);
1939 mapping_set_error(mapping, rc);
1941 kref_put(&wdata->refcount, cifs_writedata_release);
1943 wbc->nr_to_write -= nr_pages;
1944 if (wbc->nr_to_write <= 0)
1950 if (!scanned && !done) {
1952 * We hit the last page and there is more work to be done: wrap
1953 * back to the start of the file
1960 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1961 mapping->writeback_index = index;
1967 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1973 /* BB add check for wbc flags */
1974 page_cache_get(page);
1975 if (!PageUptodate(page))
1976 cFYI(1, "ppw - page not up to date");
1979 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1981 * A writepage() implementation always needs to do either this,
1982 * or re-dirty the page with "redirty_page_for_writepage()" in
1983 * the case of a failure.
1985 * Just unlocking the page will cause the radix tree tag-bits
1986 * to fail to update with the state of the page correctly.
1988 set_page_writeback(page);
1990 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1991 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1993 else if (rc == -EAGAIN)
1994 redirty_page_for_writepage(wbc, page);
1998 SetPageUptodate(page);
1999 end_page_writeback(page);
2000 page_cache_release(page);
2005 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2007 int rc = cifs_writepage_locked(page, wbc);
2012 static int cifs_write_end(struct file *file, struct address_space *mapping,
2013 loff_t pos, unsigned len, unsigned copied,
2014 struct page *page, void *fsdata)
2017 struct inode *inode = mapping->host;
2018 struct cifsFileInfo *cfile = file->private_data;
2019 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2022 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2025 pid = current->tgid;
2027 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2030 if (PageChecked(page)) {
2032 SetPageUptodate(page);
2033 ClearPageChecked(page);
2034 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2035 SetPageUptodate(page);
2037 if (!PageUptodate(page)) {
2039 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2043 /* this is probably better than directly calling
2044 partialpage_write since in this function the file handle is
2045 known which we might as well leverage */
2046 /* BB check if anything else missing out of ppw
2047 such as updating last write time */
2048 page_data = kmap(page);
2049 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2050 /* if (rc < 0) should we set writebehind rc? */
2057 set_page_dirty(page);
2061 spin_lock(&inode->i_lock);
2062 if (pos > inode->i_size)
2063 i_size_write(inode, pos);
2064 spin_unlock(&inode->i_lock);
2068 page_cache_release(page);
2073 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2078 struct cifs_tcon *tcon;
2079 struct TCP_Server_Info *server;
2080 struct cifsFileInfo *smbfile = file->private_data;
2081 struct inode *inode = file->f_path.dentry->d_inode;
2082 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2084 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2087 mutex_lock(&inode->i_mutex);
2091 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2092 file->f_path.dentry->d_name.name, datasync);
2094 if (!CIFS_I(inode)->clientCanCacheRead) {
2095 rc = cifs_invalidate_mapping(inode);
2097 cFYI(1, "rc: %d during invalidate phase", rc);
2098 rc = 0; /* don't care about it in fsync */
2102 tcon = tlink_tcon(smbfile->tlink);
2103 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2104 server = tcon->ses->server;
2105 if (server->ops->flush)
2106 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2112 mutex_unlock(&inode->i_mutex);
2116 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2120 struct cifs_tcon *tcon;
2121 struct TCP_Server_Info *server;
2122 struct cifsFileInfo *smbfile = file->private_data;
2123 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2124 struct inode *inode = file->f_mapping->host;
2126 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2129 mutex_lock(&inode->i_mutex);
2133 cFYI(1, "Sync file - name: %s datasync: 0x%x",
2134 file->f_path.dentry->d_name.name, datasync);
2136 tcon = tlink_tcon(smbfile->tlink);
2137 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2138 server = tcon->ses->server;
2139 if (server->ops->flush)
2140 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2146 mutex_unlock(&inode->i_mutex);
2151 * As file closes, flush all cached write data for this inode checking
2152 * for write behind errors.
2154 int cifs_flush(struct file *file, fl_owner_t id)
2156 struct inode *inode = file->f_path.dentry->d_inode;
2159 if (file->f_mode & FMODE_WRITE)
2160 rc = filemap_write_and_wait(inode->i_mapping);
2162 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2168 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2173 for (i = 0; i < num_pages; i++) {
2174 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2177 * save number of pages we have already allocated and
2178 * return with ENOMEM error
2187 for (i = 0; i < num_pages; i++)
2194 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2199 clen = min_t(const size_t, len, wsize);
2200 num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2209 cifs_uncached_marshal_iov(struct kvec *iov, struct cifs_writedata *wdata)
2212 size_t bytes = wdata->bytes;
2214 /* marshal up the pages into iov array */
2215 for (i = 0; i < wdata->nr_pages; i++) {
2216 iov[i + 1].iov_len = min_t(size_t, bytes, PAGE_SIZE);
2217 iov[i + 1].iov_base = kmap(wdata->pages[i]);
2218 bytes -= iov[i + 1].iov_len;
2223 cifs_uncached_writev_complete(struct work_struct *work)
2226 struct cifs_writedata *wdata = container_of(work,
2227 struct cifs_writedata, work);
2228 struct inode *inode = wdata->cfile->dentry->d_inode;
2229 struct cifsInodeInfo *cifsi = CIFS_I(inode);
2231 spin_lock(&inode->i_lock);
2232 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2233 if (cifsi->server_eof > inode->i_size)
2234 i_size_write(inode, cifsi->server_eof);
2235 spin_unlock(&inode->i_lock);
2237 complete(&wdata->done);
2239 if (wdata->result != -EAGAIN) {
2240 for (i = 0; i < wdata->nr_pages; i++)
2241 put_page(wdata->pages[i]);
2244 kref_put(&wdata->refcount, cifs_writedata_release);
2247 /* attempt to send write to server, retry on any -EAGAIN errors */
2249 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2252 struct TCP_Server_Info *server;
2254 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2257 if (wdata->cfile->invalidHandle) {
2258 rc = cifs_reopen_file(wdata->cfile, false);
2262 rc = server->ops->async_writev(wdata);
2263 } while (rc == -EAGAIN);
2269 cifs_iovec_write(struct file *file, const struct iovec *iov,
2270 unsigned long nr_segs, loff_t *poffset)
2272 unsigned long nr_pages, i;
2273 size_t copied, len, cur_len;
2274 ssize_t total_written = 0;
2277 struct cifsFileInfo *open_file;
2278 struct cifs_tcon *tcon;
2279 struct cifs_sb_info *cifs_sb;
2280 struct cifs_writedata *wdata, *tmp;
2281 struct list_head wdata_list;
2285 len = iov_length(iov, nr_segs);
2289 rc = generic_write_checks(file, poffset, &len, 0);
2293 INIT_LIST_HEAD(&wdata_list);
2294 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2295 open_file = file->private_data;
2296 tcon = tlink_tcon(open_file->tlink);
2298 if (!tcon->ses->server->ops->async_writev)
2303 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2304 pid = open_file->pid;
2306 pid = current->tgid;
2308 iov_iter_init(&it, iov, nr_segs, len, 0);
2312 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2313 wdata = cifs_writedata_alloc(nr_pages,
2314 cifs_uncached_writev_complete);
2320 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2327 for (i = 0; i < nr_pages; i++) {
2328 copied = min_t(const size_t, cur_len, PAGE_SIZE);
2329 copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2332 iov_iter_advance(&it, copied);
2334 cur_len = save_len - cur_len;
2336 wdata->sync_mode = WB_SYNC_ALL;
2337 wdata->nr_pages = nr_pages;
2338 wdata->offset = (__u64)offset;
2339 wdata->cfile = cifsFileInfo_get(open_file);
2341 wdata->bytes = cur_len;
2342 wdata->marshal_iov = cifs_uncached_marshal_iov;
2343 rc = cifs_uncached_retry_writev(wdata);
2345 kref_put(&wdata->refcount, cifs_writedata_release);
2349 list_add_tail(&wdata->list, &wdata_list);
2355 * If at least one write was successfully sent, then discard any rc
2356 * value from the later writes. If the other write succeeds, then
2357 * we'll end up returning whatever was written. If it fails, then
2358 * we'll get a new rc value from that.
2360 if (!list_empty(&wdata_list))
2364 * Wait for and collect replies for any successful sends in order of
2365 * increasing offset. Once an error is hit or we get a fatal signal
2366 * while waiting, then return without waiting for any more replies.
2369 list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2371 /* FIXME: freezable too? */
2372 rc = wait_for_completion_killable(&wdata->done);
2375 else if (wdata->result)
2378 total_written += wdata->bytes;
2380 /* resend call if it's a retryable error */
2381 if (rc == -EAGAIN) {
2382 rc = cifs_uncached_retry_writev(wdata);
2386 list_del_init(&wdata->list);
2387 kref_put(&wdata->refcount, cifs_writedata_release);
2390 if (total_written > 0)
2391 *poffset += total_written;
2393 cifs_stats_bytes_written(tcon, total_written);
2394 return total_written ? total_written : (ssize_t)rc;
2397 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2398 unsigned long nr_segs, loff_t pos)
2401 struct inode *inode;
2403 inode = iocb->ki_filp->f_path.dentry->d_inode;
2406 * BB - optimize the way when signing is disabled. We can drop this
2407 * extra memory-to-memory copying and use iovec buffers for constructing
2411 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2413 CIFS_I(inode)->invalid_mapping = true;
2420 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2421 unsigned long nr_segs, loff_t pos)
2423 struct inode *inode;
2425 inode = iocb->ki_filp->f_path.dentry->d_inode;
2427 if (CIFS_I(inode)->clientCanCacheAll)
2428 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2431 * In strict cache mode we need to write the data to the server exactly
2432 * from the pos to pos+len-1 rather than flush all affected pages
2433 * because it may cause a error with mandatory locks on these pages but
2434 * not on the region from pos to ppos+len-1.
2437 return cifs_user_writev(iocb, iov, nr_segs, pos);
2440 static struct cifs_readdata *
2441 cifs_readdata_alloc(unsigned int nr_vecs, work_func_t complete)
2443 struct cifs_readdata *rdata;
2445 rdata = kzalloc(sizeof(*rdata) +
2446 sizeof(struct kvec) * nr_vecs, GFP_KERNEL);
2447 if (rdata != NULL) {
2448 kref_init(&rdata->refcount);
2449 INIT_LIST_HEAD(&rdata->list);
2450 init_completion(&rdata->done);
2451 INIT_WORK(&rdata->work, complete);
2452 INIT_LIST_HEAD(&rdata->pages);
2458 cifs_readdata_release(struct kref *refcount)
2460 struct cifs_readdata *rdata = container_of(refcount,
2461 struct cifs_readdata, refcount);
2464 cifsFileInfo_put(rdata->cfile);
2470 cifs_read_allocate_pages(struct list_head *list, unsigned int npages)
2473 struct page *page, *tpage;
2476 for (i = 0; i < npages; i++) {
2477 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2482 list_add(&page->lru, list);
2486 list_for_each_entry_safe(page, tpage, list, lru) {
2487 list_del(&page->lru);
2495 cifs_uncached_readdata_release(struct kref *refcount)
2497 struct page *page, *tpage;
2498 struct cifs_readdata *rdata = container_of(refcount,
2499 struct cifs_readdata, refcount);
2501 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2502 list_del(&page->lru);
2505 cifs_readdata_release(refcount);
2509 cifs_retry_async_readv(struct cifs_readdata *rdata)
2512 struct TCP_Server_Info *server;
2514 server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2517 if (rdata->cfile->invalidHandle) {
2518 rc = cifs_reopen_file(rdata->cfile, true);
2522 rc = server->ops->async_readv(rdata);
2523 } while (rc == -EAGAIN);
2529 * cifs_readdata_to_iov - copy data from pages in response to an iovec
2530 * @rdata: the readdata response with list of pages holding data
2531 * @iov: vector in which we should copy the data
2532 * @nr_segs: number of segments in vector
2533 * @offset: offset into file of the first iovec
2534 * @copied: used to return the amount of data copied to the iov
2536 * This function copies data from a list of pages in a readdata response into
2537 * an array of iovecs. It will first calculate where the data should go
2538 * based on the info in the readdata and then copy the data into that spot.
2541 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2542 unsigned long nr_segs, loff_t offset, ssize_t *copied)
2546 size_t pos = rdata->offset - offset;
2547 struct page *page, *tpage;
2548 ssize_t remaining = rdata->bytes;
2549 unsigned char *pdata;
2551 /* set up iov_iter and advance to the correct offset */
2552 iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2553 iov_iter_advance(&ii, pos);
2556 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2559 /* copy a whole page or whatever's left */
2560 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2562 /* ...but limit it to whatever space is left in the iov */
2563 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2565 /* go while there's data to be copied and no errors */
2568 rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2574 iov_iter_advance(&ii, copy);
2578 list_del(&page->lru);
2586 cifs_uncached_readv_complete(struct work_struct *work)
2588 struct cifs_readdata *rdata = container_of(work,
2589 struct cifs_readdata, work);
2591 /* if the result is non-zero then the pages weren't kmapped */
2592 if (rdata->result == 0) {
2595 list_for_each_entry(page, &rdata->pages, lru)
2599 complete(&rdata->done);
2600 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2604 cifs_uncached_read_marshal_iov(struct cifs_readdata *rdata,
2605 unsigned int remaining)
2608 struct page *page, *tpage;
2611 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2612 if (remaining >= PAGE_SIZE) {
2613 /* enough data to fill the page */
2614 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2615 rdata->iov[rdata->nr_iov].iov_len = PAGE_SIZE;
2616 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2617 rdata->nr_iov, page->index,
2618 rdata->iov[rdata->nr_iov].iov_base,
2619 rdata->iov[rdata->nr_iov].iov_len);
2622 remaining -= PAGE_SIZE;
2623 } else if (remaining > 0) {
2624 /* enough for partial page, fill and zero the rest */
2625 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2626 rdata->iov[rdata->nr_iov].iov_len = remaining;
2627 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2628 rdata->nr_iov, page->index,
2629 rdata->iov[rdata->nr_iov].iov_base,
2630 rdata->iov[rdata->nr_iov].iov_len);
2631 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
2632 '\0', PAGE_SIZE - remaining);
2637 /* no need to hold page hostage */
2638 list_del(&page->lru);
2647 cifs_iovec_read(struct file *file, const struct iovec *iov,
2648 unsigned long nr_segs, loff_t *poffset)
2651 size_t len, cur_len;
2652 ssize_t total_read = 0;
2653 loff_t offset = *poffset;
2654 unsigned int npages;
2655 struct cifs_sb_info *cifs_sb;
2656 struct cifs_tcon *tcon;
2657 struct cifsFileInfo *open_file;
2658 struct cifs_readdata *rdata, *tmp;
2659 struct list_head rdata_list;
2665 len = iov_length(iov, nr_segs);
2669 INIT_LIST_HEAD(&rdata_list);
2670 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2671 open_file = file->private_data;
2672 tcon = tlink_tcon(open_file->tlink);
2674 if (!tcon->ses->server->ops->async_readv)
2677 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2678 pid = open_file->pid;
2680 pid = current->tgid;
2682 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2683 cFYI(1, "attempting read on write only file instance");
2686 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2687 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2689 /* allocate a readdata struct */
2690 rdata = cifs_readdata_alloc(npages,
2691 cifs_uncached_readv_complete);
2697 rc = cifs_read_allocate_pages(&rdata->pages, npages);
2701 rdata->cfile = cifsFileInfo_get(open_file);
2702 rdata->offset = offset;
2703 rdata->bytes = cur_len;
2705 rdata->marshal_iov = cifs_uncached_read_marshal_iov;
2707 rc = cifs_retry_async_readv(rdata);
2710 kref_put(&rdata->refcount,
2711 cifs_uncached_readdata_release);
2715 list_add_tail(&rdata->list, &rdata_list);
2720 /* if at least one read request send succeeded, then reset rc */
2721 if (!list_empty(&rdata_list))
2724 /* the loop below should proceed in the order of increasing offsets */
2726 list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2730 /* FIXME: freezable sleep too? */
2731 rc = wait_for_completion_killable(&rdata->done);
2734 else if (rdata->result)
2737 rc = cifs_readdata_to_iov(rdata, iov,
2740 total_read += copied;
2743 /* resend call if it's a retryable error */
2744 if (rc == -EAGAIN) {
2745 rc = cifs_retry_async_readv(rdata);
2749 list_del_init(&rdata->list);
2750 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2753 cifs_stats_bytes_read(tcon, total_read);
2754 *poffset += total_read;
2756 /* mask nodata case */
2760 return total_read ? total_read : rc;
2763 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2764 unsigned long nr_segs, loff_t pos)
2768 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2775 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2776 unsigned long nr_segs, loff_t pos)
2778 struct inode *inode;
2780 inode = iocb->ki_filp->f_path.dentry->d_inode;
2782 if (CIFS_I(inode)->clientCanCacheRead)
2783 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2786 * In strict cache mode we need to read from the server all the time
2787 * if we don't have level II oplock because the server can delay mtime
2788 * change - so we can't make a decision about inode invalidating.
2789 * And we can also fail with pagereading if there are mandatory locks
2790 * on pages affected by this read but not on the region from pos to
2794 return cifs_user_readv(iocb, iov, nr_segs, pos);
2798 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2801 unsigned int bytes_read = 0;
2802 unsigned int total_read;
2803 unsigned int current_read_size;
2805 struct cifs_sb_info *cifs_sb;
2806 struct cifs_tcon *tcon;
2807 struct TCP_Server_Info *server;
2810 struct cifsFileInfo *open_file;
2811 struct cifs_io_parms io_parms;
2812 int buf_type = CIFS_NO_BUFFER;
2816 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2818 /* FIXME: set up handlers for larger reads and/or convert to async */
2819 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2821 if (file->private_data == NULL) {
2826 open_file = file->private_data;
2827 tcon = tlink_tcon(open_file->tlink);
2828 server = tcon->ses->server;
2830 if (!server->ops->sync_read) {
2835 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2836 pid = open_file->pid;
2838 pid = current->tgid;
2840 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2841 cFYI(1, "attempting read on write only file instance");
2843 for (total_read = 0, cur_offset = read_data; read_size > total_read;
2844 total_read += bytes_read, cur_offset += bytes_read) {
2845 current_read_size = min_t(uint, read_size - total_read, rsize);
2847 * For windows me and 9x we do not want to request more than it
2848 * negotiated since it will refuse the read then.
2850 if ((tcon->ses) && !(tcon->ses->capabilities &
2851 tcon->ses->server->vals->cap_large_files)) {
2852 current_read_size = min_t(uint, current_read_size,
2856 while (rc == -EAGAIN) {
2857 if (open_file->invalidHandle) {
2858 rc = cifs_reopen_file(open_file, true);
2863 io_parms.tcon = tcon;
2864 io_parms.offset = *offset;
2865 io_parms.length = current_read_size;
2866 rc = server->ops->sync_read(xid, open_file, &io_parms,
2867 &bytes_read, &cur_offset,
2870 if (rc || (bytes_read == 0)) {
2878 cifs_stats_bytes_read(tcon, total_read);
2879 *offset += bytes_read;
2887 * If the page is mmap'ed into a process' page tables, then we need to make
2888 * sure that it doesn't change while being written back.
2891 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
2893 struct page *page = vmf->page;
2896 return VM_FAULT_LOCKED;
2899 static struct vm_operations_struct cifs_file_vm_ops = {
2900 .fault = filemap_fault,
2901 .page_mkwrite = cifs_page_mkwrite,
2904 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2907 struct inode *inode = file->f_path.dentry->d_inode;
2911 if (!CIFS_I(inode)->clientCanCacheRead) {
2912 rc = cifs_invalidate_mapping(inode);
2917 rc = generic_file_mmap(file, vma);
2919 vma->vm_ops = &cifs_file_vm_ops;
2924 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2929 rc = cifs_revalidate_file(file);
2931 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2935 rc = generic_file_mmap(file, vma);
2937 vma->vm_ops = &cifs_file_vm_ops;
2943 cifs_readv_complete(struct work_struct *work)
2945 struct cifs_readdata *rdata = container_of(work,
2946 struct cifs_readdata, work);
2947 struct page *page, *tpage;
2949 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2950 list_del(&page->lru);
2951 lru_cache_add_file(page);
2953 if (rdata->result == 0) {
2955 flush_dcache_page(page);
2956 SetPageUptodate(page);
2961 if (rdata->result == 0)
2962 cifs_readpage_to_fscache(rdata->mapping->host, page);
2964 page_cache_release(page);
2966 kref_put(&rdata->refcount, cifs_readdata_release);
2970 cifs_readpages_marshal_iov(struct cifs_readdata *rdata, unsigned int remaining)
2973 struct page *page, *tpage;
2977 /* determine the eof that the server (probably) has */
2978 eof = CIFS_I(rdata->mapping->host)->server_eof;
2979 eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
2980 cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
2983 list_for_each_entry_safe(page, tpage, &rdata->pages, lru) {
2984 if (remaining >= PAGE_CACHE_SIZE) {
2985 /* enough data to fill the page */
2986 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2987 rdata->iov[rdata->nr_iov].iov_len = PAGE_CACHE_SIZE;
2988 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
2989 rdata->nr_iov, page->index,
2990 rdata->iov[rdata->nr_iov].iov_base,
2991 rdata->iov[rdata->nr_iov].iov_len);
2993 len += PAGE_CACHE_SIZE;
2994 remaining -= PAGE_CACHE_SIZE;
2995 } else if (remaining > 0) {
2996 /* enough for partial page, fill and zero the rest */
2997 rdata->iov[rdata->nr_iov].iov_base = kmap(page);
2998 rdata->iov[rdata->nr_iov].iov_len = remaining;
2999 cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3000 rdata->nr_iov, page->index,
3001 rdata->iov[rdata->nr_iov].iov_base,
3002 rdata->iov[rdata->nr_iov].iov_len);
3003 memset(rdata->iov[rdata->nr_iov].iov_base + remaining,
3004 '\0', PAGE_CACHE_SIZE - remaining);
3008 } else if (page->index > eof_index) {
3010 * The VFS will not try to do readahead past the
3011 * i_size, but it's possible that we have outstanding
3012 * writes with gaps in the middle and the i_size hasn't
3013 * caught up yet. Populate those with zeroed out pages
3014 * to prevent the VFS from repeatedly attempting to
3015 * fill them until the writes are flushed.
3017 zero_user(page, 0, PAGE_CACHE_SIZE);
3018 list_del(&page->lru);
3019 lru_cache_add_file(page);
3020 flush_dcache_page(page);
3021 SetPageUptodate(page);
3023 page_cache_release(page);
3025 /* no need to hold page hostage */
3026 list_del(&page->lru);
3027 lru_cache_add_file(page);
3029 page_cache_release(page);
3036 static int cifs_readpages(struct file *file, struct address_space *mapping,
3037 struct list_head *page_list, unsigned num_pages)
3040 struct list_head tmplist;
3041 struct cifsFileInfo *open_file = file->private_data;
3042 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3043 unsigned int rsize = cifs_sb->rsize;
3047 * Give up immediately if rsize is too small to read an entire page.
3048 * The VFS will fall back to readpage. We should never reach this
3049 * point however since we set ra_pages to 0 when the rsize is smaller
3050 * than a cache page.
3052 if (unlikely(rsize < PAGE_CACHE_SIZE))
3056 * Reads as many pages as possible from fscache. Returns -ENOBUFS
3057 * immediately if the cookie is negative
3059 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3064 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3065 pid = open_file->pid;
3067 pid = current->tgid;
3070 INIT_LIST_HEAD(&tmplist);
3072 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3073 mapping, num_pages);
3076 * Start with the page at end of list and move it to private
3077 * list. Do the same with any following pages until we hit
3078 * the rsize limit, hit an index discontinuity, or run out of
3079 * pages. Issue the async read and then start the loop again
3080 * until the list is empty.
3082 * Note that list order is important. The page_list is in
3083 * the order of declining indexes. When we put the pages in
3084 * the rdata->pages, then we want them in increasing order.
3086 while (!list_empty(page_list)) {
3087 unsigned int bytes = PAGE_CACHE_SIZE;
3088 unsigned int expected_index;
3089 unsigned int nr_pages = 1;
3091 struct page *page, *tpage;
3092 struct cifs_readdata *rdata;
3094 page = list_entry(page_list->prev, struct page, lru);
3097 * Lock the page and put it in the cache. Since no one else
3098 * should have access to this page, we're safe to simply set
3099 * PG_locked without checking it first.
3101 __set_page_locked(page);
3102 rc = add_to_page_cache_locked(page, mapping,
3103 page->index, GFP_KERNEL);
3105 /* give up if we can't stick it in the cache */
3107 __clear_page_locked(page);
3111 /* move first page to the tmplist */
3112 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3113 list_move_tail(&page->lru, &tmplist);
3115 /* now try and add more pages onto the request */
3116 expected_index = page->index + 1;
3117 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3118 /* discontinuity ? */
3119 if (page->index != expected_index)
3122 /* would this page push the read over the rsize? */
3123 if (bytes + PAGE_CACHE_SIZE > rsize)
3126 __set_page_locked(page);
3127 if (add_to_page_cache_locked(page, mapping,
3128 page->index, GFP_KERNEL)) {
3129 __clear_page_locked(page);
3132 list_move_tail(&page->lru, &tmplist);
3133 bytes += PAGE_CACHE_SIZE;
3138 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3140 /* best to give up if we're out of mem */
3141 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3142 list_del(&page->lru);
3143 lru_cache_add_file(page);
3145 page_cache_release(page);
3151 rdata->cfile = cifsFileInfo_get(open_file);
3152 rdata->mapping = mapping;
3153 rdata->offset = offset;
3154 rdata->bytes = bytes;
3156 rdata->marshal_iov = cifs_readpages_marshal_iov;
3157 list_splice_init(&tmplist, &rdata->pages);
3159 rc = cifs_retry_async_readv(rdata);
3161 list_for_each_entry_safe(page, tpage, &rdata->pages,
3163 list_del(&page->lru);
3164 lru_cache_add_file(page);
3166 page_cache_release(page);
3168 kref_put(&rdata->refcount, cifs_readdata_release);
3172 kref_put(&rdata->refcount, cifs_readdata_release);
3178 static int cifs_readpage_worker(struct file *file, struct page *page,
3184 /* Is the page cached? */
3185 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3189 page_cache_get(page);
3190 read_data = kmap(page);
3191 /* for reads over a certain size could initiate async read ahead */
3193 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3198 cFYI(1, "Bytes read %d", rc);
3200 file->f_path.dentry->d_inode->i_atime =
3201 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3203 if (PAGE_CACHE_SIZE > rc)
3204 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3206 flush_dcache_page(page);
3207 SetPageUptodate(page);
3209 /* send this page to the cache */
3210 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3216 page_cache_release(page);
3222 static int cifs_readpage(struct file *file, struct page *page)
3224 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3230 if (file->private_data == NULL) {
3236 cFYI(1, "readpage %p at offset %d 0x%x",
3237 page, (int)offset, (int)offset);
3239 rc = cifs_readpage_worker(file, page, &offset);
3247 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3249 struct cifsFileInfo *open_file;
3251 spin_lock(&cifs_file_list_lock);
3252 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3253 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3254 spin_unlock(&cifs_file_list_lock);
3258 spin_unlock(&cifs_file_list_lock);
3262 /* We do not want to update the file size from server for inodes
3263 open for write - to avoid races with writepage extending
3264 the file - in the future we could consider allowing
3265 refreshing the inode only on increases in the file size
3266 but this is tricky to do without racing with writebehind
3267 page caching in the current Linux kernel design */
3268 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3273 if (is_inode_writable(cifsInode)) {
3274 /* This inode is open for write at least once */
3275 struct cifs_sb_info *cifs_sb;
3277 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3278 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3279 /* since no page cache to corrupt on directio
3280 we can change size safely */
3284 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3292 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3293 loff_t pos, unsigned len, unsigned flags,
3294 struct page **pagep, void **fsdata)
3296 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3297 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3298 loff_t page_start = pos & PAGE_MASK;
3303 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3305 page = grab_cache_page_write_begin(mapping, index, flags);
3311 if (PageUptodate(page))
3315 * If we write a full page it will be up to date, no need to read from
3316 * the server. If the write is short, we'll end up doing a sync write
3319 if (len == PAGE_CACHE_SIZE)
3323 * optimize away the read when we have an oplock, and we're not
3324 * expecting to use any of the data we'd be reading in. That
3325 * is, when the page lies beyond the EOF, or straddles the EOF
3326 * and the write will cover all of the existing data.
3328 if (CIFS_I(mapping->host)->clientCanCacheRead) {
3329 i_size = i_size_read(mapping->host);
3330 if (page_start >= i_size ||
3331 (offset == 0 && (pos + len) >= i_size)) {
3332 zero_user_segments(page, 0, offset,
3336 * PageChecked means that the parts of the page
3337 * to which we're not writing are considered up
3338 * to date. Once the data is copied to the
3339 * page, it can be set uptodate.
3341 SetPageChecked(page);
3346 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3348 * might as well read a page, it is fast enough. If we get
3349 * an error, we don't need to return it. cifs_write_end will
3350 * do a sync write instead since PG_uptodate isn't set.
3352 cifs_readpage_worker(file, page, &page_start);
3354 /* we could try using another file handle if there is one -
3355 but how would we lock it to prevent close of that handle
3356 racing with this read? In any case
3357 this will be written out by write_end so is fine */
3364 static int cifs_release_page(struct page *page, gfp_t gfp)
3366 if (PagePrivate(page))
3369 return cifs_fscache_release_page(page, gfp);
3372 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3374 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3377 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3380 static int cifs_launder_page(struct page *page)
3383 loff_t range_start = page_offset(page);
3384 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3385 struct writeback_control wbc = {
3386 .sync_mode = WB_SYNC_ALL,
3388 .range_start = range_start,
3389 .range_end = range_end,
3392 cFYI(1, "Launder page: %p", page);
3394 if (clear_page_dirty_for_io(page))
3395 rc = cifs_writepage_locked(page, &wbc);
3397 cifs_fscache_invalidate_page(page, page->mapping->host);
3401 void cifs_oplock_break(struct work_struct *work)
3403 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3405 struct inode *inode = cfile->dentry->d_inode;
3406 struct cifsInodeInfo *cinode = CIFS_I(inode);
3409 if (inode && S_ISREG(inode->i_mode)) {
3410 if (cinode->clientCanCacheRead)
3411 break_lease(inode, O_RDONLY);
3413 break_lease(inode, O_WRONLY);
3414 rc = filemap_fdatawrite(inode->i_mapping);
3415 if (cinode->clientCanCacheRead == 0) {
3416 rc = filemap_fdatawait(inode->i_mapping);
3417 mapping_set_error(inode->i_mapping, rc);
3418 invalidate_remote_inode(inode);
3420 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3423 rc = cifs_push_locks(cfile);
3425 cERROR(1, "Push locks rc = %d", rc);
3428 * releasing stale oplock after recent reconnect of smb session using
3429 * a now incorrect file handle is not a data integrity issue but do
3430 * not bother sending an oplock release if session to server still is
3431 * disconnected since oplock already released by the server
3433 if (!cfile->oplock_break_cancelled) {
3434 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->fid.netfid,
3435 current->tgid, 0, 0, 0, 0,
3436 LOCKING_ANDX_OPLOCK_RELEASE, false,
3437 cinode->clientCanCacheRead ? 1 : 0);
3438 cFYI(1, "Oplock release rc = %d", rc);
3442 const struct address_space_operations cifs_addr_ops = {
3443 .readpage = cifs_readpage,
3444 .readpages = cifs_readpages,
3445 .writepage = cifs_writepage,
3446 .writepages = cifs_writepages,
3447 .write_begin = cifs_write_begin,
3448 .write_end = cifs_write_end,
3449 .set_page_dirty = __set_page_dirty_nobuffers,
3450 .releasepage = cifs_release_page,
3451 .invalidatepage = cifs_invalidate_page,
3452 .launder_page = cifs_launder_page,
3456 * cifs_readpages requires the server to support a buffer large enough to
3457 * contain the header plus one complete page of data. Otherwise, we need
3458 * to leave cifs_readpages out of the address space operations.
3460 const struct address_space_operations cifs_addr_ops_smallbuf = {
3461 .readpage = cifs_readpage,
3462 .writepage = cifs_writepage,
3463 .writepages = cifs_writepages,
3464 .write_begin = cifs_write_begin,
3465 .write_end = cifs_write_end,
3466 .set_page_dirty = __set_page_dirty_nobuffers,
3467 .releasepage = cifs_release_page,
3468 .invalidatepage = cifs_invalidate_page,
3469 .launder_page = cifs_launder_page,