1 // SPDX-License-Identifier: LGPL-2.1
4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
12 #include <linux/filelock.h>
13 #include <linux/backing-dev.h>
14 #include <linux/stat.h>
15 #include <linux/fcntl.h>
16 #include <linux/pagemap.h>
17 #include <linux/pagevec.h>
18 #include <linux/writeback.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/delay.h>
21 #include <linux/mount.h>
22 #include <linux/slab.h>
23 #include <linux/swap.h>
25 #include <asm/div64.h>
29 #include "cifsproto.h"
30 #include "smb2proto.h"
31 #include "cifs_unicode.h"
32 #include "cifs_debug.h"
33 #include "cifs_fs_sb.h"
35 #include "smbdirect.h"
36 #include "fs_context.h"
37 #include "cifs_ioctl.h"
38 #include "cached_dir.h"
41 * Remove the dirty flags from a span of pages.
43 static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
45 struct address_space *mapping = inode->i_mapping;
49 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
53 end = (start + len - 1) / PAGE_SIZE;
54 xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55 if (xas_retry(&xas, folio))
60 folio_clear_dirty_for_io(folio);
69 * Completion of write to server.
71 void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
73 struct address_space *mapping = inode->i_mapping;
77 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
84 end = (start + len - 1) / PAGE_SIZE;
85 xas_for_each(&xas, folio, end) {
86 if (xas_retry(&xas, folio))
88 if (!folio_test_writeback(folio)) {
89 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90 len, start, folio->index, end);
94 folio_detach_private(folio);
95 folio_end_writeback(folio);
102 * Failure of write to server.
104 void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
106 struct address_space *mapping = inode->i_mapping;
110 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
117 end = (start + len - 1) / PAGE_SIZE;
118 xas_for_each(&xas, folio, end) {
119 if (xas_retry(&xas, folio))
121 if (!folio_test_writeback(folio)) {
122 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123 len, start, folio->index, end);
127 folio_set_error(folio);
128 folio_end_writeback(folio);
135 * Redirty pages after a temporary failure.
137 void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
139 struct address_space *mapping = inode->i_mapping;
143 XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
150 end = (start + len - 1) / PAGE_SIZE;
151 xas_for_each(&xas, folio, end) {
152 if (!folio_test_writeback(folio)) {
153 WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154 len, start, folio->index, end);
158 filemap_dirty_folio(folio->mapping, folio);
159 folio_end_writeback(folio);
166 * Mark as invalid, all open files on tree connections since they
167 * were closed when session to server was lost.
170 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
172 struct cifsFileInfo *open_file = NULL;
173 struct list_head *tmp;
174 struct list_head *tmp1;
176 /* only send once per connect */
177 spin_lock(&tcon->tc_lock);
178 if (tcon->need_reconnect)
179 tcon->status = TID_NEED_RECON;
181 if (tcon->status != TID_NEED_RECON) {
182 spin_unlock(&tcon->tc_lock);
185 tcon->status = TID_IN_FILES_INVALIDATE;
186 spin_unlock(&tcon->tc_lock);
188 /* list all files open on tree connection and mark them invalid */
189 spin_lock(&tcon->open_file_lock);
190 list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192 open_file->invalidHandle = true;
193 open_file->oplock_break_cancelled = true;
195 spin_unlock(&tcon->open_file_lock);
197 invalidate_all_cached_dirs(tcon);
198 spin_lock(&tcon->tc_lock);
199 if (tcon->status == TID_IN_FILES_INVALIDATE)
200 tcon->status = TID_NEED_TCON;
201 spin_unlock(&tcon->tc_lock);
204 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
209 static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
211 if ((flags & O_ACCMODE) == O_RDONLY)
213 else if ((flags & O_ACCMODE) == O_WRONLY)
214 return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215 else if ((flags & O_ACCMODE) == O_RDWR) {
216 /* GENERIC_ALL is too much permission to request
217 can cause unnecessary access denied on create */
218 /* return GENERIC_ALL; */
219 return (GENERIC_READ | GENERIC_WRITE);
222 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
227 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228 static u32 cifs_posix_convert_flags(unsigned int flags)
232 if ((flags & O_ACCMODE) == O_RDONLY)
233 posix_flags = SMB_O_RDONLY;
234 else if ((flags & O_ACCMODE) == O_WRONLY)
235 posix_flags = SMB_O_WRONLY;
236 else if ((flags & O_ACCMODE) == O_RDWR)
237 posix_flags = SMB_O_RDWR;
239 if (flags & O_CREAT) {
240 posix_flags |= SMB_O_CREAT;
242 posix_flags |= SMB_O_EXCL;
243 } else if (flags & O_EXCL)
244 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245 current->comm, current->tgid);
248 posix_flags |= SMB_O_TRUNC;
249 /* be safe and imply O_SYNC for O_DSYNC */
251 posix_flags |= SMB_O_SYNC;
252 if (flags & O_DIRECTORY)
253 posix_flags |= SMB_O_DIRECTORY;
254 if (flags & O_NOFOLLOW)
255 posix_flags |= SMB_O_NOFOLLOW;
256 if (flags & O_DIRECT)
257 posix_flags |= SMB_O_DIRECT;
261 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
263 static inline int cifs_get_disposition(unsigned int flags)
265 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
267 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268 return FILE_OVERWRITE_IF;
269 else if ((flags & O_CREAT) == O_CREAT)
271 else if ((flags & O_TRUNC) == O_TRUNC)
272 return FILE_OVERWRITE;
277 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278 int cifs_posix_open(const char *full_path, struct inode **pinode,
279 struct super_block *sb, int mode, unsigned int f_flags,
280 __u32 *poplock, __u16 *pnetfid, unsigned int xid)
283 FILE_UNIX_BASIC_INFO *presp_data;
284 __u32 posix_flags = 0;
285 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286 struct cifs_fattr fattr;
287 struct tcon_link *tlink;
288 struct cifs_tcon *tcon;
290 cifs_dbg(FYI, "posix open %s\n", full_path);
292 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293 if (presp_data == NULL)
296 tlink = cifs_sb_tlink(cifs_sb);
302 tcon = tlink_tcon(tlink);
303 mode &= ~current_umask();
305 posix_flags = cifs_posix_convert_flags(f_flags);
306 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307 poplock, full_path, cifs_sb->local_nls,
308 cifs_remap(cifs_sb));
309 cifs_put_tlink(tlink);
314 if (presp_data->Type == cpu_to_le32(-1))
315 goto posix_open_ret; /* open ok, caller does qpathinfo */
318 goto posix_open_ret; /* caller does not need info */
320 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
322 /* get new inode and set it up */
323 if (*pinode == NULL) {
324 cifs_fill_uniqueid(sb, &fattr);
325 *pinode = cifs_iget(sb, &fattr);
331 cifs_revalidate_mapping(*pinode);
332 rc = cifs_fattr_to_inode(*pinode, &fattr, false);
339 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
341 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343 struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
348 int create_options = CREATE_NOT_DIR;
349 struct TCP_Server_Info *server = tcon->ses->server;
350 struct cifs_open_parms oparms;
351 int rdwr_for_fscache = 0;
353 if (!server->ops->open)
356 /* If we're caching, we need to be able to fill in around partial writes. */
357 if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358 rdwr_for_fscache = 1;
360 desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
362 /*********************************************************************
363 * open flag mapping table:
365 * POSIX Flag CIFS Disposition
366 * ---------- ----------------
367 * O_CREAT FILE_OPEN_IF
368 * O_CREAT | O_EXCL FILE_CREATE
369 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
370 * O_TRUNC FILE_OVERWRITE
371 * none of the above FILE_OPEN
373 * Note that there is not a direct match between disposition
374 * FILE_SUPERSEDE (ie create whether or not file exists although
375 * O_CREAT | O_TRUNC is similar but truncates the existing
376 * file rather than creating a new file as FILE_SUPERSEDE does
377 * (which uses the attributes / metadata passed in on open call)
379 *? O_SYNC is a reasonable match to CIFS writethrough flag
380 *? and the read write flags match reasonably. O_LARGEFILE
381 *? is irrelevant because largefile support is always used
382 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384 *********************************************************************/
386 disposition = cifs_get_disposition(f_flags);
388 /* BB pass O_SYNC flag through on file attributes .. BB */
390 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
391 if (f_flags & O_SYNC)
392 create_options |= CREATE_WRITE_THROUGH;
394 if (f_flags & O_DIRECT)
395 create_options |= CREATE_NO_BUFFER;
398 oparms = (struct cifs_open_parms) {
401 .desired_access = desired_access,
402 .create_options = cifs_create_options(cifs_sb, create_options),
403 .disposition = disposition,
408 rc = server->ops->open(xid, &oparms, oplock, buf);
410 if (rc == -EACCES && rdwr_for_fscache == 1) {
411 desired_access = cifs_convert_flags(f_flags, 0);
412 rdwr_for_fscache = 2;
417 if (rdwr_for_fscache == 2)
418 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
420 /* TODO: Add support for calling posix query info but with passing in fid */
422 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
425 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
429 server->ops->close(xid, tcon, fid);
438 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
440 struct cifs_fid_locks *cur;
441 bool has_locks = false;
443 down_read(&cinode->lock_sem);
444 list_for_each_entry(cur, &cinode->llist, llist) {
445 if (!list_empty(&cur->locks)) {
450 up_read(&cinode->lock_sem);
455 cifs_down_write(struct rw_semaphore *sem)
457 while (!down_write_trylock(sem))
461 static void cifsFileInfo_put_work(struct work_struct *work);
462 void serverclose_work(struct work_struct *work);
464 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465 struct tcon_link *tlink, __u32 oplock,
466 const char *symlink_target)
468 struct dentry *dentry = file_dentry(file);
469 struct inode *inode = d_inode(dentry);
470 struct cifsInodeInfo *cinode = CIFS_I(inode);
471 struct cifsFileInfo *cfile;
472 struct cifs_fid_locks *fdlocks;
473 struct cifs_tcon *tcon = tlink_tcon(tlink);
474 struct TCP_Server_Info *server = tcon->ses->server;
476 cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
480 fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
486 if (symlink_target) {
487 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488 if (!cfile->symlink_target) {
495 INIT_LIST_HEAD(&fdlocks->locks);
496 fdlocks->cfile = cfile;
497 cfile->llist = fdlocks;
500 cfile->pid = current->tgid;
501 cfile->uid = current_fsuid();
502 cfile->dentry = dget(dentry);
503 cfile->f_flags = file->f_flags;
504 cfile->invalidHandle = false;
505 cfile->deferred_close_scheduled = false;
506 cfile->tlink = cifs_get_tlink(tlink);
507 INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508 INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509 INIT_WORK(&cfile->serverclose, serverclose_work);
510 INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511 mutex_init(&cfile->fh_mutex);
512 spin_lock_init(&cfile->file_info_lock);
514 cifs_sb_active(inode->i_sb);
517 * If the server returned a read oplock and we have mandatory brlocks,
518 * set oplock level to None.
520 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
525 cifs_down_write(&cinode->lock_sem);
526 list_add(&fdlocks->llist, &cinode->llist);
527 up_write(&cinode->lock_sem);
529 spin_lock(&tcon->open_file_lock);
530 if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531 oplock = fid->pending_open->oplock;
532 list_del(&fid->pending_open->olist);
534 fid->purge_cache = false;
535 server->ops->set_fid(cfile, fid, oplock);
537 list_add(&cfile->tlist, &tcon->openFileList);
538 atomic_inc(&tcon->num_local_opens);
540 /* if readable file instance put first in list*/
541 spin_lock(&cinode->open_file_lock);
542 if (file->f_mode & FMODE_READ)
543 list_add(&cfile->flist, &cinode->openFileList);
545 list_add_tail(&cfile->flist, &cinode->openFileList);
546 spin_unlock(&cinode->open_file_lock);
547 spin_unlock(&tcon->open_file_lock);
549 if (fid->purge_cache)
550 cifs_zap_mapping(inode);
552 file->private_data = cfile;
556 struct cifsFileInfo *
557 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
559 spin_lock(&cifs_file->file_info_lock);
560 cifsFileInfo_get_locked(cifs_file);
561 spin_unlock(&cifs_file->file_info_lock);
565 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
567 struct inode *inode = d_inode(cifs_file->dentry);
568 struct cifsInodeInfo *cifsi = CIFS_I(inode);
569 struct cifsLockInfo *li, *tmp;
570 struct super_block *sb = inode->i_sb;
573 * Delete any outstanding lock records. We'll lose them when the file
576 cifs_down_write(&cifsi->lock_sem);
577 list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578 list_del(&li->llist);
579 cifs_del_lock_waiters(li);
582 list_del(&cifs_file->llist->llist);
583 kfree(cifs_file->llist);
584 up_write(&cifsi->lock_sem);
586 cifs_put_tlink(cifs_file->tlink);
587 dput(cifs_file->dentry);
588 cifs_sb_deactive(sb);
589 kfree(cifs_file->symlink_target);
593 static void cifsFileInfo_put_work(struct work_struct *work)
595 struct cifsFileInfo *cifs_file = container_of(work,
596 struct cifsFileInfo, put);
598 cifsFileInfo_put_final(cifs_file);
601 void serverclose_work(struct work_struct *work)
603 struct cifsFileInfo *cifs_file = container_of(work,
604 struct cifsFileInfo, serverclose);
606 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
608 struct TCP_Server_Info *server = tcon->ses->server;
614 if (server->ops->close_getattr)
615 rc = server->ops->close_getattr(0, tcon, cifs_file);
616 else if (server->ops->close)
617 rc = server->ops->close(0, tcon, &cifs_file->fid);
619 if (rc == -EBUSY || rc == -EAGAIN) {
623 } while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
626 if (retries == MAX_RETRIES)
627 pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
629 if (cifs_file->offload)
630 queue_work(fileinfo_put_wq, &cifs_file->put);
632 cifsFileInfo_put_final(cifs_file);
636 * cifsFileInfo_put - release a reference of file priv data
638 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
640 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
642 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
644 _cifsFileInfo_put(cifs_file, true, true);
648 * _cifsFileInfo_put - release a reference of file priv data
650 * This may involve closing the filehandle @cifs_file out on the
651 * server. Must be called without holding tcon->open_file_lock,
652 * cinode->open_file_lock and cifs_file->file_info_lock.
654 * If @wait_for_oplock_handler is true and we are releasing the last
655 * reference, wait for any running oplock break handler of the file
656 * and cancel any pending one.
658 * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file
659 * @wait_oplock_handler: must be false if called from oplock_break_handler
660 * @offload: not offloaded on close and oplock breaks
663 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664 bool wait_oplock_handler, bool offload)
666 struct inode *inode = d_inode(cifs_file->dentry);
667 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668 struct TCP_Server_Info *server = tcon->ses->server;
669 struct cifsInodeInfo *cifsi = CIFS_I(inode);
670 struct super_block *sb = inode->i_sb;
671 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672 struct cifs_fid fid = {};
673 struct cifs_pending_open open;
674 bool oplock_break_cancelled;
675 bool serverclose_offloaded = false;
677 spin_lock(&tcon->open_file_lock);
678 spin_lock(&cifsi->open_file_lock);
679 spin_lock(&cifs_file->file_info_lock);
681 cifs_file->offload = offload;
682 if (--cifs_file->count > 0) {
683 spin_unlock(&cifs_file->file_info_lock);
684 spin_unlock(&cifsi->open_file_lock);
685 spin_unlock(&tcon->open_file_lock);
688 spin_unlock(&cifs_file->file_info_lock);
690 if (server->ops->get_lease_key)
691 server->ops->get_lease_key(inode, &fid);
693 /* store open in pending opens to make sure we don't miss lease break */
694 cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
696 /* remove it from the lists */
697 list_del(&cifs_file->flist);
698 list_del(&cifs_file->tlist);
699 atomic_dec(&tcon->num_local_opens);
701 if (list_empty(&cifsi->openFileList)) {
702 cifs_dbg(FYI, "closing last open instance for inode %p\n",
703 d_inode(cifs_file->dentry));
705 * In strict cache mode we need invalidate mapping on the last
706 * close because it may cause a error when we open this file
707 * again and get at least level II oplock.
709 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710 set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711 cifs_set_oplock_level(cifsi, 0);
714 spin_unlock(&cifsi->open_file_lock);
715 spin_unlock(&tcon->open_file_lock);
717 oplock_break_cancelled = wait_oplock_handler ?
718 cancel_work_sync(&cifs_file->oplock_break) : false;
720 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721 struct TCP_Server_Info *server = tcon->ses->server;
726 if (server->ops->close_getattr)
727 rc = server->ops->close_getattr(xid, tcon, cifs_file);
728 else if (server->ops->close)
729 rc = server->ops->close(xid, tcon, &cifs_file->fid);
732 if (rc == -EBUSY || rc == -EAGAIN) {
733 // Server close failed, hence offloading it as an async op
734 queue_work(serverclose_wq, &cifs_file->serverclose);
735 serverclose_offloaded = true;
739 if (oplock_break_cancelled)
740 cifs_done_oplock_break(cifsi);
742 cifs_del_pending_open(&open);
744 // if serverclose has been offloaded to wq (on failure), it will
745 // handle offloading put as well. If serverclose not offloaded,
746 // we need to handle offloading put here.
747 if (!serverclose_offloaded) {
749 queue_work(fileinfo_put_wq, &cifs_file->put);
751 cifsFileInfo_put_final(cifs_file);
755 int cifs_open(struct inode *inode, struct file *file)
761 struct cifs_sb_info *cifs_sb;
762 struct TCP_Server_Info *server;
763 struct cifs_tcon *tcon;
764 struct tcon_link *tlink;
765 struct cifsFileInfo *cfile = NULL;
767 const char *full_path;
768 bool posix_open_ok = false;
769 struct cifs_fid fid = {};
770 struct cifs_pending_open open;
771 struct cifs_open_info_data data = {};
775 cifs_sb = CIFS_SB(inode->i_sb);
776 if (unlikely(cifs_forced_shutdown(cifs_sb))) {
781 tlink = cifs_sb_tlink(cifs_sb);
784 return PTR_ERR(tlink);
786 tcon = tlink_tcon(tlink);
787 server = tcon->ses->server;
789 page = alloc_dentry_path();
790 full_path = build_path_from_dentry(file_dentry(file), page);
791 if (IS_ERR(full_path)) {
792 rc = PTR_ERR(full_path);
796 cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797 inode, file->f_flags, full_path);
799 if (file->f_flags & O_DIRECT &&
800 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802 file->f_op = &cifs_file_direct_nobrl_ops;
804 file->f_op = &cifs_file_direct_ops;
807 /* Get the cached handle as SMB2 close is deferred */
808 rc = cifs_get_readable_path(tcon, full_path, &cfile);
810 if (file->f_flags == cfile->f_flags) {
811 file->private_data = cfile;
812 spin_lock(&CIFS_I(inode)->deferred_lock);
813 cifs_del_deferred_close(cfile);
814 spin_unlock(&CIFS_I(inode)->deferred_lock);
817 _cifsFileInfo_put(cfile, true, false);
826 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827 if (!tcon->broken_posix_open && tcon->unix_ext &&
828 cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830 /* can not refresh inode info since size could be stale */
831 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832 cifs_sb->ctx->file_mode /* ignored */,
833 file->f_flags, &oplock, &fid.netfid, xid);
835 cifs_dbg(FYI, "posix open succeeded\n");
836 posix_open_ok = true;
837 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838 if (tcon->ses->serverNOS)
839 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
841 tcon->ses->serverNOS);
842 tcon->broken_posix_open = true;
843 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
844 (rc != -EOPNOTSUPP)) /* path not found or net err */
847 * Else fallthrough to retry open the old way on network i/o
851 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
853 if (server->ops->get_lease_key)
854 server->ops->get_lease_key(inode, &fid);
856 cifs_add_pending_open(&fid, tlink, &open);
858 if (!posix_open_ok) {
859 if (server->ops->get_lease_key)
860 server->ops->get_lease_key(inode, &fid);
862 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
865 cifs_del_pending_open(&open);
870 cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
872 if (server->ops->close)
873 server->ops->close(xid, tcon, &fid);
874 cifs_del_pending_open(&open);
879 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
882 * Time to set mode which we can not set earlier due to
883 * problems creating new read-only files.
885 struct cifs_unix_set_info_args args = {
886 .mode = inode->i_mode,
887 .uid = INVALID_UID, /* no change */
888 .gid = INVALID_GID, /* no change */
889 .ctime = NO_CHANGE_64,
890 .atime = NO_CHANGE_64,
891 .mtime = NO_CHANGE_64,
894 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
897 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
900 fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901 file->f_mode & FMODE_WRITE);
902 if (!(file->f_flags & O_DIRECT))
904 if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
906 cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
909 free_dentry_path(page);
911 cifs_put_tlink(tlink);
912 cifs_free_open_info(&data);
916 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
921 * Try to reacquire byte range locks that were released when session
922 * to server was lost.
925 cifs_relock_file(struct cifsFileInfo *cfile)
927 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
930 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
934 down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935 if (cinode->can_cache_brlcks) {
936 /* can cache locks - no need to relock */
937 up_read(&cinode->lock_sem);
941 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942 if (cap_unix(tcon->ses) &&
943 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945 rc = cifs_push_posix_locks(cfile);
947 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948 rc = tcon->ses->server->ops->push_mand_locks(cfile);
950 up_read(&cinode->lock_sem);
955 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
960 struct cifs_sb_info *cifs_sb;
961 struct cifs_tcon *tcon;
962 struct TCP_Server_Info *server;
963 struct cifsInodeInfo *cinode;
966 const char *full_path;
968 int disposition = FILE_OPEN;
969 int create_options = CREATE_NOT_DIR;
970 struct cifs_open_parms oparms;
971 int rdwr_for_fscache = 0;
974 mutex_lock(&cfile->fh_mutex);
975 if (!cfile->invalidHandle) {
976 mutex_unlock(&cfile->fh_mutex);
981 inode = d_inode(cfile->dentry);
982 cifs_sb = CIFS_SB(inode->i_sb);
983 tcon = tlink_tcon(cfile->tlink);
984 server = tcon->ses->server;
987 * Can not grab rename sem here because various ops, including those
988 * that already have the rename sem can end up causing writepage to get
989 * called and if the server was down that means we end up here, and we
990 * can never tell if the caller already has the rename_sem.
992 page = alloc_dentry_path();
993 full_path = build_path_from_dentry(cfile->dentry, page);
994 if (IS_ERR(full_path)) {
995 mutex_unlock(&cfile->fh_mutex);
996 free_dentry_path(page);
998 return PTR_ERR(full_path);
1001 cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002 inode, cfile->f_flags, full_path);
1004 if (tcon->ses->server->oplocks)
1005 oplock = REQ_OPLOCK;
1009 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010 if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1014 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015 * original open. Must mask them off for a reopen.
1017 unsigned int oflags = cfile->f_flags &
1018 ~(O_CREAT | O_EXCL | O_TRUNC);
1020 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021 cifs_sb->ctx->file_mode /* ignored */,
1022 oflags, &oplock, &cfile->fid.netfid, xid);
1024 cifs_dbg(FYI, "posix reopen succeeded\n");
1025 oparms.reconnect = true;
1026 goto reopen_success;
1029 * fallthrough to retry open the old way on errors, especially
1030 * in the reconnect path it is important to retry hard
1033 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1035 /* If we're caching, we need to be able to fill in around partial writes. */
1036 if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037 rdwr_for_fscache = 1;
1039 desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1041 /* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042 if (cfile->f_flags & O_SYNC)
1043 create_options |= CREATE_WRITE_THROUGH;
1045 if (cfile->f_flags & O_DIRECT)
1046 create_options |= CREATE_NO_BUFFER;
1048 if (server->ops->get_lease_key)
1049 server->ops->get_lease_key(inode, &cfile->fid);
1052 oparms = (struct cifs_open_parms) {
1055 .desired_access = desired_access,
1056 .create_options = cifs_create_options(cifs_sb, create_options),
1057 .disposition = disposition,
1064 * Can not refresh inode by passing in file_info buf to be returned by
1065 * ops->open and then calling get_inode_info with returned buf since
1066 * file might have write behind data that needs to be flushed and server
1067 * version of file size can be stale. If we knew for sure that inode was
1068 * not dirty locally we could do this.
1070 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071 if (rc == -ENOENT && oparms.reconnect == false) {
1072 /* durable handle timeout is expired - open the file again */
1073 rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074 /* indicate that we need to relock the file */
1075 oparms.reconnect = true;
1077 if (rc == -EACCES && rdwr_for_fscache == 1) {
1078 desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079 rdwr_for_fscache = 2;
1084 mutex_unlock(&cfile->fh_mutex);
1085 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086 cifs_dbg(FYI, "oplock: %d\n", oplock);
1087 goto reopen_error_exit;
1090 if (rdwr_for_fscache == 2)
1091 cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1093 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1095 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096 cfile->invalidHandle = false;
1097 mutex_unlock(&cfile->fh_mutex);
1098 cinode = CIFS_I(inode);
1101 rc = filemap_write_and_wait(inode->i_mapping);
1102 if (!is_interrupt_error(rc))
1103 mapping_set_error(inode->i_mapping, rc);
1105 if (tcon->posix_extensions) {
1106 rc = smb311_posix_get_inode_info(&inode, full_path,
1107 NULL, inode->i_sb, xid);
1108 } else if (tcon->unix_ext) {
1109 rc = cifs_get_inode_info_unix(&inode, full_path,
1112 rc = cifs_get_inode_info(&inode, full_path, NULL,
1113 inode->i_sb, xid, NULL);
1117 * Else we are writing out data to server already and could deadlock if
1118 * we tried to flush data, and since we do not know if we have data that
1119 * would invalidate the current end of file on the server we can not go
1120 * to the server to get the new inode info.
1124 * If the server returned a read oplock and we have mandatory brlocks,
1125 * set oplock level to None.
1127 if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1132 server->ops->set_fid(cfile, &cfile->fid, oplock);
1133 if (oparms.reconnect)
1134 cifs_relock_file(cfile);
1137 free_dentry_path(page);
1142 void smb2_deferred_work_close(struct work_struct *work)
1144 struct cifsFileInfo *cfile = container_of(work,
1145 struct cifsFileInfo, deferred.work);
1147 spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148 cifs_del_deferred_close(cfile);
1149 cfile->deferred_close_scheduled = false;
1150 spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151 _cifsFileInfo_put(cfile, true, false);
1155 smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1157 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158 struct cifsInodeInfo *cinode = CIFS_I(inode);
1160 return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161 (cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163 !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1167 int cifs_close(struct inode *inode, struct file *file)
1169 struct cifsFileInfo *cfile;
1170 struct cifsInodeInfo *cinode = CIFS_I(inode);
1171 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172 struct cifs_deferred_close *dclose;
1174 cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1176 if (file->private_data != NULL) {
1177 cfile = file->private_data;
1178 file->private_data = NULL;
1179 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180 if ((cfile->status_file_deleted == false) &&
1181 (smb2_can_defer_close(inode, dclose))) {
1182 if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183 inode_set_mtime_to_ts(inode,
1184 inode_set_ctime_current(inode));
1186 spin_lock(&cinode->deferred_lock);
1187 cifs_add_deferred_close(cfile, dclose);
1188 if (cfile->deferred_close_scheduled &&
1189 delayed_work_pending(&cfile->deferred)) {
1191 * If there is no pending work, mod_delayed_work queues new work.
1192 * So, Increase the ref count to avoid use-after-free.
1194 if (!mod_delayed_work(deferredclose_wq,
1195 &cfile->deferred, cifs_sb->ctx->closetimeo))
1196 cifsFileInfo_get(cfile);
1198 /* Deferred close for files */
1199 queue_delayed_work(deferredclose_wq,
1200 &cfile->deferred, cifs_sb->ctx->closetimeo);
1201 cfile->deferred_close_scheduled = true;
1202 spin_unlock(&cinode->deferred_lock);
1205 spin_unlock(&cinode->deferred_lock);
1206 _cifsFileInfo_put(cfile, true, false);
1208 _cifsFileInfo_put(cfile, true, false);
1213 /* return code from the ->release op is always ignored */
1218 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1220 struct cifsFileInfo *open_file, *tmp;
1221 struct list_head tmp_list;
1223 if (!tcon->use_persistent || !tcon->need_reopen_files)
1226 tcon->need_reopen_files = false;
1228 cifs_dbg(FYI, "Reopen persistent handles\n");
1229 INIT_LIST_HEAD(&tmp_list);
1231 /* list all files open on tree connection, reopen resilient handles */
1232 spin_lock(&tcon->open_file_lock);
1233 list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234 if (!open_file->invalidHandle)
1236 cifsFileInfo_get(open_file);
1237 list_add_tail(&open_file->rlist, &tmp_list);
1239 spin_unlock(&tcon->open_file_lock);
1241 list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242 if (cifs_reopen_file(open_file, false /* do not flush */))
1243 tcon->need_reopen_files = true;
1244 list_del_init(&open_file->rlist);
1245 cifsFileInfo_put(open_file);
1249 int cifs_closedir(struct inode *inode, struct file *file)
1253 struct cifsFileInfo *cfile = file->private_data;
1254 struct cifs_tcon *tcon;
1255 struct TCP_Server_Info *server;
1258 cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1264 tcon = tlink_tcon(cfile->tlink);
1265 server = tcon->ses->server;
1267 cifs_dbg(FYI, "Freeing private data in close dir\n");
1268 spin_lock(&cfile->file_info_lock);
1269 if (server->ops->dir_needs_close(cfile)) {
1270 cfile->invalidHandle = true;
1271 spin_unlock(&cfile->file_info_lock);
1272 if (server->ops->close_dir)
1273 rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1276 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277 /* not much we can do if it fails anyway, ignore rc */
1280 spin_unlock(&cfile->file_info_lock);
1282 buf = cfile->srch_inf.ntwrk_buf_start;
1284 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285 cfile->srch_inf.ntwrk_buf_start = NULL;
1286 if (cfile->srch_inf.smallBuf)
1287 cifs_small_buf_release(buf);
1289 cifs_buf_release(buf);
1292 cifs_put_tlink(cfile->tlink);
1293 kfree(file->private_data);
1294 file->private_data = NULL;
1295 /* BB can we lock the filestruct while this is going on? */
1300 static struct cifsLockInfo *
1301 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1303 struct cifsLockInfo *lock =
1304 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1307 lock->offset = offset;
1308 lock->length = length;
1310 lock->pid = current->tgid;
1311 lock->flags = flags;
1312 INIT_LIST_HEAD(&lock->blist);
1313 init_waitqueue_head(&lock->block_q);
1318 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1320 struct cifsLockInfo *li, *tmp;
1321 list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322 list_del_init(&li->blist);
1323 wake_up(&li->block_q);
1327 #define CIFS_LOCK_OP 0
1328 #define CIFS_READ_OP 1
1329 #define CIFS_WRITE_OP 2
1331 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1333 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334 __u64 length, __u8 type, __u16 flags,
1335 struct cifsFileInfo *cfile,
1336 struct cifsLockInfo **conf_lock, int rw_check)
1338 struct cifsLockInfo *li;
1339 struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1342 list_for_each_entry(li, &fdlocks->locks, llist) {
1343 if (offset + length <= li->offset ||
1344 offset >= li->offset + li->length)
1346 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347 server->ops->compare_fids(cfile, cur_cfile)) {
1348 /* shared lock prevents write op through the same fid */
1349 if (!(li->type & server->vals->shared_lock_type) ||
1350 rw_check != CIFS_WRITE_OP)
1353 if ((type & server->vals->shared_lock_type) &&
1354 ((server->ops->compare_fids(cfile, cur_cfile) &&
1355 current->tgid == li->pid) || type == li->type))
1357 if (rw_check == CIFS_LOCK_OP &&
1358 (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359 server->ops->compare_fids(cfile, cur_cfile))
1369 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370 __u8 type, __u16 flags,
1371 struct cifsLockInfo **conf_lock, int rw_check)
1374 struct cifs_fid_locks *cur;
1375 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1377 list_for_each_entry(cur, &cinode->llist, llist) {
1378 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379 flags, cfile, conf_lock,
1389 * Check if there is another lock that prevents us to set the lock (mandatory
1390 * style). If such a lock exists, update the flock structure with its
1391 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392 * or leave it the same if we can't. Returns 0 if we don't need to request to
1393 * the server or 1 otherwise.
1396 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397 __u8 type, struct file_lock *flock)
1400 struct cifsLockInfo *conf_lock;
1401 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1405 down_read(&cinode->lock_sem);
1407 exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408 flock->c.flc_flags, &conf_lock,
1411 flock->fl_start = conf_lock->offset;
1412 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413 flock->c.flc_pid = conf_lock->pid;
1414 if (conf_lock->type & server->vals->shared_lock_type)
1415 flock->c.flc_type = F_RDLCK;
1417 flock->c.flc_type = F_WRLCK;
1418 } else if (!cinode->can_cache_brlcks)
1421 flock->c.flc_type = F_UNLCK;
1423 up_read(&cinode->lock_sem);
1428 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1430 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431 cifs_down_write(&cinode->lock_sem);
1432 list_add_tail(&lock->llist, &cfile->llist->locks);
1433 up_write(&cinode->lock_sem);
1437 * Set the byte-range lock (mandatory style). Returns:
1438 * 1) 0, if we set the lock and don't need to request to the server;
1439 * 2) 1, if no locks prevent us but we need to request to the server;
1440 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1443 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1446 struct cifsLockInfo *conf_lock;
1447 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1453 cifs_down_write(&cinode->lock_sem);
1455 exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456 lock->type, lock->flags, &conf_lock,
1458 if (!exist && cinode->can_cache_brlcks) {
1459 list_add_tail(&lock->llist, &cfile->llist->locks);
1460 up_write(&cinode->lock_sem);
1469 list_add_tail(&lock->blist, &conf_lock->blist);
1470 up_write(&cinode->lock_sem);
1471 rc = wait_event_interruptible(lock->block_q,
1472 (lock->blist.prev == &lock->blist) &&
1473 (lock->blist.next == &lock->blist));
1476 cifs_down_write(&cinode->lock_sem);
1477 list_del_init(&lock->blist);
1480 up_write(&cinode->lock_sem);
1484 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1486 * Check if there is another lock that prevents us to set the lock (posix
1487 * style). If such a lock exists, update the flock structure with its
1488 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489 * or leave it the same if we can't. Returns 0 if we don't need to request to
1490 * the server or 1 otherwise.
1493 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1496 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497 unsigned char saved_type = flock->c.flc_type;
1499 if ((flock->c.flc_flags & FL_POSIX) == 0)
1502 down_read(&cinode->lock_sem);
1503 posix_test_lock(file, flock);
1505 if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1506 flock->c.flc_type = saved_type;
1510 up_read(&cinode->lock_sem);
1515 * Set the byte-range lock (posix style). Returns:
1516 * 1) <0, if the error occurs while setting the lock;
1517 * 2) 0, if we set the lock and don't need to request to the server;
1518 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1522 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1524 struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525 int rc = FILE_LOCK_DEFERRED + 1;
1527 if ((flock->c.flc_flags & FL_POSIX) == 0)
1530 cifs_down_write(&cinode->lock_sem);
1531 if (!cinode->can_cache_brlcks) {
1532 up_write(&cinode->lock_sem);
1536 rc = posix_lock_file(file, flock, NULL);
1537 up_write(&cinode->lock_sem);
1542 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1545 int rc = 0, stored_rc;
1546 struct cifsLockInfo *li, *tmp;
1547 struct cifs_tcon *tcon;
1548 unsigned int num, max_num, max_buf;
1549 LOCKING_ANDX_RANGE *buf, *cur;
1550 static const int types[] = {
1551 LOCKING_ANDX_LARGE_FILES,
1552 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1557 tcon = tlink_tcon(cfile->tlink);
1560 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561 * and check it before using.
1563 max_buf = tcon->ses->server->maxBuf;
1564 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1569 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1571 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1573 max_num = (max_buf - sizeof(struct smb_hdr)) /
1574 sizeof(LOCKING_ANDX_RANGE);
1575 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1581 for (i = 0; i < 2; i++) {
1584 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585 if (li->type != types[i])
1587 cur->Pid = cpu_to_le16(li->pid);
1588 cur->LengthLow = cpu_to_le32((u32)li->length);
1589 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592 if (++num == max_num) {
1593 stored_rc = cifs_lockv(xid, tcon,
1595 (__u8)li->type, 0, num,
1606 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607 (__u8)types[i], 0, num, buf);
1619 hash_lockowner(fl_owner_t owner)
1621 return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1623 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1625 struct lock_to_push {
1626 struct list_head llist;
1634 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1636 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1638 struct inode *inode = d_inode(cfile->dentry);
1639 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640 struct file_lock *flock;
1641 struct file_lock_context *flctx = locks_inode_context(inode);
1642 unsigned int count = 0, i;
1643 int rc = 0, xid, type;
1644 struct list_head locks_to_send, *el;
1645 struct lock_to_push *lck, *tmp;
1653 spin_lock(&flctx->flc_lock);
1654 list_for_each(el, &flctx->flc_posix) {
1657 spin_unlock(&flctx->flc_lock);
1659 INIT_LIST_HEAD(&locks_to_send);
1662 * Allocating count locks is enough because no FL_POSIX locks can be
1663 * added to the list while we are holding cinode->lock_sem that
1664 * protects locking operations of this inode.
1666 for (i = 0; i < count; i++) {
1667 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1672 list_add_tail(&lck->llist, &locks_to_send);
1675 el = locks_to_send.next;
1676 spin_lock(&flctx->flc_lock);
1677 for_each_file_lock(flock, &flctx->flc_posix) {
1678 unsigned char ftype = flock->c.flc_type;
1680 if (el == &locks_to_send) {
1682 * The list ended. We don't have enough allocated
1683 * structures - something is really wrong.
1685 cifs_dbg(VFS, "Can't push all brlocks!\n");
1688 length = cifs_flock_len(flock);
1689 if (ftype == F_RDLCK || ftype == F_SHLCK)
1693 lck = list_entry(el, struct lock_to_push, llist);
1694 lck->pid = hash_lockowner(flock->c.flc_owner);
1695 lck->netfid = cfile->fid.netfid;
1696 lck->length = length;
1698 lck->offset = flock->fl_start;
1700 spin_unlock(&flctx->flc_lock);
1702 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1705 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1706 lck->offset, lck->length, NULL,
1710 list_del(&lck->llist);
1718 list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1719 list_del(&lck->llist);
1724 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1727 cifs_push_locks(struct cifsFileInfo *cfile)
1729 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1730 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1732 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1733 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1734 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1736 /* we are going to update can_cache_brlcks here - need a write access */
1737 cifs_down_write(&cinode->lock_sem);
1738 if (!cinode->can_cache_brlcks) {
1739 up_write(&cinode->lock_sem);
1743 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1744 if (cap_unix(tcon->ses) &&
1745 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1746 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1747 rc = cifs_push_posix_locks(cfile);
1749 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1750 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1752 cinode->can_cache_brlcks = false;
1753 up_write(&cinode->lock_sem);
1758 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1759 bool *wait_flag, struct TCP_Server_Info *server)
1761 if (flock->c.flc_flags & FL_POSIX)
1762 cifs_dbg(FYI, "Posix\n");
1763 if (flock->c.flc_flags & FL_FLOCK)
1764 cifs_dbg(FYI, "Flock\n");
1765 if (flock->c.flc_flags & FL_SLEEP) {
1766 cifs_dbg(FYI, "Blocking lock\n");
1769 if (flock->c.flc_flags & FL_ACCESS)
1770 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1771 if (flock->c.flc_flags & FL_LEASE)
1772 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1773 if (flock->c.flc_flags &
1774 (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1775 FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1776 cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1777 flock->c.flc_flags);
1779 *type = server->vals->large_lock_type;
1780 if (lock_is_write(flock)) {
1781 cifs_dbg(FYI, "F_WRLCK\n");
1782 *type |= server->vals->exclusive_lock_type;
1784 } else if (lock_is_unlock(flock)) {
1785 cifs_dbg(FYI, "F_UNLCK\n");
1786 *type |= server->vals->unlock_lock_type;
1788 /* Check if unlock includes more than one lock range */
1789 } else if (lock_is_read(flock)) {
1790 cifs_dbg(FYI, "F_RDLCK\n");
1791 *type |= server->vals->shared_lock_type;
1793 } else if (flock->c.flc_type == F_EXLCK) {
1794 cifs_dbg(FYI, "F_EXLCK\n");
1795 *type |= server->vals->exclusive_lock_type;
1797 } else if (flock->c.flc_type == F_SHLCK) {
1798 cifs_dbg(FYI, "F_SHLCK\n");
1799 *type |= server->vals->shared_lock_type;
1802 cifs_dbg(FYI, "Unknown type of lock\n");
1806 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1807 bool wait_flag, bool posix_lck, unsigned int xid)
1810 __u64 length = cifs_flock_len(flock);
1811 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1812 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813 struct TCP_Server_Info *server = tcon->ses->server;
1814 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1815 __u16 netfid = cfile->fid.netfid;
1818 int posix_lock_type;
1820 rc = cifs_posix_lock_test(file, flock);
1824 if (type & server->vals->shared_lock_type)
1825 posix_lock_type = CIFS_RDLCK;
1827 posix_lock_type = CIFS_WRLCK;
1828 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1829 hash_lockowner(flock->c.flc_owner),
1830 flock->fl_start, length, flock,
1831 posix_lock_type, wait_flag);
1834 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1836 rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1840 /* BB we could chain these into one lock request BB */
1841 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1844 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1846 flock->c.flc_type = F_UNLCK;
1848 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1853 if (type & server->vals->shared_lock_type) {
1854 flock->c.flc_type = F_WRLCK;
1858 type &= ~server->vals->exclusive_lock_type;
1860 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1861 type | server->vals->shared_lock_type,
1864 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1865 type | server->vals->shared_lock_type, 0, 1, false);
1866 flock->c.flc_type = F_RDLCK;
1868 cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1871 flock->c.flc_type = F_WRLCK;
1877 cifs_move_llist(struct list_head *source, struct list_head *dest)
1879 struct list_head *li, *tmp;
1880 list_for_each_safe(li, tmp, source)
1881 list_move(li, dest);
1885 cifs_free_llist(struct list_head *llist)
1887 struct cifsLockInfo *li, *tmp;
1888 list_for_each_entry_safe(li, tmp, llist, llist) {
1889 cifs_del_lock_waiters(li);
1890 list_del(&li->llist);
1895 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1897 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1900 int rc = 0, stored_rc;
1901 static const int types[] = {
1902 LOCKING_ANDX_LARGE_FILES,
1903 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1906 unsigned int max_num, num, max_buf;
1907 LOCKING_ANDX_RANGE *buf, *cur;
1908 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1909 struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1910 struct cifsLockInfo *li, *tmp;
1911 __u64 length = cifs_flock_len(flock);
1912 struct list_head tmp_llist;
1914 INIT_LIST_HEAD(&tmp_llist);
1917 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1918 * and check it before using.
1920 max_buf = tcon->ses->server->maxBuf;
1921 if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1924 BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1926 max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1928 max_num = (max_buf - sizeof(struct smb_hdr)) /
1929 sizeof(LOCKING_ANDX_RANGE);
1930 buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1934 cifs_down_write(&cinode->lock_sem);
1935 for (i = 0; i < 2; i++) {
1938 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1939 if (flock->fl_start > li->offset ||
1940 (flock->fl_start + length) <
1941 (li->offset + li->length))
1943 if (current->tgid != li->pid)
1945 if (types[i] != li->type)
1947 if (cinode->can_cache_brlcks) {
1949 * We can cache brlock requests - simply remove
1950 * a lock from the file's list.
1952 list_del(&li->llist);
1953 cifs_del_lock_waiters(li);
1957 cur->Pid = cpu_to_le16(li->pid);
1958 cur->LengthLow = cpu_to_le32((u32)li->length);
1959 cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1960 cur->OffsetLow = cpu_to_le32((u32)li->offset);
1961 cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1963 * We need to save a lock here to let us add it again to
1964 * the file's list if the unlock range request fails on
1967 list_move(&li->llist, &tmp_llist);
1968 if (++num == max_num) {
1969 stored_rc = cifs_lockv(xid, tcon,
1971 li->type, num, 0, buf);
1974 * We failed on the unlock range
1975 * request - add all locks from the tmp
1976 * list to the head of the file's list.
1978 cifs_move_llist(&tmp_llist,
1979 &cfile->llist->locks);
1983 * The unlock range request succeed -
1984 * free the tmp list.
1986 cifs_free_llist(&tmp_llist);
1993 stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1994 types[i], num, 0, buf);
1996 cifs_move_llist(&tmp_llist,
1997 &cfile->llist->locks);
2000 cifs_free_llist(&tmp_llist);
2004 up_write(&cinode->lock_sem);
2008 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2011 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2012 bool wait_flag, bool posix_lck, int lock, int unlock,
2016 __u64 length = cifs_flock_len(flock);
2017 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2018 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2019 struct TCP_Server_Info *server = tcon->ses->server;
2020 struct inode *inode = d_inode(cfile->dentry);
2022 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2024 int posix_lock_type;
2026 rc = cifs_posix_lock_set(file, flock);
2027 if (rc <= FILE_LOCK_DEFERRED)
2030 if (type & server->vals->shared_lock_type)
2031 posix_lock_type = CIFS_RDLCK;
2033 posix_lock_type = CIFS_WRLCK;
2036 posix_lock_type = CIFS_UNLCK;
2038 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2039 hash_lockowner(flock->c.flc_owner),
2040 flock->fl_start, length,
2041 NULL, posix_lock_type, wait_flag);
2044 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2046 struct cifsLockInfo *lock;
2048 lock = cifs_lock_init(flock->fl_start, length, type,
2049 flock->c.flc_flags);
2053 rc = cifs_lock_add_if(cfile, lock, wait_flag);
2062 * Windows 7 server can delay breaking lease from read to None
2063 * if we set a byte-range lock on a file - break it explicitly
2064 * before sending the lock to the server to be sure the next
2065 * read won't conflict with non-overlapted locks due to
2068 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2069 CIFS_CACHE_READ(CIFS_I(inode))) {
2070 cifs_zap_mapping(inode);
2071 cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2073 CIFS_I(inode)->oplock = 0;
2076 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2077 type, 1, 0, wait_flag);
2083 cifs_lock_add(cfile, lock);
2085 rc = server->ops->mand_unlock_range(cfile, flock, xid);
2088 if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2090 * If this is a request to remove all locks because we
2091 * are closing the file, it doesn't matter if the
2092 * unlocking failed as both cifs.ko and the SMB server
2093 * remove the lock on file close
2096 cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2097 if (!(flock->c.flc_flags & FL_CLOSE))
2100 rc = locks_lock_file_wait(file, flock);
2105 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2108 int lock = 0, unlock = 0;
2109 bool wait_flag = false;
2110 bool posix_lck = false;
2111 struct cifs_sb_info *cifs_sb;
2112 struct cifs_tcon *tcon;
2113 struct cifsFileInfo *cfile;
2118 if (!(fl->c.flc_flags & FL_FLOCK)) {
2124 cfile = (struct cifsFileInfo *)file->private_data;
2125 tcon = tlink_tcon(cfile->tlink);
2127 cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2129 cifs_sb = CIFS_FILE_SB(file);
2131 if (cap_unix(tcon->ses) &&
2132 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2133 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2136 if (!lock && !unlock) {
2138 * if no lock or unlock then nothing to do since we do not
2146 rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2154 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2157 int lock = 0, unlock = 0;
2158 bool wait_flag = false;
2159 bool posix_lck = false;
2160 struct cifs_sb_info *cifs_sb;
2161 struct cifs_tcon *tcon;
2162 struct cifsFileInfo *cfile;
2168 cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2169 flock->c.flc_flags, flock->c.flc_type,
2170 (long long)flock->fl_start,
2171 (long long)flock->fl_end);
2173 cfile = (struct cifsFileInfo *)file->private_data;
2174 tcon = tlink_tcon(cfile->tlink);
2176 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2178 cifs_sb = CIFS_FILE_SB(file);
2179 set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2181 if (cap_unix(tcon->ses) &&
2182 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2183 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2186 * BB add code here to normalize offset and length to account for
2187 * negative length which we can not accept over the wire.
2189 if (IS_GETLK(cmd)) {
2190 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2195 if (!lock && !unlock) {
2197 * if no lock or unlock then nothing to do since we do not
2204 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2211 * update the file size (if needed) after a write. Should be called with
2212 * the inode->i_lock held
2215 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2216 unsigned int bytes_written)
2218 loff_t end_of_write = offset + bytes_written;
2220 if (end_of_write > cifsi->netfs.remote_i_size)
2221 netfs_resize_file(&cifsi->netfs, end_of_write, true);
2225 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2226 size_t write_size, loff_t *offset)
2229 unsigned int bytes_written = 0;
2230 unsigned int total_written;
2231 struct cifs_tcon *tcon;
2232 struct TCP_Server_Info *server;
2234 struct dentry *dentry = open_file->dentry;
2235 struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2236 struct cifs_io_parms io_parms = {0};
2238 cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2239 write_size, *offset, dentry);
2241 tcon = tlink_tcon(open_file->tlink);
2242 server = tcon->ses->server;
2244 if (!server->ops->sync_write)
2249 for (total_written = 0; write_size > total_written;
2250 total_written += bytes_written) {
2252 while (rc == -EAGAIN) {
2256 if (open_file->invalidHandle) {
2257 /* we could deadlock if we called
2258 filemap_fdatawait from here so tell
2259 reopen_file not to flush data to
2261 rc = cifs_reopen_file(open_file, false);
2266 len = min(server->ops->wp_retry_size(d_inode(dentry)),
2267 (unsigned int)write_size - total_written);
2268 /* iov[0] is reserved for smb header */
2269 iov[1].iov_base = (char *)write_data + total_written;
2270 iov[1].iov_len = len;
2272 io_parms.tcon = tcon;
2273 io_parms.offset = *offset;
2274 io_parms.length = len;
2275 rc = server->ops->sync_write(xid, &open_file->fid,
2276 &io_parms, &bytes_written, iov, 1);
2278 if (rc || (bytes_written == 0)) {
2286 spin_lock(&d_inode(dentry)->i_lock);
2287 cifs_update_eof(cifsi, *offset, bytes_written);
2288 spin_unlock(&d_inode(dentry)->i_lock);
2289 *offset += bytes_written;
2293 cifs_stats_bytes_written(tcon, total_written);
2295 if (total_written > 0) {
2296 spin_lock(&d_inode(dentry)->i_lock);
2297 if (*offset > d_inode(dentry)->i_size) {
2298 i_size_write(d_inode(dentry), *offset);
2299 d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2301 spin_unlock(&d_inode(dentry)->i_lock);
2303 mark_inode_dirty_sync(d_inode(dentry));
2305 return total_written;
2308 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2311 struct cifsFileInfo *open_file = NULL;
2312 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2314 /* only filter by fsuid on multiuser mounts */
2315 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2318 spin_lock(&cifs_inode->open_file_lock);
2319 /* we could simply get the first_list_entry since write-only entries
2320 are always at the end of the list but since the first entry might
2321 have a close pending, we go through the whole list */
2322 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2323 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2325 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2326 if ((!open_file->invalidHandle)) {
2327 /* found a good file */
2328 /* lock it so it will not be closed on us */
2329 cifsFileInfo_get(open_file);
2330 spin_unlock(&cifs_inode->open_file_lock);
2332 } /* else might as well continue, and look for
2333 another, or simply have the caller reopen it
2334 again rather than trying to fix this handle */
2335 } else /* write only file */
2336 break; /* write only files are last so must be done */
2338 spin_unlock(&cifs_inode->open_file_lock);
2342 /* Return -EBADF if no handle is found and general rc otherwise */
2344 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2345 struct cifsFileInfo **ret_file)
2347 struct cifsFileInfo *open_file, *inv_file = NULL;
2348 struct cifs_sb_info *cifs_sb;
2349 bool any_available = false;
2351 unsigned int refind = 0;
2352 bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2353 bool with_delete = flags & FIND_WR_WITH_DELETE;
2357 * Having a null inode here (because mapping->host was set to zero by
2358 * the VFS or MM) should not happen but we had reports of on oops (due
2359 * to it being zero) during stress testcases so we need to check for it
2362 if (cifs_inode == NULL) {
2363 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2368 cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2370 /* only filter by fsuid on multiuser mounts */
2371 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2374 spin_lock(&cifs_inode->open_file_lock);
2376 if (refind > MAX_REOPEN_ATT) {
2377 spin_unlock(&cifs_inode->open_file_lock);
2380 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2381 if (!any_available && open_file->pid != current->tgid)
2383 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2385 if (with_delete && !(open_file->fid.access & DELETE))
2387 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2388 if (!open_file->invalidHandle) {
2389 /* found a good writable file */
2390 cifsFileInfo_get(open_file);
2391 spin_unlock(&cifs_inode->open_file_lock);
2392 *ret_file = open_file;
2396 inv_file = open_file;
2400 /* couldn't find useable FH with same pid, try any available */
2401 if (!any_available) {
2402 any_available = true;
2403 goto refind_writable;
2407 any_available = false;
2408 cifsFileInfo_get(inv_file);
2411 spin_unlock(&cifs_inode->open_file_lock);
2414 rc = cifs_reopen_file(inv_file, false);
2416 *ret_file = inv_file;
2420 spin_lock(&cifs_inode->open_file_lock);
2421 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2422 spin_unlock(&cifs_inode->open_file_lock);
2423 cifsFileInfo_put(inv_file);
2426 spin_lock(&cifs_inode->open_file_lock);
2427 goto refind_writable;
2433 struct cifsFileInfo *
2434 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2436 struct cifsFileInfo *cfile;
2439 rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2441 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2447 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2449 struct cifsFileInfo **ret_file)
2451 struct cifsFileInfo *cfile;
2452 void *page = alloc_dentry_path();
2456 spin_lock(&tcon->open_file_lock);
2457 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2458 struct cifsInodeInfo *cinode;
2459 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2460 if (IS_ERR(full_path)) {
2461 spin_unlock(&tcon->open_file_lock);
2462 free_dentry_path(page);
2463 return PTR_ERR(full_path);
2465 if (strcmp(full_path, name))
2468 cinode = CIFS_I(d_inode(cfile->dentry));
2469 spin_unlock(&tcon->open_file_lock);
2470 free_dentry_path(page);
2471 return cifs_get_writable_file(cinode, flags, ret_file);
2474 spin_unlock(&tcon->open_file_lock);
2475 free_dentry_path(page);
2480 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2481 struct cifsFileInfo **ret_file)
2483 struct cifsFileInfo *cfile;
2484 void *page = alloc_dentry_path();
2488 spin_lock(&tcon->open_file_lock);
2489 list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2490 struct cifsInodeInfo *cinode;
2491 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2492 if (IS_ERR(full_path)) {
2493 spin_unlock(&tcon->open_file_lock);
2494 free_dentry_path(page);
2495 return PTR_ERR(full_path);
2497 if (strcmp(full_path, name))
2500 cinode = CIFS_I(d_inode(cfile->dentry));
2501 spin_unlock(&tcon->open_file_lock);
2502 free_dentry_path(page);
2503 *ret_file = find_readable_file(cinode, 0);
2504 return *ret_file ? 0 : -ENOENT;
2507 spin_unlock(&tcon->open_file_lock);
2508 free_dentry_path(page);
2513 cifs_writedata_release(struct kref *refcount)
2515 struct cifs_writedata *wdata = container_of(refcount,
2516 struct cifs_writedata, refcount);
2517 #ifdef CONFIG_CIFS_SMB_DIRECT
2519 smbd_deregister_mr(wdata->mr);
2525 cifsFileInfo_put(wdata->cfile);
2531 * Write failed with a retryable error. Resend the write request. It's also
2532 * possible that the page was redirtied so re-clean the page.
2535 cifs_writev_requeue(struct cifs_writedata *wdata)
2538 struct inode *inode = d_inode(wdata->cfile->dentry);
2539 struct TCP_Server_Info *server;
2540 unsigned int rest_len = wdata->bytes;
2541 loff_t fpos = wdata->offset;
2543 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2545 struct cifs_writedata *wdata2;
2546 unsigned int wsize, cur_len;
2548 wsize = server->ops->wp_retry_size(inode);
2549 if (wsize < rest_len) {
2550 if (wsize < PAGE_SIZE) {
2554 cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2559 wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2565 wdata2->sync_mode = wdata->sync_mode;
2566 wdata2->offset = fpos;
2567 wdata2->bytes = cur_len;
2568 wdata2->iter = wdata->iter;
2570 iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2571 iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2573 if (iov_iter_is_xarray(&wdata2->iter))
2574 /* Check for pages having been redirtied and clean
2575 * them. We can do this by walking the xarray. If
2576 * it's not an xarray, then it's a DIO and we shouldn't
2577 * be mucking around with the page bits.
2579 cifs_undirty_folios(inode, fpos, cur_len);
2581 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2583 if (!wdata2->cfile) {
2584 cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2586 if (!is_retryable_error(rc))
2589 wdata2->pid = wdata2->cfile->pid;
2590 rc = server->ops->async_writev(wdata2,
2591 cifs_writedata_release);
2594 kref_put(&wdata2->refcount, cifs_writedata_release);
2596 if (is_retryable_error(rc))
2599 rest_len -= cur_len;
2604 rest_len -= cur_len;
2605 } while (rest_len > 0);
2607 /* Clean up remaining pages from the original wdata */
2608 if (iov_iter_is_xarray(&wdata->iter))
2609 cifs_pages_write_failed(inode, fpos, rest_len);
2611 if (rc != 0 && !is_retryable_error(rc))
2612 mapping_set_error(inode->i_mapping, rc);
2613 kref_put(&wdata->refcount, cifs_writedata_release);
2617 cifs_writev_complete(struct work_struct *work)
2619 struct cifs_writedata *wdata = container_of(work,
2620 struct cifs_writedata, work);
2621 struct inode *inode = d_inode(wdata->cfile->dentry);
2623 if (wdata->result == 0) {
2624 spin_lock(&inode->i_lock);
2625 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2626 spin_unlock(&inode->i_lock);
2627 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2629 } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2630 return cifs_writev_requeue(wdata);
2632 if (wdata->result == -EAGAIN)
2633 cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2634 else if (wdata->result < 0)
2635 cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2637 cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2639 if (wdata->result != -EAGAIN)
2640 mapping_set_error(inode->i_mapping, wdata->result);
2641 kref_put(&wdata->refcount, cifs_writedata_release);
2644 struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2646 struct cifs_writedata *wdata;
2648 wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2649 if (wdata != NULL) {
2650 kref_init(&wdata->refcount);
2651 INIT_LIST_HEAD(&wdata->list);
2652 init_completion(&wdata->done);
2653 INIT_WORK(&wdata->work, complete);
2658 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2660 struct address_space *mapping = page->mapping;
2661 loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2664 int bytes_written = 0;
2665 struct inode *inode;
2666 struct cifsFileInfo *open_file;
2668 if (!mapping || !mapping->host)
2671 inode = page->mapping->host;
2673 offset += (loff_t)from;
2674 write_data = kmap(page);
2677 if ((to > PAGE_SIZE) || (from > to)) {
2682 /* racing with truncate? */
2683 if (offset > mapping->host->i_size) {
2685 return 0; /* don't care */
2688 /* check to make sure that we are not extending the file */
2689 if (mapping->host->i_size - offset < (loff_t)to)
2690 to = (unsigned)(mapping->host->i_size - offset);
2692 rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2695 bytes_written = cifs_write(open_file, open_file->pid,
2696 write_data, to - from, &offset);
2697 cifsFileInfo_put(open_file);
2698 /* Does mm or vfs already set times? */
2699 simple_inode_init_ts(inode);
2700 if ((bytes_written > 0) && (offset))
2702 else if (bytes_written < 0)
2707 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2708 if (!is_retryable_error(rc))
2717 * Extend the region to be written back to include subsequent contiguously
2718 * dirty pages if possible, but don't sleep while doing so.
2720 static void cifs_extend_writeback(struct address_space *mapping,
2721 struct xa_state *xas,
2728 struct folio_batch batch;
2729 struct folio *folio;
2730 unsigned int nr_pages;
2731 pgoff_t index = (start + *_len) / PAGE_SIZE;
2736 folio_batch_init(&batch);
2739 /* Firstly, we gather up a batch of contiguous dirty pages
2740 * under the RCU read lock - but we can't clear the dirty flags
2741 * there if any of those pages are mapped.
2745 xas_for_each(xas, folio, ULONG_MAX) {
2747 if (xas_retry(xas, folio))
2749 if (xa_is_value(folio))
2751 if (folio->index != index) {
2756 if (!folio_try_get_rcu(folio)) {
2760 nr_pages = folio_nr_pages(folio);
2761 if (nr_pages > max_pages) {
2766 /* Has the page moved or been split? */
2767 if (unlikely(folio != xas_reload(xas))) {
2773 if (!folio_trylock(folio)) {
2778 if (!folio_test_dirty(folio) ||
2779 folio_test_writeback(folio)) {
2780 folio_unlock(folio);
2786 max_pages -= nr_pages;
2787 len = folio_size(folio);
2791 *_count -= nr_pages;
2793 if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2796 if (!folio_batch_add(&batch, folio))
2805 /* Now, if we obtained any pages, we can shift them to being
2806 * writable and mark them for caching.
2808 if (!folio_batch_count(&batch))
2811 for (i = 0; i < folio_batch_count(&batch); i++) {
2812 folio = batch.folios[i];
2813 /* The folio should be locked, dirty and not undergoing
2814 * writeback from the loop above.
2816 if (!folio_clear_dirty_for_io(folio))
2818 folio_start_writeback(folio);
2819 folio_unlock(folio);
2822 folio_batch_release(&batch);
2828 * Write back the locked page and any subsequent non-locked dirty pages.
2830 static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2831 struct writeback_control *wbc,
2832 struct xa_state *xas,
2833 struct folio *folio,
2834 unsigned long long start,
2835 unsigned long long end)
2837 struct inode *inode = mapping->host;
2838 struct TCP_Server_Info *server;
2839 struct cifs_writedata *wdata;
2840 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2841 struct cifs_credits credits_on_stack;
2842 struct cifs_credits *credits = &credits_on_stack;
2843 struct cifsFileInfo *cfile = NULL;
2844 unsigned long long i_size = i_size_read(inode), max_len;
2845 unsigned int xid, wsize;
2846 size_t len = folio_size(folio);
2847 long count = wbc->nr_to_write;
2850 /* The folio should be locked, dirty and not undergoing writeback. */
2851 if (!folio_clear_dirty_for_io(folio))
2853 folio_start_writeback(folio);
2855 count -= folio_nr_pages(folio);
2858 server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2860 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2862 cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2866 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2871 wdata = cifs_writedata_alloc(cifs_writev_complete);
2877 wdata->sync_mode = wbc->sync_mode;
2878 wdata->offset = folio_pos(folio);
2879 wdata->pid = cfile->pid;
2880 wdata->credits = credits_on_stack;
2881 wdata->cfile = cfile;
2882 wdata->server = server;
2885 /* Find all consecutive lockable dirty pages that have contiguous
2886 * written regions, stopping when we find a page that is not
2887 * immediately lockable, is not dirty or is missing, or we reach the
2890 if (start < i_size) {
2891 /* Trim the write to the EOF; the extra data is ignored. Also
2892 * put an upper limit on the size of a single storedata op.
2895 max_len = min_t(unsigned long long, max_len, end - start + 1);
2896 max_len = min_t(unsigned long long, max_len, i_size - start);
2898 if (len < max_len) {
2899 int max_pages = INT_MAX;
2901 #ifdef CONFIG_CIFS_SMB_DIRECT
2902 if (server->smbd_conn)
2903 max_pages = server->smbd_conn->max_frmr_depth;
2905 max_pages -= folio_nr_pages(folio);
2908 cifs_extend_writeback(mapping, xas, &count, start,
2909 max_pages, max_len, &len);
2912 len = min_t(unsigned long long, len, i_size - start);
2914 /* We now have a contiguous set of dirty pages, each with writeback
2915 * set; the first page is still locked at this point, but all the rest
2916 * have been unlocked.
2918 folio_unlock(folio);
2921 if (start < i_size) {
2922 iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2925 rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2929 if (wdata->cfile->invalidHandle)
2932 rc = wdata->server->ops->async_writev(wdata,
2933 cifs_writedata_release);
2935 kref_put(&wdata->refcount, cifs_writedata_release);
2939 /* The dirty region was entirely beyond the EOF. */
2940 cifs_pages_written_back(inode, start, len);
2945 kref_put(&wdata->refcount, cifs_writedata_release);
2947 add_credits_and_wake_if(server, credits, 0);
2950 cifsFileInfo_put(cfile);
2954 wbc->nr_to_write = count;
2956 } else if (is_retryable_error(rc)) {
2957 cifs_pages_write_redirty(inode, start, len);
2959 cifs_pages_write_failed(inode, start, len);
2960 mapping_set_error(mapping, rc);
2962 /* Indication to update ctime and mtime as close is deferred */
2963 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2968 * write a region of pages back to the server
2970 static ssize_t cifs_writepages_begin(struct address_space *mapping,
2971 struct writeback_control *wbc,
2972 struct xa_state *xas,
2973 unsigned long long *_start,
2974 unsigned long long end)
2976 struct folio *folio;
2977 unsigned long long start = *_start;
2982 /* Find the first dirty page. */
2986 folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2987 if (xas_retry(xas, folio) || xa_is_value(folio))
2992 if (!folio_try_get_rcu(folio)) {
2997 if (unlikely(folio != xas_reload(xas))) {
3010 start = folio_pos(folio); /* May regress with THPs */
3012 /* At this point we hold neither the i_pages lock nor the page lock:
3013 * the page may be truncated or invalidated (changing page->mapping to
3014 * NULL), or even swizzled back from swapper_space to tmpfs file
3018 if (wbc->sync_mode != WB_SYNC_NONE) {
3019 ret = folio_lock_killable(folio);
3023 if (!folio_trylock(folio))
3027 if (folio->mapping != mapping ||
3028 !folio_test_dirty(folio)) {
3029 start += folio_size(folio);
3030 folio_unlock(folio);
3034 if (folio_test_writeback(folio) ||
3035 folio_test_fscache(folio)) {
3036 folio_unlock(folio);
3037 if (wbc->sync_mode != WB_SYNC_NONE) {
3038 folio_wait_writeback(folio);
3039 #ifdef CONFIG_CIFS_FSCACHE
3040 folio_wait_fscache(folio);
3045 start += folio_size(folio);
3046 if (wbc->sync_mode == WB_SYNC_NONE) {
3047 if (skips >= 5 || need_resched()) {
3056 ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3059 *_start = start + ret;
3064 * Write a region of pages back to the server
3066 static int cifs_writepages_region(struct address_space *mapping,
3067 struct writeback_control *wbc,
3068 unsigned long long *_start,
3069 unsigned long long end)
3073 XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3076 ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3077 if (ret > 0 && wbc->nr_to_write > 0)
3079 } while (ret > 0 && wbc->nr_to_write > 0);
3081 return ret > 0 ? 0 : ret;
3085 * Write some of the pending data back to the server
3087 static int cifs_writepages(struct address_space *mapping,
3088 struct writeback_control *wbc)
3093 /* We have to be careful as we can end up racing with setattr()
3094 * truncating the pagecache since the caller doesn't take a lock here
3098 if (wbc->range_cyclic && mapping->writeback_index) {
3099 start = mapping->writeback_index * PAGE_SIZE;
3100 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3104 if (wbc->nr_to_write <= 0) {
3105 mapping->writeback_index = start / PAGE_SIZE;
3110 end = mapping->writeback_index * PAGE_SIZE;
3111 mapping->writeback_index = 0;
3112 ret = cifs_writepages_region(mapping, wbc, &start, end);
3114 mapping->writeback_index = start / PAGE_SIZE;
3115 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3117 ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3118 if (wbc->nr_to_write > 0 && ret == 0)
3119 mapping->writeback_index = start / PAGE_SIZE;
3121 start = wbc->range_start;
3122 ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3130 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3136 /* BB add check for wbc flags */
3138 if (!PageUptodate(page))
3139 cifs_dbg(FYI, "ppw - page not up to date\n");
3142 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3144 * A writepage() implementation always needs to do either this,
3145 * or re-dirty the page with "redirty_page_for_writepage()" in
3146 * the case of a failure.
3148 * Just unlocking the page will cause the radix tree tag-bits
3149 * to fail to update with the state of the page correctly.
3151 set_page_writeback(page);
3153 rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3154 if (is_retryable_error(rc)) {
3155 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3157 redirty_page_for_writepage(wbc, page);
3158 } else if (rc != 0) {
3160 mapping_set_error(page->mapping, rc);
3162 SetPageUptodate(page);
3164 end_page_writeback(page);
3170 static int cifs_write_end(struct file *file, struct address_space *mapping,
3171 loff_t pos, unsigned len, unsigned copied,
3172 struct page *page, void *fsdata)
3175 struct inode *inode = mapping->host;
3176 struct cifsFileInfo *cfile = file->private_data;
3177 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3178 struct folio *folio = page_folio(page);
3181 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3184 pid = current->tgid;
3186 cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3189 if (folio_test_checked(folio)) {
3191 folio_mark_uptodate(folio);
3192 folio_clear_checked(folio);
3193 } else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3194 folio_mark_uptodate(folio);
3196 if (!folio_test_uptodate(folio)) {
3198 unsigned offset = pos & (PAGE_SIZE - 1);
3202 /* this is probably better than directly calling
3203 partialpage_write since in this function the file handle is
3204 known which we might as well leverage */
3205 /* BB check if anything else missing out of ppw
3206 such as updating last write time */
3207 page_data = kmap(page);
3208 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3209 /* if (rc < 0) should we set writebehind rc? */
3216 set_page_dirty(page);
3220 spin_lock(&inode->i_lock);
3221 if (pos > inode->i_size) {
3222 loff_t additional_blocks = (512 - 1 + copied) >> 9;
3224 i_size_write(inode, pos);
3226 * Estimate new allocation size based on the amount written.
3227 * This will be updated from server on close (and on queryinfo)
3229 inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3230 inode->i_blocks + additional_blocks);
3232 spin_unlock(&inode->i_lock);
3237 /* Indication to update ctime and mtime as close is deferred */
3238 set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3243 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3248 struct cifs_tcon *tcon;
3249 struct TCP_Server_Info *server;
3250 struct cifsFileInfo *smbfile = file->private_data;
3251 struct inode *inode = file_inode(file);
3252 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3254 rc = file_write_and_wait_range(file, start, end);
3256 trace_cifs_fsync_err(inode->i_ino, rc);
3262 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3265 if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3266 rc = cifs_zap_mapping(inode);
3268 cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3269 rc = 0; /* don't care about it in fsync */
3273 tcon = tlink_tcon(smbfile->tlink);
3274 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3275 server = tcon->ses->server;
3276 if (server->ops->flush == NULL) {
3278 goto strict_fsync_exit;
3281 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3282 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3284 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3285 cifsFileInfo_put(smbfile);
3287 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3289 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3297 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3301 struct cifs_tcon *tcon;
3302 struct TCP_Server_Info *server;
3303 struct cifsFileInfo *smbfile = file->private_data;
3304 struct inode *inode = file_inode(file);
3305 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3307 rc = file_write_and_wait_range(file, start, end);
3309 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3315 cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3318 tcon = tlink_tcon(smbfile->tlink);
3319 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3320 server = tcon->ses->server;
3321 if (server->ops->flush == NULL) {
3326 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3327 smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3329 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3330 cifsFileInfo_put(smbfile);
3332 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3334 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3343 * As file closes, flush all cached write data for this inode checking
3344 * for write behind errors.
3346 int cifs_flush(struct file *file, fl_owner_t id)
3348 struct inode *inode = file_inode(file);
3351 if (file->f_mode & FMODE_WRITE)
3352 rc = filemap_write_and_wait(inode->i_mapping);
3354 cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3356 /* get more nuanced writeback errors */
3357 rc = filemap_check_wb_err(file->f_mapping, 0);
3358 trace_cifs_flush_err(inode->i_ino, rc);
3364 cifs_uncached_writedata_release(struct kref *refcount)
3366 struct cifs_writedata *wdata = container_of(refcount,
3367 struct cifs_writedata, refcount);
3369 kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3370 cifs_writedata_release(refcount);
3373 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3376 cifs_uncached_writev_complete(struct work_struct *work)
3378 struct cifs_writedata *wdata = container_of(work,
3379 struct cifs_writedata, work);
3380 struct inode *inode = d_inode(wdata->cfile->dentry);
3381 struct cifsInodeInfo *cifsi = CIFS_I(inode);
3383 spin_lock(&inode->i_lock);
3384 cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3385 if (cifsi->netfs.remote_i_size > inode->i_size)
3386 i_size_write(inode, cifsi->netfs.remote_i_size);
3387 spin_unlock(&inode->i_lock);
3389 complete(&wdata->done);
3390 collect_uncached_write_data(wdata->ctx);
3391 /* the below call can possibly free the last ref to aio ctx */
3392 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3396 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3397 struct cifs_aio_ctx *ctx)
3400 struct cifs_credits credits;
3402 struct TCP_Server_Info *server = wdata->server;
3405 if (wdata->cfile->invalidHandle) {
3406 rc = cifs_reopen_file(wdata->cfile, false);
3415 * Wait for credits to resend this wdata.
3416 * Note: we are attempting to resend the whole wdata not in
3420 rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3425 if (wsize < wdata->bytes) {
3426 add_credits_and_wake_if(server, &credits, 0);
3429 } while (wsize < wdata->bytes);
3430 wdata->credits = credits;
3432 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3435 if (wdata->cfile->invalidHandle)
3438 wdata->replay = true;
3439 #ifdef CONFIG_CIFS_SMB_DIRECT
3441 wdata->mr->need_invalidate = true;
3442 smbd_deregister_mr(wdata->mr);
3446 rc = server->ops->async_writev(wdata,
3447 cifs_uncached_writedata_release);
3451 /* If the write was successfully sent, we are done */
3453 list_add_tail(&wdata->list, wdata_list);
3457 /* Roll back credits and retry if needed */
3458 add_credits_and_wake_if(server, &wdata->credits, 0);
3459 } while (rc == -EAGAIN);
3462 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3467 * Select span of a bvec iterator we're going to use. Limit it by both maximum
3468 * size and maximum number of segments.
3470 static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3471 size_t max_segs, unsigned int *_nsegs)
3473 const struct bio_vec *bvecs = iter->bvec;
3474 unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3475 size_t len, span = 0, n = iter->count;
3476 size_t skip = iter->iov_offset;
3478 if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3481 while (n && ix < nbv && skip) {
3482 len = bvecs[ix].bv_len;
3490 while (n && ix < nbv) {
3491 len = min3(n, bvecs[ix].bv_len - skip, max_size);
3496 if (max_size == 0 || nsegs >= max_segs)
3507 cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3508 struct cifsFileInfo *open_file,
3509 struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3510 struct cifs_aio_ctx *ctx)
3513 size_t cur_len, max_len;
3514 struct cifs_writedata *wdata;
3516 struct TCP_Server_Info *server;
3517 unsigned int xid, max_segs = INT_MAX;
3519 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3520 pid = open_file->pid;
3522 pid = current->tgid;
3524 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3527 #ifdef CONFIG_CIFS_SMB_DIRECT
3528 if (server->smbd_conn)
3529 max_segs = server->smbd_conn->max_frmr_depth;
3533 struct cifs_credits credits_on_stack;
3534 struct cifs_credits *credits = &credits_on_stack;
3535 unsigned int wsize, nsegs = 0;
3537 if (signal_pending(current)) {
3542 if (open_file->invalidHandle) {
3543 rc = cifs_reopen_file(open_file, false);
3550 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3555 max_len = min_t(const size_t, len, wsize);
3558 add_credits_and_wake_if(server, credits, 0);
3562 cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3563 cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3564 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3567 add_credits_and_wake_if(server, credits, 0);
3571 wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3574 add_credits_and_wake_if(server, credits, 0);
3578 wdata->sync_mode = WB_SYNC_ALL;
3579 wdata->offset = (__u64)fpos;
3580 wdata->cfile = cifsFileInfo_get(open_file);
3581 wdata->server = server;
3583 wdata->bytes = cur_len;
3584 wdata->credits = credits_on_stack;
3585 wdata->iter = *from;
3587 kref_get(&ctx->refcount);
3589 iov_iter_truncate(&wdata->iter, cur_len);
3591 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3594 if (wdata->cfile->invalidHandle)
3597 rc = server->ops->async_writev(wdata,
3598 cifs_uncached_writedata_release);
3602 add_credits_and_wake_if(server, &wdata->credits, 0);
3603 kref_put(&wdata->refcount,
3604 cifs_uncached_writedata_release);
3610 list_add_tail(&wdata->list, wdata_list);
3611 iov_iter_advance(from, cur_len);
3620 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3622 struct cifs_writedata *wdata, *tmp;
3623 struct cifs_tcon *tcon;
3624 struct cifs_sb_info *cifs_sb;
3625 struct dentry *dentry = ctx->cfile->dentry;
3628 tcon = tlink_tcon(ctx->cfile->tlink);
3629 cifs_sb = CIFS_SB(dentry->d_sb);
3631 mutex_lock(&ctx->aio_mutex);
3633 if (list_empty(&ctx->list)) {
3634 mutex_unlock(&ctx->aio_mutex);
3640 * Wait for and collect replies for any successful sends in order of
3641 * increasing offset. Once an error is hit, then return without waiting
3642 * for any more replies.
3645 list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3647 if (!try_wait_for_completion(&wdata->done)) {
3648 mutex_unlock(&ctx->aio_mutex);
3655 ctx->total_len += wdata->bytes;
3657 /* resend call if it's a retryable error */
3658 if (rc == -EAGAIN) {
3659 struct list_head tmp_list;
3660 struct iov_iter tmp_from = ctx->iter;
3662 INIT_LIST_HEAD(&tmp_list);
3663 list_del_init(&wdata->list);
3666 rc = cifs_resend_wdata(
3667 wdata, &tmp_list, ctx);
3669 iov_iter_advance(&tmp_from,
3670 wdata->offset - ctx->pos);
3672 rc = cifs_write_from_iter(wdata->offset,
3673 wdata->bytes, &tmp_from,
3674 ctx->cfile, cifs_sb, &tmp_list,
3677 kref_put(&wdata->refcount,
3678 cifs_uncached_writedata_release);
3681 list_splice(&tmp_list, &ctx->list);
3685 list_del_init(&wdata->list);
3686 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3689 cifs_stats_bytes_written(tcon, ctx->total_len);
3690 set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3692 ctx->rc = (rc == 0) ? ctx->total_len : rc;
3694 mutex_unlock(&ctx->aio_mutex);
3696 if (ctx->iocb && ctx->iocb->ki_complete)
3697 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3699 complete(&ctx->done);
3702 static ssize_t __cifs_writev(
3703 struct kiocb *iocb, struct iov_iter *from, bool direct)
3705 struct file *file = iocb->ki_filp;
3706 ssize_t total_written = 0;
3707 struct cifsFileInfo *cfile;
3708 struct cifs_tcon *tcon;
3709 struct cifs_sb_info *cifs_sb;
3710 struct cifs_aio_ctx *ctx;
3713 rc = generic_write_checks(iocb, from);
3717 cifs_sb = CIFS_FILE_SB(file);
3718 cfile = file->private_data;
3719 tcon = tlink_tcon(cfile->tlink);
3721 if (!tcon->ses->server->ops->async_writev)
3724 ctx = cifs_aio_ctx_alloc();
3728 ctx->cfile = cifsFileInfo_get(cfile);
3730 if (!is_sync_kiocb(iocb))
3733 ctx->pos = iocb->ki_pos;
3734 ctx->direct_io = direct;
3735 ctx->nr_pinned_pages = 0;
3737 if (user_backed_iter(from)) {
3739 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3740 * they contain references to the calling process's virtual
3741 * memory layout which won't be available in an async worker
3742 * thread. This also takes a pin on every folio involved.
3744 rc = netfs_extract_user_iter(from, iov_iter_count(from),
3747 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3751 ctx->nr_pinned_pages = rc;
3752 ctx->bv = (void *)ctx->iter.bvec;
3753 ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3754 } else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3755 !is_sync_kiocb(iocb)) {
3757 * If the op is asynchronous, we need to copy the list attached
3758 * to a BVEC/KVEC-type iterator, but we assume that the storage
3759 * will be pinned by the caller; in any case, we may or may not
3760 * be able to pin the pages, so we don't try.
3762 ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3764 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3769 * Otherwise, we just pass the iterator down as-is and rely on
3770 * the caller to make sure the pages referred to by the
3771 * iterator don't evaporate.
3776 ctx->len = iov_iter_count(&ctx->iter);
3778 /* grab a lock here due to read response handlers can access ctx */
3779 mutex_lock(&ctx->aio_mutex);
3781 rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3782 cfile, cifs_sb, &ctx->list, ctx);
3785 * If at least one write was successfully sent, then discard any rc
3786 * value from the later writes. If the other write succeeds, then
3787 * we'll end up returning whatever was written. If it fails, then
3788 * we'll get a new rc value from that.
3790 if (!list_empty(&ctx->list))
3793 mutex_unlock(&ctx->aio_mutex);
3796 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3800 if (!is_sync_kiocb(iocb)) {
3801 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802 return -EIOCBQUEUED;
3805 rc = wait_for_completion_killable(&ctx->done);
3807 mutex_lock(&ctx->aio_mutex);
3808 ctx->rc = rc = -EINTR;
3809 total_written = ctx->total_len;
3810 mutex_unlock(&ctx->aio_mutex);
3813 total_written = ctx->total_len;
3816 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3818 if (unlikely(!total_written))
3821 iocb->ki_pos += total_written;
3822 return total_written;
3825 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3827 struct file *file = iocb->ki_filp;
3829 cifs_revalidate_mapping(file->f_inode);
3830 return __cifs_writev(iocb, from, true);
3833 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3835 return __cifs_writev(iocb, from, false);
3839 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3841 struct file *file = iocb->ki_filp;
3842 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3843 struct inode *inode = file->f_mapping->host;
3844 struct cifsInodeInfo *cinode = CIFS_I(inode);
3845 struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3850 * We need to hold the sem to be sure nobody modifies lock list
3851 * with a brlock that prevents writing.
3853 down_read(&cinode->lock_sem);
3855 rc = generic_write_checks(iocb, from);
3859 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3860 server->vals->exclusive_lock_type, 0,
3861 NULL, CIFS_WRITE_OP))
3862 rc = __generic_file_write_iter(iocb, from);
3866 up_read(&cinode->lock_sem);
3867 inode_unlock(inode);
3870 rc = generic_write_sync(iocb, rc);
3875 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3877 struct inode *inode = file_inode(iocb->ki_filp);
3878 struct cifsInodeInfo *cinode = CIFS_I(inode);
3879 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3880 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3881 iocb->ki_filp->private_data;
3882 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3885 written = cifs_get_writer(cinode);
3889 if (CIFS_CACHE_WRITE(cinode)) {
3890 if (cap_unix(tcon->ses) &&
3891 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3892 && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3893 written = generic_file_write_iter(iocb, from);
3896 written = cifs_writev(iocb, from);
3900 * For non-oplocked files in strict cache mode we need to write the data
3901 * to the server exactly from the pos to pos+len-1 rather than flush all
3902 * affected pages because it may cause a error with mandatory locks on
3903 * these pages but not on the region from pos to ppos+len-1.
3905 written = cifs_user_writev(iocb, from);
3906 if (CIFS_CACHE_READ(cinode)) {
3908 * We have read level caching and we have just sent a write
3909 * request to the server thus making data in the cache stale.
3910 * Zap the cache and set oplock/lease level to NONE to avoid
3911 * reading stale data from the cache. All subsequent read
3912 * operations will read new data from the server.
3914 cifs_zap_mapping(inode);
3915 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3920 cifs_put_writer(cinode);
3924 static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3926 struct cifs_readdata *rdata;
3928 rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3930 kref_init(&rdata->refcount);
3931 INIT_LIST_HEAD(&rdata->list);
3932 init_completion(&rdata->done);
3933 INIT_WORK(&rdata->work, complete);
3940 cifs_readdata_release(struct kref *refcount)
3942 struct cifs_readdata *rdata = container_of(refcount,
3943 struct cifs_readdata, refcount);
3946 kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3947 #ifdef CONFIG_CIFS_SMB_DIRECT
3949 smbd_deregister_mr(rdata->mr);
3954 cifsFileInfo_put(rdata->cfile);
3959 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3962 cifs_uncached_readv_complete(struct work_struct *work)
3964 struct cifs_readdata *rdata = container_of(work,
3965 struct cifs_readdata, work);
3967 complete(&rdata->done);
3968 collect_uncached_read_data(rdata->ctx);
3969 /* the below call can possibly free the last ref to aio ctx */
3970 kref_put(&rdata->refcount, cifs_readdata_release);
3973 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3974 struct list_head *rdata_list,
3975 struct cifs_aio_ctx *ctx)
3978 struct cifs_credits credits;
3980 struct TCP_Server_Info *server;
3982 /* XXX: should we pick a new channel here? */
3983 server = rdata->server;
3986 if (rdata->cfile->invalidHandle) {
3987 rc = cifs_reopen_file(rdata->cfile, true);
3995 * Wait for credits to resend this rdata.
3996 * Note: we are attempting to resend the whole rdata not in
4000 rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4006 if (rsize < rdata->bytes) {
4007 add_credits_and_wake_if(server, &credits, 0);
4010 } while (rsize < rdata->bytes);
4011 rdata->credits = credits;
4013 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4015 if (rdata->cfile->invalidHandle)
4018 #ifdef CONFIG_CIFS_SMB_DIRECT
4020 rdata->mr->need_invalidate = true;
4021 smbd_deregister_mr(rdata->mr);
4025 rc = server->ops->async_readv(rdata);
4029 /* If the read was successfully sent, we are done */
4031 /* Add to aio pending list */
4032 list_add_tail(&rdata->list, rdata_list);
4036 /* Roll back credits and retry if needed */
4037 add_credits_and_wake_if(server, &rdata->credits, 0);
4038 } while (rc == -EAGAIN);
4041 kref_put(&rdata->refcount, cifs_readdata_release);
4046 cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4047 struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4048 struct cifs_aio_ctx *ctx)
4050 struct cifs_readdata *rdata;
4051 unsigned int rsize, nsegs, max_segs = INT_MAX;
4052 struct cifs_credits credits_on_stack;
4053 struct cifs_credits *credits = &credits_on_stack;
4054 size_t cur_len, max_len;
4057 struct TCP_Server_Info *server;
4059 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4061 #ifdef CONFIG_CIFS_SMB_DIRECT
4062 if (server->smbd_conn)
4063 max_segs = server->smbd_conn->max_frmr_depth;
4066 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067 pid = open_file->pid;
4069 pid = current->tgid;
4072 if (open_file->invalidHandle) {
4073 rc = cifs_reopen_file(open_file, true);
4080 if (cifs_sb->ctx->rsize == 0)
4081 cifs_sb->ctx->rsize =
4082 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4085 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4090 max_len = min_t(size_t, len, rsize);
4092 cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4094 cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4095 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4098 add_credits_and_wake_if(server, credits, 0);
4102 rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4104 add_credits_and_wake_if(server, credits, 0);
4109 rdata->server = server;
4110 rdata->cfile = cifsFileInfo_get(open_file);
4111 rdata->offset = fpos;
4112 rdata->bytes = cur_len;
4114 rdata->credits = credits_on_stack;
4116 kref_get(&ctx->refcount);
4118 rdata->iter = ctx->iter;
4119 iov_iter_truncate(&rdata->iter, cur_len);
4121 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4124 if (rdata->cfile->invalidHandle)
4127 rc = server->ops->async_readv(rdata);
4131 add_credits_and_wake_if(server, &rdata->credits, 0);
4132 kref_put(&rdata->refcount, cifs_readdata_release);
4138 list_add_tail(&rdata->list, rdata_list);
4139 iov_iter_advance(&ctx->iter, cur_len);
4148 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4150 struct cifs_readdata *rdata, *tmp;
4151 struct cifs_sb_info *cifs_sb;
4154 cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4156 mutex_lock(&ctx->aio_mutex);
4158 if (list_empty(&ctx->list)) {
4159 mutex_unlock(&ctx->aio_mutex);
4164 /* the loop below should proceed in the order of increasing offsets */
4166 list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4168 if (!try_wait_for_completion(&rdata->done)) {
4169 mutex_unlock(&ctx->aio_mutex);
4173 if (rdata->result == -EAGAIN) {
4174 /* resend call if it's a retryable error */
4175 struct list_head tmp_list;
4176 unsigned int got_bytes = rdata->got_bytes;
4178 list_del_init(&rdata->list);
4179 INIT_LIST_HEAD(&tmp_list);
4181 if (ctx->direct_io) {
4183 * Re-use rdata as this is a
4186 rc = cifs_resend_rdata(
4190 rc = cifs_send_async_read(
4191 rdata->offset + got_bytes,
4192 rdata->bytes - got_bytes,
4193 rdata->cfile, cifs_sb,
4196 kref_put(&rdata->refcount,
4197 cifs_readdata_release);
4200 list_splice(&tmp_list, &ctx->list);
4203 } else if (rdata->result)
4206 /* if there was a short read -- discard anything left */
4207 if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4210 ctx->total_len += rdata->got_bytes;
4212 list_del_init(&rdata->list);
4213 kref_put(&rdata->refcount, cifs_readdata_release);
4216 /* mask nodata case */
4220 ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4222 mutex_unlock(&ctx->aio_mutex);
4224 if (ctx->iocb && ctx->iocb->ki_complete)
4225 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4227 complete(&ctx->done);
4230 static ssize_t __cifs_readv(
4231 struct kiocb *iocb, struct iov_iter *to, bool direct)
4234 struct file *file = iocb->ki_filp;
4235 struct cifs_sb_info *cifs_sb;
4236 struct cifsFileInfo *cfile;
4237 struct cifs_tcon *tcon;
4238 ssize_t rc, total_read = 0;
4239 loff_t offset = iocb->ki_pos;
4240 struct cifs_aio_ctx *ctx;
4242 len = iov_iter_count(to);
4246 cifs_sb = CIFS_FILE_SB(file);
4247 cfile = file->private_data;
4248 tcon = tlink_tcon(cfile->tlink);
4250 if (!tcon->ses->server->ops->async_readv)
4253 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4254 cifs_dbg(FYI, "attempting read on write only file instance\n");
4256 ctx = cifs_aio_ctx_alloc();
4261 ctx->direct_io = direct;
4263 ctx->cfile = cifsFileInfo_get(cfile);
4264 ctx->nr_pinned_pages = 0;
4266 if (!is_sync_kiocb(iocb))
4269 if (user_backed_iter(to)) {
4271 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4272 * they contain references to the calling process's virtual
4273 * memory layout which won't be available in an async worker
4274 * thread. This also takes a pin on every folio involved.
4276 rc = netfs_extract_user_iter(to, iov_iter_count(to),
4279 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4283 ctx->nr_pinned_pages = rc;
4284 ctx->bv = (void *)ctx->iter.bvec;
4285 ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4286 ctx->should_dirty = true;
4287 } else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4288 !is_sync_kiocb(iocb)) {
4290 * If the op is asynchronous, we need to copy the list attached
4291 * to a BVEC/KVEC-type iterator, but we assume that the storage
4292 * will be retained by the caller; in any case, we may or may
4293 * not be able to pin the pages, so we don't try.
4295 ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4297 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4302 * Otherwise, we just pass the iterator down as-is and rely on
4303 * the caller to make sure the pages referred to by the
4304 * iterator don't evaporate.
4310 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4311 offset, offset + len - 1);
4313 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4318 /* grab a lock here due to read response handlers can access ctx */
4319 mutex_lock(&ctx->aio_mutex);
4321 rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4323 /* if at least one read request send succeeded, then reset rc */
4324 if (!list_empty(&ctx->list))
4327 mutex_unlock(&ctx->aio_mutex);
4330 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4334 if (!is_sync_kiocb(iocb)) {
4335 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336 return -EIOCBQUEUED;
4339 rc = wait_for_completion_killable(&ctx->done);
4341 mutex_lock(&ctx->aio_mutex);
4342 ctx->rc = rc = -EINTR;
4343 total_read = ctx->total_len;
4344 mutex_unlock(&ctx->aio_mutex);
4347 total_read = ctx->total_len;
4350 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4353 iocb->ki_pos += total_read;
4359 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4361 return __cifs_readv(iocb, to, true);
4364 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4366 return __cifs_readv(iocb, to, false);
4370 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4372 struct inode *inode = file_inode(iocb->ki_filp);
4373 struct cifsInodeInfo *cinode = CIFS_I(inode);
4374 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4375 struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4376 iocb->ki_filp->private_data;
4377 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4381 * In strict cache mode we need to read from the server all the time
4382 * if we don't have level II oplock because the server can delay mtime
4383 * change - so we can't make a decision about inode invalidating.
4384 * And we can also fail with pagereading if there are mandatory locks
4385 * on pages affected by this read but not on the region from pos to
4388 if (!CIFS_CACHE_READ(cinode))
4389 return cifs_user_readv(iocb, to);
4391 if (cap_unix(tcon->ses) &&
4392 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4393 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4394 return generic_file_read_iter(iocb, to);
4397 * We need to hold the sem to be sure nobody modifies lock list
4398 * with a brlock that prevents reading.
4400 down_read(&cinode->lock_sem);
4401 if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4402 tcon->ses->server->vals->shared_lock_type,
4403 0, NULL, CIFS_READ_OP))
4404 rc = generic_file_read_iter(iocb, to);
4405 up_read(&cinode->lock_sem);
4410 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4413 unsigned int bytes_read = 0;
4414 unsigned int total_read;
4415 unsigned int current_read_size;
4417 struct cifs_sb_info *cifs_sb;
4418 struct cifs_tcon *tcon;
4419 struct TCP_Server_Info *server;
4422 struct cifsFileInfo *open_file;
4423 struct cifs_io_parms io_parms = {0};
4424 int buf_type = CIFS_NO_BUFFER;
4428 cifs_sb = CIFS_FILE_SB(file);
4430 /* FIXME: set up handlers for larger reads and/or convert to async */
4431 rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4433 if (file->private_data == NULL) {
4438 open_file = file->private_data;
4439 tcon = tlink_tcon(open_file->tlink);
4440 server = cifs_pick_channel(tcon->ses);
4442 if (!server->ops->sync_read) {
4447 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4448 pid = open_file->pid;
4450 pid = current->tgid;
4452 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4453 cifs_dbg(FYI, "attempting read on write only file instance\n");
4455 for (total_read = 0, cur_offset = read_data; read_size > total_read;
4456 total_read += bytes_read, cur_offset += bytes_read) {
4458 current_read_size = min_t(uint, read_size - total_read,
4461 * For windows me and 9x we do not want to request more
4462 * than it negotiated since it will refuse the read
4465 if (!(tcon->ses->capabilities &
4466 tcon->ses->server->vals->cap_large_files)) {
4467 current_read_size = min_t(uint,
4468 current_read_size, CIFSMaxBufSize);
4470 if (open_file->invalidHandle) {
4471 rc = cifs_reopen_file(open_file, true);
4476 io_parms.tcon = tcon;
4477 io_parms.offset = *offset;
4478 io_parms.length = current_read_size;
4479 io_parms.server = server;
4480 rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4481 &bytes_read, &cur_offset,
4483 } while (rc == -EAGAIN);
4485 if (rc || (bytes_read == 0)) {
4493 cifs_stats_bytes_read(tcon, total_read);
4494 *offset += bytes_read;
4502 * If the page is mmap'ed into a process' page tables, then we need to make
4503 * sure that it doesn't change while being written back.
4505 static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4507 struct folio *folio = page_folio(vmf->page);
4509 /* Wait for the folio to be written to the cache before we allow it to
4510 * be modified. We then assume the entire folio will need writing back.
4512 #ifdef CONFIG_CIFS_FSCACHE
4513 if (folio_test_fscache(folio) &&
4514 folio_wait_fscache_killable(folio) < 0)
4515 return VM_FAULT_RETRY;
4518 folio_wait_writeback(folio);
4520 if (folio_lock_killable(folio) < 0)
4521 return VM_FAULT_RETRY;
4522 return VM_FAULT_LOCKED;
4525 static const struct vm_operations_struct cifs_file_vm_ops = {
4526 .fault = filemap_fault,
4527 .map_pages = filemap_map_pages,
4528 .page_mkwrite = cifs_page_mkwrite,
4531 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4534 struct inode *inode = file_inode(file);
4538 if (!CIFS_CACHE_READ(CIFS_I(inode)))
4539 rc = cifs_zap_mapping(inode);
4541 rc = generic_file_mmap(file, vma);
4543 vma->vm_ops = &cifs_file_vm_ops;
4549 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4555 rc = cifs_revalidate_file(file);
4557 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4560 rc = generic_file_mmap(file, vma);
4562 vma->vm_ops = &cifs_file_vm_ops;
4569 * Unlock a bunch of folios in the pagecache.
4571 static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4573 struct folio *folio;
4574 XA_STATE(xas, &mapping->i_pages, first);
4577 xas_for_each(&xas, folio, last) {
4578 folio_unlock(folio);
4583 static void cifs_readahead_complete(struct work_struct *work)
4585 struct cifs_readdata *rdata = container_of(work,
4586 struct cifs_readdata, work);
4587 struct folio *folio;
4589 bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4591 XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4594 cifs_readahead_to_fscache(rdata->mapping->host,
4595 rdata->offset, rdata->bytes);
4597 if (iov_iter_count(&rdata->iter) > 0)
4598 iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4600 last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4603 xas_for_each(&xas, folio, last) {
4605 flush_dcache_folio(folio);
4606 folio_mark_uptodate(folio);
4608 folio_unlock(folio);
4612 kref_put(&rdata->refcount, cifs_readdata_release);
4615 static void cifs_readahead(struct readahead_control *ractl)
4617 struct cifsFileInfo *open_file = ractl->file->private_data;
4618 struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4619 struct TCP_Server_Info *server;
4620 unsigned int xid, nr_pages, cache_nr_pages = 0;
4621 unsigned int ra_pages;
4622 pgoff_t next_cached = ULONG_MAX, ra_index;
4623 bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4624 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4625 bool check_cache = caching;
4629 /* Note that readahead_count() lags behind our dequeuing of pages from
4630 * the ractl, wo we have to keep track for ourselves.
4632 ra_pages = readahead_count(ractl);
4633 ra_index = readahead_index(ractl);
4637 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4638 pid = open_file->pid;
4640 pid = current->tgid;
4642 server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4644 cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4645 __func__, ractl->file, ractl->mapping, ra_pages);
4648 * Chop the readahead request up into rsize-sized read requests.
4650 while ((nr_pages = ra_pages)) {
4651 unsigned int i, rsize;
4652 struct cifs_readdata *rdata;
4653 struct cifs_credits credits_on_stack;
4654 struct cifs_credits *credits = &credits_on_stack;
4655 struct folio *folio;
4659 * Find out if we have anything cached in the range of
4660 * interest, and if so, where the next chunk of cached data is.
4664 rc = cifs_fscache_query_occupancy(
4665 ractl->mapping->host, ra_index, nr_pages,
4666 &next_cached, &cache_nr_pages);
4669 check_cache = false;
4672 if (ra_index == next_cached) {
4674 * TODO: Send a whole batch of pages to be read
4677 folio = readahead_folio(ractl);
4678 fsize = folio_nr_pages(folio);
4681 if (cifs_readpage_from_fscache(ractl->mapping->host,
4682 &folio->page) < 0) {
4684 * TODO: Deal with cache read failure
4685 * here, but for the moment, delegate
4690 folio_unlock(folio);
4691 next_cached += fsize;
4692 cache_nr_pages -= fsize;
4693 if (cache_nr_pages == 0)
4699 if (open_file->invalidHandle) {
4700 rc = cifs_reopen_file(open_file, true);
4708 if (cifs_sb->ctx->rsize == 0)
4709 cifs_sb->ctx->rsize =
4710 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4713 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4717 nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4718 if (next_cached != ULONG_MAX)
4719 nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4722 * Give up immediately if rsize is too small to read an entire
4723 * page. The VFS will fall back to readpage. We should never
4724 * reach this point however since we set ra_pages to 0 when the
4725 * rsize is smaller than a cache page.
4727 if (unlikely(!nr_pages)) {
4728 add_credits_and_wake_if(server, credits, 0);
4732 rdata = cifs_readdata_alloc(cifs_readahead_complete);
4734 /* best to give up if we're out of mem */
4735 add_credits_and_wake_if(server, credits, 0);
4739 rdata->offset = ra_index * PAGE_SIZE;
4740 rdata->bytes = nr_pages * PAGE_SIZE;
4741 rdata->cfile = cifsFileInfo_get(open_file);
4742 rdata->server = server;
4743 rdata->mapping = ractl->mapping;
4745 rdata->credits = credits_on_stack;
4747 for (i = 0; i < nr_pages; i++) {
4748 if (!readahead_folio(ractl))
4751 ra_pages -= nr_pages;
4752 ra_index += nr_pages;
4754 iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4755 rdata->offset, rdata->bytes);
4757 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4759 if (rdata->cfile->invalidHandle)
4762 rc = server->ops->async_readv(rdata);
4766 add_credits_and_wake_if(server, &rdata->credits, 0);
4767 cifs_unlock_folios(rdata->mapping,
4768 rdata->offset / PAGE_SIZE,
4769 (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4770 /* Fallback to the readpage in error/reconnect cases */
4771 kref_put(&rdata->refcount, cifs_readdata_release);
4775 kref_put(&rdata->refcount, cifs_readdata_release);
4782 * cifs_readpage_worker must be called with the page pinned
4784 static int cifs_readpage_worker(struct file *file, struct page *page,
4787 struct inode *inode = file_inode(file);
4788 struct timespec64 atime, mtime;
4792 /* Is the page cached? */
4793 rc = cifs_readpage_from_fscache(inode, page);
4797 read_data = kmap(page);
4798 /* for reads over a certain size could initiate async read ahead */
4800 rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4805 cifs_dbg(FYI, "Bytes read %d\n", rc);
4807 /* we do not want atime to be less than mtime, it broke some apps */
4808 atime = inode_set_atime_to_ts(inode, current_time(inode));
4809 mtime = inode_get_mtime(inode);
4810 if (timespec64_compare(&atime, &mtime) < 0)
4811 inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4814 memset(read_data + rc, 0, PAGE_SIZE - rc);
4816 flush_dcache_page(page);
4817 SetPageUptodate(page);
4828 static int cifs_read_folio(struct file *file, struct folio *folio)
4830 struct page *page = &folio->page;
4831 loff_t offset = page_file_offset(page);
4837 if (file->private_data == NULL) {
4843 cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4844 page, (int)offset, (int)offset);
4846 rc = cifs_readpage_worker(file, page, &offset);
4852 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4854 struct cifsFileInfo *open_file;
4856 spin_lock(&cifs_inode->open_file_lock);
4857 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4858 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4859 spin_unlock(&cifs_inode->open_file_lock);
4863 spin_unlock(&cifs_inode->open_file_lock);
4867 /* We do not want to update the file size from server for inodes
4868 open for write - to avoid races with writepage extending
4869 the file - in the future we could consider allowing
4870 refreshing the inode only on increases in the file size
4871 but this is tricky to do without racing with writebehind
4872 page caching in the current Linux kernel design */
4873 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4879 if (is_inode_writable(cifsInode) ||
4880 ((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4881 /* This inode is open for write at least once */
4882 struct cifs_sb_info *cifs_sb;
4884 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4885 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4886 /* since no page cache to corrupt on directio
4887 we can change size safely */
4891 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4899 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4900 loff_t pos, unsigned len,
4901 struct page **pagep, void **fsdata)
4904 pgoff_t index = pos >> PAGE_SHIFT;
4905 loff_t offset = pos & (PAGE_SIZE - 1);
4906 loff_t page_start = pos & PAGE_MASK;
4911 cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4914 page = grab_cache_page_write_begin(mapping, index);
4920 if (PageUptodate(page))
4924 * If we write a full page it will be up to date, no need to read from
4925 * the server. If the write is short, we'll end up doing a sync write
4928 if (len == PAGE_SIZE)
4932 * optimize away the read when we have an oplock, and we're not
4933 * expecting to use any of the data we'd be reading in. That
4934 * is, when the page lies beyond the EOF, or straddles the EOF
4935 * and the write will cover all of the existing data.
4937 if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4938 i_size = i_size_read(mapping->host);
4939 if (page_start >= i_size ||
4940 (offset == 0 && (pos + len) >= i_size)) {
4941 zero_user_segments(page, 0, offset,
4945 * PageChecked means that the parts of the page
4946 * to which we're not writing are considered up
4947 * to date. Once the data is copied to the
4948 * page, it can be set uptodate.
4950 SetPageChecked(page);
4955 if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4957 * might as well read a page, it is fast enough. If we get
4958 * an error, we don't need to return it. cifs_write_end will
4959 * do a sync write instead since PG_uptodate isn't set.
4961 cifs_readpage_worker(file, page, &page_start);
4966 /* we could try using another file handle if there is one -
4967 but how would we lock it to prevent close of that handle
4968 racing with this read? In any case
4969 this will be written out by write_end so is fine */
4976 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4978 if (folio_test_private(folio))
4980 if (folio_test_fscache(folio)) {
4981 if (current_is_kswapd() || !(gfp & __GFP_FS))
4983 folio_wait_fscache(folio);
4985 fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4989 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4992 folio_wait_fscache(folio);
4995 static int cifs_launder_folio(struct folio *folio)
4998 loff_t range_start = folio_pos(folio);
4999 loff_t range_end = range_start + folio_size(folio);
5000 struct writeback_control wbc = {
5001 .sync_mode = WB_SYNC_ALL,
5003 .range_start = range_start,
5004 .range_end = range_end,
5007 cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5009 if (folio_clear_dirty_for_io(folio))
5010 rc = cifs_writepage_locked(&folio->page, &wbc);
5012 folio_wait_fscache(folio);
5016 void cifs_oplock_break(struct work_struct *work)
5018 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5020 struct inode *inode = d_inode(cfile->dentry);
5021 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5022 struct cifsInodeInfo *cinode = CIFS_I(inode);
5023 struct cifs_tcon *tcon;
5024 struct TCP_Server_Info *server;
5025 struct tcon_link *tlink;
5027 bool purge_cache = false, oplock_break_cancelled;
5028 __u64 persistent_fid, volatile_fid;
5031 wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5032 TASK_UNINTERRUPTIBLE);
5034 tlink = cifs_sb_tlink(cifs_sb);
5037 tcon = tlink_tcon(tlink);
5038 server = tcon->ses->server;
5040 server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5041 cfile->oplock_epoch, &purge_cache);
5043 if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5044 cifs_has_mand_locks(cinode)) {
5045 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5050 if (inode && S_ISREG(inode->i_mode)) {
5051 if (CIFS_CACHE_READ(cinode))
5052 break_lease(inode, O_RDONLY);
5054 break_lease(inode, O_WRONLY);
5055 rc = filemap_fdatawrite(inode->i_mapping);
5056 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5057 rc = filemap_fdatawait(inode->i_mapping);
5058 mapping_set_error(inode->i_mapping, rc);
5059 cifs_zap_mapping(inode);
5061 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5062 if (CIFS_CACHE_WRITE(cinode))
5063 goto oplock_break_ack;
5066 rc = cifs_push_locks(cfile);
5068 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5072 * When oplock break is received and there are no active
5073 * file handles but cached, then schedule deferred close immediately.
5074 * So, new open will not use cached handle.
5077 if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5078 cifs_close_deferred_file(cinode);
5080 persistent_fid = cfile->fid.persistent_fid;
5081 volatile_fid = cfile->fid.volatile_fid;
5082 net_fid = cfile->fid.netfid;
5083 oplock_break_cancelled = cfile->oplock_break_cancelled;
5085 _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5087 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5088 * an acknowledgment to be sent when the file has already been closed.
5090 spin_lock(&cinode->open_file_lock);
5091 /* check list empty since can race with kill_sb calling tree disconnect */
5092 if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5093 spin_unlock(&cinode->open_file_lock);
5094 rc = server->ops->oplock_response(tcon, persistent_fid,
5095 volatile_fid, net_fid, cinode);
5096 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5098 spin_unlock(&cinode->open_file_lock);
5100 cifs_put_tlink(tlink);
5102 cifs_done_oplock_break(cinode);
5106 * The presence of cifs_direct_io() in the address space ops vector
5107 * allowes open() O_DIRECT flags which would have failed otherwise.
5109 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5110 * so this method should never be called.
5112 * Direct IO is not yet supported in the cached mode.
5115 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5119 * Eventually need to support direct IO for non forcedirectio mounts
5124 static int cifs_swap_activate(struct swap_info_struct *sis,
5125 struct file *swap_file, sector_t *span)
5127 struct cifsFileInfo *cfile = swap_file->private_data;
5128 struct inode *inode = swap_file->f_mapping->host;
5129 unsigned long blocks;
5132 cifs_dbg(FYI, "swap activate\n");
5134 if (!swap_file->f_mapping->a_ops->swap_rw)
5135 /* Cannot support swap */
5138 spin_lock(&inode->i_lock);
5139 blocks = inode->i_blocks;
5140 isize = inode->i_size;
5141 spin_unlock(&inode->i_lock);
5142 if (blocks*512 < isize) {
5143 pr_warn("swap activate: swapfile has holes\n");
5148 pr_warn_once("Swap support over SMB3 is experimental\n");
5151 * TODO: consider adding ACL (or documenting how) to prevent other
5152 * users (on this or other systems) from reading it
5156 /* TODO: add sk_set_memalloc(inet) or similar */
5159 cfile->swapfile = true;
5161 * TODO: Since file already open, we can't open with DENY_ALL here
5162 * but we could add call to grab a byte range lock to prevent others
5163 * from reading or writing the file
5166 sis->flags |= SWP_FS_OPS;
5167 return add_swap_extent(sis, 0, sis->max, 0);
5170 static void cifs_swap_deactivate(struct file *file)
5172 struct cifsFileInfo *cfile = file->private_data;
5174 cifs_dbg(FYI, "swap deactivate\n");
5176 /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5179 cfile->swapfile = false;
5181 /* do we need to unpin (or unlock) the file */
5184 const struct address_space_operations cifs_addr_ops = {
5185 .read_folio = cifs_read_folio,
5186 .readahead = cifs_readahead,
5187 .writepages = cifs_writepages,
5188 .write_begin = cifs_write_begin,
5189 .write_end = cifs_write_end,
5190 .dirty_folio = netfs_dirty_folio,
5191 .release_folio = cifs_release_folio,
5192 .direct_IO = cifs_direct_io,
5193 .invalidate_folio = cifs_invalidate_folio,
5194 .launder_folio = cifs_launder_folio,
5195 .migrate_folio = filemap_migrate_folio,
5197 * TODO: investigate and if useful we could add an is_dirty_writeback
5200 .swap_activate = cifs_swap_activate,
5201 .swap_deactivate = cifs_swap_deactivate,
5205 * cifs_readahead requires the server to support a buffer large enough to
5206 * contain the header plus one complete page of data. Otherwise, we need
5207 * to leave cifs_readahead out of the address space operations.
5209 const struct address_space_operations cifs_addr_ops_smallbuf = {
5210 .read_folio = cifs_read_folio,
5211 .writepages = cifs_writepages,
5212 .write_begin = cifs_write_begin,
5213 .write_end = cifs_write_end,
5214 .dirty_folio = netfs_dirty_folio,
5215 .release_folio = cifs_release_folio,
5216 .invalidate_folio = cifs_invalidate_folio,
5217 .launder_folio = cifs_launder_folio,
5218 .migrate_folio = filemap_migrate_folio,