Merge tag 'virtio-fs-5.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi...
[linux-2.6-block.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256 out:
257         kfree(buf);
258         return rc;
259 }
260
261 static bool
262 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
263 {
264         struct cifs_fid_locks *cur;
265         bool has_locks = false;
266
267         down_read(&cinode->lock_sem);
268         list_for_each_entry(cur, &cinode->llist, llist) {
269                 if (!list_empty(&cur->locks)) {
270                         has_locks = true;
271                         break;
272                 }
273         }
274         up_read(&cinode->lock_sem);
275         return has_locks;
276 }
277
278 struct cifsFileInfo *
279 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
280                   struct tcon_link *tlink, __u32 oplock)
281 {
282         struct dentry *dentry = file_dentry(file);
283         struct inode *inode = d_inode(dentry);
284         struct cifsInodeInfo *cinode = CIFS_I(inode);
285         struct cifsFileInfo *cfile;
286         struct cifs_fid_locks *fdlocks;
287         struct cifs_tcon *tcon = tlink_tcon(tlink);
288         struct TCP_Server_Info *server = tcon->ses->server;
289
290         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
291         if (cfile == NULL)
292                 return cfile;
293
294         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
295         if (!fdlocks) {
296                 kfree(cfile);
297                 return NULL;
298         }
299
300         INIT_LIST_HEAD(&fdlocks->locks);
301         fdlocks->cfile = cfile;
302         cfile->llist = fdlocks;
303         down_write(&cinode->lock_sem);
304         list_add(&fdlocks->llist, &cinode->llist);
305         up_write(&cinode->lock_sem);
306
307         cfile->count = 1;
308         cfile->pid = current->tgid;
309         cfile->uid = current_fsuid();
310         cfile->dentry = dget(dentry);
311         cfile->f_flags = file->f_flags;
312         cfile->invalidHandle = false;
313         cfile->tlink = cifs_get_tlink(tlink);
314         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
315         mutex_init(&cfile->fh_mutex);
316         spin_lock_init(&cfile->file_info_lock);
317
318         cifs_sb_active(inode->i_sb);
319
320         /*
321          * If the server returned a read oplock and we have mandatory brlocks,
322          * set oplock level to None.
323          */
324         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
325                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
326                 oplock = 0;
327         }
328
329         spin_lock(&tcon->open_file_lock);
330         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
331                 oplock = fid->pending_open->oplock;
332         list_del(&fid->pending_open->olist);
333
334         fid->purge_cache = false;
335         server->ops->set_fid(cfile, fid, oplock);
336
337         list_add(&cfile->tlist, &tcon->openFileList);
338         atomic_inc(&tcon->num_local_opens);
339
340         /* if readable file instance put first in list*/
341         spin_lock(&cinode->open_file_lock);
342         if (file->f_mode & FMODE_READ)
343                 list_add(&cfile->flist, &cinode->openFileList);
344         else
345                 list_add_tail(&cfile->flist, &cinode->openFileList);
346         spin_unlock(&cinode->open_file_lock);
347         spin_unlock(&tcon->open_file_lock);
348
349         if (fid->purge_cache)
350                 cifs_zap_mapping(inode);
351
352         file->private_data = cfile;
353         return cfile;
354 }
355
356 struct cifsFileInfo *
357 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
358 {
359         spin_lock(&cifs_file->file_info_lock);
360         cifsFileInfo_get_locked(cifs_file);
361         spin_unlock(&cifs_file->file_info_lock);
362         return cifs_file;
363 }
364
365 /**
366  * cifsFileInfo_put - release a reference of file priv data
367  *
368  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
369  */
370 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
371 {
372         _cifsFileInfo_put(cifs_file, true);
373 }
374
375 /**
376  * _cifsFileInfo_put - release a reference of file priv data
377  *
378  * This may involve closing the filehandle @cifs_file out on the
379  * server. Must be called without holding tcon->open_file_lock and
380  * cifs_file->file_info_lock.
381  *
382  * If @wait_for_oplock_handler is true and we are releasing the last
383  * reference, wait for any running oplock break handler of the file
384  * and cancel any pending one. If calling this function from the
385  * oplock break handler, you need to pass false.
386  *
387  */
388 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
389 {
390         struct inode *inode = d_inode(cifs_file->dentry);
391         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
392         struct TCP_Server_Info *server = tcon->ses->server;
393         struct cifsInodeInfo *cifsi = CIFS_I(inode);
394         struct super_block *sb = inode->i_sb;
395         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
396         struct cifsLockInfo *li, *tmp;
397         struct cifs_fid fid;
398         struct cifs_pending_open open;
399         bool oplock_break_cancelled;
400
401         spin_lock(&tcon->open_file_lock);
402
403         spin_lock(&cifs_file->file_info_lock);
404         if (--cifs_file->count > 0) {
405                 spin_unlock(&cifs_file->file_info_lock);
406                 spin_unlock(&tcon->open_file_lock);
407                 return;
408         }
409         spin_unlock(&cifs_file->file_info_lock);
410
411         if (server->ops->get_lease_key)
412                 server->ops->get_lease_key(inode, &fid);
413
414         /* store open in pending opens to make sure we don't miss lease break */
415         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
416
417         /* remove it from the lists */
418         spin_lock(&cifsi->open_file_lock);
419         list_del(&cifs_file->flist);
420         spin_unlock(&cifsi->open_file_lock);
421         list_del(&cifs_file->tlist);
422         atomic_dec(&tcon->num_local_opens);
423
424         if (list_empty(&cifsi->openFileList)) {
425                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
426                          d_inode(cifs_file->dentry));
427                 /*
428                  * In strict cache mode we need invalidate mapping on the last
429                  * close  because it may cause a error when we open this file
430                  * again and get at least level II oplock.
431                  */
432                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
433                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
434                 cifs_set_oplock_level(cifsi, 0);
435         }
436
437         spin_unlock(&tcon->open_file_lock);
438
439         oplock_break_cancelled = wait_oplock_handler ?
440                 cancel_work_sync(&cifs_file->oplock_break) : false;
441
442         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
443                 struct TCP_Server_Info *server = tcon->ses->server;
444                 unsigned int xid;
445
446                 xid = get_xid();
447                 if (server->ops->close)
448                         server->ops->close(xid, tcon, &cifs_file->fid);
449                 _free_xid(xid);
450         }
451
452         if (oplock_break_cancelled)
453                 cifs_done_oplock_break(cifsi);
454
455         cifs_del_pending_open(&open);
456
457         /*
458          * Delete any outstanding lock records. We'll lose them when the file
459          * is closed anyway.
460          */
461         down_write(&cifsi->lock_sem);
462         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
463                 list_del(&li->llist);
464                 cifs_del_lock_waiters(li);
465                 kfree(li);
466         }
467         list_del(&cifs_file->llist->llist);
468         kfree(cifs_file->llist);
469         up_write(&cifsi->lock_sem);
470
471         cifs_put_tlink(cifs_file->tlink);
472         dput(cifs_file->dentry);
473         cifs_sb_deactive(sb);
474         kfree(cifs_file);
475 }
476
477 int cifs_open(struct inode *inode, struct file *file)
478
479 {
480         int rc = -EACCES;
481         unsigned int xid;
482         __u32 oplock;
483         struct cifs_sb_info *cifs_sb;
484         struct TCP_Server_Info *server;
485         struct cifs_tcon *tcon;
486         struct tcon_link *tlink;
487         struct cifsFileInfo *cfile = NULL;
488         char *full_path = NULL;
489         bool posix_open_ok = false;
490         struct cifs_fid fid;
491         struct cifs_pending_open open;
492
493         xid = get_xid();
494
495         cifs_sb = CIFS_SB(inode->i_sb);
496         tlink = cifs_sb_tlink(cifs_sb);
497         if (IS_ERR(tlink)) {
498                 free_xid(xid);
499                 return PTR_ERR(tlink);
500         }
501         tcon = tlink_tcon(tlink);
502         server = tcon->ses->server;
503
504         full_path = build_path_from_dentry(file_dentry(file));
505         if (full_path == NULL) {
506                 rc = -ENOMEM;
507                 goto out;
508         }
509
510         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
511                  inode, file->f_flags, full_path);
512
513         if (file->f_flags & O_DIRECT &&
514             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
515                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
516                         file->f_op = &cifs_file_direct_nobrl_ops;
517                 else
518                         file->f_op = &cifs_file_direct_ops;
519         }
520
521         if (server->oplocks)
522                 oplock = REQ_OPLOCK;
523         else
524                 oplock = 0;
525
526         if (!tcon->broken_posix_open && tcon->unix_ext &&
527             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
528                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
529                 /* can not refresh inode info since size could be stale */
530                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
531                                 cifs_sb->mnt_file_mode /* ignored */,
532                                 file->f_flags, &oplock, &fid.netfid, xid);
533                 if (rc == 0) {
534                         cifs_dbg(FYI, "posix open succeeded\n");
535                         posix_open_ok = true;
536                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
537                         if (tcon->ses->serverNOS)
538                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
539                                          tcon->ses->serverName,
540                                          tcon->ses->serverNOS);
541                         tcon->broken_posix_open = true;
542                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
543                          (rc != -EOPNOTSUPP)) /* path not found or net err */
544                         goto out;
545                 /*
546                  * Else fallthrough to retry open the old way on network i/o
547                  * or DFS errors.
548                  */
549         }
550
551         if (server->ops->get_lease_key)
552                 server->ops->get_lease_key(inode, &fid);
553
554         cifs_add_pending_open(&fid, tlink, &open);
555
556         if (!posix_open_ok) {
557                 if (server->ops->get_lease_key)
558                         server->ops->get_lease_key(inode, &fid);
559
560                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
561                                   file->f_flags, &oplock, &fid, xid);
562                 if (rc) {
563                         cifs_del_pending_open(&open);
564                         goto out;
565                 }
566         }
567
568         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
569         if (cfile == NULL) {
570                 if (server->ops->close)
571                         server->ops->close(xid, tcon, &fid);
572                 cifs_del_pending_open(&open);
573                 rc = -ENOMEM;
574                 goto out;
575         }
576
577         cifs_fscache_set_inode_cookie(inode, file);
578
579         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
580                 /*
581                  * Time to set mode which we can not set earlier due to
582                  * problems creating new read-only files.
583                  */
584                 struct cifs_unix_set_info_args args = {
585                         .mode   = inode->i_mode,
586                         .uid    = INVALID_UID, /* no change */
587                         .gid    = INVALID_GID, /* no change */
588                         .ctime  = NO_CHANGE_64,
589                         .atime  = NO_CHANGE_64,
590                         .mtime  = NO_CHANGE_64,
591                         .device = 0,
592                 };
593                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
594                                        cfile->pid);
595         }
596
597 out:
598         kfree(full_path);
599         free_xid(xid);
600         cifs_put_tlink(tlink);
601         return rc;
602 }
603
604 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
605
606 /*
607  * Try to reacquire byte range locks that were released when session
608  * to server was lost.
609  */
610 static int
611 cifs_relock_file(struct cifsFileInfo *cfile)
612 {
613         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
614         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
615         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
616         int rc = 0;
617
618         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
619         if (cinode->can_cache_brlcks) {
620                 /* can cache locks - no need to relock */
621                 up_read(&cinode->lock_sem);
622                 return rc;
623         }
624
625         if (cap_unix(tcon->ses) &&
626             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
627             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
628                 rc = cifs_push_posix_locks(cfile);
629         else
630                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
631
632         up_read(&cinode->lock_sem);
633         return rc;
634 }
635
636 static int
637 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
638 {
639         int rc = -EACCES;
640         unsigned int xid;
641         __u32 oplock;
642         struct cifs_sb_info *cifs_sb;
643         struct cifs_tcon *tcon;
644         struct TCP_Server_Info *server;
645         struct cifsInodeInfo *cinode;
646         struct inode *inode;
647         char *full_path = NULL;
648         int desired_access;
649         int disposition = FILE_OPEN;
650         int create_options = CREATE_NOT_DIR;
651         struct cifs_open_parms oparms;
652
653         xid = get_xid();
654         mutex_lock(&cfile->fh_mutex);
655         if (!cfile->invalidHandle) {
656                 mutex_unlock(&cfile->fh_mutex);
657                 rc = 0;
658                 free_xid(xid);
659                 return rc;
660         }
661
662         inode = d_inode(cfile->dentry);
663         cifs_sb = CIFS_SB(inode->i_sb);
664         tcon = tlink_tcon(cfile->tlink);
665         server = tcon->ses->server;
666
667         /*
668          * Can not grab rename sem here because various ops, including those
669          * that already have the rename sem can end up causing writepage to get
670          * called and if the server was down that means we end up here, and we
671          * can never tell if the caller already has the rename_sem.
672          */
673         full_path = build_path_from_dentry(cfile->dentry);
674         if (full_path == NULL) {
675                 rc = -ENOMEM;
676                 mutex_unlock(&cfile->fh_mutex);
677                 free_xid(xid);
678                 return rc;
679         }
680
681         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
682                  inode, cfile->f_flags, full_path);
683
684         if (tcon->ses->server->oplocks)
685                 oplock = REQ_OPLOCK;
686         else
687                 oplock = 0;
688
689         if (tcon->unix_ext && cap_unix(tcon->ses) &&
690             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
691                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
692                 /*
693                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
694                  * original open. Must mask them off for a reopen.
695                  */
696                 unsigned int oflags = cfile->f_flags &
697                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
698
699                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
700                                      cifs_sb->mnt_file_mode /* ignored */,
701                                      oflags, &oplock, &cfile->fid.netfid, xid);
702                 if (rc == 0) {
703                         cifs_dbg(FYI, "posix reopen succeeded\n");
704                         oparms.reconnect = true;
705                         goto reopen_success;
706                 }
707                 /*
708                  * fallthrough to retry open the old way on errors, especially
709                  * in the reconnect path it is important to retry hard
710                  */
711         }
712
713         desired_access = cifs_convert_flags(cfile->f_flags);
714
715         if (backup_cred(cifs_sb))
716                 create_options |= CREATE_OPEN_BACKUP_INTENT;
717
718         if (server->ops->get_lease_key)
719                 server->ops->get_lease_key(inode, &cfile->fid);
720
721         oparms.tcon = tcon;
722         oparms.cifs_sb = cifs_sb;
723         oparms.desired_access = desired_access;
724         oparms.create_options = create_options;
725         oparms.disposition = disposition;
726         oparms.path = full_path;
727         oparms.fid = &cfile->fid;
728         oparms.reconnect = true;
729
730         /*
731          * Can not refresh inode by passing in file_info buf to be returned by
732          * ops->open and then calling get_inode_info with returned buf since
733          * file might have write behind data that needs to be flushed and server
734          * version of file size can be stale. If we knew for sure that inode was
735          * not dirty locally we could do this.
736          */
737         rc = server->ops->open(xid, &oparms, &oplock, NULL);
738         if (rc == -ENOENT && oparms.reconnect == false) {
739                 /* durable handle timeout is expired - open the file again */
740                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
741                 /* indicate that we need to relock the file */
742                 oparms.reconnect = true;
743         }
744
745         if (rc) {
746                 mutex_unlock(&cfile->fh_mutex);
747                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
748                 cifs_dbg(FYI, "oplock: %d\n", oplock);
749                 goto reopen_error_exit;
750         }
751
752 reopen_success:
753         cfile->invalidHandle = false;
754         mutex_unlock(&cfile->fh_mutex);
755         cinode = CIFS_I(inode);
756
757         if (can_flush) {
758                 rc = filemap_write_and_wait(inode->i_mapping);
759                 if (!is_interrupt_error(rc))
760                         mapping_set_error(inode->i_mapping, rc);
761
762                 if (tcon->unix_ext)
763                         rc = cifs_get_inode_info_unix(&inode, full_path,
764                                                       inode->i_sb, xid);
765                 else
766                         rc = cifs_get_inode_info(&inode, full_path, NULL,
767                                                  inode->i_sb, xid, NULL);
768         }
769         /*
770          * Else we are writing out data to server already and could deadlock if
771          * we tried to flush data, and since we do not know if we have data that
772          * would invalidate the current end of file on the server we can not go
773          * to the server to get the new inode info.
774          */
775
776         /*
777          * If the server returned a read oplock and we have mandatory brlocks,
778          * set oplock level to None.
779          */
780         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
781                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
782                 oplock = 0;
783         }
784
785         server->ops->set_fid(cfile, &cfile->fid, oplock);
786         if (oparms.reconnect)
787                 cifs_relock_file(cfile);
788
789 reopen_error_exit:
790         kfree(full_path);
791         free_xid(xid);
792         return rc;
793 }
794
795 int cifs_close(struct inode *inode, struct file *file)
796 {
797         if (file->private_data != NULL) {
798                 cifsFileInfo_put(file->private_data);
799                 file->private_data = NULL;
800         }
801
802         /* return code from the ->release op is always ignored */
803         return 0;
804 }
805
806 void
807 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
808 {
809         struct cifsFileInfo *open_file;
810         struct list_head *tmp;
811         struct list_head *tmp1;
812         struct list_head tmp_list;
813
814         if (!tcon->use_persistent || !tcon->need_reopen_files)
815                 return;
816
817         tcon->need_reopen_files = false;
818
819         cifs_dbg(FYI, "Reopen persistent handles");
820         INIT_LIST_HEAD(&tmp_list);
821
822         /* list all files open on tree connection, reopen resilient handles  */
823         spin_lock(&tcon->open_file_lock);
824         list_for_each(tmp, &tcon->openFileList) {
825                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
826                 if (!open_file->invalidHandle)
827                         continue;
828                 cifsFileInfo_get(open_file);
829                 list_add_tail(&open_file->rlist, &tmp_list);
830         }
831         spin_unlock(&tcon->open_file_lock);
832
833         list_for_each_safe(tmp, tmp1, &tmp_list) {
834                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
835                 if (cifs_reopen_file(open_file, false /* do not flush */))
836                         tcon->need_reopen_files = true;
837                 list_del_init(&open_file->rlist);
838                 cifsFileInfo_put(open_file);
839         }
840 }
841
842 int cifs_closedir(struct inode *inode, struct file *file)
843 {
844         int rc = 0;
845         unsigned int xid;
846         struct cifsFileInfo *cfile = file->private_data;
847         struct cifs_tcon *tcon;
848         struct TCP_Server_Info *server;
849         char *buf;
850
851         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
852
853         if (cfile == NULL)
854                 return rc;
855
856         xid = get_xid();
857         tcon = tlink_tcon(cfile->tlink);
858         server = tcon->ses->server;
859
860         cifs_dbg(FYI, "Freeing private data in close dir\n");
861         spin_lock(&cfile->file_info_lock);
862         if (server->ops->dir_needs_close(cfile)) {
863                 cfile->invalidHandle = true;
864                 spin_unlock(&cfile->file_info_lock);
865                 if (server->ops->close_dir)
866                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
867                 else
868                         rc = -ENOSYS;
869                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
870                 /* not much we can do if it fails anyway, ignore rc */
871                 rc = 0;
872         } else
873                 spin_unlock(&cfile->file_info_lock);
874
875         buf = cfile->srch_inf.ntwrk_buf_start;
876         if (buf) {
877                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
878                 cfile->srch_inf.ntwrk_buf_start = NULL;
879                 if (cfile->srch_inf.smallBuf)
880                         cifs_small_buf_release(buf);
881                 else
882                         cifs_buf_release(buf);
883         }
884
885         cifs_put_tlink(cfile->tlink);
886         kfree(file->private_data);
887         file->private_data = NULL;
888         /* BB can we lock the filestruct while this is going on? */
889         free_xid(xid);
890         return rc;
891 }
892
893 static struct cifsLockInfo *
894 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
895 {
896         struct cifsLockInfo *lock =
897                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
898         if (!lock)
899                 return lock;
900         lock->offset = offset;
901         lock->length = length;
902         lock->type = type;
903         lock->pid = current->tgid;
904         lock->flags = flags;
905         INIT_LIST_HEAD(&lock->blist);
906         init_waitqueue_head(&lock->block_q);
907         return lock;
908 }
909
910 void
911 cifs_del_lock_waiters(struct cifsLockInfo *lock)
912 {
913         struct cifsLockInfo *li, *tmp;
914         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
915                 list_del_init(&li->blist);
916                 wake_up(&li->block_q);
917         }
918 }
919
920 #define CIFS_LOCK_OP    0
921 #define CIFS_READ_OP    1
922 #define CIFS_WRITE_OP   2
923
924 /* @rw_check : 0 - no op, 1 - read, 2 - write */
925 static bool
926 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
927                             __u64 length, __u8 type, __u16 flags,
928                             struct cifsFileInfo *cfile,
929                             struct cifsLockInfo **conf_lock, int rw_check)
930 {
931         struct cifsLockInfo *li;
932         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
933         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
934
935         list_for_each_entry(li, &fdlocks->locks, llist) {
936                 if (offset + length <= li->offset ||
937                     offset >= li->offset + li->length)
938                         continue;
939                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
940                     server->ops->compare_fids(cfile, cur_cfile)) {
941                         /* shared lock prevents write op through the same fid */
942                         if (!(li->type & server->vals->shared_lock_type) ||
943                             rw_check != CIFS_WRITE_OP)
944                                 continue;
945                 }
946                 if ((type & server->vals->shared_lock_type) &&
947                     ((server->ops->compare_fids(cfile, cur_cfile) &&
948                      current->tgid == li->pid) || type == li->type))
949                         continue;
950                 if (rw_check == CIFS_LOCK_OP &&
951                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
952                     server->ops->compare_fids(cfile, cur_cfile))
953                         continue;
954                 if (conf_lock)
955                         *conf_lock = li;
956                 return true;
957         }
958         return false;
959 }
960
961 bool
962 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
963                         __u8 type, __u16 flags,
964                         struct cifsLockInfo **conf_lock, int rw_check)
965 {
966         bool rc = false;
967         struct cifs_fid_locks *cur;
968         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
969
970         list_for_each_entry(cur, &cinode->llist, llist) {
971                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
972                                                  flags, cfile, conf_lock,
973                                                  rw_check);
974                 if (rc)
975                         break;
976         }
977
978         return rc;
979 }
980
981 /*
982  * Check if there is another lock that prevents us to set the lock (mandatory
983  * style). If such a lock exists, update the flock structure with its
984  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
985  * or leave it the same if we can't. Returns 0 if we don't need to request to
986  * the server or 1 otherwise.
987  */
988 static int
989 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
990                __u8 type, struct file_lock *flock)
991 {
992         int rc = 0;
993         struct cifsLockInfo *conf_lock;
994         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
995         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
996         bool exist;
997
998         down_read(&cinode->lock_sem);
999
1000         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1001                                         flock->fl_flags, &conf_lock,
1002                                         CIFS_LOCK_OP);
1003         if (exist) {
1004                 flock->fl_start = conf_lock->offset;
1005                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1006                 flock->fl_pid = conf_lock->pid;
1007                 if (conf_lock->type & server->vals->shared_lock_type)
1008                         flock->fl_type = F_RDLCK;
1009                 else
1010                         flock->fl_type = F_WRLCK;
1011         } else if (!cinode->can_cache_brlcks)
1012                 rc = 1;
1013         else
1014                 flock->fl_type = F_UNLCK;
1015
1016         up_read(&cinode->lock_sem);
1017         return rc;
1018 }
1019
1020 static void
1021 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1022 {
1023         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1024         down_write(&cinode->lock_sem);
1025         list_add_tail(&lock->llist, &cfile->llist->locks);
1026         up_write(&cinode->lock_sem);
1027 }
1028
1029 /*
1030  * Set the byte-range lock (mandatory style). Returns:
1031  * 1) 0, if we set the lock and don't need to request to the server;
1032  * 2) 1, if no locks prevent us but we need to request to the server;
1033  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1034  */
1035 static int
1036 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1037                  bool wait)
1038 {
1039         struct cifsLockInfo *conf_lock;
1040         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1041         bool exist;
1042         int rc = 0;
1043
1044 try_again:
1045         exist = false;
1046         down_write(&cinode->lock_sem);
1047
1048         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1049                                         lock->type, lock->flags, &conf_lock,
1050                                         CIFS_LOCK_OP);
1051         if (!exist && cinode->can_cache_brlcks) {
1052                 list_add_tail(&lock->llist, &cfile->llist->locks);
1053                 up_write(&cinode->lock_sem);
1054                 return rc;
1055         }
1056
1057         if (!exist)
1058                 rc = 1;
1059         else if (!wait)
1060                 rc = -EACCES;
1061         else {
1062                 list_add_tail(&lock->blist, &conf_lock->blist);
1063                 up_write(&cinode->lock_sem);
1064                 rc = wait_event_interruptible(lock->block_q,
1065                                         (lock->blist.prev == &lock->blist) &&
1066                                         (lock->blist.next == &lock->blist));
1067                 if (!rc)
1068                         goto try_again;
1069                 down_write(&cinode->lock_sem);
1070                 list_del_init(&lock->blist);
1071         }
1072
1073         up_write(&cinode->lock_sem);
1074         return rc;
1075 }
1076
1077 /*
1078  * Check if there is another lock that prevents us to set the lock (posix
1079  * style). If such a lock exists, update the flock structure with its
1080  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1081  * or leave it the same if we can't. Returns 0 if we don't need to request to
1082  * the server or 1 otherwise.
1083  */
1084 static int
1085 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1086 {
1087         int rc = 0;
1088         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1089         unsigned char saved_type = flock->fl_type;
1090
1091         if ((flock->fl_flags & FL_POSIX) == 0)
1092                 return 1;
1093
1094         down_read(&cinode->lock_sem);
1095         posix_test_lock(file, flock);
1096
1097         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1098                 flock->fl_type = saved_type;
1099                 rc = 1;
1100         }
1101
1102         up_read(&cinode->lock_sem);
1103         return rc;
1104 }
1105
1106 /*
1107  * Set the byte-range lock (posix style). Returns:
1108  * 1) 0, if we set the lock and don't need to request to the server;
1109  * 2) 1, if we need to request to the server;
1110  * 3) <0, if the error occurs while setting the lock.
1111  */
1112 static int
1113 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1114 {
1115         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1116         int rc = 1;
1117
1118         if ((flock->fl_flags & FL_POSIX) == 0)
1119                 return rc;
1120
1121 try_again:
1122         down_write(&cinode->lock_sem);
1123         if (!cinode->can_cache_brlcks) {
1124                 up_write(&cinode->lock_sem);
1125                 return rc;
1126         }
1127
1128         rc = posix_lock_file(file, flock, NULL);
1129         up_write(&cinode->lock_sem);
1130         if (rc == FILE_LOCK_DEFERRED) {
1131                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1132                 if (!rc)
1133                         goto try_again;
1134                 locks_delete_block(flock);
1135         }
1136         return rc;
1137 }
1138
1139 int
1140 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1141 {
1142         unsigned int xid;
1143         int rc = 0, stored_rc;
1144         struct cifsLockInfo *li, *tmp;
1145         struct cifs_tcon *tcon;
1146         unsigned int num, max_num, max_buf;
1147         LOCKING_ANDX_RANGE *buf, *cur;
1148         static const int types[] = {
1149                 LOCKING_ANDX_LARGE_FILES,
1150                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1151         };
1152         int i;
1153
1154         xid = get_xid();
1155         tcon = tlink_tcon(cfile->tlink);
1156
1157         /*
1158          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1159          * and check it before using.
1160          */
1161         max_buf = tcon->ses->server->maxBuf;
1162         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1163                 free_xid(xid);
1164                 return -EINVAL;
1165         }
1166
1167         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1168                      PAGE_SIZE);
1169         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1170                         PAGE_SIZE);
1171         max_num = (max_buf - sizeof(struct smb_hdr)) /
1172                                                 sizeof(LOCKING_ANDX_RANGE);
1173         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1174         if (!buf) {
1175                 free_xid(xid);
1176                 return -ENOMEM;
1177         }
1178
1179         for (i = 0; i < 2; i++) {
1180                 cur = buf;
1181                 num = 0;
1182                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1183                         if (li->type != types[i])
1184                                 continue;
1185                         cur->Pid = cpu_to_le16(li->pid);
1186                         cur->LengthLow = cpu_to_le32((u32)li->length);
1187                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1188                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1189                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1190                         if (++num == max_num) {
1191                                 stored_rc = cifs_lockv(xid, tcon,
1192                                                        cfile->fid.netfid,
1193                                                        (__u8)li->type, 0, num,
1194                                                        buf);
1195                                 if (stored_rc)
1196                                         rc = stored_rc;
1197                                 cur = buf;
1198                                 num = 0;
1199                         } else
1200                                 cur++;
1201                 }
1202
1203                 if (num) {
1204                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1205                                                (__u8)types[i], 0, num, buf);
1206                         if (stored_rc)
1207                                 rc = stored_rc;
1208                 }
1209         }
1210
1211         kfree(buf);
1212         free_xid(xid);
1213         return rc;
1214 }
1215
1216 static __u32
1217 hash_lockowner(fl_owner_t owner)
1218 {
1219         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1220 }
1221
1222 struct lock_to_push {
1223         struct list_head llist;
1224         __u64 offset;
1225         __u64 length;
1226         __u32 pid;
1227         __u16 netfid;
1228         __u8 type;
1229 };
1230
1231 static int
1232 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1233 {
1234         struct inode *inode = d_inode(cfile->dentry);
1235         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1236         struct file_lock *flock;
1237         struct file_lock_context *flctx = inode->i_flctx;
1238         unsigned int count = 0, i;
1239         int rc = 0, xid, type;
1240         struct list_head locks_to_send, *el;
1241         struct lock_to_push *lck, *tmp;
1242         __u64 length;
1243
1244         xid = get_xid();
1245
1246         if (!flctx)
1247                 goto out;
1248
1249         spin_lock(&flctx->flc_lock);
1250         list_for_each(el, &flctx->flc_posix) {
1251                 count++;
1252         }
1253         spin_unlock(&flctx->flc_lock);
1254
1255         INIT_LIST_HEAD(&locks_to_send);
1256
1257         /*
1258          * Allocating count locks is enough because no FL_POSIX locks can be
1259          * added to the list while we are holding cinode->lock_sem that
1260          * protects locking operations of this inode.
1261          */
1262         for (i = 0; i < count; i++) {
1263                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1264                 if (!lck) {
1265                         rc = -ENOMEM;
1266                         goto err_out;
1267                 }
1268                 list_add_tail(&lck->llist, &locks_to_send);
1269         }
1270
1271         el = locks_to_send.next;
1272         spin_lock(&flctx->flc_lock);
1273         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1274                 if (el == &locks_to_send) {
1275                         /*
1276                          * The list ended. We don't have enough allocated
1277                          * structures - something is really wrong.
1278                          */
1279                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1280                         break;
1281                 }
1282                 length = 1 + flock->fl_end - flock->fl_start;
1283                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1284                         type = CIFS_RDLCK;
1285                 else
1286                         type = CIFS_WRLCK;
1287                 lck = list_entry(el, struct lock_to_push, llist);
1288                 lck->pid = hash_lockowner(flock->fl_owner);
1289                 lck->netfid = cfile->fid.netfid;
1290                 lck->length = length;
1291                 lck->type = type;
1292                 lck->offset = flock->fl_start;
1293         }
1294         spin_unlock(&flctx->flc_lock);
1295
1296         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1297                 int stored_rc;
1298
1299                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1300                                              lck->offset, lck->length, NULL,
1301                                              lck->type, 0);
1302                 if (stored_rc)
1303                         rc = stored_rc;
1304                 list_del(&lck->llist);
1305                 kfree(lck);
1306         }
1307
1308 out:
1309         free_xid(xid);
1310         return rc;
1311 err_out:
1312         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1313                 list_del(&lck->llist);
1314                 kfree(lck);
1315         }
1316         goto out;
1317 }
1318
1319 static int
1320 cifs_push_locks(struct cifsFileInfo *cfile)
1321 {
1322         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1323         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1324         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1325         int rc = 0;
1326
1327         /* we are going to update can_cache_brlcks here - need a write access */
1328         down_write(&cinode->lock_sem);
1329         if (!cinode->can_cache_brlcks) {
1330                 up_write(&cinode->lock_sem);
1331                 return rc;
1332         }
1333
1334         if (cap_unix(tcon->ses) &&
1335             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1336             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1337                 rc = cifs_push_posix_locks(cfile);
1338         else
1339                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1340
1341         cinode->can_cache_brlcks = false;
1342         up_write(&cinode->lock_sem);
1343         return rc;
1344 }
1345
1346 static void
1347 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1348                 bool *wait_flag, struct TCP_Server_Info *server)
1349 {
1350         if (flock->fl_flags & FL_POSIX)
1351                 cifs_dbg(FYI, "Posix\n");
1352         if (flock->fl_flags & FL_FLOCK)
1353                 cifs_dbg(FYI, "Flock\n");
1354         if (flock->fl_flags & FL_SLEEP) {
1355                 cifs_dbg(FYI, "Blocking lock\n");
1356                 *wait_flag = true;
1357         }
1358         if (flock->fl_flags & FL_ACCESS)
1359                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1360         if (flock->fl_flags & FL_LEASE)
1361                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1362         if (flock->fl_flags &
1363             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1364                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1365                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1366
1367         *type = server->vals->large_lock_type;
1368         if (flock->fl_type == F_WRLCK) {
1369                 cifs_dbg(FYI, "F_WRLCK\n");
1370                 *type |= server->vals->exclusive_lock_type;
1371                 *lock = 1;
1372         } else if (flock->fl_type == F_UNLCK) {
1373                 cifs_dbg(FYI, "F_UNLCK\n");
1374                 *type |= server->vals->unlock_lock_type;
1375                 *unlock = 1;
1376                 /* Check if unlock includes more than one lock range */
1377         } else if (flock->fl_type == F_RDLCK) {
1378                 cifs_dbg(FYI, "F_RDLCK\n");
1379                 *type |= server->vals->shared_lock_type;
1380                 *lock = 1;
1381         } else if (flock->fl_type == F_EXLCK) {
1382                 cifs_dbg(FYI, "F_EXLCK\n");
1383                 *type |= server->vals->exclusive_lock_type;
1384                 *lock = 1;
1385         } else if (flock->fl_type == F_SHLCK) {
1386                 cifs_dbg(FYI, "F_SHLCK\n");
1387                 *type |= server->vals->shared_lock_type;
1388                 *lock = 1;
1389         } else
1390                 cifs_dbg(FYI, "Unknown type of lock\n");
1391 }
1392
1393 static int
1394 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1395            bool wait_flag, bool posix_lck, unsigned int xid)
1396 {
1397         int rc = 0;
1398         __u64 length = 1 + flock->fl_end - flock->fl_start;
1399         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1400         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1401         struct TCP_Server_Info *server = tcon->ses->server;
1402         __u16 netfid = cfile->fid.netfid;
1403
1404         if (posix_lck) {
1405                 int posix_lock_type;
1406
1407                 rc = cifs_posix_lock_test(file, flock);
1408                 if (!rc)
1409                         return rc;
1410
1411                 if (type & server->vals->shared_lock_type)
1412                         posix_lock_type = CIFS_RDLCK;
1413                 else
1414                         posix_lock_type = CIFS_WRLCK;
1415                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1416                                       hash_lockowner(flock->fl_owner),
1417                                       flock->fl_start, length, flock,
1418                                       posix_lock_type, wait_flag);
1419                 return rc;
1420         }
1421
1422         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1423         if (!rc)
1424                 return rc;
1425
1426         /* BB we could chain these into one lock request BB */
1427         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1428                                     1, 0, false);
1429         if (rc == 0) {
1430                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1431                                             type, 0, 1, false);
1432                 flock->fl_type = F_UNLCK;
1433                 if (rc != 0)
1434                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1435                                  rc);
1436                 return 0;
1437         }
1438
1439         if (type & server->vals->shared_lock_type) {
1440                 flock->fl_type = F_WRLCK;
1441                 return 0;
1442         }
1443
1444         type &= ~server->vals->exclusive_lock_type;
1445
1446         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1447                                     type | server->vals->shared_lock_type,
1448                                     1, 0, false);
1449         if (rc == 0) {
1450                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1451                         type | server->vals->shared_lock_type, 0, 1, false);
1452                 flock->fl_type = F_RDLCK;
1453                 if (rc != 0)
1454                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1455                                  rc);
1456         } else
1457                 flock->fl_type = F_WRLCK;
1458
1459         return 0;
1460 }
1461
1462 void
1463 cifs_move_llist(struct list_head *source, struct list_head *dest)
1464 {
1465         struct list_head *li, *tmp;
1466         list_for_each_safe(li, tmp, source)
1467                 list_move(li, dest);
1468 }
1469
1470 void
1471 cifs_free_llist(struct list_head *llist)
1472 {
1473         struct cifsLockInfo *li, *tmp;
1474         list_for_each_entry_safe(li, tmp, llist, llist) {
1475                 cifs_del_lock_waiters(li);
1476                 list_del(&li->llist);
1477                 kfree(li);
1478         }
1479 }
1480
1481 int
1482 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1483                   unsigned int xid)
1484 {
1485         int rc = 0, stored_rc;
1486         static const int types[] = {
1487                 LOCKING_ANDX_LARGE_FILES,
1488                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1489         };
1490         unsigned int i;
1491         unsigned int max_num, num, max_buf;
1492         LOCKING_ANDX_RANGE *buf, *cur;
1493         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1494         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1495         struct cifsLockInfo *li, *tmp;
1496         __u64 length = 1 + flock->fl_end - flock->fl_start;
1497         struct list_head tmp_llist;
1498
1499         INIT_LIST_HEAD(&tmp_llist);
1500
1501         /*
1502          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1503          * and check it before using.
1504          */
1505         max_buf = tcon->ses->server->maxBuf;
1506         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1507                 return -EINVAL;
1508
1509         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1510                      PAGE_SIZE);
1511         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1512                         PAGE_SIZE);
1513         max_num = (max_buf - sizeof(struct smb_hdr)) /
1514                                                 sizeof(LOCKING_ANDX_RANGE);
1515         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1516         if (!buf)
1517                 return -ENOMEM;
1518
1519         down_write(&cinode->lock_sem);
1520         for (i = 0; i < 2; i++) {
1521                 cur = buf;
1522                 num = 0;
1523                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1524                         if (flock->fl_start > li->offset ||
1525                             (flock->fl_start + length) <
1526                             (li->offset + li->length))
1527                                 continue;
1528                         if (current->tgid != li->pid)
1529                                 continue;
1530                         if (types[i] != li->type)
1531                                 continue;
1532                         if (cinode->can_cache_brlcks) {
1533                                 /*
1534                                  * We can cache brlock requests - simply remove
1535                                  * a lock from the file's list.
1536                                  */
1537                                 list_del(&li->llist);
1538                                 cifs_del_lock_waiters(li);
1539                                 kfree(li);
1540                                 continue;
1541                         }
1542                         cur->Pid = cpu_to_le16(li->pid);
1543                         cur->LengthLow = cpu_to_le32((u32)li->length);
1544                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1545                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1546                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1547                         /*
1548                          * We need to save a lock here to let us add it again to
1549                          * the file's list if the unlock range request fails on
1550                          * the server.
1551                          */
1552                         list_move(&li->llist, &tmp_llist);
1553                         if (++num == max_num) {
1554                                 stored_rc = cifs_lockv(xid, tcon,
1555                                                        cfile->fid.netfid,
1556                                                        li->type, num, 0, buf);
1557                                 if (stored_rc) {
1558                                         /*
1559                                          * We failed on the unlock range
1560                                          * request - add all locks from the tmp
1561                                          * list to the head of the file's list.
1562                                          */
1563                                         cifs_move_llist(&tmp_llist,
1564                                                         &cfile->llist->locks);
1565                                         rc = stored_rc;
1566                                 } else
1567                                         /*
1568                                          * The unlock range request succeed -
1569                                          * free the tmp list.
1570                                          */
1571                                         cifs_free_llist(&tmp_llist);
1572                                 cur = buf;
1573                                 num = 0;
1574                         } else
1575                                 cur++;
1576                 }
1577                 if (num) {
1578                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1579                                                types[i], num, 0, buf);
1580                         if (stored_rc) {
1581                                 cifs_move_llist(&tmp_llist,
1582                                                 &cfile->llist->locks);
1583                                 rc = stored_rc;
1584                         } else
1585                                 cifs_free_llist(&tmp_llist);
1586                 }
1587         }
1588
1589         up_write(&cinode->lock_sem);
1590         kfree(buf);
1591         return rc;
1592 }
1593
1594 static int
1595 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1596            bool wait_flag, bool posix_lck, int lock, int unlock,
1597            unsigned int xid)
1598 {
1599         int rc = 0;
1600         __u64 length = 1 + flock->fl_end - flock->fl_start;
1601         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1602         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1603         struct TCP_Server_Info *server = tcon->ses->server;
1604         struct inode *inode = d_inode(cfile->dentry);
1605
1606         if (posix_lck) {
1607                 int posix_lock_type;
1608
1609                 rc = cifs_posix_lock_set(file, flock);
1610                 if (!rc || rc < 0)
1611                         return rc;
1612
1613                 if (type & server->vals->shared_lock_type)
1614                         posix_lock_type = CIFS_RDLCK;
1615                 else
1616                         posix_lock_type = CIFS_WRLCK;
1617
1618                 if (unlock == 1)
1619                         posix_lock_type = CIFS_UNLCK;
1620
1621                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1622                                       hash_lockowner(flock->fl_owner),
1623                                       flock->fl_start, length,
1624                                       NULL, posix_lock_type, wait_flag);
1625                 goto out;
1626         }
1627
1628         if (lock) {
1629                 struct cifsLockInfo *lock;
1630
1631                 lock = cifs_lock_init(flock->fl_start, length, type,
1632                                       flock->fl_flags);
1633                 if (!lock)
1634                         return -ENOMEM;
1635
1636                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1637                 if (rc < 0) {
1638                         kfree(lock);
1639                         return rc;
1640                 }
1641                 if (!rc)
1642                         goto out;
1643
1644                 /*
1645                  * Windows 7 server can delay breaking lease from read to None
1646                  * if we set a byte-range lock on a file - break it explicitly
1647                  * before sending the lock to the server to be sure the next
1648                  * read won't conflict with non-overlapted locks due to
1649                  * pagereading.
1650                  */
1651                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1652                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1653                         cifs_zap_mapping(inode);
1654                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1655                                  inode);
1656                         CIFS_I(inode)->oplock = 0;
1657                 }
1658
1659                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1660                                             type, 1, 0, wait_flag);
1661                 if (rc) {
1662                         kfree(lock);
1663                         return rc;
1664                 }
1665
1666                 cifs_lock_add(cfile, lock);
1667         } else if (unlock)
1668                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1669
1670 out:
1671         if (flock->fl_flags & FL_POSIX) {
1672                 /*
1673                  * If this is a request to remove all locks because we
1674                  * are closing the file, it doesn't matter if the
1675                  * unlocking failed as both cifs.ko and the SMB server
1676                  * remove the lock on file close
1677                  */
1678                 if (rc) {
1679                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1680                         if (!(flock->fl_flags & FL_CLOSE))
1681                                 return rc;
1682                 }
1683                 rc = locks_lock_file_wait(file, flock);
1684         }
1685         return rc;
1686 }
1687
1688 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1689 {
1690         int rc, xid;
1691         int lock = 0, unlock = 0;
1692         bool wait_flag = false;
1693         bool posix_lck = false;
1694         struct cifs_sb_info *cifs_sb;
1695         struct cifs_tcon *tcon;
1696         struct cifsFileInfo *cfile;
1697         __u32 type;
1698
1699         rc = -EACCES;
1700         xid = get_xid();
1701
1702         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1703                  cmd, flock->fl_flags, flock->fl_type,
1704                  flock->fl_start, flock->fl_end);
1705
1706         cfile = (struct cifsFileInfo *)file->private_data;
1707         tcon = tlink_tcon(cfile->tlink);
1708
1709         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1710                         tcon->ses->server);
1711         cifs_sb = CIFS_FILE_SB(file);
1712
1713         if (cap_unix(tcon->ses) &&
1714             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1715             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1716                 posix_lck = true;
1717         /*
1718          * BB add code here to normalize offset and length to account for
1719          * negative length which we can not accept over the wire.
1720          */
1721         if (IS_GETLK(cmd)) {
1722                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1723                 free_xid(xid);
1724                 return rc;
1725         }
1726
1727         if (!lock && !unlock) {
1728                 /*
1729                  * if no lock or unlock then nothing to do since we do not
1730                  * know what it is
1731                  */
1732                 free_xid(xid);
1733                 return -EOPNOTSUPP;
1734         }
1735
1736         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1737                         xid);
1738         free_xid(xid);
1739         return rc;
1740 }
1741
1742 /*
1743  * update the file size (if needed) after a write. Should be called with
1744  * the inode->i_lock held
1745  */
1746 void
1747 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1748                       unsigned int bytes_written)
1749 {
1750         loff_t end_of_write = offset + bytes_written;
1751
1752         if (end_of_write > cifsi->server_eof)
1753                 cifsi->server_eof = end_of_write;
1754 }
1755
1756 static ssize_t
1757 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1758            size_t write_size, loff_t *offset)
1759 {
1760         int rc = 0;
1761         unsigned int bytes_written = 0;
1762         unsigned int total_written;
1763         struct cifs_tcon *tcon;
1764         struct TCP_Server_Info *server;
1765         unsigned int xid;
1766         struct dentry *dentry = open_file->dentry;
1767         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1768         struct cifs_io_parms io_parms;
1769
1770         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1771                  write_size, *offset, dentry);
1772
1773         tcon = tlink_tcon(open_file->tlink);
1774         server = tcon->ses->server;
1775
1776         if (!server->ops->sync_write)
1777                 return -ENOSYS;
1778
1779         xid = get_xid();
1780
1781         for (total_written = 0; write_size > total_written;
1782              total_written += bytes_written) {
1783                 rc = -EAGAIN;
1784                 while (rc == -EAGAIN) {
1785                         struct kvec iov[2];
1786                         unsigned int len;
1787
1788                         if (open_file->invalidHandle) {
1789                                 /* we could deadlock if we called
1790                                    filemap_fdatawait from here so tell
1791                                    reopen_file not to flush data to
1792                                    server now */
1793                                 rc = cifs_reopen_file(open_file, false);
1794                                 if (rc != 0)
1795                                         break;
1796                         }
1797
1798                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1799                                   (unsigned int)write_size - total_written);
1800                         /* iov[0] is reserved for smb header */
1801                         iov[1].iov_base = (char *)write_data + total_written;
1802                         iov[1].iov_len = len;
1803                         io_parms.pid = pid;
1804                         io_parms.tcon = tcon;
1805                         io_parms.offset = *offset;
1806                         io_parms.length = len;
1807                         rc = server->ops->sync_write(xid, &open_file->fid,
1808                                         &io_parms, &bytes_written, iov, 1);
1809                 }
1810                 if (rc || (bytes_written == 0)) {
1811                         if (total_written)
1812                                 break;
1813                         else {
1814                                 free_xid(xid);
1815                                 return rc;
1816                         }
1817                 } else {
1818                         spin_lock(&d_inode(dentry)->i_lock);
1819                         cifs_update_eof(cifsi, *offset, bytes_written);
1820                         spin_unlock(&d_inode(dentry)->i_lock);
1821                         *offset += bytes_written;
1822                 }
1823         }
1824
1825         cifs_stats_bytes_written(tcon, total_written);
1826
1827         if (total_written > 0) {
1828                 spin_lock(&d_inode(dentry)->i_lock);
1829                 if (*offset > d_inode(dentry)->i_size)
1830                         i_size_write(d_inode(dentry), *offset);
1831                 spin_unlock(&d_inode(dentry)->i_lock);
1832         }
1833         mark_inode_dirty_sync(d_inode(dentry));
1834         free_xid(xid);
1835         return total_written;
1836 }
1837
1838 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1839                                         bool fsuid_only)
1840 {
1841         struct cifsFileInfo *open_file = NULL;
1842         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1843         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1844
1845         /* only filter by fsuid on multiuser mounts */
1846         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1847                 fsuid_only = false;
1848
1849         spin_lock(&tcon->open_file_lock);
1850         /* we could simply get the first_list_entry since write-only entries
1851            are always at the end of the list but since the first entry might
1852            have a close pending, we go through the whole list */
1853         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1854                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1855                         continue;
1856                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1857                         if (!open_file->invalidHandle) {
1858                                 /* found a good file */
1859                                 /* lock it so it will not be closed on us */
1860                                 cifsFileInfo_get(open_file);
1861                                 spin_unlock(&tcon->open_file_lock);
1862                                 return open_file;
1863                         } /* else might as well continue, and look for
1864                              another, or simply have the caller reopen it
1865                              again rather than trying to fix this handle */
1866                 } else /* write only file */
1867                         break; /* write only files are last so must be done */
1868         }
1869         spin_unlock(&tcon->open_file_lock);
1870         return NULL;
1871 }
1872
1873 /* Return -EBADF if no handle is found and general rc otherwise */
1874 int
1875 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1876                        struct cifsFileInfo **ret_file)
1877 {
1878         struct cifsFileInfo *open_file, *inv_file = NULL;
1879         struct cifs_sb_info *cifs_sb;
1880         struct cifs_tcon *tcon;
1881         bool any_available = false;
1882         int rc = -EBADF;
1883         unsigned int refind = 0;
1884
1885         *ret_file = NULL;
1886
1887         /*
1888          * Having a null inode here (because mapping->host was set to zero by
1889          * the VFS or MM) should not happen but we had reports of on oops (due
1890          * to it being zero) during stress testcases so we need to check for it
1891          */
1892
1893         if (cifs_inode == NULL) {
1894                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1895                 dump_stack();
1896                 return rc;
1897         }
1898
1899         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1900         tcon = cifs_sb_master_tcon(cifs_sb);
1901
1902         /* only filter by fsuid on multiuser mounts */
1903         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1904                 fsuid_only = false;
1905
1906         spin_lock(&tcon->open_file_lock);
1907 refind_writable:
1908         if (refind > MAX_REOPEN_ATT) {
1909                 spin_unlock(&tcon->open_file_lock);
1910                 return rc;
1911         }
1912         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1913                 if (!any_available && open_file->pid != current->tgid)
1914                         continue;
1915                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1916                         continue;
1917                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1918                         if (!open_file->invalidHandle) {
1919                                 /* found a good writable file */
1920                                 cifsFileInfo_get(open_file);
1921                                 spin_unlock(&tcon->open_file_lock);
1922                                 *ret_file = open_file;
1923                                 return 0;
1924                         } else {
1925                                 if (!inv_file)
1926                                         inv_file = open_file;
1927                         }
1928                 }
1929         }
1930         /* couldn't find useable FH with same pid, try any available */
1931         if (!any_available) {
1932                 any_available = true;
1933                 goto refind_writable;
1934         }
1935
1936         if (inv_file) {
1937                 any_available = false;
1938                 cifsFileInfo_get(inv_file);
1939         }
1940
1941         spin_unlock(&tcon->open_file_lock);
1942
1943         if (inv_file) {
1944                 rc = cifs_reopen_file(inv_file, false);
1945                 if (!rc) {
1946                         *ret_file = inv_file;
1947                         return 0;
1948                 }
1949
1950                 spin_lock(&cifs_inode->open_file_lock);
1951                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
1952                 spin_unlock(&cifs_inode->open_file_lock);
1953                 cifsFileInfo_put(inv_file);
1954                 ++refind;
1955                 inv_file = NULL;
1956                 spin_lock(&tcon->open_file_lock);
1957                 goto refind_writable;
1958         }
1959
1960         return rc;
1961 }
1962
1963 struct cifsFileInfo *
1964 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
1965 {
1966         struct cifsFileInfo *cfile;
1967         int rc;
1968
1969         rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
1970         if (rc)
1971                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
1972
1973         return cfile;
1974 }
1975
1976 int
1977 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
1978                        struct cifsFileInfo **ret_file)
1979 {
1980         struct list_head *tmp;
1981         struct cifsFileInfo *cfile;
1982         struct cifsInodeInfo *cinode;
1983         char *full_path;
1984
1985         *ret_file = NULL;
1986
1987         spin_lock(&tcon->open_file_lock);
1988         list_for_each(tmp, &tcon->openFileList) {
1989                 cfile = list_entry(tmp, struct cifsFileInfo,
1990                              tlist);
1991                 full_path = build_path_from_dentry(cfile->dentry);
1992                 if (full_path == NULL) {
1993                         spin_unlock(&tcon->open_file_lock);
1994                         return -ENOMEM;
1995                 }
1996                 if (strcmp(full_path, name)) {
1997                         kfree(full_path);
1998                         continue;
1999                 }
2000
2001                 kfree(full_path);
2002                 cinode = CIFS_I(d_inode(cfile->dentry));
2003                 spin_unlock(&tcon->open_file_lock);
2004                 return cifs_get_writable_file(cinode, 0, ret_file);
2005         }
2006
2007         spin_unlock(&tcon->open_file_lock);
2008         return -ENOENT;
2009 }
2010
2011 int
2012 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2013                        struct cifsFileInfo **ret_file)
2014 {
2015         struct list_head *tmp;
2016         struct cifsFileInfo *cfile;
2017         struct cifsInodeInfo *cinode;
2018         char *full_path;
2019
2020         *ret_file = NULL;
2021
2022         spin_lock(&tcon->open_file_lock);
2023         list_for_each(tmp, &tcon->openFileList) {
2024                 cfile = list_entry(tmp, struct cifsFileInfo,
2025                              tlist);
2026                 full_path = build_path_from_dentry(cfile->dentry);
2027                 if (full_path == NULL) {
2028                         spin_unlock(&tcon->open_file_lock);
2029                         return -ENOMEM;
2030                 }
2031                 if (strcmp(full_path, name)) {
2032                         kfree(full_path);
2033                         continue;
2034                 }
2035
2036                 kfree(full_path);
2037                 cinode = CIFS_I(d_inode(cfile->dentry));
2038                 spin_unlock(&tcon->open_file_lock);
2039                 *ret_file = find_readable_file(cinode, 0);
2040                 return *ret_file ? 0 : -ENOENT;
2041         }
2042
2043         spin_unlock(&tcon->open_file_lock);
2044         return -ENOENT;
2045 }
2046
2047 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2048 {
2049         struct address_space *mapping = page->mapping;
2050         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2051         char *write_data;
2052         int rc = -EFAULT;
2053         int bytes_written = 0;
2054         struct inode *inode;
2055         struct cifsFileInfo *open_file;
2056
2057         if (!mapping || !mapping->host)
2058                 return -EFAULT;
2059
2060         inode = page->mapping->host;
2061
2062         offset += (loff_t)from;
2063         write_data = kmap(page);
2064         write_data += from;
2065
2066         if ((to > PAGE_SIZE) || (from > to)) {
2067                 kunmap(page);
2068                 return -EIO;
2069         }
2070
2071         /* racing with truncate? */
2072         if (offset > mapping->host->i_size) {
2073                 kunmap(page);
2074                 return 0; /* don't care */
2075         }
2076
2077         /* check to make sure that we are not extending the file */
2078         if (mapping->host->i_size - offset < (loff_t)to)
2079                 to = (unsigned)(mapping->host->i_size - offset);
2080
2081         rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2082         if (!rc) {
2083                 bytes_written = cifs_write(open_file, open_file->pid,
2084                                            write_data, to - from, &offset);
2085                 cifsFileInfo_put(open_file);
2086                 /* Does mm or vfs already set times? */
2087                 inode->i_atime = inode->i_mtime = current_time(inode);
2088                 if ((bytes_written > 0) && (offset))
2089                         rc = 0;
2090                 else if (bytes_written < 0)
2091                         rc = bytes_written;
2092                 else
2093                         rc = -EFAULT;
2094         } else {
2095                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2096                 if (!is_retryable_error(rc))
2097                         rc = -EIO;
2098         }
2099
2100         kunmap(page);
2101         return rc;
2102 }
2103
2104 static struct cifs_writedata *
2105 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2106                           pgoff_t end, pgoff_t *index,
2107                           unsigned int *found_pages)
2108 {
2109         struct cifs_writedata *wdata;
2110
2111         wdata = cifs_writedata_alloc((unsigned int)tofind,
2112                                      cifs_writev_complete);
2113         if (!wdata)
2114                 return NULL;
2115
2116         *found_pages = find_get_pages_range_tag(mapping, index, end,
2117                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2118         return wdata;
2119 }
2120
2121 static unsigned int
2122 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2123                     struct address_space *mapping,
2124                     struct writeback_control *wbc,
2125                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2126 {
2127         unsigned int nr_pages = 0, i;
2128         struct page *page;
2129
2130         for (i = 0; i < found_pages; i++) {
2131                 page = wdata->pages[i];
2132                 /*
2133                  * At this point we hold neither the i_pages lock nor the
2134                  * page lock: the page may be truncated or invalidated
2135                  * (changing page->mapping to NULL), or even swizzled
2136                  * back from swapper_space to tmpfs file mapping
2137                  */
2138
2139                 if (nr_pages == 0)
2140                         lock_page(page);
2141                 else if (!trylock_page(page))
2142                         break;
2143
2144                 if (unlikely(page->mapping != mapping)) {
2145                         unlock_page(page);
2146                         break;
2147                 }
2148
2149                 if (!wbc->range_cyclic && page->index > end) {
2150                         *done = true;
2151                         unlock_page(page);
2152                         break;
2153                 }
2154
2155                 if (*next && (page->index != *next)) {
2156                         /* Not next consecutive page */
2157                         unlock_page(page);
2158                         break;
2159                 }
2160
2161                 if (wbc->sync_mode != WB_SYNC_NONE)
2162                         wait_on_page_writeback(page);
2163
2164                 if (PageWriteback(page) ||
2165                                 !clear_page_dirty_for_io(page)) {
2166                         unlock_page(page);
2167                         break;
2168                 }
2169
2170                 /*
2171                  * This actually clears the dirty bit in the radix tree.
2172                  * See cifs_writepage() for more commentary.
2173                  */
2174                 set_page_writeback(page);
2175                 if (page_offset(page) >= i_size_read(mapping->host)) {
2176                         *done = true;
2177                         unlock_page(page);
2178                         end_page_writeback(page);
2179                         break;
2180                 }
2181
2182                 wdata->pages[i] = page;
2183                 *next = page->index + 1;
2184                 ++nr_pages;
2185         }
2186
2187         /* reset index to refind any pages skipped */
2188         if (nr_pages == 0)
2189                 *index = wdata->pages[0]->index + 1;
2190
2191         /* put any pages we aren't going to use */
2192         for (i = nr_pages; i < found_pages; i++) {
2193                 put_page(wdata->pages[i]);
2194                 wdata->pages[i] = NULL;
2195         }
2196
2197         return nr_pages;
2198 }
2199
2200 static int
2201 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2202                  struct address_space *mapping, struct writeback_control *wbc)
2203 {
2204         int rc;
2205         struct TCP_Server_Info *server =
2206                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2207
2208         wdata->sync_mode = wbc->sync_mode;
2209         wdata->nr_pages = nr_pages;
2210         wdata->offset = page_offset(wdata->pages[0]);
2211         wdata->pagesz = PAGE_SIZE;
2212         wdata->tailsz = min(i_size_read(mapping->host) -
2213                         page_offset(wdata->pages[nr_pages - 1]),
2214                         (loff_t)PAGE_SIZE);
2215         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2216         wdata->pid = wdata->cfile->pid;
2217
2218         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2219         if (rc)
2220                 return rc;
2221
2222         if (wdata->cfile->invalidHandle)
2223                 rc = -EAGAIN;
2224         else
2225                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2226
2227         return rc;
2228 }
2229
2230 static int cifs_writepages(struct address_space *mapping,
2231                            struct writeback_control *wbc)
2232 {
2233         struct inode *inode = mapping->host;
2234         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2235         struct TCP_Server_Info *server;
2236         bool done = false, scanned = false, range_whole = false;
2237         pgoff_t end, index;
2238         struct cifs_writedata *wdata;
2239         struct cifsFileInfo *cfile = NULL;
2240         int rc = 0;
2241         int saved_rc = 0;
2242         unsigned int xid;
2243
2244         /*
2245          * If wsize is smaller than the page cache size, default to writing
2246          * one page at a time via cifs_writepage
2247          */
2248         if (cifs_sb->wsize < PAGE_SIZE)
2249                 return generic_writepages(mapping, wbc);
2250
2251         xid = get_xid();
2252         if (wbc->range_cyclic) {
2253                 index = mapping->writeback_index; /* Start from prev offset */
2254                 end = -1;
2255         } else {
2256                 index = wbc->range_start >> PAGE_SHIFT;
2257                 end = wbc->range_end >> PAGE_SHIFT;
2258                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2259                         range_whole = true;
2260                 scanned = true;
2261         }
2262         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2263 retry:
2264         while (!done && index <= end) {
2265                 unsigned int i, nr_pages, found_pages, wsize;
2266                 pgoff_t next = 0, tofind, saved_index = index;
2267                 struct cifs_credits credits_on_stack;
2268                 struct cifs_credits *credits = &credits_on_stack;
2269                 int get_file_rc = 0;
2270
2271                 if (cfile)
2272                         cifsFileInfo_put(cfile);
2273
2274                 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2275
2276                 /* in case of an error store it to return later */
2277                 if (rc)
2278                         get_file_rc = rc;
2279
2280                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2281                                                    &wsize, credits);
2282                 if (rc != 0) {
2283                         done = true;
2284                         break;
2285                 }
2286
2287                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2288
2289                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2290                                                   &found_pages);
2291                 if (!wdata) {
2292                         rc = -ENOMEM;
2293                         done = true;
2294                         add_credits_and_wake_if(server, credits, 0);
2295                         break;
2296                 }
2297
2298                 if (found_pages == 0) {
2299                         kref_put(&wdata->refcount, cifs_writedata_release);
2300                         add_credits_and_wake_if(server, credits, 0);
2301                         break;
2302                 }
2303
2304                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2305                                                end, &index, &next, &done);
2306
2307                 /* nothing to write? */
2308                 if (nr_pages == 0) {
2309                         kref_put(&wdata->refcount, cifs_writedata_release);
2310                         add_credits_and_wake_if(server, credits, 0);
2311                         continue;
2312                 }
2313
2314                 wdata->credits = credits_on_stack;
2315                 wdata->cfile = cfile;
2316                 cfile = NULL;
2317
2318                 if (!wdata->cfile) {
2319                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2320                                  get_file_rc);
2321                         if (is_retryable_error(get_file_rc))
2322                                 rc = get_file_rc;
2323                         else
2324                                 rc = -EBADF;
2325                 } else
2326                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2327
2328                 for (i = 0; i < nr_pages; ++i)
2329                         unlock_page(wdata->pages[i]);
2330
2331                 /* send failure -- clean up the mess */
2332                 if (rc != 0) {
2333                         add_credits_and_wake_if(server, &wdata->credits, 0);
2334                         for (i = 0; i < nr_pages; ++i) {
2335                                 if (is_retryable_error(rc))
2336                                         redirty_page_for_writepage(wbc,
2337                                                            wdata->pages[i]);
2338                                 else
2339                                         SetPageError(wdata->pages[i]);
2340                                 end_page_writeback(wdata->pages[i]);
2341                                 put_page(wdata->pages[i]);
2342                         }
2343                         if (!is_retryable_error(rc))
2344                                 mapping_set_error(mapping, rc);
2345                 }
2346                 kref_put(&wdata->refcount, cifs_writedata_release);
2347
2348                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2349                         index = saved_index;
2350                         continue;
2351                 }
2352
2353                 /* Return immediately if we received a signal during writing */
2354                 if (is_interrupt_error(rc)) {
2355                         done = true;
2356                         break;
2357                 }
2358
2359                 if (rc != 0 && saved_rc == 0)
2360                         saved_rc = rc;
2361
2362                 wbc->nr_to_write -= nr_pages;
2363                 if (wbc->nr_to_write <= 0)
2364                         done = true;
2365
2366                 index = next;
2367         }
2368
2369         if (!scanned && !done) {
2370                 /*
2371                  * We hit the last page and there is more work to be done: wrap
2372                  * back to the start of the file
2373                  */
2374                 scanned = true;
2375                 index = 0;
2376                 goto retry;
2377         }
2378
2379         if (saved_rc != 0)
2380                 rc = saved_rc;
2381
2382         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2383                 mapping->writeback_index = index;
2384
2385         if (cfile)
2386                 cifsFileInfo_put(cfile);
2387         free_xid(xid);
2388         return rc;
2389 }
2390
2391 static int
2392 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2393 {
2394         int rc;
2395         unsigned int xid;
2396
2397         xid = get_xid();
2398 /* BB add check for wbc flags */
2399         get_page(page);
2400         if (!PageUptodate(page))
2401                 cifs_dbg(FYI, "ppw - page not up to date\n");
2402
2403         /*
2404          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2405          *
2406          * A writepage() implementation always needs to do either this,
2407          * or re-dirty the page with "redirty_page_for_writepage()" in
2408          * the case of a failure.
2409          *
2410          * Just unlocking the page will cause the radix tree tag-bits
2411          * to fail to update with the state of the page correctly.
2412          */
2413         set_page_writeback(page);
2414 retry_write:
2415         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2416         if (is_retryable_error(rc)) {
2417                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2418                         goto retry_write;
2419                 redirty_page_for_writepage(wbc, page);
2420         } else if (rc != 0) {
2421                 SetPageError(page);
2422                 mapping_set_error(page->mapping, rc);
2423         } else {
2424                 SetPageUptodate(page);
2425         }
2426         end_page_writeback(page);
2427         put_page(page);
2428         free_xid(xid);
2429         return rc;
2430 }
2431
2432 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2433 {
2434         int rc = cifs_writepage_locked(page, wbc);
2435         unlock_page(page);
2436         return rc;
2437 }
2438
2439 static int cifs_write_end(struct file *file, struct address_space *mapping,
2440                         loff_t pos, unsigned len, unsigned copied,
2441                         struct page *page, void *fsdata)
2442 {
2443         int rc;
2444         struct inode *inode = mapping->host;
2445         struct cifsFileInfo *cfile = file->private_data;
2446         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2447         __u32 pid;
2448
2449         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2450                 pid = cfile->pid;
2451         else
2452                 pid = current->tgid;
2453
2454         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2455                  page, pos, copied);
2456
2457         if (PageChecked(page)) {
2458                 if (copied == len)
2459                         SetPageUptodate(page);
2460                 ClearPageChecked(page);
2461         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2462                 SetPageUptodate(page);
2463
2464         if (!PageUptodate(page)) {
2465                 char *page_data;
2466                 unsigned offset = pos & (PAGE_SIZE - 1);
2467                 unsigned int xid;
2468
2469                 xid = get_xid();
2470                 /* this is probably better than directly calling
2471                    partialpage_write since in this function the file handle is
2472                    known which we might as well leverage */
2473                 /* BB check if anything else missing out of ppw
2474                    such as updating last write time */
2475                 page_data = kmap(page);
2476                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2477                 /* if (rc < 0) should we set writebehind rc? */
2478                 kunmap(page);
2479
2480                 free_xid(xid);
2481         } else {
2482                 rc = copied;
2483                 pos += copied;
2484                 set_page_dirty(page);
2485         }
2486
2487         if (rc > 0) {
2488                 spin_lock(&inode->i_lock);
2489                 if (pos > inode->i_size)
2490                         i_size_write(inode, pos);
2491                 spin_unlock(&inode->i_lock);
2492         }
2493
2494         unlock_page(page);
2495         put_page(page);
2496
2497         return rc;
2498 }
2499
2500 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2501                       int datasync)
2502 {
2503         unsigned int xid;
2504         int rc = 0;
2505         struct cifs_tcon *tcon;
2506         struct TCP_Server_Info *server;
2507         struct cifsFileInfo *smbfile = file->private_data;
2508         struct inode *inode = file_inode(file);
2509         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2510
2511         rc = file_write_and_wait_range(file, start, end);
2512         if (rc)
2513                 return rc;
2514
2515         xid = get_xid();
2516
2517         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2518                  file, datasync);
2519
2520         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2521                 rc = cifs_zap_mapping(inode);
2522                 if (rc) {
2523                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2524                         rc = 0; /* don't care about it in fsync */
2525                 }
2526         }
2527
2528         tcon = tlink_tcon(smbfile->tlink);
2529         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2530                 server = tcon->ses->server;
2531                 if (server->ops->flush)
2532                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2533                 else
2534                         rc = -ENOSYS;
2535         }
2536
2537         free_xid(xid);
2538         return rc;
2539 }
2540
2541 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2542 {
2543         unsigned int xid;
2544         int rc = 0;
2545         struct cifs_tcon *tcon;
2546         struct TCP_Server_Info *server;
2547         struct cifsFileInfo *smbfile = file->private_data;
2548         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2549
2550         rc = file_write_and_wait_range(file, start, end);
2551         if (rc)
2552                 return rc;
2553
2554         xid = get_xid();
2555
2556         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2557                  file, datasync);
2558
2559         tcon = tlink_tcon(smbfile->tlink);
2560         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2561                 server = tcon->ses->server;
2562                 if (server->ops->flush)
2563                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2564                 else
2565                         rc = -ENOSYS;
2566         }
2567
2568         free_xid(xid);
2569         return rc;
2570 }
2571
2572 /*
2573  * As file closes, flush all cached write data for this inode checking
2574  * for write behind errors.
2575  */
2576 int cifs_flush(struct file *file, fl_owner_t id)
2577 {
2578         struct inode *inode = file_inode(file);
2579         int rc = 0;
2580
2581         if (file->f_mode & FMODE_WRITE)
2582                 rc = filemap_write_and_wait(inode->i_mapping);
2583
2584         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2585
2586         return rc;
2587 }
2588
2589 static int
2590 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2591 {
2592         int rc = 0;
2593         unsigned long i;
2594
2595         for (i = 0; i < num_pages; i++) {
2596                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2597                 if (!pages[i]) {
2598                         /*
2599                          * save number of pages we have already allocated and
2600                          * return with ENOMEM error
2601                          */
2602                         num_pages = i;
2603                         rc = -ENOMEM;
2604                         break;
2605                 }
2606         }
2607
2608         if (rc) {
2609                 for (i = 0; i < num_pages; i++)
2610                         put_page(pages[i]);
2611         }
2612         return rc;
2613 }
2614
2615 static inline
2616 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2617 {
2618         size_t num_pages;
2619         size_t clen;
2620
2621         clen = min_t(const size_t, len, wsize);
2622         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2623
2624         if (cur_len)
2625                 *cur_len = clen;
2626
2627         return num_pages;
2628 }
2629
2630 static void
2631 cifs_uncached_writedata_release(struct kref *refcount)
2632 {
2633         int i;
2634         struct cifs_writedata *wdata = container_of(refcount,
2635                                         struct cifs_writedata, refcount);
2636
2637         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2638         for (i = 0; i < wdata->nr_pages; i++)
2639                 put_page(wdata->pages[i]);
2640         cifs_writedata_release(refcount);
2641 }
2642
2643 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2644
2645 static void
2646 cifs_uncached_writev_complete(struct work_struct *work)
2647 {
2648         struct cifs_writedata *wdata = container_of(work,
2649                                         struct cifs_writedata, work);
2650         struct inode *inode = d_inode(wdata->cfile->dentry);
2651         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2652
2653         spin_lock(&inode->i_lock);
2654         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2655         if (cifsi->server_eof > inode->i_size)
2656                 i_size_write(inode, cifsi->server_eof);
2657         spin_unlock(&inode->i_lock);
2658
2659         complete(&wdata->done);
2660         collect_uncached_write_data(wdata->ctx);
2661         /* the below call can possibly free the last ref to aio ctx */
2662         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2663 }
2664
2665 static int
2666 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2667                       size_t *len, unsigned long *num_pages)
2668 {
2669         size_t save_len, copied, bytes, cur_len = *len;
2670         unsigned long i, nr_pages = *num_pages;
2671
2672         save_len = cur_len;
2673         for (i = 0; i < nr_pages; i++) {
2674                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2675                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2676                 cur_len -= copied;
2677                 /*
2678                  * If we didn't copy as much as we expected, then that
2679                  * may mean we trod into an unmapped area. Stop copying
2680                  * at that point. On the next pass through the big
2681                  * loop, we'll likely end up getting a zero-length
2682                  * write and bailing out of it.
2683                  */
2684                 if (copied < bytes)
2685                         break;
2686         }
2687         cur_len = save_len - cur_len;
2688         *len = cur_len;
2689
2690         /*
2691          * If we have no data to send, then that probably means that
2692          * the copy above failed altogether. That's most likely because
2693          * the address in the iovec was bogus. Return -EFAULT and let
2694          * the caller free anything we allocated and bail out.
2695          */
2696         if (!cur_len)
2697                 return -EFAULT;
2698
2699         /*
2700          * i + 1 now represents the number of pages we actually used in
2701          * the copy phase above.
2702          */
2703         *num_pages = i + 1;
2704         return 0;
2705 }
2706
2707 static int
2708 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2709         struct cifs_aio_ctx *ctx)
2710 {
2711         unsigned int wsize;
2712         struct cifs_credits credits;
2713         int rc;
2714         struct TCP_Server_Info *server =
2715                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2716
2717         do {
2718                 if (wdata->cfile->invalidHandle) {
2719                         rc = cifs_reopen_file(wdata->cfile, false);
2720                         if (rc == -EAGAIN)
2721                                 continue;
2722                         else if (rc)
2723                                 break;
2724                 }
2725
2726
2727                 /*
2728                  * Wait for credits to resend this wdata.
2729                  * Note: we are attempting to resend the whole wdata not in
2730                  * segments
2731                  */
2732                 do {
2733                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2734                                                 &wsize, &credits);
2735                         if (rc)
2736                                 goto fail;
2737
2738                         if (wsize < wdata->bytes) {
2739                                 add_credits_and_wake_if(server, &credits, 0);
2740                                 msleep(1000);
2741                         }
2742                 } while (wsize < wdata->bytes);
2743                 wdata->credits = credits;
2744
2745                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2746
2747                 if (!rc) {
2748                         if (wdata->cfile->invalidHandle)
2749                                 rc = -EAGAIN;
2750                         else
2751                                 rc = server->ops->async_writev(wdata,
2752                                         cifs_uncached_writedata_release);
2753                 }
2754
2755                 /* If the write was successfully sent, we are done */
2756                 if (!rc) {
2757                         list_add_tail(&wdata->list, wdata_list);
2758                         return 0;
2759                 }
2760
2761                 /* Roll back credits and retry if needed */
2762                 add_credits_and_wake_if(server, &wdata->credits, 0);
2763         } while (rc == -EAGAIN);
2764
2765 fail:
2766         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2767         return rc;
2768 }
2769
2770 static int
2771 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2772                      struct cifsFileInfo *open_file,
2773                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2774                      struct cifs_aio_ctx *ctx)
2775 {
2776         int rc = 0;
2777         size_t cur_len;
2778         unsigned long nr_pages, num_pages, i;
2779         struct cifs_writedata *wdata;
2780         struct iov_iter saved_from = *from;
2781         loff_t saved_offset = offset;
2782         pid_t pid;
2783         struct TCP_Server_Info *server;
2784         struct page **pagevec;
2785         size_t start;
2786         unsigned int xid;
2787
2788         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2789                 pid = open_file->pid;
2790         else
2791                 pid = current->tgid;
2792
2793         server = tlink_tcon(open_file->tlink)->ses->server;
2794         xid = get_xid();
2795
2796         do {
2797                 unsigned int wsize;
2798                 struct cifs_credits credits_on_stack;
2799                 struct cifs_credits *credits = &credits_on_stack;
2800
2801                 if (open_file->invalidHandle) {
2802                         rc = cifs_reopen_file(open_file, false);
2803                         if (rc == -EAGAIN)
2804                                 continue;
2805                         else if (rc)
2806                                 break;
2807                 }
2808
2809                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2810                                                    &wsize, credits);
2811                 if (rc)
2812                         break;
2813
2814                 cur_len = min_t(const size_t, len, wsize);
2815
2816                 if (ctx->direct_io) {
2817                         ssize_t result;
2818
2819                         result = iov_iter_get_pages_alloc(
2820                                 from, &pagevec, cur_len, &start);
2821                         if (result < 0) {
2822                                 cifs_dbg(VFS,
2823                                         "direct_writev couldn't get user pages "
2824                                         "(rc=%zd) iter type %d iov_offset %zd "
2825                                         "count %zd\n",
2826                                         result, from->type,
2827                                         from->iov_offset, from->count);
2828                                 dump_stack();
2829
2830                                 rc = result;
2831                                 add_credits_and_wake_if(server, credits, 0);
2832                                 break;
2833                         }
2834                         cur_len = (size_t)result;
2835                         iov_iter_advance(from, cur_len);
2836
2837                         nr_pages =
2838                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2839
2840                         wdata = cifs_writedata_direct_alloc(pagevec,
2841                                              cifs_uncached_writev_complete);
2842                         if (!wdata) {
2843                                 rc = -ENOMEM;
2844                                 add_credits_and_wake_if(server, credits, 0);
2845                                 break;
2846                         }
2847
2848
2849                         wdata->page_offset = start;
2850                         wdata->tailsz =
2851                                 nr_pages > 1 ?
2852                                         cur_len - (PAGE_SIZE - start) -
2853                                         (nr_pages - 2) * PAGE_SIZE :
2854                                         cur_len;
2855                 } else {
2856                         nr_pages = get_numpages(wsize, len, &cur_len);
2857                         wdata = cifs_writedata_alloc(nr_pages,
2858                                              cifs_uncached_writev_complete);
2859                         if (!wdata) {
2860                                 rc = -ENOMEM;
2861                                 add_credits_and_wake_if(server, credits, 0);
2862                                 break;
2863                         }
2864
2865                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2866                         if (rc) {
2867                                 kvfree(wdata->pages);
2868                                 kfree(wdata);
2869                                 add_credits_and_wake_if(server, credits, 0);
2870                                 break;
2871                         }
2872
2873                         num_pages = nr_pages;
2874                         rc = wdata_fill_from_iovec(
2875                                 wdata, from, &cur_len, &num_pages);
2876                         if (rc) {
2877                                 for (i = 0; i < nr_pages; i++)
2878                                         put_page(wdata->pages[i]);
2879                                 kvfree(wdata->pages);
2880                                 kfree(wdata);
2881                                 add_credits_and_wake_if(server, credits, 0);
2882                                 break;
2883                         }
2884
2885                         /*
2886                          * Bring nr_pages down to the number of pages we
2887                          * actually used, and free any pages that we didn't use.
2888                          */
2889                         for ( ; nr_pages > num_pages; nr_pages--)
2890                                 put_page(wdata->pages[nr_pages - 1]);
2891
2892                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2893                 }
2894
2895                 wdata->sync_mode = WB_SYNC_ALL;
2896                 wdata->nr_pages = nr_pages;
2897                 wdata->offset = (__u64)offset;
2898                 wdata->cfile = cifsFileInfo_get(open_file);
2899                 wdata->pid = pid;
2900                 wdata->bytes = cur_len;
2901                 wdata->pagesz = PAGE_SIZE;
2902                 wdata->credits = credits_on_stack;
2903                 wdata->ctx = ctx;
2904                 kref_get(&ctx->refcount);
2905
2906                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2907
2908                 if (!rc) {
2909                         if (wdata->cfile->invalidHandle)
2910                                 rc = -EAGAIN;
2911                         else
2912                                 rc = server->ops->async_writev(wdata,
2913                                         cifs_uncached_writedata_release);
2914                 }
2915
2916                 if (rc) {
2917                         add_credits_and_wake_if(server, &wdata->credits, 0);
2918                         kref_put(&wdata->refcount,
2919                                  cifs_uncached_writedata_release);
2920                         if (rc == -EAGAIN) {
2921                                 *from = saved_from;
2922                                 iov_iter_advance(from, offset - saved_offset);
2923                                 continue;
2924                         }
2925                         break;
2926                 }
2927
2928                 list_add_tail(&wdata->list, wdata_list);
2929                 offset += cur_len;
2930                 len -= cur_len;
2931         } while (len > 0);
2932
2933         free_xid(xid);
2934         return rc;
2935 }
2936
2937 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2938 {
2939         struct cifs_writedata *wdata, *tmp;
2940         struct cifs_tcon *tcon;
2941         struct cifs_sb_info *cifs_sb;
2942         struct dentry *dentry = ctx->cfile->dentry;
2943         int rc;
2944
2945         tcon = tlink_tcon(ctx->cfile->tlink);
2946         cifs_sb = CIFS_SB(dentry->d_sb);
2947
2948         mutex_lock(&ctx->aio_mutex);
2949
2950         if (list_empty(&ctx->list)) {
2951                 mutex_unlock(&ctx->aio_mutex);
2952                 return;
2953         }
2954
2955         rc = ctx->rc;
2956         /*
2957          * Wait for and collect replies for any successful sends in order of
2958          * increasing offset. Once an error is hit, then return without waiting
2959          * for any more replies.
2960          */
2961 restart_loop:
2962         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2963                 if (!rc) {
2964                         if (!try_wait_for_completion(&wdata->done)) {
2965                                 mutex_unlock(&ctx->aio_mutex);
2966                                 return;
2967                         }
2968
2969                         if (wdata->result)
2970                                 rc = wdata->result;
2971                         else
2972                                 ctx->total_len += wdata->bytes;
2973
2974                         /* resend call if it's a retryable error */
2975                         if (rc == -EAGAIN) {
2976                                 struct list_head tmp_list;
2977                                 struct iov_iter tmp_from = ctx->iter;
2978
2979                                 INIT_LIST_HEAD(&tmp_list);
2980                                 list_del_init(&wdata->list);
2981
2982                                 if (ctx->direct_io)
2983                                         rc = cifs_resend_wdata(
2984                                                 wdata, &tmp_list, ctx);
2985                                 else {
2986                                         iov_iter_advance(&tmp_from,
2987                                                  wdata->offset - ctx->pos);
2988
2989                                         rc = cifs_write_from_iter(wdata->offset,
2990                                                 wdata->bytes, &tmp_from,
2991                                                 ctx->cfile, cifs_sb, &tmp_list,
2992                                                 ctx);
2993
2994                                         kref_put(&wdata->refcount,
2995                                                 cifs_uncached_writedata_release);
2996                                 }
2997
2998                                 list_splice(&tmp_list, &ctx->list);
2999                                 goto restart_loop;
3000                         }
3001                 }
3002                 list_del_init(&wdata->list);
3003                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3004         }
3005
3006         cifs_stats_bytes_written(tcon, ctx->total_len);
3007         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3008
3009         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3010
3011         mutex_unlock(&ctx->aio_mutex);
3012
3013         if (ctx->iocb && ctx->iocb->ki_complete)
3014                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3015         else
3016                 complete(&ctx->done);
3017 }
3018
3019 static ssize_t __cifs_writev(
3020         struct kiocb *iocb, struct iov_iter *from, bool direct)
3021 {
3022         struct file *file = iocb->ki_filp;
3023         ssize_t total_written = 0;
3024         struct cifsFileInfo *cfile;
3025         struct cifs_tcon *tcon;
3026         struct cifs_sb_info *cifs_sb;
3027         struct cifs_aio_ctx *ctx;
3028         struct iov_iter saved_from = *from;
3029         size_t len = iov_iter_count(from);
3030         int rc;
3031
3032         /*
3033          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3034          * In this case, fall back to non-direct write function.
3035          * this could be improved by getting pages directly in ITER_KVEC
3036          */
3037         if (direct && from->type & ITER_KVEC) {
3038                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3039                 direct = false;
3040         }
3041
3042         rc = generic_write_checks(iocb, from);
3043         if (rc <= 0)
3044                 return rc;
3045
3046         cifs_sb = CIFS_FILE_SB(file);
3047         cfile = file->private_data;
3048         tcon = tlink_tcon(cfile->tlink);
3049
3050         if (!tcon->ses->server->ops->async_writev)
3051                 return -ENOSYS;
3052
3053         ctx = cifs_aio_ctx_alloc();
3054         if (!ctx)
3055                 return -ENOMEM;
3056
3057         ctx->cfile = cifsFileInfo_get(cfile);
3058
3059         if (!is_sync_kiocb(iocb))
3060                 ctx->iocb = iocb;
3061
3062         ctx->pos = iocb->ki_pos;
3063
3064         if (direct) {
3065                 ctx->direct_io = true;
3066                 ctx->iter = *from;
3067                 ctx->len = len;
3068         } else {
3069                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3070                 if (rc) {
3071                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3072                         return rc;
3073                 }
3074         }
3075
3076         /* grab a lock here due to read response handlers can access ctx */
3077         mutex_lock(&ctx->aio_mutex);
3078
3079         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3080                                   cfile, cifs_sb, &ctx->list, ctx);
3081
3082         /*
3083          * If at least one write was successfully sent, then discard any rc
3084          * value from the later writes. If the other write succeeds, then
3085          * we'll end up returning whatever was written. If it fails, then
3086          * we'll get a new rc value from that.
3087          */
3088         if (!list_empty(&ctx->list))
3089                 rc = 0;
3090
3091         mutex_unlock(&ctx->aio_mutex);
3092
3093         if (rc) {
3094                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3095                 return rc;
3096         }
3097
3098         if (!is_sync_kiocb(iocb)) {
3099                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3100                 return -EIOCBQUEUED;
3101         }
3102
3103         rc = wait_for_completion_killable(&ctx->done);
3104         if (rc) {
3105                 mutex_lock(&ctx->aio_mutex);
3106                 ctx->rc = rc = -EINTR;
3107                 total_written = ctx->total_len;
3108                 mutex_unlock(&ctx->aio_mutex);
3109         } else {
3110                 rc = ctx->rc;
3111                 total_written = ctx->total_len;
3112         }
3113
3114         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3115
3116         if (unlikely(!total_written))
3117                 return rc;
3118
3119         iocb->ki_pos += total_written;
3120         return total_written;
3121 }
3122
3123 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3124 {
3125         return __cifs_writev(iocb, from, true);
3126 }
3127
3128 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3129 {
3130         return __cifs_writev(iocb, from, false);
3131 }
3132
3133 static ssize_t
3134 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3135 {
3136         struct file *file = iocb->ki_filp;
3137         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3138         struct inode *inode = file->f_mapping->host;
3139         struct cifsInodeInfo *cinode = CIFS_I(inode);
3140         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3141         ssize_t rc;
3142
3143         inode_lock(inode);
3144         /*
3145          * We need to hold the sem to be sure nobody modifies lock list
3146          * with a brlock that prevents writing.
3147          */
3148         down_read(&cinode->lock_sem);
3149
3150         rc = generic_write_checks(iocb, from);
3151         if (rc <= 0)
3152                 goto out;
3153
3154         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3155                                      server->vals->exclusive_lock_type, 0,
3156                                      NULL, CIFS_WRITE_OP))
3157                 rc = __generic_file_write_iter(iocb, from);
3158         else
3159                 rc = -EACCES;
3160 out:
3161         up_read(&cinode->lock_sem);
3162         inode_unlock(inode);
3163
3164         if (rc > 0)
3165                 rc = generic_write_sync(iocb, rc);
3166         return rc;
3167 }
3168
3169 ssize_t
3170 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3171 {
3172         struct inode *inode = file_inode(iocb->ki_filp);
3173         struct cifsInodeInfo *cinode = CIFS_I(inode);
3174         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3175         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3176                                                 iocb->ki_filp->private_data;
3177         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3178         ssize_t written;
3179
3180         written = cifs_get_writer(cinode);
3181         if (written)
3182                 return written;
3183
3184         if (CIFS_CACHE_WRITE(cinode)) {
3185                 if (cap_unix(tcon->ses) &&
3186                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3187                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3188                         written = generic_file_write_iter(iocb, from);
3189                         goto out;
3190                 }
3191                 written = cifs_writev(iocb, from);
3192                 goto out;
3193         }
3194         /*
3195          * For non-oplocked files in strict cache mode we need to write the data
3196          * to the server exactly from the pos to pos+len-1 rather than flush all
3197          * affected pages because it may cause a error with mandatory locks on
3198          * these pages but not on the region from pos to ppos+len-1.
3199          */
3200         written = cifs_user_writev(iocb, from);
3201         if (CIFS_CACHE_READ(cinode)) {
3202                 /*
3203                  * We have read level caching and we have just sent a write
3204                  * request to the server thus making data in the cache stale.
3205                  * Zap the cache and set oplock/lease level to NONE to avoid
3206                  * reading stale data from the cache. All subsequent read
3207                  * operations will read new data from the server.
3208                  */
3209                 cifs_zap_mapping(inode);
3210                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3211                          inode);
3212                 cinode->oplock = 0;
3213         }
3214 out:
3215         cifs_put_writer(cinode);
3216         return written;
3217 }
3218
3219 static struct cifs_readdata *
3220 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3221 {
3222         struct cifs_readdata *rdata;
3223
3224         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3225         if (rdata != NULL) {
3226                 rdata->pages = pages;
3227                 kref_init(&rdata->refcount);
3228                 INIT_LIST_HEAD(&rdata->list);
3229                 init_completion(&rdata->done);
3230                 INIT_WORK(&rdata->work, complete);
3231         }
3232
3233         return rdata;
3234 }
3235
3236 static struct cifs_readdata *
3237 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3238 {
3239         struct page **pages =
3240                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3241         struct cifs_readdata *ret = NULL;
3242
3243         if (pages) {
3244                 ret = cifs_readdata_direct_alloc(pages, complete);
3245                 if (!ret)
3246                         kfree(pages);
3247         }
3248
3249         return ret;
3250 }
3251
3252 void
3253 cifs_readdata_release(struct kref *refcount)
3254 {
3255         struct cifs_readdata *rdata = container_of(refcount,
3256                                         struct cifs_readdata, refcount);
3257 #ifdef CONFIG_CIFS_SMB_DIRECT
3258         if (rdata->mr) {
3259                 smbd_deregister_mr(rdata->mr);
3260                 rdata->mr = NULL;
3261         }
3262 #endif
3263         if (rdata->cfile)
3264                 cifsFileInfo_put(rdata->cfile);
3265
3266         kvfree(rdata->pages);
3267         kfree(rdata);
3268 }
3269
3270 static int
3271 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3272 {
3273         int rc = 0;
3274         struct page *page;
3275         unsigned int i;
3276
3277         for (i = 0; i < nr_pages; i++) {
3278                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3279                 if (!page) {
3280                         rc = -ENOMEM;
3281                         break;
3282                 }
3283                 rdata->pages[i] = page;
3284         }
3285
3286         if (rc) {
3287                 unsigned int nr_page_failed = i;
3288
3289                 for (i = 0; i < nr_page_failed; i++) {
3290                         put_page(rdata->pages[i]);
3291                         rdata->pages[i] = NULL;
3292                 }
3293         }
3294         return rc;
3295 }
3296
3297 static void
3298 cifs_uncached_readdata_release(struct kref *refcount)
3299 {
3300         struct cifs_readdata *rdata = container_of(refcount,
3301                                         struct cifs_readdata, refcount);
3302         unsigned int i;
3303
3304         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3305         for (i = 0; i < rdata->nr_pages; i++) {
3306                 put_page(rdata->pages[i]);
3307         }
3308         cifs_readdata_release(refcount);
3309 }
3310
3311 /**
3312  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3313  * @rdata:      the readdata response with list of pages holding data
3314  * @iter:       destination for our data
3315  *
3316  * This function copies data from a list of pages in a readdata response into
3317  * an array of iovecs. It will first calculate where the data should go
3318  * based on the info in the readdata and then copy the data into that spot.
3319  */
3320 static int
3321 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3322 {
3323         size_t remaining = rdata->got_bytes;
3324         unsigned int i;
3325
3326         for (i = 0; i < rdata->nr_pages; i++) {
3327                 struct page *page = rdata->pages[i];
3328                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3329                 size_t written;
3330
3331                 if (unlikely(iov_iter_is_pipe(iter))) {
3332                         void *addr = kmap_atomic(page);
3333
3334                         written = copy_to_iter(addr, copy, iter);
3335                         kunmap_atomic(addr);
3336                 } else
3337                         written = copy_page_to_iter(page, 0, copy, iter);
3338                 remaining -= written;
3339                 if (written < copy && iov_iter_count(iter) > 0)
3340                         break;
3341         }
3342         return remaining ? -EFAULT : 0;
3343 }
3344
3345 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3346
3347 static void
3348 cifs_uncached_readv_complete(struct work_struct *work)
3349 {
3350         struct cifs_readdata *rdata = container_of(work,
3351                                                 struct cifs_readdata, work);
3352
3353         complete(&rdata->done);
3354         collect_uncached_read_data(rdata->ctx);
3355         /* the below call can possibly free the last ref to aio ctx */
3356         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3357 }
3358
3359 static int
3360 uncached_fill_pages(struct TCP_Server_Info *server,
3361                     struct cifs_readdata *rdata, struct iov_iter *iter,
3362                     unsigned int len)
3363 {
3364         int result = 0;
3365         unsigned int i;
3366         unsigned int nr_pages = rdata->nr_pages;
3367         unsigned int page_offset = rdata->page_offset;
3368
3369         rdata->got_bytes = 0;
3370         rdata->tailsz = PAGE_SIZE;
3371         for (i = 0; i < nr_pages; i++) {
3372                 struct page *page = rdata->pages[i];
3373                 size_t n;
3374                 unsigned int segment_size = rdata->pagesz;
3375
3376                 if (i == 0)
3377                         segment_size -= page_offset;
3378                 else
3379                         page_offset = 0;
3380
3381
3382                 if (len <= 0) {
3383                         /* no need to hold page hostage */
3384                         rdata->pages[i] = NULL;
3385                         rdata->nr_pages--;
3386                         put_page(page);
3387                         continue;
3388                 }
3389
3390                 n = len;
3391                 if (len >= segment_size)
3392                         /* enough data to fill the page */
3393                         n = segment_size;
3394                 else
3395                         rdata->tailsz = len;
3396                 len -= n;
3397
3398                 if (iter)
3399                         result = copy_page_from_iter(
3400                                         page, page_offset, n, iter);
3401 #ifdef CONFIG_CIFS_SMB_DIRECT
3402                 else if (rdata->mr)
3403                         result = n;
3404 #endif
3405                 else
3406                         result = cifs_read_page_from_socket(
3407                                         server, page, page_offset, n);
3408                 if (result < 0)
3409                         break;
3410
3411                 rdata->got_bytes += result;
3412         }
3413
3414         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3415                                                 rdata->got_bytes : result;
3416 }
3417
3418 static int
3419 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3420                               struct cifs_readdata *rdata, unsigned int len)
3421 {
3422         return uncached_fill_pages(server, rdata, NULL, len);
3423 }
3424
3425 static int
3426 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3427                               struct cifs_readdata *rdata,
3428                               struct iov_iter *iter)
3429 {
3430         return uncached_fill_pages(server, rdata, iter, iter->count);
3431 }
3432
3433 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3434                         struct list_head *rdata_list,
3435                         struct cifs_aio_ctx *ctx)
3436 {
3437         unsigned int rsize;
3438         struct cifs_credits credits;
3439         int rc;
3440         struct TCP_Server_Info *server =
3441                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3442
3443         do {
3444                 if (rdata->cfile->invalidHandle) {
3445                         rc = cifs_reopen_file(rdata->cfile, true);
3446                         if (rc == -EAGAIN)
3447                                 continue;
3448                         else if (rc)
3449                                 break;
3450                 }
3451
3452                 /*
3453                  * Wait for credits to resend this rdata.
3454                  * Note: we are attempting to resend the whole rdata not in
3455                  * segments
3456                  */
3457                 do {
3458                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3459                                                 &rsize, &credits);
3460
3461                         if (rc)
3462                                 goto fail;
3463
3464                         if (rsize < rdata->bytes) {
3465                                 add_credits_and_wake_if(server, &credits, 0);
3466                                 msleep(1000);
3467                         }
3468                 } while (rsize < rdata->bytes);
3469                 rdata->credits = credits;
3470
3471                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3472                 if (!rc) {
3473                         if (rdata->cfile->invalidHandle)
3474                                 rc = -EAGAIN;
3475                         else
3476                                 rc = server->ops->async_readv(rdata);
3477                 }
3478
3479                 /* If the read was successfully sent, we are done */
3480                 if (!rc) {
3481                         /* Add to aio pending list */
3482                         list_add_tail(&rdata->list, rdata_list);
3483                         return 0;
3484                 }
3485
3486                 /* Roll back credits and retry if needed */
3487                 add_credits_and_wake_if(server, &rdata->credits, 0);
3488         } while (rc == -EAGAIN);
3489
3490 fail:
3491         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3492         return rc;
3493 }
3494
3495 static int
3496 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3497                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3498                      struct cifs_aio_ctx *ctx)
3499 {
3500         struct cifs_readdata *rdata;
3501         unsigned int npages, rsize;
3502         struct cifs_credits credits_on_stack;
3503         struct cifs_credits *credits = &credits_on_stack;
3504         size_t cur_len;
3505         int rc;
3506         pid_t pid;
3507         struct TCP_Server_Info *server;
3508         struct page **pagevec;
3509         size_t start;
3510         struct iov_iter direct_iov = ctx->iter;
3511
3512         server = tlink_tcon(open_file->tlink)->ses->server;
3513
3514         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3515                 pid = open_file->pid;
3516         else
3517                 pid = current->tgid;
3518
3519         if (ctx->direct_io)
3520                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3521
3522         do {
3523                 if (open_file->invalidHandle) {
3524                         rc = cifs_reopen_file(open_file, true);
3525                         if (rc == -EAGAIN)
3526                                 continue;
3527                         else if (rc)
3528                                 break;
3529                 }
3530
3531                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3532                                                    &rsize, credits);
3533                 if (rc)
3534                         break;
3535
3536                 cur_len = min_t(const size_t, len, rsize);
3537
3538                 if (ctx->direct_io) {
3539                         ssize_t result;
3540
3541                         result = iov_iter_get_pages_alloc(
3542                                         &direct_iov, &pagevec,
3543                                         cur_len, &start);
3544                         if (result < 0) {
3545                                 cifs_dbg(VFS,
3546                                         "couldn't get user pages (rc=%zd)"
3547                                         " iter type %d"
3548                                         " iov_offset %zd count %zd\n",
3549                                         result, direct_iov.type,
3550                                         direct_iov.iov_offset,
3551                                         direct_iov.count);
3552                                 dump_stack();
3553
3554                                 rc = result;
3555                                 add_credits_and_wake_if(server, credits, 0);
3556                                 break;
3557                         }
3558                         cur_len = (size_t)result;
3559                         iov_iter_advance(&direct_iov, cur_len);
3560
3561                         rdata = cifs_readdata_direct_alloc(
3562                                         pagevec, cifs_uncached_readv_complete);
3563                         if (!rdata) {
3564                                 add_credits_and_wake_if(server, credits, 0);
3565                                 rc = -ENOMEM;
3566                                 break;
3567                         }
3568
3569                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3570                         rdata->page_offset = start;
3571                         rdata->tailsz = npages > 1 ?
3572                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3573                                 cur_len;
3574
3575                 } else {
3576
3577                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3578                         /* allocate a readdata struct */
3579                         rdata = cifs_readdata_alloc(npages,
3580                                             cifs_uncached_readv_complete);
3581                         if (!rdata) {
3582                                 add_credits_and_wake_if(server, credits, 0);
3583                                 rc = -ENOMEM;
3584                                 break;
3585                         }
3586
3587                         rc = cifs_read_allocate_pages(rdata, npages);
3588                         if (rc) {
3589                                 kvfree(rdata->pages);
3590                                 kfree(rdata);
3591                                 add_credits_and_wake_if(server, credits, 0);
3592                                 break;
3593                         }
3594
3595                         rdata->tailsz = PAGE_SIZE;
3596                 }
3597
3598                 rdata->cfile = cifsFileInfo_get(open_file);
3599                 rdata->nr_pages = npages;
3600                 rdata->offset = offset;
3601                 rdata->bytes = cur_len;
3602                 rdata->pid = pid;
3603                 rdata->pagesz = PAGE_SIZE;
3604                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3605                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3606                 rdata->credits = credits_on_stack;
3607                 rdata->ctx = ctx;
3608                 kref_get(&ctx->refcount);
3609
3610                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3611
3612                 if (!rc) {
3613                         if (rdata->cfile->invalidHandle)
3614                                 rc = -EAGAIN;
3615                         else
3616                                 rc = server->ops->async_readv(rdata);
3617                 }
3618
3619                 if (rc) {
3620                         add_credits_and_wake_if(server, &rdata->credits, 0);
3621                         kref_put(&rdata->refcount,
3622                                 cifs_uncached_readdata_release);
3623                         if (rc == -EAGAIN) {
3624                                 iov_iter_revert(&direct_iov, cur_len);
3625                                 continue;
3626                         }
3627                         break;
3628                 }
3629
3630                 list_add_tail(&rdata->list, rdata_list);
3631                 offset += cur_len;
3632                 len -= cur_len;
3633         } while (len > 0);
3634
3635         return rc;
3636 }
3637
3638 static void
3639 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3640 {
3641         struct cifs_readdata *rdata, *tmp;
3642         struct iov_iter *to = &ctx->iter;
3643         struct cifs_sb_info *cifs_sb;
3644         int rc;
3645
3646         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3647
3648         mutex_lock(&ctx->aio_mutex);
3649
3650         if (list_empty(&ctx->list)) {
3651                 mutex_unlock(&ctx->aio_mutex);
3652                 return;
3653         }
3654
3655         rc = ctx->rc;
3656         /* the loop below should proceed in the order of increasing offsets */
3657 again:
3658         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3659                 if (!rc) {
3660                         if (!try_wait_for_completion(&rdata->done)) {
3661                                 mutex_unlock(&ctx->aio_mutex);
3662                                 return;
3663                         }
3664
3665                         if (rdata->result == -EAGAIN) {
3666                                 /* resend call if it's a retryable error */
3667                                 struct list_head tmp_list;
3668                                 unsigned int got_bytes = rdata->got_bytes;
3669
3670                                 list_del_init(&rdata->list);
3671                                 INIT_LIST_HEAD(&tmp_list);
3672
3673                                 /*
3674                                  * Got a part of data and then reconnect has
3675                                  * happened -- fill the buffer and continue
3676                                  * reading.
3677                                  */
3678                                 if (got_bytes && got_bytes < rdata->bytes) {
3679                                         rc = 0;
3680                                         if (!ctx->direct_io)
3681                                                 rc = cifs_readdata_to_iov(rdata, to);
3682                                         if (rc) {
3683                                                 kref_put(&rdata->refcount,
3684                                                         cifs_uncached_readdata_release);
3685                                                 continue;
3686                                         }
3687                                 }
3688
3689                                 if (ctx->direct_io) {
3690                                         /*
3691                                          * Re-use rdata as this is a
3692                                          * direct I/O
3693                                          */
3694                                         rc = cifs_resend_rdata(
3695                                                 rdata,
3696                                                 &tmp_list, ctx);
3697                                 } else {
3698                                         rc = cifs_send_async_read(
3699                                                 rdata->offset + got_bytes,
3700                                                 rdata->bytes - got_bytes,
3701                                                 rdata->cfile, cifs_sb,
3702                                                 &tmp_list, ctx);
3703
3704                                         kref_put(&rdata->refcount,
3705                                                 cifs_uncached_readdata_release);
3706                                 }
3707
3708                                 list_splice(&tmp_list, &ctx->list);
3709
3710                                 goto again;
3711                         } else if (rdata->result)
3712                                 rc = rdata->result;
3713                         else if (!ctx->direct_io)
3714                                 rc = cifs_readdata_to_iov(rdata, to);
3715
3716                         /* if there was a short read -- discard anything left */
3717                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3718                                 rc = -ENODATA;
3719
3720                         ctx->total_len += rdata->got_bytes;
3721                 }
3722                 list_del_init(&rdata->list);
3723                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3724         }
3725
3726         if (!ctx->direct_io)
3727                 ctx->total_len = ctx->len - iov_iter_count(to);
3728
3729         /* mask nodata case */
3730         if (rc == -ENODATA)
3731                 rc = 0;
3732
3733         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3734
3735         mutex_unlock(&ctx->aio_mutex);
3736
3737         if (ctx->iocb && ctx->iocb->ki_complete)
3738                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3739         else
3740                 complete(&ctx->done);
3741 }
3742
3743 static ssize_t __cifs_readv(
3744         struct kiocb *iocb, struct iov_iter *to, bool direct)
3745 {
3746         size_t len;
3747         struct file *file = iocb->ki_filp;
3748         struct cifs_sb_info *cifs_sb;
3749         struct cifsFileInfo *cfile;
3750         struct cifs_tcon *tcon;
3751         ssize_t rc, total_read = 0;
3752         loff_t offset = iocb->ki_pos;
3753         struct cifs_aio_ctx *ctx;
3754
3755         /*
3756          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3757          * fall back to data copy read path
3758          * this could be improved by getting pages directly in ITER_KVEC
3759          */
3760         if (direct && to->type & ITER_KVEC) {
3761                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3762                 direct = false;
3763         }
3764
3765         len = iov_iter_count(to);
3766         if (!len)
3767                 return 0;
3768
3769         cifs_sb = CIFS_FILE_SB(file);
3770         cfile = file->private_data;
3771         tcon = tlink_tcon(cfile->tlink);
3772
3773         if (!tcon->ses->server->ops->async_readv)
3774                 return -ENOSYS;
3775
3776         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3777                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3778
3779         ctx = cifs_aio_ctx_alloc();
3780         if (!ctx)
3781                 return -ENOMEM;
3782
3783         ctx->cfile = cifsFileInfo_get(cfile);
3784
3785         if (!is_sync_kiocb(iocb))
3786                 ctx->iocb = iocb;
3787
3788         if (iter_is_iovec(to))
3789                 ctx->should_dirty = true;
3790
3791         if (direct) {
3792                 ctx->pos = offset;
3793                 ctx->direct_io = true;
3794                 ctx->iter = *to;
3795                 ctx->len = len;
3796         } else {
3797                 rc = setup_aio_ctx_iter(ctx, to, READ);
3798                 if (rc) {
3799                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3800                         return rc;
3801                 }
3802                 len = ctx->len;
3803         }
3804
3805         /* grab a lock here due to read response handlers can access ctx */
3806         mutex_lock(&ctx->aio_mutex);
3807
3808         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3809
3810         /* if at least one read request send succeeded, then reset rc */
3811         if (!list_empty(&ctx->list))
3812                 rc = 0;
3813
3814         mutex_unlock(&ctx->aio_mutex);
3815
3816         if (rc) {
3817                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3818                 return rc;
3819         }
3820
3821         if (!is_sync_kiocb(iocb)) {
3822                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3823                 return -EIOCBQUEUED;
3824         }
3825
3826         rc = wait_for_completion_killable(&ctx->done);
3827         if (rc) {
3828                 mutex_lock(&ctx->aio_mutex);
3829                 ctx->rc = rc = -EINTR;
3830                 total_read = ctx->total_len;
3831                 mutex_unlock(&ctx->aio_mutex);
3832         } else {
3833                 rc = ctx->rc;
3834                 total_read = ctx->total_len;
3835         }
3836
3837         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3838
3839         if (total_read) {
3840                 iocb->ki_pos += total_read;
3841                 return total_read;
3842         }
3843         return rc;
3844 }
3845
3846 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3847 {
3848         return __cifs_readv(iocb, to, true);
3849 }
3850
3851 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3852 {
3853         return __cifs_readv(iocb, to, false);
3854 }
3855
3856 ssize_t
3857 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3858 {
3859         struct inode *inode = file_inode(iocb->ki_filp);
3860         struct cifsInodeInfo *cinode = CIFS_I(inode);
3861         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3862         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3863                                                 iocb->ki_filp->private_data;
3864         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3865         int rc = -EACCES;
3866
3867         /*
3868          * In strict cache mode we need to read from the server all the time
3869          * if we don't have level II oplock because the server can delay mtime
3870          * change - so we can't make a decision about inode invalidating.
3871          * And we can also fail with pagereading if there are mandatory locks
3872          * on pages affected by this read but not on the region from pos to
3873          * pos+len-1.
3874          */
3875         if (!CIFS_CACHE_READ(cinode))
3876                 return cifs_user_readv(iocb, to);
3877
3878         if (cap_unix(tcon->ses) &&
3879             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3880             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3881                 return generic_file_read_iter(iocb, to);
3882
3883         /*
3884          * We need to hold the sem to be sure nobody modifies lock list
3885          * with a brlock that prevents reading.
3886          */
3887         down_read(&cinode->lock_sem);
3888         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3889                                      tcon->ses->server->vals->shared_lock_type,
3890                                      0, NULL, CIFS_READ_OP))
3891                 rc = generic_file_read_iter(iocb, to);
3892         up_read(&cinode->lock_sem);
3893         return rc;
3894 }
3895
3896 static ssize_t
3897 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3898 {
3899         int rc = -EACCES;
3900         unsigned int bytes_read = 0;
3901         unsigned int total_read;
3902         unsigned int current_read_size;
3903         unsigned int rsize;
3904         struct cifs_sb_info *cifs_sb;
3905         struct cifs_tcon *tcon;
3906         struct TCP_Server_Info *server;
3907         unsigned int xid;
3908         char *cur_offset;
3909         struct cifsFileInfo *open_file;
3910         struct cifs_io_parms io_parms;
3911         int buf_type = CIFS_NO_BUFFER;
3912         __u32 pid;
3913
3914         xid = get_xid();
3915         cifs_sb = CIFS_FILE_SB(file);
3916
3917         /* FIXME: set up handlers for larger reads and/or convert to async */
3918         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3919
3920         if (file->private_data == NULL) {
3921                 rc = -EBADF;
3922                 free_xid(xid);
3923                 return rc;
3924         }
3925         open_file = file->private_data;
3926         tcon = tlink_tcon(open_file->tlink);
3927         server = tcon->ses->server;
3928
3929         if (!server->ops->sync_read) {
3930                 free_xid(xid);
3931                 return -ENOSYS;
3932         }
3933
3934         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3935                 pid = open_file->pid;
3936         else
3937                 pid = current->tgid;
3938
3939         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3940                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3941
3942         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3943              total_read += bytes_read, cur_offset += bytes_read) {
3944                 do {
3945                         current_read_size = min_t(uint, read_size - total_read,
3946                                                   rsize);
3947                         /*
3948                          * For windows me and 9x we do not want to request more
3949                          * than it negotiated since it will refuse the read
3950                          * then.
3951                          */
3952                         if ((tcon->ses) && !(tcon->ses->capabilities &
3953                                 tcon->ses->server->vals->cap_large_files)) {
3954                                 current_read_size = min_t(uint,
3955                                         current_read_size, CIFSMaxBufSize);
3956                         }
3957                         if (open_file->invalidHandle) {
3958                                 rc = cifs_reopen_file(open_file, true);
3959                                 if (rc != 0)
3960                                         break;
3961                         }
3962                         io_parms.pid = pid;
3963                         io_parms.tcon = tcon;
3964                         io_parms.offset = *offset;
3965                         io_parms.length = current_read_size;
3966                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3967                                                     &bytes_read, &cur_offset,
3968                                                     &buf_type);
3969                 } while (rc == -EAGAIN);
3970
3971                 if (rc || (bytes_read == 0)) {
3972                         if (total_read) {
3973                                 break;
3974                         } else {
3975                                 free_xid(xid);
3976                                 return rc;
3977                         }
3978                 } else {
3979                         cifs_stats_bytes_read(tcon, total_read);
3980                         *offset += bytes_read;
3981                 }
3982         }
3983         free_xid(xid);
3984         return total_read;
3985 }
3986
3987 /*
3988  * If the page is mmap'ed into a process' page tables, then we need to make
3989  * sure that it doesn't change while being written back.
3990  */
3991 static vm_fault_t
3992 cifs_page_mkwrite(struct vm_fault *vmf)
3993 {
3994         struct page *page = vmf->page;
3995
3996         lock_page(page);
3997         return VM_FAULT_LOCKED;
3998 }
3999
4000 static const struct vm_operations_struct cifs_file_vm_ops = {
4001         .fault = filemap_fault,
4002         .map_pages = filemap_map_pages,
4003         .page_mkwrite = cifs_page_mkwrite,
4004 };
4005
4006 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4007 {
4008         int xid, rc = 0;
4009         struct inode *inode = file_inode(file);
4010
4011         xid = get_xid();
4012
4013         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4014                 rc = cifs_zap_mapping(inode);
4015         if (!rc)
4016                 rc = generic_file_mmap(file, vma);
4017         if (!rc)
4018                 vma->vm_ops = &cifs_file_vm_ops;
4019
4020         free_xid(xid);
4021         return rc;
4022 }
4023
4024 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4025 {
4026         int rc, xid;
4027
4028         xid = get_xid();
4029
4030         rc = cifs_revalidate_file(file);
4031         if (rc)
4032                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4033                          rc);
4034         if (!rc)
4035                 rc = generic_file_mmap(file, vma);
4036         if (!rc)
4037                 vma->vm_ops = &cifs_file_vm_ops;
4038
4039         free_xid(xid);
4040         return rc;
4041 }
4042
4043 static void
4044 cifs_readv_complete(struct work_struct *work)
4045 {
4046         unsigned int i, got_bytes;
4047         struct cifs_readdata *rdata = container_of(work,
4048                                                 struct cifs_readdata, work);
4049
4050         got_bytes = rdata->got_bytes;
4051         for (i = 0; i < rdata->nr_pages; i++) {
4052                 struct page *page = rdata->pages[i];
4053
4054                 lru_cache_add_file(page);
4055
4056                 if (rdata->result == 0 ||
4057                     (rdata->result == -EAGAIN && got_bytes)) {
4058                         flush_dcache_page(page);
4059                         SetPageUptodate(page);
4060                 }
4061
4062                 unlock_page(page);
4063
4064                 if (rdata->result == 0 ||
4065                     (rdata->result == -EAGAIN && got_bytes))
4066                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4067
4068                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4069
4070                 put_page(page);
4071                 rdata->pages[i] = NULL;
4072         }
4073         kref_put(&rdata->refcount, cifs_readdata_release);
4074 }
4075
4076 static int
4077 readpages_fill_pages(struct TCP_Server_Info *server,
4078                      struct cifs_readdata *rdata, struct iov_iter *iter,
4079                      unsigned int len)
4080 {
4081         int result = 0;
4082         unsigned int i;
4083         u64 eof;
4084         pgoff_t eof_index;
4085         unsigned int nr_pages = rdata->nr_pages;
4086         unsigned int page_offset = rdata->page_offset;
4087
4088         /* determine the eof that the server (probably) has */
4089         eof = CIFS_I(rdata->mapping->host)->server_eof;
4090         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4091         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4092
4093         rdata->got_bytes = 0;
4094         rdata->tailsz = PAGE_SIZE;
4095         for (i = 0; i < nr_pages; i++) {
4096                 struct page *page = rdata->pages[i];
4097                 unsigned int to_read = rdata->pagesz;
4098                 size_t n;
4099
4100                 if (i == 0)
4101                         to_read -= page_offset;
4102                 else
4103                         page_offset = 0;
4104
4105                 n = to_read;
4106
4107                 if (len >= to_read) {
4108                         len -= to_read;
4109                 } else if (len > 0) {
4110                         /* enough for partial page, fill and zero the rest */
4111                         zero_user(page, len + page_offset, to_read - len);
4112                         n = rdata->tailsz = len;
4113                         len = 0;
4114                 } else if (page->index > eof_index) {
4115                         /*
4116                          * The VFS will not try to do readahead past the
4117                          * i_size, but it's possible that we have outstanding
4118                          * writes with gaps in the middle and the i_size hasn't
4119                          * caught up yet. Populate those with zeroed out pages
4120                          * to prevent the VFS from repeatedly attempting to
4121                          * fill them until the writes are flushed.
4122                          */
4123                         zero_user(page, 0, PAGE_SIZE);
4124                         lru_cache_add_file(page);
4125                         flush_dcache_page(page);
4126                         SetPageUptodate(page);
4127                         unlock_page(page);
4128                         put_page(page);
4129                         rdata->pages[i] = NULL;
4130                         rdata->nr_pages--;
4131                         continue;
4132                 } else {
4133                         /* no need to hold page hostage */
4134                         lru_cache_add_file(page);
4135                         unlock_page(page);
4136                         put_page(page);
4137                         rdata->pages[i] = NULL;
4138                         rdata->nr_pages--;
4139                         continue;
4140                 }
4141
4142                 if (iter)
4143                         result = copy_page_from_iter(
4144                                         page, page_offset, n, iter);
4145 #ifdef CONFIG_CIFS_SMB_DIRECT
4146                 else if (rdata->mr)
4147                         result = n;
4148 #endif
4149                 else
4150                         result = cifs_read_page_from_socket(
4151                                         server, page, page_offset, n);
4152                 if (result < 0)
4153                         break;
4154
4155                 rdata->got_bytes += result;
4156         }
4157
4158         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4159                                                 rdata->got_bytes : result;
4160 }
4161
4162 static int
4163 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4164                                struct cifs_readdata *rdata, unsigned int len)
4165 {
4166         return readpages_fill_pages(server, rdata, NULL, len);
4167 }
4168
4169 static int
4170 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4171                                struct cifs_readdata *rdata,
4172                                struct iov_iter *iter)
4173 {
4174         return readpages_fill_pages(server, rdata, iter, iter->count);
4175 }
4176
4177 static int
4178 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4179                     unsigned int rsize, struct list_head *tmplist,
4180                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4181 {
4182         struct page *page, *tpage;
4183         unsigned int expected_index;
4184         int rc;
4185         gfp_t gfp = readahead_gfp_mask(mapping);
4186
4187         INIT_LIST_HEAD(tmplist);
4188
4189         page = lru_to_page(page_list);
4190
4191         /*
4192          * Lock the page and put it in the cache. Since no one else
4193          * should have access to this page, we're safe to simply set
4194          * PG_locked without checking it first.
4195          */
4196         __SetPageLocked(page);
4197         rc = add_to_page_cache_locked(page, mapping,
4198                                       page->index, gfp);
4199
4200         /* give up if we can't stick it in the cache */
4201         if (rc) {
4202                 __ClearPageLocked(page);
4203                 return rc;
4204         }
4205
4206         /* move first page to the tmplist */
4207         *offset = (loff_t)page->index << PAGE_SHIFT;
4208         *bytes = PAGE_SIZE;
4209         *nr_pages = 1;
4210         list_move_tail(&page->lru, tmplist);
4211
4212         /* now try and add more pages onto the request */
4213         expected_index = page->index + 1;
4214         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4215                 /* discontinuity ? */
4216                 if (page->index != expected_index)
4217                         break;
4218
4219                 /* would this page push the read over the rsize? */
4220                 if (*bytes + PAGE_SIZE > rsize)
4221                         break;
4222
4223                 __SetPageLocked(page);
4224                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4225                         __ClearPageLocked(page);
4226                         break;
4227                 }
4228                 list_move_tail(&page->lru, tmplist);
4229                 (*bytes) += PAGE_SIZE;
4230                 expected_index++;
4231                 (*nr_pages)++;
4232         }
4233         return rc;
4234 }
4235
4236 static int cifs_readpages(struct file *file, struct address_space *mapping,
4237         struct list_head *page_list, unsigned num_pages)
4238 {
4239         int rc;
4240         struct list_head tmplist;
4241         struct cifsFileInfo *open_file = file->private_data;
4242         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4243         struct TCP_Server_Info *server;
4244         pid_t pid;
4245         unsigned int xid;
4246
4247         xid = get_xid();
4248         /*
4249          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4250          * immediately if the cookie is negative
4251          *
4252          * After this point, every page in the list might have PG_fscache set,
4253          * so we will need to clean that up off of every page we don't use.
4254          */
4255         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4256                                          &num_pages);
4257         if (rc == 0) {
4258                 free_xid(xid);
4259                 return rc;
4260         }
4261
4262         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4263                 pid = open_file->pid;
4264         else
4265                 pid = current->tgid;
4266
4267         rc = 0;
4268         server = tlink_tcon(open_file->tlink)->ses->server;
4269
4270         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4271                  __func__, file, mapping, num_pages);
4272
4273         /*
4274          * Start with the page at end of list and move it to private
4275          * list. Do the same with any following pages until we hit
4276          * the rsize limit, hit an index discontinuity, or run out of
4277          * pages. Issue the async read and then start the loop again
4278          * until the list is empty.
4279          *
4280          * Note that list order is important. The page_list is in
4281          * the order of declining indexes. When we put the pages in
4282          * the rdata->pages, then we want them in increasing order.
4283          */
4284         while (!list_empty(page_list)) {
4285                 unsigned int i, nr_pages, bytes, rsize;
4286                 loff_t offset;
4287                 struct page *page, *tpage;
4288                 struct cifs_readdata *rdata;
4289                 struct cifs_credits credits_on_stack;
4290                 struct cifs_credits *credits = &credits_on_stack;
4291
4292                 if (open_file->invalidHandle) {
4293                         rc = cifs_reopen_file(open_file, true);
4294                         if (rc == -EAGAIN)
4295                                 continue;
4296                         else if (rc)
4297                                 break;
4298                 }
4299
4300                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4301                                                    &rsize, credits);
4302                 if (rc)
4303                         break;
4304
4305                 /*
4306                  * Give up immediately if rsize is too small to read an entire
4307                  * page. The VFS will fall back to readpage. We should never
4308                  * reach this point however since we set ra_pages to 0 when the
4309                  * rsize is smaller than a cache page.
4310                  */
4311                 if (unlikely(rsize < PAGE_SIZE)) {
4312                         add_credits_and_wake_if(server, credits, 0);
4313                         free_xid(xid);
4314                         return 0;
4315                 }
4316
4317                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4318                                          &nr_pages, &offset, &bytes);
4319                 if (rc) {
4320                         add_credits_and_wake_if(server, credits, 0);
4321                         break;
4322                 }
4323
4324                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4325                 if (!rdata) {
4326                         /* best to give up if we're out of mem */
4327                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4328                                 list_del(&page->lru);
4329                                 lru_cache_add_file(page);
4330                                 unlock_page(page);
4331                                 put_page(page);
4332                         }
4333                         rc = -ENOMEM;
4334                         add_credits_and_wake_if(server, credits, 0);
4335                         break;
4336                 }
4337
4338                 rdata->cfile = cifsFileInfo_get(open_file);
4339                 rdata->mapping = mapping;
4340                 rdata->offset = offset;
4341                 rdata->bytes = bytes;
4342                 rdata->pid = pid;
4343                 rdata->pagesz = PAGE_SIZE;
4344                 rdata->tailsz = PAGE_SIZE;
4345                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4346                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4347                 rdata->credits = credits_on_stack;
4348
4349                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4350                         list_del(&page->lru);
4351                         rdata->pages[rdata->nr_pages++] = page;
4352                 }
4353
4354                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4355
4356                 if (!rc) {
4357                         if (rdata->cfile->invalidHandle)
4358                                 rc = -EAGAIN;
4359                         else
4360                                 rc = server->ops->async_readv(rdata);
4361                 }
4362
4363                 if (rc) {
4364                         add_credits_and_wake_if(server, &rdata->credits, 0);
4365                         for (i = 0; i < rdata->nr_pages; i++) {
4366                                 page = rdata->pages[i];
4367                                 lru_cache_add_file(page);
4368                                 unlock_page(page);
4369                                 put_page(page);
4370                         }
4371                         /* Fallback to the readpage in error/reconnect cases */
4372                         kref_put(&rdata->refcount, cifs_readdata_release);
4373                         break;
4374                 }
4375
4376                 kref_put(&rdata->refcount, cifs_readdata_release);
4377         }
4378
4379         /* Any pages that have been shown to fscache but didn't get added to
4380          * the pagecache must be uncached before they get returned to the
4381          * allocator.
4382          */
4383         cifs_fscache_readpages_cancel(mapping->host, page_list);
4384         free_xid(xid);
4385         return rc;
4386 }
4387
4388 /*
4389  * cifs_readpage_worker must be called with the page pinned
4390  */
4391 static int cifs_readpage_worker(struct file *file, struct page *page,
4392         loff_t *poffset)
4393 {
4394         char *read_data;
4395         int rc;
4396
4397         /* Is the page cached? */
4398         rc = cifs_readpage_from_fscache(file_inode(file), page);
4399         if (rc == 0)
4400                 goto read_complete;
4401
4402         read_data = kmap(page);
4403         /* for reads over a certain size could initiate async read ahead */
4404
4405         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4406
4407         if (rc < 0)
4408                 goto io_error;
4409         else
4410                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4411
4412         /* we do not want atime to be less than mtime, it broke some apps */
4413         file_inode(file)->i_atime = current_time(file_inode(file));
4414         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4415                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4416         else
4417                 file_inode(file)->i_atime = current_time(file_inode(file));
4418
4419         if (PAGE_SIZE > rc)
4420                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4421
4422         flush_dcache_page(page);
4423         SetPageUptodate(page);
4424
4425         /* send this page to the cache */
4426         cifs_readpage_to_fscache(file_inode(file), page);
4427
4428         rc = 0;
4429
4430 io_error:
4431         kunmap(page);
4432         unlock_page(page);
4433
4434 read_complete:
4435         return rc;
4436 }
4437
4438 static int cifs_readpage(struct file *file, struct page *page)
4439 {
4440         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4441         int rc = -EACCES;
4442         unsigned int xid;
4443
4444         xid = get_xid();
4445
4446         if (file->private_data == NULL) {
4447                 rc = -EBADF;
4448                 free_xid(xid);
4449                 return rc;
4450         }
4451
4452         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4453                  page, (int)offset, (int)offset);
4454
4455         rc = cifs_readpage_worker(file, page, &offset);
4456
4457         free_xid(xid);
4458         return rc;
4459 }
4460
4461 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4462 {
4463         struct cifsFileInfo *open_file;
4464         struct cifs_tcon *tcon =
4465                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
4466
4467         spin_lock(&tcon->open_file_lock);
4468         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4469                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4470                         spin_unlock(&tcon->open_file_lock);
4471                         return 1;
4472                 }
4473         }
4474         spin_unlock(&tcon->open_file_lock);
4475         return 0;
4476 }
4477
4478 /* We do not want to update the file size from server for inodes
4479    open for write - to avoid races with writepage extending
4480    the file - in the future we could consider allowing
4481    refreshing the inode only on increases in the file size
4482    but this is tricky to do without racing with writebehind
4483    page caching in the current Linux kernel design */
4484 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4485 {
4486         if (!cifsInode)
4487                 return true;
4488
4489         if (is_inode_writable(cifsInode)) {
4490                 /* This inode is open for write at least once */
4491                 struct cifs_sb_info *cifs_sb;
4492
4493                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4494                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4495                         /* since no page cache to corrupt on directio
4496                         we can change size safely */
4497                         return true;
4498                 }
4499
4500                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4501                         return true;
4502
4503                 return false;
4504         } else
4505                 return true;
4506 }
4507
4508 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4509                         loff_t pos, unsigned len, unsigned flags,
4510                         struct page **pagep, void **fsdata)
4511 {
4512         int oncethru = 0;
4513         pgoff_t index = pos >> PAGE_SHIFT;
4514         loff_t offset = pos & (PAGE_SIZE - 1);
4515         loff_t page_start = pos & PAGE_MASK;
4516         loff_t i_size;
4517         struct page *page;
4518         int rc = 0;
4519
4520         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4521
4522 start:
4523         page = grab_cache_page_write_begin(mapping, index, flags);
4524         if (!page) {
4525                 rc = -ENOMEM;
4526                 goto out;
4527         }
4528
4529         if (PageUptodate(page))
4530                 goto out;
4531
4532         /*
4533          * If we write a full page it will be up to date, no need to read from
4534          * the server. If the write is short, we'll end up doing a sync write
4535          * instead.
4536          */
4537         if (len == PAGE_SIZE)
4538                 goto out;
4539
4540         /*
4541          * optimize away the read when we have an oplock, and we're not
4542          * expecting to use any of the data we'd be reading in. That
4543          * is, when the page lies beyond the EOF, or straddles the EOF
4544          * and the write will cover all of the existing data.
4545          */
4546         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4547                 i_size = i_size_read(mapping->host);
4548                 if (page_start >= i_size ||
4549                     (offset == 0 && (pos + len) >= i_size)) {
4550                         zero_user_segments(page, 0, offset,
4551                                            offset + len,
4552                                            PAGE_SIZE);
4553                         /*
4554                          * PageChecked means that the parts of the page
4555                          * to which we're not writing are considered up
4556                          * to date. Once the data is copied to the
4557                          * page, it can be set uptodate.
4558                          */
4559                         SetPageChecked(page);
4560                         goto out;
4561                 }
4562         }
4563
4564         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4565                 /*
4566                  * might as well read a page, it is fast enough. If we get
4567                  * an error, we don't need to return it. cifs_write_end will
4568                  * do a sync write instead since PG_uptodate isn't set.
4569                  */
4570                 cifs_readpage_worker(file, page, &page_start);
4571                 put_page(page);
4572                 oncethru = 1;
4573                 goto start;
4574         } else {
4575                 /* we could try using another file handle if there is one -
4576                    but how would we lock it to prevent close of that handle
4577                    racing with this read? In any case
4578                    this will be written out by write_end so is fine */
4579         }
4580 out:
4581         *pagep = page;
4582         return rc;
4583 }
4584
4585 static int cifs_release_page(struct page *page, gfp_t gfp)
4586 {
4587         if (PagePrivate(page))
4588                 return 0;
4589
4590         return cifs_fscache_release_page(page, gfp);
4591 }
4592
4593 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4594                                  unsigned int length)
4595 {
4596         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4597
4598         if (offset == 0 && length == PAGE_SIZE)
4599                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4600 }
4601
4602 static int cifs_launder_page(struct page *page)
4603 {
4604         int rc = 0;
4605         loff_t range_start = page_offset(page);
4606         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4607         struct writeback_control wbc = {
4608                 .sync_mode = WB_SYNC_ALL,
4609                 .nr_to_write = 0,
4610                 .range_start = range_start,
4611                 .range_end = range_end,
4612         };
4613
4614         cifs_dbg(FYI, "Launder page: %p\n", page);
4615
4616         if (clear_page_dirty_for_io(page))
4617                 rc = cifs_writepage_locked(page, &wbc);
4618
4619         cifs_fscache_invalidate_page(page, page->mapping->host);
4620         return rc;
4621 }
4622
4623 void cifs_oplock_break(struct work_struct *work)
4624 {
4625         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4626                                                   oplock_break);
4627         struct inode *inode = d_inode(cfile->dentry);
4628         struct cifsInodeInfo *cinode = CIFS_I(inode);
4629         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4630         struct TCP_Server_Info *server = tcon->ses->server;
4631         int rc = 0;
4632
4633         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4634                         TASK_UNINTERRUPTIBLE);
4635
4636         server->ops->downgrade_oplock(server, cinode,
4637                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4638
4639         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4640                                                 cifs_has_mand_locks(cinode)) {
4641                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4642                          inode);
4643                 cinode->oplock = 0;
4644         }
4645
4646         if (inode && S_ISREG(inode->i_mode)) {
4647                 if (CIFS_CACHE_READ(cinode))
4648                         break_lease(inode, O_RDONLY);
4649                 else
4650                         break_lease(inode, O_WRONLY);
4651                 rc = filemap_fdatawrite(inode->i_mapping);
4652                 if (!CIFS_CACHE_READ(cinode)) {
4653                         rc = filemap_fdatawait(inode->i_mapping);
4654                         mapping_set_error(inode->i_mapping, rc);
4655                         cifs_zap_mapping(inode);
4656                 }
4657                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4658         }
4659
4660         rc = cifs_push_locks(cfile);
4661         if (rc)
4662                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4663
4664         /*
4665          * releasing stale oplock after recent reconnect of smb session using
4666          * a now incorrect file handle is not a data integrity issue but do
4667          * not bother sending an oplock release if session to server still is
4668          * disconnected since oplock already released by the server
4669          */
4670         if (!cfile->oplock_break_cancelled) {
4671                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4672                                                              cinode);
4673                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4674         }
4675         _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4676         cifs_done_oplock_break(cinode);
4677 }
4678
4679 /*
4680  * The presence of cifs_direct_io() in the address space ops vector
4681  * allowes open() O_DIRECT flags which would have failed otherwise.
4682  *
4683  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4684  * so this method should never be called.
4685  *
4686  * Direct IO is not yet supported in the cached mode. 
4687  */
4688 static ssize_t
4689 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4690 {
4691         /*
4692          * FIXME
4693          * Eventually need to support direct IO for non forcedirectio mounts
4694          */
4695         return -EINVAL;
4696 }
4697
4698
4699 const struct address_space_operations cifs_addr_ops = {
4700         .readpage = cifs_readpage,
4701         .readpages = cifs_readpages,
4702         .writepage = cifs_writepage,
4703         .writepages = cifs_writepages,
4704         .write_begin = cifs_write_begin,
4705         .write_end = cifs_write_end,
4706         .set_page_dirty = __set_page_dirty_nobuffers,
4707         .releasepage = cifs_release_page,
4708         .direct_IO = cifs_direct_io,
4709         .invalidatepage = cifs_invalidate_page,
4710         .launder_page = cifs_launder_page,
4711 };
4712
4713 /*
4714  * cifs_readpages requires the server to support a buffer large enough to
4715  * contain the header plus one complete page of data.  Otherwise, we need
4716  * to leave cifs_readpages out of the address space operations.
4717  */
4718 const struct address_space_operations cifs_addr_ops_smallbuf = {
4719         .readpage = cifs_readpage,
4720         .writepage = cifs_writepage,
4721         .writepages = cifs_writepages,
4722         .write_begin = cifs_write_begin,
4723         .write_end = cifs_write_end,
4724         .set_page_dirty = __set_page_dirty_nobuffers,
4725         .releasepage = cifs_release_page,
4726         .invalidatepage = cifs_invalidate_page,
4727         .launder_page = cifs_launder_page,
4728 };