objtool: Re-arrange validate_functions()
[linux-block.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/mm.h>
37 #include <asm/div64.h>
38 #include "cifsfs.h"
39 #include "cifspdu.h"
40 #include "cifsglob.h"
41 #include "cifsproto.h"
42 #include "cifs_unicode.h"
43 #include "cifs_debug.h"
44 #include "cifs_fs_sb.h"
45 #include "fscache.h"
46 #include "smbdirect.h"
47
48 static inline int cifs_convert_flags(unsigned int flags)
49 {
50         if ((flags & O_ACCMODE) == O_RDONLY)
51                 return GENERIC_READ;
52         else if ((flags & O_ACCMODE) == O_WRONLY)
53                 return GENERIC_WRITE;
54         else if ((flags & O_ACCMODE) == O_RDWR) {
55                 /* GENERIC_ALL is too much permission to request
56                    can cause unnecessary access denied on create */
57                 /* return GENERIC_ALL; */
58                 return (GENERIC_READ | GENERIC_WRITE);
59         }
60
61         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
62                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63                 FILE_READ_DATA);
64 }
65
66 static u32 cifs_posix_convert_flags(unsigned int flags)
67 {
68         u32 posix_flags = 0;
69
70         if ((flags & O_ACCMODE) == O_RDONLY)
71                 posix_flags = SMB_O_RDONLY;
72         else if ((flags & O_ACCMODE) == O_WRONLY)
73                 posix_flags = SMB_O_WRONLY;
74         else if ((flags & O_ACCMODE) == O_RDWR)
75                 posix_flags = SMB_O_RDWR;
76
77         if (flags & O_CREAT) {
78                 posix_flags |= SMB_O_CREAT;
79                 if (flags & O_EXCL)
80                         posix_flags |= SMB_O_EXCL;
81         } else if (flags & O_EXCL)
82                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
83                          current->comm, current->tgid);
84
85         if (flags & O_TRUNC)
86                 posix_flags |= SMB_O_TRUNC;
87         /* be safe and imply O_SYNC for O_DSYNC */
88         if (flags & O_DSYNC)
89                 posix_flags |= SMB_O_SYNC;
90         if (flags & O_DIRECTORY)
91                 posix_flags |= SMB_O_DIRECTORY;
92         if (flags & O_NOFOLLOW)
93                 posix_flags |= SMB_O_NOFOLLOW;
94         if (flags & O_DIRECT)
95                 posix_flags |= SMB_O_DIRECT;
96
97         return posix_flags;
98 }
99
100 static inline int cifs_get_disposition(unsigned int flags)
101 {
102         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
103                 return FILE_CREATE;
104         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
105                 return FILE_OVERWRITE_IF;
106         else if ((flags & O_CREAT) == O_CREAT)
107                 return FILE_OPEN_IF;
108         else if ((flags & O_TRUNC) == O_TRUNC)
109                 return FILE_OVERWRITE;
110         else
111                 return FILE_OPEN;
112 }
113
114 int cifs_posix_open(char *full_path, struct inode **pinode,
115                         struct super_block *sb, int mode, unsigned int f_flags,
116                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
117 {
118         int rc;
119         FILE_UNIX_BASIC_INFO *presp_data;
120         __u32 posix_flags = 0;
121         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
122         struct cifs_fattr fattr;
123         struct tcon_link *tlink;
124         struct cifs_tcon *tcon;
125
126         cifs_dbg(FYI, "posix open %s\n", full_path);
127
128         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
129         if (presp_data == NULL)
130                 return -ENOMEM;
131
132         tlink = cifs_sb_tlink(cifs_sb);
133         if (IS_ERR(tlink)) {
134                 rc = PTR_ERR(tlink);
135                 goto posix_open_ret;
136         }
137
138         tcon = tlink_tcon(tlink);
139         mode &= ~current_umask();
140
141         posix_flags = cifs_posix_convert_flags(f_flags);
142         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
143                              poplock, full_path, cifs_sb->local_nls,
144                              cifs_remap(cifs_sb));
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
226         if (f_flags & O_SYNC)
227                 create_options |= CREATE_WRITE_THROUGH;
228
229         if (f_flags & O_DIRECT)
230                 create_options |= CREATE_NO_BUFFER;
231
232         oparms.tcon = tcon;
233         oparms.cifs_sb = cifs_sb;
234         oparms.desired_access = desired_access;
235         oparms.create_options = cifs_create_options(cifs_sb, create_options);
236         oparms.disposition = disposition;
237         oparms.path = full_path;
238         oparms.fid = fid;
239         oparms.reconnect = false;
240
241         rc = server->ops->open(xid, &oparms, oplock, buf);
242
243         if (rc)
244                 goto out;
245
246         if (tcon->unix_ext)
247                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
248                                               xid);
249         else
250                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
251                                          xid, fid);
252
253         if (rc) {
254                 server->ops->close(xid, tcon, fid);
255                 if (rc == -ESTALE)
256                         rc = -EOPENSTALE;
257         }
258
259 out:
260         kfree(buf);
261         return rc;
262 }
263
264 static bool
265 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
266 {
267         struct cifs_fid_locks *cur;
268         bool has_locks = false;
269
270         down_read(&cinode->lock_sem);
271         list_for_each_entry(cur, &cinode->llist, llist) {
272                 if (!list_empty(&cur->locks)) {
273                         has_locks = true;
274                         break;
275                 }
276         }
277         up_read(&cinode->lock_sem);
278         return has_locks;
279 }
280
281 void
282 cifs_down_write(struct rw_semaphore *sem)
283 {
284         while (!down_write_trylock(sem))
285                 msleep(10);
286 }
287
288 static void cifsFileInfo_put_work(struct work_struct *work);
289
290 struct cifsFileInfo *
291 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
292                   struct tcon_link *tlink, __u32 oplock)
293 {
294         struct dentry *dentry = file_dentry(file);
295         struct inode *inode = d_inode(dentry);
296         struct cifsInodeInfo *cinode = CIFS_I(inode);
297         struct cifsFileInfo *cfile;
298         struct cifs_fid_locks *fdlocks;
299         struct cifs_tcon *tcon = tlink_tcon(tlink);
300         struct TCP_Server_Info *server = tcon->ses->server;
301
302         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
303         if (cfile == NULL)
304                 return cfile;
305
306         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
307         if (!fdlocks) {
308                 kfree(cfile);
309                 return NULL;
310         }
311
312         INIT_LIST_HEAD(&fdlocks->locks);
313         fdlocks->cfile = cfile;
314         cfile->llist = fdlocks;
315
316         cfile->count = 1;
317         cfile->pid = current->tgid;
318         cfile->uid = current_fsuid();
319         cfile->dentry = dget(dentry);
320         cfile->f_flags = file->f_flags;
321         cfile->invalidHandle = false;
322         cfile->tlink = cifs_get_tlink(tlink);
323         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
324         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352         atomic_inc(&tcon->num_local_opens);
353
354         /* if readable file instance put first in list*/
355         spin_lock(&cinode->open_file_lock);
356         if (file->f_mode & FMODE_READ)
357                 list_add(&cfile->flist, &cinode->openFileList);
358         else
359                 list_add_tail(&cfile->flist, &cinode->openFileList);
360         spin_unlock(&cinode->open_file_lock);
361         spin_unlock(&tcon->open_file_lock);
362
363         if (fid->purge_cache)
364                 cifs_zap_mapping(inode);
365
366         file->private_data = cfile;
367         return cfile;
368 }
369
370 struct cifsFileInfo *
371 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
372 {
373         spin_lock(&cifs_file->file_info_lock);
374         cifsFileInfo_get_locked(cifs_file);
375         spin_unlock(&cifs_file->file_info_lock);
376         return cifs_file;
377 }
378
379 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
380 {
381         struct inode *inode = d_inode(cifs_file->dentry);
382         struct cifsInodeInfo *cifsi = CIFS_I(inode);
383         struct cifsLockInfo *li, *tmp;
384         struct super_block *sb = inode->i_sb;
385
386         /*
387          * Delete any outstanding lock records. We'll lose them when the file
388          * is closed anyway.
389          */
390         cifs_down_write(&cifsi->lock_sem);
391         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
392                 list_del(&li->llist);
393                 cifs_del_lock_waiters(li);
394                 kfree(li);
395         }
396         list_del(&cifs_file->llist->llist);
397         kfree(cifs_file->llist);
398         up_write(&cifsi->lock_sem);
399
400         cifs_put_tlink(cifs_file->tlink);
401         dput(cifs_file->dentry);
402         cifs_sb_deactive(sb);
403         kfree(cifs_file);
404 }
405
406 static void cifsFileInfo_put_work(struct work_struct *work)
407 {
408         struct cifsFileInfo *cifs_file = container_of(work,
409                         struct cifsFileInfo, put);
410
411         cifsFileInfo_put_final(cifs_file);
412 }
413
414 /**
415  * cifsFileInfo_put - release a reference of file priv data
416  *
417  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
418  */
419 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
420 {
421         _cifsFileInfo_put(cifs_file, true, true);
422 }
423
424 /**
425  * _cifsFileInfo_put - release a reference of file priv data
426  *
427  * This may involve closing the filehandle @cifs_file out on the
428  * server. Must be called without holding tcon->open_file_lock,
429  * cinode->open_file_lock and cifs_file->file_info_lock.
430  *
431  * If @wait_for_oplock_handler is true and we are releasing the last
432  * reference, wait for any running oplock break handler of the file
433  * and cancel any pending one. If calling this function from the
434  * oplock break handler, you need to pass false.
435  *
436  */
437 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
438                        bool wait_oplock_handler, bool offload)
439 {
440         struct inode *inode = d_inode(cifs_file->dentry);
441         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
442         struct TCP_Server_Info *server = tcon->ses->server;
443         struct cifsInodeInfo *cifsi = CIFS_I(inode);
444         struct super_block *sb = inode->i_sb;
445         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
446         struct cifs_fid fid;
447         struct cifs_pending_open open;
448         bool oplock_break_cancelled;
449
450         spin_lock(&tcon->open_file_lock);
451         spin_lock(&cifsi->open_file_lock);
452         spin_lock(&cifs_file->file_info_lock);
453         if (--cifs_file->count > 0) {
454                 spin_unlock(&cifs_file->file_info_lock);
455                 spin_unlock(&cifsi->open_file_lock);
456                 spin_unlock(&tcon->open_file_lock);
457                 return;
458         }
459         spin_unlock(&cifs_file->file_info_lock);
460
461         if (server->ops->get_lease_key)
462                 server->ops->get_lease_key(inode, &fid);
463
464         /* store open in pending opens to make sure we don't miss lease break */
465         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
466
467         /* remove it from the lists */
468         list_del(&cifs_file->flist);
469         list_del(&cifs_file->tlist);
470         atomic_dec(&tcon->num_local_opens);
471
472         if (list_empty(&cifsi->openFileList)) {
473                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
474                          d_inode(cifs_file->dentry));
475                 /*
476                  * In strict cache mode we need invalidate mapping on the last
477                  * close  because it may cause a error when we open this file
478                  * again and get at least level II oplock.
479                  */
480                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
481                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
482                 cifs_set_oplock_level(cifsi, 0);
483         }
484
485         spin_unlock(&cifsi->open_file_lock);
486         spin_unlock(&tcon->open_file_lock);
487
488         oplock_break_cancelled = wait_oplock_handler ?
489                 cancel_work_sync(&cifs_file->oplock_break) : false;
490
491         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
492                 struct TCP_Server_Info *server = tcon->ses->server;
493                 unsigned int xid;
494
495                 xid = get_xid();
496                 if (server->ops->close_getattr)
497                         server->ops->close_getattr(xid, tcon, cifs_file);
498                 else if (server->ops->close)
499                         server->ops->close(xid, tcon, &cifs_file->fid);
500                 _free_xid(xid);
501         }
502
503         if (oplock_break_cancelled)
504                 cifs_done_oplock_break(cifsi);
505
506         cifs_del_pending_open(&open);
507
508         if (offload)
509                 queue_work(fileinfo_put_wq, &cifs_file->put);
510         else
511                 cifsFileInfo_put_final(cifs_file);
512 }
513
514 int cifs_open(struct inode *inode, struct file *file)
515
516 {
517         int rc = -EACCES;
518         unsigned int xid;
519         __u32 oplock;
520         struct cifs_sb_info *cifs_sb;
521         struct TCP_Server_Info *server;
522         struct cifs_tcon *tcon;
523         struct tcon_link *tlink;
524         struct cifsFileInfo *cfile = NULL;
525         char *full_path = NULL;
526         bool posix_open_ok = false;
527         struct cifs_fid fid;
528         struct cifs_pending_open open;
529
530         xid = get_xid();
531
532         cifs_sb = CIFS_SB(inode->i_sb);
533         tlink = cifs_sb_tlink(cifs_sb);
534         if (IS_ERR(tlink)) {
535                 free_xid(xid);
536                 return PTR_ERR(tlink);
537         }
538         tcon = tlink_tcon(tlink);
539         server = tcon->ses->server;
540
541         full_path = build_path_from_dentry(file_dentry(file));
542         if (full_path == NULL) {
543                 rc = -ENOMEM;
544                 goto out;
545         }
546
547         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
548                  inode, file->f_flags, full_path);
549
550         if (file->f_flags & O_DIRECT &&
551             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
552                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
553                         file->f_op = &cifs_file_direct_nobrl_ops;
554                 else
555                         file->f_op = &cifs_file_direct_ops;
556         }
557
558         if (server->oplocks)
559                 oplock = REQ_OPLOCK;
560         else
561                 oplock = 0;
562
563         if (!tcon->broken_posix_open && tcon->unix_ext &&
564             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
565                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
566                 /* can not refresh inode info since size could be stale */
567                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
568                                 cifs_sb->mnt_file_mode /* ignored */,
569                                 file->f_flags, &oplock, &fid.netfid, xid);
570                 if (rc == 0) {
571                         cifs_dbg(FYI, "posix open succeeded\n");
572                         posix_open_ok = true;
573                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
574                         if (tcon->ses->serverNOS)
575                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
576                                          tcon->ses->serverName,
577                                          tcon->ses->serverNOS);
578                         tcon->broken_posix_open = true;
579                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
580                          (rc != -EOPNOTSUPP)) /* path not found or net err */
581                         goto out;
582                 /*
583                  * Else fallthrough to retry open the old way on network i/o
584                  * or DFS errors.
585                  */
586         }
587
588         if (server->ops->get_lease_key)
589                 server->ops->get_lease_key(inode, &fid);
590
591         cifs_add_pending_open(&fid, tlink, &open);
592
593         if (!posix_open_ok) {
594                 if (server->ops->get_lease_key)
595                         server->ops->get_lease_key(inode, &fid);
596
597                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
598                                   file->f_flags, &oplock, &fid, xid);
599                 if (rc) {
600                         cifs_del_pending_open(&open);
601                         goto out;
602                 }
603         }
604
605         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
606         if (cfile == NULL) {
607                 if (server->ops->close)
608                         server->ops->close(xid, tcon, &fid);
609                 cifs_del_pending_open(&open);
610                 rc = -ENOMEM;
611                 goto out;
612         }
613
614         cifs_fscache_set_inode_cookie(inode, file);
615
616         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
617                 /*
618                  * Time to set mode which we can not set earlier due to
619                  * problems creating new read-only files.
620                  */
621                 struct cifs_unix_set_info_args args = {
622                         .mode   = inode->i_mode,
623                         .uid    = INVALID_UID, /* no change */
624                         .gid    = INVALID_GID, /* no change */
625                         .ctime  = NO_CHANGE_64,
626                         .atime  = NO_CHANGE_64,
627                         .mtime  = NO_CHANGE_64,
628                         .device = 0,
629                 };
630                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
631                                        cfile->pid);
632         }
633
634 out:
635         kfree(full_path);
636         free_xid(xid);
637         cifs_put_tlink(tlink);
638         return rc;
639 }
640
641 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
642
643 /*
644  * Try to reacquire byte range locks that were released when session
645  * to server was lost.
646  */
647 static int
648 cifs_relock_file(struct cifsFileInfo *cfile)
649 {
650         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
651         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
652         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
653         int rc = 0;
654
655         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
656         if (cinode->can_cache_brlcks) {
657                 /* can cache locks - no need to relock */
658                 up_read(&cinode->lock_sem);
659                 return rc;
660         }
661
662         if (cap_unix(tcon->ses) &&
663             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
664             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
665                 rc = cifs_push_posix_locks(cfile);
666         else
667                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
668
669         up_read(&cinode->lock_sem);
670         return rc;
671 }
672
673 static int
674 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
675 {
676         int rc = -EACCES;
677         unsigned int xid;
678         __u32 oplock;
679         struct cifs_sb_info *cifs_sb;
680         struct cifs_tcon *tcon;
681         struct TCP_Server_Info *server;
682         struct cifsInodeInfo *cinode;
683         struct inode *inode;
684         char *full_path = NULL;
685         int desired_access;
686         int disposition = FILE_OPEN;
687         int create_options = CREATE_NOT_DIR;
688         struct cifs_open_parms oparms;
689
690         xid = get_xid();
691         mutex_lock(&cfile->fh_mutex);
692         if (!cfile->invalidHandle) {
693                 mutex_unlock(&cfile->fh_mutex);
694                 rc = 0;
695                 free_xid(xid);
696                 return rc;
697         }
698
699         inode = d_inode(cfile->dentry);
700         cifs_sb = CIFS_SB(inode->i_sb);
701         tcon = tlink_tcon(cfile->tlink);
702         server = tcon->ses->server;
703
704         /*
705          * Can not grab rename sem here because various ops, including those
706          * that already have the rename sem can end up causing writepage to get
707          * called and if the server was down that means we end up here, and we
708          * can never tell if the caller already has the rename_sem.
709          */
710         full_path = build_path_from_dentry(cfile->dentry);
711         if (full_path == NULL) {
712                 rc = -ENOMEM;
713                 mutex_unlock(&cfile->fh_mutex);
714                 free_xid(xid);
715                 return rc;
716         }
717
718         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
719                  inode, cfile->f_flags, full_path);
720
721         if (tcon->ses->server->oplocks)
722                 oplock = REQ_OPLOCK;
723         else
724                 oplock = 0;
725
726         if (tcon->unix_ext && cap_unix(tcon->ses) &&
727             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
728                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
729                 /*
730                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
731                  * original open. Must mask them off for a reopen.
732                  */
733                 unsigned int oflags = cfile->f_flags &
734                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
735
736                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
737                                      cifs_sb->mnt_file_mode /* ignored */,
738                                      oflags, &oplock, &cfile->fid.netfid, xid);
739                 if (rc == 0) {
740                         cifs_dbg(FYI, "posix reopen succeeded\n");
741                         oparms.reconnect = true;
742                         goto reopen_success;
743                 }
744                 /*
745                  * fallthrough to retry open the old way on errors, especially
746                  * in the reconnect path it is important to retry hard
747                  */
748         }
749
750         desired_access = cifs_convert_flags(cfile->f_flags);
751
752         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
753         if (cfile->f_flags & O_SYNC)
754                 create_options |= CREATE_WRITE_THROUGH;
755
756         if (cfile->f_flags & O_DIRECT)
757                 create_options |= CREATE_NO_BUFFER;
758
759         if (server->ops->get_lease_key)
760                 server->ops->get_lease_key(inode, &cfile->fid);
761
762         oparms.tcon = tcon;
763         oparms.cifs_sb = cifs_sb;
764         oparms.desired_access = desired_access;
765         oparms.create_options = cifs_create_options(cifs_sb, create_options);
766         oparms.disposition = disposition;
767         oparms.path = full_path;
768         oparms.fid = &cfile->fid;
769         oparms.reconnect = true;
770
771         /*
772          * Can not refresh inode by passing in file_info buf to be returned by
773          * ops->open and then calling get_inode_info with returned buf since
774          * file might have write behind data that needs to be flushed and server
775          * version of file size can be stale. If we knew for sure that inode was
776          * not dirty locally we could do this.
777          */
778         rc = server->ops->open(xid, &oparms, &oplock, NULL);
779         if (rc == -ENOENT && oparms.reconnect == false) {
780                 /* durable handle timeout is expired - open the file again */
781                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
782                 /* indicate that we need to relock the file */
783                 oparms.reconnect = true;
784         }
785
786         if (rc) {
787                 mutex_unlock(&cfile->fh_mutex);
788                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
789                 cifs_dbg(FYI, "oplock: %d\n", oplock);
790                 goto reopen_error_exit;
791         }
792
793 reopen_success:
794         cfile->invalidHandle = false;
795         mutex_unlock(&cfile->fh_mutex);
796         cinode = CIFS_I(inode);
797
798         if (can_flush) {
799                 rc = filemap_write_and_wait(inode->i_mapping);
800                 if (!is_interrupt_error(rc))
801                         mapping_set_error(inode->i_mapping, rc);
802
803                 if (tcon->unix_ext)
804                         rc = cifs_get_inode_info_unix(&inode, full_path,
805                                                       inode->i_sb, xid);
806                 else
807                         rc = cifs_get_inode_info(&inode, full_path, NULL,
808                                                  inode->i_sb, xid, NULL);
809         }
810         /*
811          * Else we are writing out data to server already and could deadlock if
812          * we tried to flush data, and since we do not know if we have data that
813          * would invalidate the current end of file on the server we can not go
814          * to the server to get the new inode info.
815          */
816
817         /*
818          * If the server returned a read oplock and we have mandatory brlocks,
819          * set oplock level to None.
820          */
821         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
822                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
823                 oplock = 0;
824         }
825
826         server->ops->set_fid(cfile, &cfile->fid, oplock);
827         if (oparms.reconnect)
828                 cifs_relock_file(cfile);
829
830 reopen_error_exit:
831         kfree(full_path);
832         free_xid(xid);
833         return rc;
834 }
835
836 int cifs_close(struct inode *inode, struct file *file)
837 {
838         if (file->private_data != NULL) {
839                 _cifsFileInfo_put(file->private_data, true, false);
840                 file->private_data = NULL;
841         }
842
843         /* return code from the ->release op is always ignored */
844         return 0;
845 }
846
847 void
848 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
849 {
850         struct cifsFileInfo *open_file;
851         struct list_head *tmp;
852         struct list_head *tmp1;
853         struct list_head tmp_list;
854
855         if (!tcon->use_persistent || !tcon->need_reopen_files)
856                 return;
857
858         tcon->need_reopen_files = false;
859
860         cifs_dbg(FYI, "Reopen persistent handles");
861         INIT_LIST_HEAD(&tmp_list);
862
863         /* list all files open on tree connection, reopen resilient handles  */
864         spin_lock(&tcon->open_file_lock);
865         list_for_each(tmp, &tcon->openFileList) {
866                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
867                 if (!open_file->invalidHandle)
868                         continue;
869                 cifsFileInfo_get(open_file);
870                 list_add_tail(&open_file->rlist, &tmp_list);
871         }
872         spin_unlock(&tcon->open_file_lock);
873
874         list_for_each_safe(tmp, tmp1, &tmp_list) {
875                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
876                 if (cifs_reopen_file(open_file, false /* do not flush */))
877                         tcon->need_reopen_files = true;
878                 list_del_init(&open_file->rlist);
879                 cifsFileInfo_put(open_file);
880         }
881 }
882
883 int cifs_closedir(struct inode *inode, struct file *file)
884 {
885         int rc = 0;
886         unsigned int xid;
887         struct cifsFileInfo *cfile = file->private_data;
888         struct cifs_tcon *tcon;
889         struct TCP_Server_Info *server;
890         char *buf;
891
892         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
893
894         if (cfile == NULL)
895                 return rc;
896
897         xid = get_xid();
898         tcon = tlink_tcon(cfile->tlink);
899         server = tcon->ses->server;
900
901         cifs_dbg(FYI, "Freeing private data in close dir\n");
902         spin_lock(&cfile->file_info_lock);
903         if (server->ops->dir_needs_close(cfile)) {
904                 cfile->invalidHandle = true;
905                 spin_unlock(&cfile->file_info_lock);
906                 if (server->ops->close_dir)
907                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
908                 else
909                         rc = -ENOSYS;
910                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
911                 /* not much we can do if it fails anyway, ignore rc */
912                 rc = 0;
913         } else
914                 spin_unlock(&cfile->file_info_lock);
915
916         buf = cfile->srch_inf.ntwrk_buf_start;
917         if (buf) {
918                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
919                 cfile->srch_inf.ntwrk_buf_start = NULL;
920                 if (cfile->srch_inf.smallBuf)
921                         cifs_small_buf_release(buf);
922                 else
923                         cifs_buf_release(buf);
924         }
925
926         cifs_put_tlink(cfile->tlink);
927         kfree(file->private_data);
928         file->private_data = NULL;
929         /* BB can we lock the filestruct while this is going on? */
930         free_xid(xid);
931         return rc;
932 }
933
934 static struct cifsLockInfo *
935 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
936 {
937         struct cifsLockInfo *lock =
938                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
939         if (!lock)
940                 return lock;
941         lock->offset = offset;
942         lock->length = length;
943         lock->type = type;
944         lock->pid = current->tgid;
945         lock->flags = flags;
946         INIT_LIST_HEAD(&lock->blist);
947         init_waitqueue_head(&lock->block_q);
948         return lock;
949 }
950
951 void
952 cifs_del_lock_waiters(struct cifsLockInfo *lock)
953 {
954         struct cifsLockInfo *li, *tmp;
955         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
956                 list_del_init(&li->blist);
957                 wake_up(&li->block_q);
958         }
959 }
960
961 #define CIFS_LOCK_OP    0
962 #define CIFS_READ_OP    1
963 #define CIFS_WRITE_OP   2
964
965 /* @rw_check : 0 - no op, 1 - read, 2 - write */
966 static bool
967 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
968                             __u64 length, __u8 type, __u16 flags,
969                             struct cifsFileInfo *cfile,
970                             struct cifsLockInfo **conf_lock, int rw_check)
971 {
972         struct cifsLockInfo *li;
973         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
974         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
975
976         list_for_each_entry(li, &fdlocks->locks, llist) {
977                 if (offset + length <= li->offset ||
978                     offset >= li->offset + li->length)
979                         continue;
980                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
981                     server->ops->compare_fids(cfile, cur_cfile)) {
982                         /* shared lock prevents write op through the same fid */
983                         if (!(li->type & server->vals->shared_lock_type) ||
984                             rw_check != CIFS_WRITE_OP)
985                                 continue;
986                 }
987                 if ((type & server->vals->shared_lock_type) &&
988                     ((server->ops->compare_fids(cfile, cur_cfile) &&
989                      current->tgid == li->pid) || type == li->type))
990                         continue;
991                 if (rw_check == CIFS_LOCK_OP &&
992                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
993                     server->ops->compare_fids(cfile, cur_cfile))
994                         continue;
995                 if (conf_lock)
996                         *conf_lock = li;
997                 return true;
998         }
999         return false;
1000 }
1001
1002 bool
1003 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1004                         __u8 type, __u16 flags,
1005                         struct cifsLockInfo **conf_lock, int rw_check)
1006 {
1007         bool rc = false;
1008         struct cifs_fid_locks *cur;
1009         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1010
1011         list_for_each_entry(cur, &cinode->llist, llist) {
1012                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1013                                                  flags, cfile, conf_lock,
1014                                                  rw_check);
1015                 if (rc)
1016                         break;
1017         }
1018
1019         return rc;
1020 }
1021
1022 /*
1023  * Check if there is another lock that prevents us to set the lock (mandatory
1024  * style). If such a lock exists, update the flock structure with its
1025  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1026  * or leave it the same if we can't. Returns 0 if we don't need to request to
1027  * the server or 1 otherwise.
1028  */
1029 static int
1030 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1031                __u8 type, struct file_lock *flock)
1032 {
1033         int rc = 0;
1034         struct cifsLockInfo *conf_lock;
1035         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1036         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1037         bool exist;
1038
1039         down_read(&cinode->lock_sem);
1040
1041         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1042                                         flock->fl_flags, &conf_lock,
1043                                         CIFS_LOCK_OP);
1044         if (exist) {
1045                 flock->fl_start = conf_lock->offset;
1046                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1047                 flock->fl_pid = conf_lock->pid;
1048                 if (conf_lock->type & server->vals->shared_lock_type)
1049                         flock->fl_type = F_RDLCK;
1050                 else
1051                         flock->fl_type = F_WRLCK;
1052         } else if (!cinode->can_cache_brlcks)
1053                 rc = 1;
1054         else
1055                 flock->fl_type = F_UNLCK;
1056
1057         up_read(&cinode->lock_sem);
1058         return rc;
1059 }
1060
1061 static void
1062 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1063 {
1064         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1065         cifs_down_write(&cinode->lock_sem);
1066         list_add_tail(&lock->llist, &cfile->llist->locks);
1067         up_write(&cinode->lock_sem);
1068 }
1069
1070 /*
1071  * Set the byte-range lock (mandatory style). Returns:
1072  * 1) 0, if we set the lock and don't need to request to the server;
1073  * 2) 1, if no locks prevent us but we need to request to the server;
1074  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1075  */
1076 static int
1077 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1078                  bool wait)
1079 {
1080         struct cifsLockInfo *conf_lock;
1081         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1082         bool exist;
1083         int rc = 0;
1084
1085 try_again:
1086         exist = false;
1087         cifs_down_write(&cinode->lock_sem);
1088
1089         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1090                                         lock->type, lock->flags, &conf_lock,
1091                                         CIFS_LOCK_OP);
1092         if (!exist && cinode->can_cache_brlcks) {
1093                 list_add_tail(&lock->llist, &cfile->llist->locks);
1094                 up_write(&cinode->lock_sem);
1095                 return rc;
1096         }
1097
1098         if (!exist)
1099                 rc = 1;
1100         else if (!wait)
1101                 rc = -EACCES;
1102         else {
1103                 list_add_tail(&lock->blist, &conf_lock->blist);
1104                 up_write(&cinode->lock_sem);
1105                 rc = wait_event_interruptible(lock->block_q,
1106                                         (lock->blist.prev == &lock->blist) &&
1107                                         (lock->blist.next == &lock->blist));
1108                 if (!rc)
1109                         goto try_again;
1110                 cifs_down_write(&cinode->lock_sem);
1111                 list_del_init(&lock->blist);
1112         }
1113
1114         up_write(&cinode->lock_sem);
1115         return rc;
1116 }
1117
1118 /*
1119  * Check if there is another lock that prevents us to set the lock (posix
1120  * style). If such a lock exists, update the flock structure with its
1121  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1122  * or leave it the same if we can't. Returns 0 if we don't need to request to
1123  * the server or 1 otherwise.
1124  */
1125 static int
1126 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1127 {
1128         int rc = 0;
1129         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1130         unsigned char saved_type = flock->fl_type;
1131
1132         if ((flock->fl_flags & FL_POSIX) == 0)
1133                 return 1;
1134
1135         down_read(&cinode->lock_sem);
1136         posix_test_lock(file, flock);
1137
1138         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1139                 flock->fl_type = saved_type;
1140                 rc = 1;
1141         }
1142
1143         up_read(&cinode->lock_sem);
1144         return rc;
1145 }
1146
1147 /*
1148  * Set the byte-range lock (posix style). Returns:
1149  * 1) 0, if we set the lock and don't need to request to the server;
1150  * 2) 1, if we need to request to the server;
1151  * 3) <0, if the error occurs while setting the lock.
1152  */
1153 static int
1154 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1155 {
1156         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1157         int rc = 1;
1158
1159         if ((flock->fl_flags & FL_POSIX) == 0)
1160                 return rc;
1161
1162 try_again:
1163         cifs_down_write(&cinode->lock_sem);
1164         if (!cinode->can_cache_brlcks) {
1165                 up_write(&cinode->lock_sem);
1166                 return rc;
1167         }
1168
1169         rc = posix_lock_file(file, flock, NULL);
1170         up_write(&cinode->lock_sem);
1171         if (rc == FILE_LOCK_DEFERRED) {
1172                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker);
1173                 if (!rc)
1174                         goto try_again;
1175                 locks_delete_block(flock);
1176         }
1177         return rc;
1178 }
1179
1180 int
1181 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1182 {
1183         unsigned int xid;
1184         int rc = 0, stored_rc;
1185         struct cifsLockInfo *li, *tmp;
1186         struct cifs_tcon *tcon;
1187         unsigned int num, max_num, max_buf;
1188         LOCKING_ANDX_RANGE *buf, *cur;
1189         static const int types[] = {
1190                 LOCKING_ANDX_LARGE_FILES,
1191                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1192         };
1193         int i;
1194
1195         xid = get_xid();
1196         tcon = tlink_tcon(cfile->tlink);
1197
1198         /*
1199          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1200          * and check it before using.
1201          */
1202         max_buf = tcon->ses->server->maxBuf;
1203         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1204                 free_xid(xid);
1205                 return -EINVAL;
1206         }
1207
1208         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1209                      PAGE_SIZE);
1210         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1211                         PAGE_SIZE);
1212         max_num = (max_buf - sizeof(struct smb_hdr)) /
1213                                                 sizeof(LOCKING_ANDX_RANGE);
1214         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1215         if (!buf) {
1216                 free_xid(xid);
1217                 return -ENOMEM;
1218         }
1219
1220         for (i = 0; i < 2; i++) {
1221                 cur = buf;
1222                 num = 0;
1223                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1224                         if (li->type != types[i])
1225                                 continue;
1226                         cur->Pid = cpu_to_le16(li->pid);
1227                         cur->LengthLow = cpu_to_le32((u32)li->length);
1228                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1229                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1230                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1231                         if (++num == max_num) {
1232                                 stored_rc = cifs_lockv(xid, tcon,
1233                                                        cfile->fid.netfid,
1234                                                        (__u8)li->type, 0, num,
1235                                                        buf);
1236                                 if (stored_rc)
1237                                         rc = stored_rc;
1238                                 cur = buf;
1239                                 num = 0;
1240                         } else
1241                                 cur++;
1242                 }
1243
1244                 if (num) {
1245                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1246                                                (__u8)types[i], 0, num, buf);
1247                         if (stored_rc)
1248                                 rc = stored_rc;
1249                 }
1250         }
1251
1252         kfree(buf);
1253         free_xid(xid);
1254         return rc;
1255 }
1256
1257 static __u32
1258 hash_lockowner(fl_owner_t owner)
1259 {
1260         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1261 }
1262
1263 struct lock_to_push {
1264         struct list_head llist;
1265         __u64 offset;
1266         __u64 length;
1267         __u32 pid;
1268         __u16 netfid;
1269         __u8 type;
1270 };
1271
1272 static int
1273 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1274 {
1275         struct inode *inode = d_inode(cfile->dentry);
1276         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1277         struct file_lock *flock;
1278         struct file_lock_context *flctx = inode->i_flctx;
1279         unsigned int count = 0, i;
1280         int rc = 0, xid, type;
1281         struct list_head locks_to_send, *el;
1282         struct lock_to_push *lck, *tmp;
1283         __u64 length;
1284
1285         xid = get_xid();
1286
1287         if (!flctx)
1288                 goto out;
1289
1290         spin_lock(&flctx->flc_lock);
1291         list_for_each(el, &flctx->flc_posix) {
1292                 count++;
1293         }
1294         spin_unlock(&flctx->flc_lock);
1295
1296         INIT_LIST_HEAD(&locks_to_send);
1297
1298         /*
1299          * Allocating count locks is enough because no FL_POSIX locks can be
1300          * added to the list while we are holding cinode->lock_sem that
1301          * protects locking operations of this inode.
1302          */
1303         for (i = 0; i < count; i++) {
1304                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1305                 if (!lck) {
1306                         rc = -ENOMEM;
1307                         goto err_out;
1308                 }
1309                 list_add_tail(&lck->llist, &locks_to_send);
1310         }
1311
1312         el = locks_to_send.next;
1313         spin_lock(&flctx->flc_lock);
1314         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1315                 if (el == &locks_to_send) {
1316                         /*
1317                          * The list ended. We don't have enough allocated
1318                          * structures - something is really wrong.
1319                          */
1320                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1321                         break;
1322                 }
1323                 length = 1 + flock->fl_end - flock->fl_start;
1324                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1325                         type = CIFS_RDLCK;
1326                 else
1327                         type = CIFS_WRLCK;
1328                 lck = list_entry(el, struct lock_to_push, llist);
1329                 lck->pid = hash_lockowner(flock->fl_owner);
1330                 lck->netfid = cfile->fid.netfid;
1331                 lck->length = length;
1332                 lck->type = type;
1333                 lck->offset = flock->fl_start;
1334         }
1335         spin_unlock(&flctx->flc_lock);
1336
1337         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1338                 int stored_rc;
1339
1340                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1341                                              lck->offset, lck->length, NULL,
1342                                              lck->type, 0);
1343                 if (stored_rc)
1344                         rc = stored_rc;
1345                 list_del(&lck->llist);
1346                 kfree(lck);
1347         }
1348
1349 out:
1350         free_xid(xid);
1351         return rc;
1352 err_out:
1353         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1354                 list_del(&lck->llist);
1355                 kfree(lck);
1356         }
1357         goto out;
1358 }
1359
1360 static int
1361 cifs_push_locks(struct cifsFileInfo *cfile)
1362 {
1363         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1364         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1365         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1366         int rc = 0;
1367
1368         /* we are going to update can_cache_brlcks here - need a write access */
1369         cifs_down_write(&cinode->lock_sem);
1370         if (!cinode->can_cache_brlcks) {
1371                 up_write(&cinode->lock_sem);
1372                 return rc;
1373         }
1374
1375         if (cap_unix(tcon->ses) &&
1376             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1377             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1378                 rc = cifs_push_posix_locks(cfile);
1379         else
1380                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1381
1382         cinode->can_cache_brlcks = false;
1383         up_write(&cinode->lock_sem);
1384         return rc;
1385 }
1386
1387 static void
1388 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1389                 bool *wait_flag, struct TCP_Server_Info *server)
1390 {
1391         if (flock->fl_flags & FL_POSIX)
1392                 cifs_dbg(FYI, "Posix\n");
1393         if (flock->fl_flags & FL_FLOCK)
1394                 cifs_dbg(FYI, "Flock\n");
1395         if (flock->fl_flags & FL_SLEEP) {
1396                 cifs_dbg(FYI, "Blocking lock\n");
1397                 *wait_flag = true;
1398         }
1399         if (flock->fl_flags & FL_ACCESS)
1400                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1401         if (flock->fl_flags & FL_LEASE)
1402                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1403         if (flock->fl_flags &
1404             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1405                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1406                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1407
1408         *type = server->vals->large_lock_type;
1409         if (flock->fl_type == F_WRLCK) {
1410                 cifs_dbg(FYI, "F_WRLCK\n");
1411                 *type |= server->vals->exclusive_lock_type;
1412                 *lock = 1;
1413         } else if (flock->fl_type == F_UNLCK) {
1414                 cifs_dbg(FYI, "F_UNLCK\n");
1415                 *type |= server->vals->unlock_lock_type;
1416                 *unlock = 1;
1417                 /* Check if unlock includes more than one lock range */
1418         } else if (flock->fl_type == F_RDLCK) {
1419                 cifs_dbg(FYI, "F_RDLCK\n");
1420                 *type |= server->vals->shared_lock_type;
1421                 *lock = 1;
1422         } else if (flock->fl_type == F_EXLCK) {
1423                 cifs_dbg(FYI, "F_EXLCK\n");
1424                 *type |= server->vals->exclusive_lock_type;
1425                 *lock = 1;
1426         } else if (flock->fl_type == F_SHLCK) {
1427                 cifs_dbg(FYI, "F_SHLCK\n");
1428                 *type |= server->vals->shared_lock_type;
1429                 *lock = 1;
1430         } else
1431                 cifs_dbg(FYI, "Unknown type of lock\n");
1432 }
1433
1434 static int
1435 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1436            bool wait_flag, bool posix_lck, unsigned int xid)
1437 {
1438         int rc = 0;
1439         __u64 length = 1 + flock->fl_end - flock->fl_start;
1440         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1441         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1442         struct TCP_Server_Info *server = tcon->ses->server;
1443         __u16 netfid = cfile->fid.netfid;
1444
1445         if (posix_lck) {
1446                 int posix_lock_type;
1447
1448                 rc = cifs_posix_lock_test(file, flock);
1449                 if (!rc)
1450                         return rc;
1451
1452                 if (type & server->vals->shared_lock_type)
1453                         posix_lock_type = CIFS_RDLCK;
1454                 else
1455                         posix_lock_type = CIFS_WRLCK;
1456                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1457                                       hash_lockowner(flock->fl_owner),
1458                                       flock->fl_start, length, flock,
1459                                       posix_lock_type, wait_flag);
1460                 return rc;
1461         }
1462
1463         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1464         if (!rc)
1465                 return rc;
1466
1467         /* BB we could chain these into one lock request BB */
1468         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1469                                     1, 0, false);
1470         if (rc == 0) {
1471                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1472                                             type, 0, 1, false);
1473                 flock->fl_type = F_UNLCK;
1474                 if (rc != 0)
1475                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1476                                  rc);
1477                 return 0;
1478         }
1479
1480         if (type & server->vals->shared_lock_type) {
1481                 flock->fl_type = F_WRLCK;
1482                 return 0;
1483         }
1484
1485         type &= ~server->vals->exclusive_lock_type;
1486
1487         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1488                                     type | server->vals->shared_lock_type,
1489                                     1, 0, false);
1490         if (rc == 0) {
1491                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1492                         type | server->vals->shared_lock_type, 0, 1, false);
1493                 flock->fl_type = F_RDLCK;
1494                 if (rc != 0)
1495                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1496                                  rc);
1497         } else
1498                 flock->fl_type = F_WRLCK;
1499
1500         return 0;
1501 }
1502
1503 void
1504 cifs_move_llist(struct list_head *source, struct list_head *dest)
1505 {
1506         struct list_head *li, *tmp;
1507         list_for_each_safe(li, tmp, source)
1508                 list_move(li, dest);
1509 }
1510
1511 void
1512 cifs_free_llist(struct list_head *llist)
1513 {
1514         struct cifsLockInfo *li, *tmp;
1515         list_for_each_entry_safe(li, tmp, llist, llist) {
1516                 cifs_del_lock_waiters(li);
1517                 list_del(&li->llist);
1518                 kfree(li);
1519         }
1520 }
1521
1522 int
1523 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1524                   unsigned int xid)
1525 {
1526         int rc = 0, stored_rc;
1527         static const int types[] = {
1528                 LOCKING_ANDX_LARGE_FILES,
1529                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1530         };
1531         unsigned int i;
1532         unsigned int max_num, num, max_buf;
1533         LOCKING_ANDX_RANGE *buf, *cur;
1534         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1535         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1536         struct cifsLockInfo *li, *tmp;
1537         __u64 length = 1 + flock->fl_end - flock->fl_start;
1538         struct list_head tmp_llist;
1539
1540         INIT_LIST_HEAD(&tmp_llist);
1541
1542         /*
1543          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1544          * and check it before using.
1545          */
1546         max_buf = tcon->ses->server->maxBuf;
1547         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1548                 return -EINVAL;
1549
1550         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1551                      PAGE_SIZE);
1552         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1553                         PAGE_SIZE);
1554         max_num = (max_buf - sizeof(struct smb_hdr)) /
1555                                                 sizeof(LOCKING_ANDX_RANGE);
1556         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1557         if (!buf)
1558                 return -ENOMEM;
1559
1560         cifs_down_write(&cinode->lock_sem);
1561         for (i = 0; i < 2; i++) {
1562                 cur = buf;
1563                 num = 0;
1564                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1565                         if (flock->fl_start > li->offset ||
1566                             (flock->fl_start + length) <
1567                             (li->offset + li->length))
1568                                 continue;
1569                         if (current->tgid != li->pid)
1570                                 continue;
1571                         if (types[i] != li->type)
1572                                 continue;
1573                         if (cinode->can_cache_brlcks) {
1574                                 /*
1575                                  * We can cache brlock requests - simply remove
1576                                  * a lock from the file's list.
1577                                  */
1578                                 list_del(&li->llist);
1579                                 cifs_del_lock_waiters(li);
1580                                 kfree(li);
1581                                 continue;
1582                         }
1583                         cur->Pid = cpu_to_le16(li->pid);
1584                         cur->LengthLow = cpu_to_le32((u32)li->length);
1585                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1586                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1587                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1588                         /*
1589                          * We need to save a lock here to let us add it again to
1590                          * the file's list if the unlock range request fails on
1591                          * the server.
1592                          */
1593                         list_move(&li->llist, &tmp_llist);
1594                         if (++num == max_num) {
1595                                 stored_rc = cifs_lockv(xid, tcon,
1596                                                        cfile->fid.netfid,
1597                                                        li->type, num, 0, buf);
1598                                 if (stored_rc) {
1599                                         /*
1600                                          * We failed on the unlock range
1601                                          * request - add all locks from the tmp
1602                                          * list to the head of the file's list.
1603                                          */
1604                                         cifs_move_llist(&tmp_llist,
1605                                                         &cfile->llist->locks);
1606                                         rc = stored_rc;
1607                                 } else
1608                                         /*
1609                                          * The unlock range request succeed -
1610                                          * free the tmp list.
1611                                          */
1612                                         cifs_free_llist(&tmp_llist);
1613                                 cur = buf;
1614                                 num = 0;
1615                         } else
1616                                 cur++;
1617                 }
1618                 if (num) {
1619                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1620                                                types[i], num, 0, buf);
1621                         if (stored_rc) {
1622                                 cifs_move_llist(&tmp_llist,
1623                                                 &cfile->llist->locks);
1624                                 rc = stored_rc;
1625                         } else
1626                                 cifs_free_llist(&tmp_llist);
1627                 }
1628         }
1629
1630         up_write(&cinode->lock_sem);
1631         kfree(buf);
1632         return rc;
1633 }
1634
1635 static int
1636 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1637            bool wait_flag, bool posix_lck, int lock, int unlock,
1638            unsigned int xid)
1639 {
1640         int rc = 0;
1641         __u64 length = 1 + flock->fl_end - flock->fl_start;
1642         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1643         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1644         struct TCP_Server_Info *server = tcon->ses->server;
1645         struct inode *inode = d_inode(cfile->dentry);
1646
1647         if (posix_lck) {
1648                 int posix_lock_type;
1649
1650                 rc = cifs_posix_lock_set(file, flock);
1651                 if (!rc || rc < 0)
1652                         return rc;
1653
1654                 if (type & server->vals->shared_lock_type)
1655                         posix_lock_type = CIFS_RDLCK;
1656                 else
1657                         posix_lock_type = CIFS_WRLCK;
1658
1659                 if (unlock == 1)
1660                         posix_lock_type = CIFS_UNLCK;
1661
1662                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1663                                       hash_lockowner(flock->fl_owner),
1664                                       flock->fl_start, length,
1665                                       NULL, posix_lock_type, wait_flag);
1666                 goto out;
1667         }
1668
1669         if (lock) {
1670                 struct cifsLockInfo *lock;
1671
1672                 lock = cifs_lock_init(flock->fl_start, length, type,
1673                                       flock->fl_flags);
1674                 if (!lock)
1675                         return -ENOMEM;
1676
1677                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1678                 if (rc < 0) {
1679                         kfree(lock);
1680                         return rc;
1681                 }
1682                 if (!rc)
1683                         goto out;
1684
1685                 /*
1686                  * Windows 7 server can delay breaking lease from read to None
1687                  * if we set a byte-range lock on a file - break it explicitly
1688                  * before sending the lock to the server to be sure the next
1689                  * read won't conflict with non-overlapted locks due to
1690                  * pagereading.
1691                  */
1692                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1693                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1694                         cifs_zap_mapping(inode);
1695                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1696                                  inode);
1697                         CIFS_I(inode)->oplock = 0;
1698                 }
1699
1700                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1701                                             type, 1, 0, wait_flag);
1702                 if (rc) {
1703                         kfree(lock);
1704                         return rc;
1705                 }
1706
1707                 cifs_lock_add(cfile, lock);
1708         } else if (unlock)
1709                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1710
1711 out:
1712         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1713                 /*
1714                  * If this is a request to remove all locks because we
1715                  * are closing the file, it doesn't matter if the
1716                  * unlocking failed as both cifs.ko and the SMB server
1717                  * remove the lock on file close
1718                  */
1719                 if (rc) {
1720                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1721                         if (!(flock->fl_flags & FL_CLOSE))
1722                                 return rc;
1723                 }
1724                 rc = locks_lock_file_wait(file, flock);
1725         }
1726         return rc;
1727 }
1728
1729 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1730 {
1731         int rc, xid;
1732         int lock = 0, unlock = 0;
1733         bool wait_flag = false;
1734         bool posix_lck = false;
1735         struct cifs_sb_info *cifs_sb;
1736         struct cifs_tcon *tcon;
1737         struct cifsFileInfo *cfile;
1738         __u32 type;
1739
1740         rc = -EACCES;
1741         xid = get_xid();
1742
1743         if (!(fl->fl_flags & FL_FLOCK))
1744                 return -ENOLCK;
1745
1746         cfile = (struct cifsFileInfo *)file->private_data;
1747         tcon = tlink_tcon(cfile->tlink);
1748
1749         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1750                         tcon->ses->server);
1751         cifs_sb = CIFS_FILE_SB(file);
1752
1753         if (cap_unix(tcon->ses) &&
1754             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1755             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1756                 posix_lck = true;
1757
1758         if (!lock && !unlock) {
1759                 /*
1760                  * if no lock or unlock then nothing to do since we do not
1761                  * know what it is
1762                  */
1763                 free_xid(xid);
1764                 return -EOPNOTSUPP;
1765         }
1766
1767         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1768                         xid);
1769         free_xid(xid);
1770         return rc;
1771
1772
1773 }
1774
1775 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1776 {
1777         int rc, xid;
1778         int lock = 0, unlock = 0;
1779         bool wait_flag = false;
1780         bool posix_lck = false;
1781         struct cifs_sb_info *cifs_sb;
1782         struct cifs_tcon *tcon;
1783         struct cifsFileInfo *cfile;
1784         __u32 type;
1785
1786         rc = -EACCES;
1787         xid = get_xid();
1788
1789         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1790                  cmd, flock->fl_flags, flock->fl_type,
1791                  flock->fl_start, flock->fl_end);
1792
1793         cfile = (struct cifsFileInfo *)file->private_data;
1794         tcon = tlink_tcon(cfile->tlink);
1795
1796         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1797                         tcon->ses->server);
1798         cifs_sb = CIFS_FILE_SB(file);
1799
1800         if (cap_unix(tcon->ses) &&
1801             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1802             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1803                 posix_lck = true;
1804         /*
1805          * BB add code here to normalize offset and length to account for
1806          * negative length which we can not accept over the wire.
1807          */
1808         if (IS_GETLK(cmd)) {
1809                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1810                 free_xid(xid);
1811                 return rc;
1812         }
1813
1814         if (!lock && !unlock) {
1815                 /*
1816                  * if no lock or unlock then nothing to do since we do not
1817                  * know what it is
1818                  */
1819                 free_xid(xid);
1820                 return -EOPNOTSUPP;
1821         }
1822
1823         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1824                         xid);
1825         free_xid(xid);
1826         return rc;
1827 }
1828
1829 /*
1830  * update the file size (if needed) after a write. Should be called with
1831  * the inode->i_lock held
1832  */
1833 void
1834 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1835                       unsigned int bytes_written)
1836 {
1837         loff_t end_of_write = offset + bytes_written;
1838
1839         if (end_of_write > cifsi->server_eof)
1840                 cifsi->server_eof = end_of_write;
1841 }
1842
1843 static ssize_t
1844 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1845            size_t write_size, loff_t *offset)
1846 {
1847         int rc = 0;
1848         unsigned int bytes_written = 0;
1849         unsigned int total_written;
1850         struct cifs_tcon *tcon;
1851         struct TCP_Server_Info *server;
1852         unsigned int xid;
1853         struct dentry *dentry = open_file->dentry;
1854         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1855         struct cifs_io_parms io_parms;
1856
1857         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1858                  write_size, *offset, dentry);
1859
1860         tcon = tlink_tcon(open_file->tlink);
1861         server = tcon->ses->server;
1862
1863         if (!server->ops->sync_write)
1864                 return -ENOSYS;
1865
1866         xid = get_xid();
1867
1868         for (total_written = 0; write_size > total_written;
1869              total_written += bytes_written) {
1870                 rc = -EAGAIN;
1871                 while (rc == -EAGAIN) {
1872                         struct kvec iov[2];
1873                         unsigned int len;
1874
1875                         if (open_file->invalidHandle) {
1876                                 /* we could deadlock if we called
1877                                    filemap_fdatawait from here so tell
1878                                    reopen_file not to flush data to
1879                                    server now */
1880                                 rc = cifs_reopen_file(open_file, false);
1881                                 if (rc != 0)
1882                                         break;
1883                         }
1884
1885                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1886                                   (unsigned int)write_size - total_written);
1887                         /* iov[0] is reserved for smb header */
1888                         iov[1].iov_base = (char *)write_data + total_written;
1889                         iov[1].iov_len = len;
1890                         io_parms.pid = pid;
1891                         io_parms.tcon = tcon;
1892                         io_parms.offset = *offset;
1893                         io_parms.length = len;
1894                         rc = server->ops->sync_write(xid, &open_file->fid,
1895                                         &io_parms, &bytes_written, iov, 1);
1896                 }
1897                 if (rc || (bytes_written == 0)) {
1898                         if (total_written)
1899                                 break;
1900                         else {
1901                                 free_xid(xid);
1902                                 return rc;
1903                         }
1904                 } else {
1905                         spin_lock(&d_inode(dentry)->i_lock);
1906                         cifs_update_eof(cifsi, *offset, bytes_written);
1907                         spin_unlock(&d_inode(dentry)->i_lock);
1908                         *offset += bytes_written;
1909                 }
1910         }
1911
1912         cifs_stats_bytes_written(tcon, total_written);
1913
1914         if (total_written > 0) {
1915                 spin_lock(&d_inode(dentry)->i_lock);
1916                 if (*offset > d_inode(dentry)->i_size)
1917                         i_size_write(d_inode(dentry), *offset);
1918                 spin_unlock(&d_inode(dentry)->i_lock);
1919         }
1920         mark_inode_dirty_sync(d_inode(dentry));
1921         free_xid(xid);
1922         return total_written;
1923 }
1924
1925 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1926                                         bool fsuid_only)
1927 {
1928         struct cifsFileInfo *open_file = NULL;
1929         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1930
1931         /* only filter by fsuid on multiuser mounts */
1932         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1933                 fsuid_only = false;
1934
1935         spin_lock(&cifs_inode->open_file_lock);
1936         /* we could simply get the first_list_entry since write-only entries
1937            are always at the end of the list but since the first entry might
1938            have a close pending, we go through the whole list */
1939         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1940                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1941                         continue;
1942                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1943                         if (!open_file->invalidHandle) {
1944                                 /* found a good file */
1945                                 /* lock it so it will not be closed on us */
1946                                 cifsFileInfo_get(open_file);
1947                                 spin_unlock(&cifs_inode->open_file_lock);
1948                                 return open_file;
1949                         } /* else might as well continue, and look for
1950                              another, or simply have the caller reopen it
1951                              again rather than trying to fix this handle */
1952                 } else /* write only file */
1953                         break; /* write only files are last so must be done */
1954         }
1955         spin_unlock(&cifs_inode->open_file_lock);
1956         return NULL;
1957 }
1958
1959 /* Return -EBADF if no handle is found and general rc otherwise */
1960 int
1961 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only,
1962                        struct cifsFileInfo **ret_file)
1963 {
1964         struct cifsFileInfo *open_file, *inv_file = NULL;
1965         struct cifs_sb_info *cifs_sb;
1966         bool any_available = false;
1967         int rc = -EBADF;
1968         unsigned int refind = 0;
1969
1970         *ret_file = NULL;
1971
1972         /*
1973          * Having a null inode here (because mapping->host was set to zero by
1974          * the VFS or MM) should not happen but we had reports of on oops (due
1975          * to it being zero) during stress testcases so we need to check for it
1976          */
1977
1978         if (cifs_inode == NULL) {
1979                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1980                 dump_stack();
1981                 return rc;
1982         }
1983
1984         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1985
1986         /* only filter by fsuid on multiuser mounts */
1987         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1988                 fsuid_only = false;
1989
1990         spin_lock(&cifs_inode->open_file_lock);
1991 refind_writable:
1992         if (refind > MAX_REOPEN_ATT) {
1993                 spin_unlock(&cifs_inode->open_file_lock);
1994                 return rc;
1995         }
1996         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1997                 if (!any_available && open_file->pid != current->tgid)
1998                         continue;
1999                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2000                         continue;
2001                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2002                         if (!open_file->invalidHandle) {
2003                                 /* found a good writable file */
2004                                 cifsFileInfo_get(open_file);
2005                                 spin_unlock(&cifs_inode->open_file_lock);
2006                                 *ret_file = open_file;
2007                                 return 0;
2008                         } else {
2009                                 if (!inv_file)
2010                                         inv_file = open_file;
2011                         }
2012                 }
2013         }
2014         /* couldn't find useable FH with same pid, try any available */
2015         if (!any_available) {
2016                 any_available = true;
2017                 goto refind_writable;
2018         }
2019
2020         if (inv_file) {
2021                 any_available = false;
2022                 cifsFileInfo_get(inv_file);
2023         }
2024
2025         spin_unlock(&cifs_inode->open_file_lock);
2026
2027         if (inv_file) {
2028                 rc = cifs_reopen_file(inv_file, false);
2029                 if (!rc) {
2030                         *ret_file = inv_file;
2031                         return 0;
2032                 }
2033
2034                 spin_lock(&cifs_inode->open_file_lock);
2035                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2036                 spin_unlock(&cifs_inode->open_file_lock);
2037                 cifsFileInfo_put(inv_file);
2038                 ++refind;
2039                 inv_file = NULL;
2040                 spin_lock(&cifs_inode->open_file_lock);
2041                 goto refind_writable;
2042         }
2043
2044         return rc;
2045 }
2046
2047 struct cifsFileInfo *
2048 find_writable_file(struct cifsInodeInfo *cifs_inode, bool fsuid_only)
2049 {
2050         struct cifsFileInfo *cfile;
2051         int rc;
2052
2053         rc = cifs_get_writable_file(cifs_inode, fsuid_only, &cfile);
2054         if (rc)
2055                 cifs_dbg(FYI, "couldn't find writable handle rc=%d", rc);
2056
2057         return cfile;
2058 }
2059
2060 int
2061 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2062                        struct cifsFileInfo **ret_file)
2063 {
2064         struct list_head *tmp;
2065         struct cifsFileInfo *cfile;
2066         struct cifsInodeInfo *cinode;
2067         char *full_path;
2068
2069         *ret_file = NULL;
2070
2071         spin_lock(&tcon->open_file_lock);
2072         list_for_each(tmp, &tcon->openFileList) {
2073                 cfile = list_entry(tmp, struct cifsFileInfo,
2074                              tlist);
2075                 full_path = build_path_from_dentry(cfile->dentry);
2076                 if (full_path == NULL) {
2077                         spin_unlock(&tcon->open_file_lock);
2078                         return -ENOMEM;
2079                 }
2080                 if (strcmp(full_path, name)) {
2081                         kfree(full_path);
2082                         continue;
2083                 }
2084
2085                 kfree(full_path);
2086                 cinode = CIFS_I(d_inode(cfile->dentry));
2087                 spin_unlock(&tcon->open_file_lock);
2088                 return cifs_get_writable_file(cinode, 0, ret_file);
2089         }
2090
2091         spin_unlock(&tcon->open_file_lock);
2092         return -ENOENT;
2093 }
2094
2095 int
2096 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2097                        struct cifsFileInfo **ret_file)
2098 {
2099         struct list_head *tmp;
2100         struct cifsFileInfo *cfile;
2101         struct cifsInodeInfo *cinode;
2102         char *full_path;
2103
2104         *ret_file = NULL;
2105
2106         spin_lock(&tcon->open_file_lock);
2107         list_for_each(tmp, &tcon->openFileList) {
2108                 cfile = list_entry(tmp, struct cifsFileInfo,
2109                              tlist);
2110                 full_path = build_path_from_dentry(cfile->dentry);
2111                 if (full_path == NULL) {
2112                         spin_unlock(&tcon->open_file_lock);
2113                         return -ENOMEM;
2114                 }
2115                 if (strcmp(full_path, name)) {
2116                         kfree(full_path);
2117                         continue;
2118                 }
2119
2120                 kfree(full_path);
2121                 cinode = CIFS_I(d_inode(cfile->dentry));
2122                 spin_unlock(&tcon->open_file_lock);
2123                 *ret_file = find_readable_file(cinode, 0);
2124                 return *ret_file ? 0 : -ENOENT;
2125         }
2126
2127         spin_unlock(&tcon->open_file_lock);
2128         return -ENOENT;
2129 }
2130
2131 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2132 {
2133         struct address_space *mapping = page->mapping;
2134         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2135         char *write_data;
2136         int rc = -EFAULT;
2137         int bytes_written = 0;
2138         struct inode *inode;
2139         struct cifsFileInfo *open_file;
2140
2141         if (!mapping || !mapping->host)
2142                 return -EFAULT;
2143
2144         inode = page->mapping->host;
2145
2146         offset += (loff_t)from;
2147         write_data = kmap(page);
2148         write_data += from;
2149
2150         if ((to > PAGE_SIZE) || (from > to)) {
2151                 kunmap(page);
2152                 return -EIO;
2153         }
2154
2155         /* racing with truncate? */
2156         if (offset > mapping->host->i_size) {
2157                 kunmap(page);
2158                 return 0; /* don't care */
2159         }
2160
2161         /* check to make sure that we are not extending the file */
2162         if (mapping->host->i_size - offset < (loff_t)to)
2163                 to = (unsigned)(mapping->host->i_size - offset);
2164
2165         rc = cifs_get_writable_file(CIFS_I(mapping->host), false, &open_file);
2166         if (!rc) {
2167                 bytes_written = cifs_write(open_file, open_file->pid,
2168                                            write_data, to - from, &offset);
2169                 cifsFileInfo_put(open_file);
2170                 /* Does mm or vfs already set times? */
2171                 inode->i_atime = inode->i_mtime = current_time(inode);
2172                 if ((bytes_written > 0) && (offset))
2173                         rc = 0;
2174                 else if (bytes_written < 0)
2175                         rc = bytes_written;
2176                 else
2177                         rc = -EFAULT;
2178         } else {
2179                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2180                 if (!is_retryable_error(rc))
2181                         rc = -EIO;
2182         }
2183
2184         kunmap(page);
2185         return rc;
2186 }
2187
2188 static struct cifs_writedata *
2189 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2190                           pgoff_t end, pgoff_t *index,
2191                           unsigned int *found_pages)
2192 {
2193         struct cifs_writedata *wdata;
2194
2195         wdata = cifs_writedata_alloc((unsigned int)tofind,
2196                                      cifs_writev_complete);
2197         if (!wdata)
2198                 return NULL;
2199
2200         *found_pages = find_get_pages_range_tag(mapping, index, end,
2201                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2202         return wdata;
2203 }
2204
2205 static unsigned int
2206 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2207                     struct address_space *mapping,
2208                     struct writeback_control *wbc,
2209                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2210 {
2211         unsigned int nr_pages = 0, i;
2212         struct page *page;
2213
2214         for (i = 0; i < found_pages; i++) {
2215                 page = wdata->pages[i];
2216                 /*
2217                  * At this point we hold neither the i_pages lock nor the
2218                  * page lock: the page may be truncated or invalidated
2219                  * (changing page->mapping to NULL), or even swizzled
2220                  * back from swapper_space to tmpfs file mapping
2221                  */
2222
2223                 if (nr_pages == 0)
2224                         lock_page(page);
2225                 else if (!trylock_page(page))
2226                         break;
2227
2228                 if (unlikely(page->mapping != mapping)) {
2229                         unlock_page(page);
2230                         break;
2231                 }
2232
2233                 if (!wbc->range_cyclic && page->index > end) {
2234                         *done = true;
2235                         unlock_page(page);
2236                         break;
2237                 }
2238
2239                 if (*next && (page->index != *next)) {
2240                         /* Not next consecutive page */
2241                         unlock_page(page);
2242                         break;
2243                 }
2244
2245                 if (wbc->sync_mode != WB_SYNC_NONE)
2246                         wait_on_page_writeback(page);
2247
2248                 if (PageWriteback(page) ||
2249                                 !clear_page_dirty_for_io(page)) {
2250                         unlock_page(page);
2251                         break;
2252                 }
2253
2254                 /*
2255                  * This actually clears the dirty bit in the radix tree.
2256                  * See cifs_writepage() for more commentary.
2257                  */
2258                 set_page_writeback(page);
2259                 if (page_offset(page) >= i_size_read(mapping->host)) {
2260                         *done = true;
2261                         unlock_page(page);
2262                         end_page_writeback(page);
2263                         break;
2264                 }
2265
2266                 wdata->pages[i] = page;
2267                 *next = page->index + 1;
2268                 ++nr_pages;
2269         }
2270
2271         /* reset index to refind any pages skipped */
2272         if (nr_pages == 0)
2273                 *index = wdata->pages[0]->index + 1;
2274
2275         /* put any pages we aren't going to use */
2276         for (i = nr_pages; i < found_pages; i++) {
2277                 put_page(wdata->pages[i]);
2278                 wdata->pages[i] = NULL;
2279         }
2280
2281         return nr_pages;
2282 }
2283
2284 static int
2285 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2286                  struct address_space *mapping, struct writeback_control *wbc)
2287 {
2288         int rc;
2289         struct TCP_Server_Info *server =
2290                                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2291
2292         wdata->sync_mode = wbc->sync_mode;
2293         wdata->nr_pages = nr_pages;
2294         wdata->offset = page_offset(wdata->pages[0]);
2295         wdata->pagesz = PAGE_SIZE;
2296         wdata->tailsz = min(i_size_read(mapping->host) -
2297                         page_offset(wdata->pages[nr_pages - 1]),
2298                         (loff_t)PAGE_SIZE);
2299         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2300         wdata->pid = wdata->cfile->pid;
2301
2302         rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2303         if (rc)
2304                 return rc;
2305
2306         if (wdata->cfile->invalidHandle)
2307                 rc = -EAGAIN;
2308         else
2309                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2310
2311         return rc;
2312 }
2313
2314 static int cifs_writepages(struct address_space *mapping,
2315                            struct writeback_control *wbc)
2316 {
2317         struct inode *inode = mapping->host;
2318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2319         struct TCP_Server_Info *server;
2320         bool done = false, scanned = false, range_whole = false;
2321         pgoff_t end, index;
2322         struct cifs_writedata *wdata;
2323         struct cifsFileInfo *cfile = NULL;
2324         int rc = 0;
2325         int saved_rc = 0;
2326         unsigned int xid;
2327
2328         /*
2329          * If wsize is smaller than the page cache size, default to writing
2330          * one page at a time via cifs_writepage
2331          */
2332         if (cifs_sb->wsize < PAGE_SIZE)
2333                 return generic_writepages(mapping, wbc);
2334
2335         xid = get_xid();
2336         if (wbc->range_cyclic) {
2337                 index = mapping->writeback_index; /* Start from prev offset */
2338                 end = -1;
2339         } else {
2340                 index = wbc->range_start >> PAGE_SHIFT;
2341                 end = wbc->range_end >> PAGE_SHIFT;
2342                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2343                         range_whole = true;
2344                 scanned = true;
2345         }
2346         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2347 retry:
2348         while (!done && index <= end) {
2349                 unsigned int i, nr_pages, found_pages, wsize;
2350                 pgoff_t next = 0, tofind, saved_index = index;
2351                 struct cifs_credits credits_on_stack;
2352                 struct cifs_credits *credits = &credits_on_stack;
2353                 int get_file_rc = 0;
2354
2355                 if (cfile)
2356                         cifsFileInfo_put(cfile);
2357
2358                 rc = cifs_get_writable_file(CIFS_I(inode), false, &cfile);
2359
2360                 /* in case of an error store it to return later */
2361                 if (rc)
2362                         get_file_rc = rc;
2363
2364                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2365                                                    &wsize, credits);
2366                 if (rc != 0) {
2367                         done = true;
2368                         break;
2369                 }
2370
2371                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2372
2373                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2374                                                   &found_pages);
2375                 if (!wdata) {
2376                         rc = -ENOMEM;
2377                         done = true;
2378                         add_credits_and_wake_if(server, credits, 0);
2379                         break;
2380                 }
2381
2382                 if (found_pages == 0) {
2383                         kref_put(&wdata->refcount, cifs_writedata_release);
2384                         add_credits_and_wake_if(server, credits, 0);
2385                         break;
2386                 }
2387
2388                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2389                                                end, &index, &next, &done);
2390
2391                 /* nothing to write? */
2392                 if (nr_pages == 0) {
2393                         kref_put(&wdata->refcount, cifs_writedata_release);
2394                         add_credits_and_wake_if(server, credits, 0);
2395                         continue;
2396                 }
2397
2398                 wdata->credits = credits_on_stack;
2399                 wdata->cfile = cfile;
2400                 cfile = NULL;
2401
2402                 if (!wdata->cfile) {
2403                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2404                                  get_file_rc);
2405                         if (is_retryable_error(get_file_rc))
2406                                 rc = get_file_rc;
2407                         else
2408                                 rc = -EBADF;
2409                 } else
2410                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2411
2412                 for (i = 0; i < nr_pages; ++i)
2413                         unlock_page(wdata->pages[i]);
2414
2415                 /* send failure -- clean up the mess */
2416                 if (rc != 0) {
2417                         add_credits_and_wake_if(server, &wdata->credits, 0);
2418                         for (i = 0; i < nr_pages; ++i) {
2419                                 if (is_retryable_error(rc))
2420                                         redirty_page_for_writepage(wbc,
2421                                                            wdata->pages[i]);
2422                                 else
2423                                         SetPageError(wdata->pages[i]);
2424                                 end_page_writeback(wdata->pages[i]);
2425                                 put_page(wdata->pages[i]);
2426                         }
2427                         if (!is_retryable_error(rc))
2428                                 mapping_set_error(mapping, rc);
2429                 }
2430                 kref_put(&wdata->refcount, cifs_writedata_release);
2431
2432                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2433                         index = saved_index;
2434                         continue;
2435                 }
2436
2437                 /* Return immediately if we received a signal during writing */
2438                 if (is_interrupt_error(rc)) {
2439                         done = true;
2440                         break;
2441                 }
2442
2443                 if (rc != 0 && saved_rc == 0)
2444                         saved_rc = rc;
2445
2446                 wbc->nr_to_write -= nr_pages;
2447                 if (wbc->nr_to_write <= 0)
2448                         done = true;
2449
2450                 index = next;
2451         }
2452
2453         if (!scanned && !done) {
2454                 /*
2455                  * We hit the last page and there is more work to be done: wrap
2456                  * back to the start of the file
2457                  */
2458                 scanned = true;
2459                 index = 0;
2460                 goto retry;
2461         }
2462
2463         if (saved_rc != 0)
2464                 rc = saved_rc;
2465
2466         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2467                 mapping->writeback_index = index;
2468
2469         if (cfile)
2470                 cifsFileInfo_put(cfile);
2471         free_xid(xid);
2472         return rc;
2473 }
2474
2475 static int
2476 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2477 {
2478         int rc;
2479         unsigned int xid;
2480
2481         xid = get_xid();
2482 /* BB add check for wbc flags */
2483         get_page(page);
2484         if (!PageUptodate(page))
2485                 cifs_dbg(FYI, "ppw - page not up to date\n");
2486
2487         /*
2488          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2489          *
2490          * A writepage() implementation always needs to do either this,
2491          * or re-dirty the page with "redirty_page_for_writepage()" in
2492          * the case of a failure.
2493          *
2494          * Just unlocking the page will cause the radix tree tag-bits
2495          * to fail to update with the state of the page correctly.
2496          */
2497         set_page_writeback(page);
2498 retry_write:
2499         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2500         if (is_retryable_error(rc)) {
2501                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2502                         goto retry_write;
2503                 redirty_page_for_writepage(wbc, page);
2504         } else if (rc != 0) {
2505                 SetPageError(page);
2506                 mapping_set_error(page->mapping, rc);
2507         } else {
2508                 SetPageUptodate(page);
2509         }
2510         end_page_writeback(page);
2511         put_page(page);
2512         free_xid(xid);
2513         return rc;
2514 }
2515
2516 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2517 {
2518         int rc = cifs_writepage_locked(page, wbc);
2519         unlock_page(page);
2520         return rc;
2521 }
2522
2523 static int cifs_write_end(struct file *file, struct address_space *mapping,
2524                         loff_t pos, unsigned len, unsigned copied,
2525                         struct page *page, void *fsdata)
2526 {
2527         int rc;
2528         struct inode *inode = mapping->host;
2529         struct cifsFileInfo *cfile = file->private_data;
2530         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2531         __u32 pid;
2532
2533         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2534                 pid = cfile->pid;
2535         else
2536                 pid = current->tgid;
2537
2538         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2539                  page, pos, copied);
2540
2541         if (PageChecked(page)) {
2542                 if (copied == len)
2543                         SetPageUptodate(page);
2544                 ClearPageChecked(page);
2545         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2546                 SetPageUptodate(page);
2547
2548         if (!PageUptodate(page)) {
2549                 char *page_data;
2550                 unsigned offset = pos & (PAGE_SIZE - 1);
2551                 unsigned int xid;
2552
2553                 xid = get_xid();
2554                 /* this is probably better than directly calling
2555                    partialpage_write since in this function the file handle is
2556                    known which we might as well leverage */
2557                 /* BB check if anything else missing out of ppw
2558                    such as updating last write time */
2559                 page_data = kmap(page);
2560                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2561                 /* if (rc < 0) should we set writebehind rc? */
2562                 kunmap(page);
2563
2564                 free_xid(xid);
2565         } else {
2566                 rc = copied;
2567                 pos += copied;
2568                 set_page_dirty(page);
2569         }
2570
2571         if (rc > 0) {
2572                 spin_lock(&inode->i_lock);
2573                 if (pos > inode->i_size)
2574                         i_size_write(inode, pos);
2575                 spin_unlock(&inode->i_lock);
2576         }
2577
2578         unlock_page(page);
2579         put_page(page);
2580
2581         return rc;
2582 }
2583
2584 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2585                       int datasync)
2586 {
2587         unsigned int xid;
2588         int rc = 0;
2589         struct cifs_tcon *tcon;
2590         struct TCP_Server_Info *server;
2591         struct cifsFileInfo *smbfile = file->private_data;
2592         struct inode *inode = file_inode(file);
2593         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2594
2595         rc = file_write_and_wait_range(file, start, end);
2596         if (rc) {
2597                 trace_cifs_fsync_err(inode->i_ino, rc);
2598                 return rc;
2599         }
2600
2601         xid = get_xid();
2602
2603         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2604                  file, datasync);
2605
2606         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2607                 rc = cifs_zap_mapping(inode);
2608                 if (rc) {
2609                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2610                         rc = 0; /* don't care about it in fsync */
2611                 }
2612         }
2613
2614         tcon = tlink_tcon(smbfile->tlink);
2615         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2616                 server = tcon->ses->server;
2617                 if (server->ops->flush)
2618                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2619                 else
2620                         rc = -ENOSYS;
2621         }
2622
2623         free_xid(xid);
2624         return rc;
2625 }
2626
2627 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2628 {
2629         unsigned int xid;
2630         int rc = 0;
2631         struct cifs_tcon *tcon;
2632         struct TCP_Server_Info *server;
2633         struct cifsFileInfo *smbfile = file->private_data;
2634         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2635
2636         rc = file_write_and_wait_range(file, start, end);
2637         if (rc) {
2638                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2639                 return rc;
2640         }
2641
2642         xid = get_xid();
2643
2644         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2645                  file, datasync);
2646
2647         tcon = tlink_tcon(smbfile->tlink);
2648         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2649                 server = tcon->ses->server;
2650                 if (server->ops->flush)
2651                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2652                 else
2653                         rc = -ENOSYS;
2654         }
2655
2656         free_xid(xid);
2657         return rc;
2658 }
2659
2660 /*
2661  * As file closes, flush all cached write data for this inode checking
2662  * for write behind errors.
2663  */
2664 int cifs_flush(struct file *file, fl_owner_t id)
2665 {
2666         struct inode *inode = file_inode(file);
2667         int rc = 0;
2668
2669         if (file->f_mode & FMODE_WRITE)
2670                 rc = filemap_write_and_wait(inode->i_mapping);
2671
2672         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2673         if (rc)
2674                 trace_cifs_flush_err(inode->i_ino, rc);
2675         return rc;
2676 }
2677
2678 static int
2679 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2680 {
2681         int rc = 0;
2682         unsigned long i;
2683
2684         for (i = 0; i < num_pages; i++) {
2685                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2686                 if (!pages[i]) {
2687                         /*
2688                          * save number of pages we have already allocated and
2689                          * return with ENOMEM error
2690                          */
2691                         num_pages = i;
2692                         rc = -ENOMEM;
2693                         break;
2694                 }
2695         }
2696
2697         if (rc) {
2698                 for (i = 0; i < num_pages; i++)
2699                         put_page(pages[i]);
2700         }
2701         return rc;
2702 }
2703
2704 static inline
2705 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2706 {
2707         size_t num_pages;
2708         size_t clen;
2709
2710         clen = min_t(const size_t, len, wsize);
2711         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2712
2713         if (cur_len)
2714                 *cur_len = clen;
2715
2716         return num_pages;
2717 }
2718
2719 static void
2720 cifs_uncached_writedata_release(struct kref *refcount)
2721 {
2722         int i;
2723         struct cifs_writedata *wdata = container_of(refcount,
2724                                         struct cifs_writedata, refcount);
2725
2726         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2727         for (i = 0; i < wdata->nr_pages; i++)
2728                 put_page(wdata->pages[i]);
2729         cifs_writedata_release(refcount);
2730 }
2731
2732 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2733
2734 static void
2735 cifs_uncached_writev_complete(struct work_struct *work)
2736 {
2737         struct cifs_writedata *wdata = container_of(work,
2738                                         struct cifs_writedata, work);
2739         struct inode *inode = d_inode(wdata->cfile->dentry);
2740         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2741
2742         spin_lock(&inode->i_lock);
2743         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2744         if (cifsi->server_eof > inode->i_size)
2745                 i_size_write(inode, cifsi->server_eof);
2746         spin_unlock(&inode->i_lock);
2747
2748         complete(&wdata->done);
2749         collect_uncached_write_data(wdata->ctx);
2750         /* the below call can possibly free the last ref to aio ctx */
2751         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2752 }
2753
2754 static int
2755 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2756                       size_t *len, unsigned long *num_pages)
2757 {
2758         size_t save_len, copied, bytes, cur_len = *len;
2759         unsigned long i, nr_pages = *num_pages;
2760
2761         save_len = cur_len;
2762         for (i = 0; i < nr_pages; i++) {
2763                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2764                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2765                 cur_len -= copied;
2766                 /*
2767                  * If we didn't copy as much as we expected, then that
2768                  * may mean we trod into an unmapped area. Stop copying
2769                  * at that point. On the next pass through the big
2770                  * loop, we'll likely end up getting a zero-length
2771                  * write and bailing out of it.
2772                  */
2773                 if (copied < bytes)
2774                         break;
2775         }
2776         cur_len = save_len - cur_len;
2777         *len = cur_len;
2778
2779         /*
2780          * If we have no data to send, then that probably means that
2781          * the copy above failed altogether. That's most likely because
2782          * the address in the iovec was bogus. Return -EFAULT and let
2783          * the caller free anything we allocated and bail out.
2784          */
2785         if (!cur_len)
2786                 return -EFAULT;
2787
2788         /*
2789          * i + 1 now represents the number of pages we actually used in
2790          * the copy phase above.
2791          */
2792         *num_pages = i + 1;
2793         return 0;
2794 }
2795
2796 static int
2797 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
2798         struct cifs_aio_ctx *ctx)
2799 {
2800         unsigned int wsize;
2801         struct cifs_credits credits;
2802         int rc;
2803         struct TCP_Server_Info *server =
2804                 tlink_tcon(wdata->cfile->tlink)->ses->server;
2805
2806         do {
2807                 if (wdata->cfile->invalidHandle) {
2808                         rc = cifs_reopen_file(wdata->cfile, false);
2809                         if (rc == -EAGAIN)
2810                                 continue;
2811                         else if (rc)
2812                                 break;
2813                 }
2814
2815
2816                 /*
2817                  * Wait for credits to resend this wdata.
2818                  * Note: we are attempting to resend the whole wdata not in
2819                  * segments
2820                  */
2821                 do {
2822                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
2823                                                 &wsize, &credits);
2824                         if (rc)
2825                                 goto fail;
2826
2827                         if (wsize < wdata->bytes) {
2828                                 add_credits_and_wake_if(server, &credits, 0);
2829                                 msleep(1000);
2830                         }
2831                 } while (wsize < wdata->bytes);
2832                 wdata->credits = credits;
2833
2834                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
2835
2836                 if (!rc) {
2837                         if (wdata->cfile->invalidHandle)
2838                                 rc = -EAGAIN;
2839                         else {
2840 #ifdef CONFIG_CIFS_SMB_DIRECT
2841                                 if (wdata->mr) {
2842                                         wdata->mr->need_invalidate = true;
2843                                         smbd_deregister_mr(wdata->mr);
2844                                         wdata->mr = NULL;
2845                                 }
2846 #endif
2847                                 rc = server->ops->async_writev(wdata,
2848                                         cifs_uncached_writedata_release);
2849                         }
2850                 }
2851
2852                 /* If the write was successfully sent, we are done */
2853                 if (!rc) {
2854                         list_add_tail(&wdata->list, wdata_list);
2855                         return 0;
2856                 }
2857
2858                 /* Roll back credits and retry if needed */
2859                 add_credits_and_wake_if(server, &wdata->credits, 0);
2860         } while (rc == -EAGAIN);
2861
2862 fail:
2863         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2864         return rc;
2865 }
2866
2867 static int
2868 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2869                      struct cifsFileInfo *open_file,
2870                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2871                      struct cifs_aio_ctx *ctx)
2872 {
2873         int rc = 0;
2874         size_t cur_len;
2875         unsigned long nr_pages, num_pages, i;
2876         struct cifs_writedata *wdata;
2877         struct iov_iter saved_from = *from;
2878         loff_t saved_offset = offset;
2879         pid_t pid;
2880         struct TCP_Server_Info *server;
2881         struct page **pagevec;
2882         size_t start;
2883         unsigned int xid;
2884
2885         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2886                 pid = open_file->pid;
2887         else
2888                 pid = current->tgid;
2889
2890         server = tlink_tcon(open_file->tlink)->ses->server;
2891         xid = get_xid();
2892
2893         do {
2894                 unsigned int wsize;
2895                 struct cifs_credits credits_on_stack;
2896                 struct cifs_credits *credits = &credits_on_stack;
2897
2898                 if (open_file->invalidHandle) {
2899                         rc = cifs_reopen_file(open_file, false);
2900                         if (rc == -EAGAIN)
2901                                 continue;
2902                         else if (rc)
2903                                 break;
2904                 }
2905
2906                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2907                                                    &wsize, credits);
2908                 if (rc)
2909                         break;
2910
2911                 cur_len = min_t(const size_t, len, wsize);
2912
2913                 if (ctx->direct_io) {
2914                         ssize_t result;
2915
2916                         result = iov_iter_get_pages_alloc(
2917                                 from, &pagevec, cur_len, &start);
2918                         if (result < 0) {
2919                                 cifs_dbg(VFS,
2920                                         "direct_writev couldn't get user pages "
2921                                         "(rc=%zd) iter type %d iov_offset %zd "
2922                                         "count %zd\n",
2923                                         result, iov_iter_type(from),
2924                                         from->iov_offset, from->count);
2925                                 dump_stack();
2926
2927                                 rc = result;
2928                                 add_credits_and_wake_if(server, credits, 0);
2929                                 break;
2930                         }
2931                         cur_len = (size_t)result;
2932                         iov_iter_advance(from, cur_len);
2933
2934                         nr_pages =
2935                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
2936
2937                         wdata = cifs_writedata_direct_alloc(pagevec,
2938                                              cifs_uncached_writev_complete);
2939                         if (!wdata) {
2940                                 rc = -ENOMEM;
2941                                 add_credits_and_wake_if(server, credits, 0);
2942                                 break;
2943                         }
2944
2945
2946                         wdata->page_offset = start;
2947                         wdata->tailsz =
2948                                 nr_pages > 1 ?
2949                                         cur_len - (PAGE_SIZE - start) -
2950                                         (nr_pages - 2) * PAGE_SIZE :
2951                                         cur_len;
2952                 } else {
2953                         nr_pages = get_numpages(wsize, len, &cur_len);
2954                         wdata = cifs_writedata_alloc(nr_pages,
2955                                              cifs_uncached_writev_complete);
2956                         if (!wdata) {
2957                                 rc = -ENOMEM;
2958                                 add_credits_and_wake_if(server, credits, 0);
2959                                 break;
2960                         }
2961
2962                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2963                         if (rc) {
2964                                 kvfree(wdata->pages);
2965                                 kfree(wdata);
2966                                 add_credits_and_wake_if(server, credits, 0);
2967                                 break;
2968                         }
2969
2970                         num_pages = nr_pages;
2971                         rc = wdata_fill_from_iovec(
2972                                 wdata, from, &cur_len, &num_pages);
2973                         if (rc) {
2974                                 for (i = 0; i < nr_pages; i++)
2975                                         put_page(wdata->pages[i]);
2976                                 kvfree(wdata->pages);
2977                                 kfree(wdata);
2978                                 add_credits_and_wake_if(server, credits, 0);
2979                                 break;
2980                         }
2981
2982                         /*
2983                          * Bring nr_pages down to the number of pages we
2984                          * actually used, and free any pages that we didn't use.
2985                          */
2986                         for ( ; nr_pages > num_pages; nr_pages--)
2987                                 put_page(wdata->pages[nr_pages - 1]);
2988
2989                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2990                 }
2991
2992                 wdata->sync_mode = WB_SYNC_ALL;
2993                 wdata->nr_pages = nr_pages;
2994                 wdata->offset = (__u64)offset;
2995                 wdata->cfile = cifsFileInfo_get(open_file);
2996                 wdata->pid = pid;
2997                 wdata->bytes = cur_len;
2998                 wdata->pagesz = PAGE_SIZE;
2999                 wdata->credits = credits_on_stack;
3000                 wdata->ctx = ctx;
3001                 kref_get(&ctx->refcount);
3002
3003                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3004
3005                 if (!rc) {
3006                         if (wdata->cfile->invalidHandle)
3007                                 rc = -EAGAIN;
3008                         else
3009                                 rc = server->ops->async_writev(wdata,
3010                                         cifs_uncached_writedata_release);
3011                 }
3012
3013                 if (rc) {
3014                         add_credits_and_wake_if(server, &wdata->credits, 0);
3015                         kref_put(&wdata->refcount,
3016                                  cifs_uncached_writedata_release);
3017                         if (rc == -EAGAIN) {
3018                                 *from = saved_from;
3019                                 iov_iter_advance(from, offset - saved_offset);
3020                                 continue;
3021                         }
3022                         break;
3023                 }
3024
3025                 list_add_tail(&wdata->list, wdata_list);
3026                 offset += cur_len;
3027                 len -= cur_len;
3028         } while (len > 0);
3029
3030         free_xid(xid);
3031         return rc;
3032 }
3033
3034 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3035 {
3036         struct cifs_writedata *wdata, *tmp;
3037         struct cifs_tcon *tcon;
3038         struct cifs_sb_info *cifs_sb;
3039         struct dentry *dentry = ctx->cfile->dentry;
3040         int rc;
3041
3042         tcon = tlink_tcon(ctx->cfile->tlink);
3043         cifs_sb = CIFS_SB(dentry->d_sb);
3044
3045         mutex_lock(&ctx->aio_mutex);
3046
3047         if (list_empty(&ctx->list)) {
3048                 mutex_unlock(&ctx->aio_mutex);
3049                 return;
3050         }
3051
3052         rc = ctx->rc;
3053         /*
3054          * Wait for and collect replies for any successful sends in order of
3055          * increasing offset. Once an error is hit, then return without waiting
3056          * for any more replies.
3057          */
3058 restart_loop:
3059         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3060                 if (!rc) {
3061                         if (!try_wait_for_completion(&wdata->done)) {
3062                                 mutex_unlock(&ctx->aio_mutex);
3063                                 return;
3064                         }
3065
3066                         if (wdata->result)
3067                                 rc = wdata->result;
3068                         else
3069                                 ctx->total_len += wdata->bytes;
3070
3071                         /* resend call if it's a retryable error */
3072                         if (rc == -EAGAIN) {
3073                                 struct list_head tmp_list;
3074                                 struct iov_iter tmp_from = ctx->iter;
3075
3076                                 INIT_LIST_HEAD(&tmp_list);
3077                                 list_del_init(&wdata->list);
3078
3079                                 if (ctx->direct_io)
3080                                         rc = cifs_resend_wdata(
3081                                                 wdata, &tmp_list, ctx);
3082                                 else {
3083                                         iov_iter_advance(&tmp_from,
3084                                                  wdata->offset - ctx->pos);
3085
3086                                         rc = cifs_write_from_iter(wdata->offset,
3087                                                 wdata->bytes, &tmp_from,
3088                                                 ctx->cfile, cifs_sb, &tmp_list,
3089                                                 ctx);
3090
3091                                         kref_put(&wdata->refcount,
3092                                                 cifs_uncached_writedata_release);
3093                                 }
3094
3095                                 list_splice(&tmp_list, &ctx->list);
3096                                 goto restart_loop;
3097                         }
3098                 }
3099                 list_del_init(&wdata->list);
3100                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3101         }
3102
3103         cifs_stats_bytes_written(tcon, ctx->total_len);
3104         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3105
3106         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3107
3108         mutex_unlock(&ctx->aio_mutex);
3109
3110         if (ctx->iocb && ctx->iocb->ki_complete)
3111                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3112         else
3113                 complete(&ctx->done);
3114 }
3115
3116 static ssize_t __cifs_writev(
3117         struct kiocb *iocb, struct iov_iter *from, bool direct)
3118 {
3119         struct file *file = iocb->ki_filp;
3120         ssize_t total_written = 0;
3121         struct cifsFileInfo *cfile;
3122         struct cifs_tcon *tcon;
3123         struct cifs_sb_info *cifs_sb;
3124         struct cifs_aio_ctx *ctx;
3125         struct iov_iter saved_from = *from;
3126         size_t len = iov_iter_count(from);
3127         int rc;
3128
3129         /*
3130          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3131          * In this case, fall back to non-direct write function.
3132          * this could be improved by getting pages directly in ITER_KVEC
3133          */
3134         if (direct && iov_iter_is_kvec(from)) {
3135                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3136                 direct = false;
3137         }
3138
3139         rc = generic_write_checks(iocb, from);
3140         if (rc <= 0)
3141                 return rc;
3142
3143         cifs_sb = CIFS_FILE_SB(file);
3144         cfile = file->private_data;
3145         tcon = tlink_tcon(cfile->tlink);
3146
3147         if (!tcon->ses->server->ops->async_writev)
3148                 return -ENOSYS;
3149
3150         ctx = cifs_aio_ctx_alloc();
3151         if (!ctx)
3152                 return -ENOMEM;
3153
3154         ctx->cfile = cifsFileInfo_get(cfile);
3155
3156         if (!is_sync_kiocb(iocb))
3157                 ctx->iocb = iocb;
3158
3159         ctx->pos = iocb->ki_pos;
3160
3161         if (direct) {
3162                 ctx->direct_io = true;
3163                 ctx->iter = *from;
3164                 ctx->len = len;
3165         } else {
3166                 rc = setup_aio_ctx_iter(ctx, from, WRITE);
3167                 if (rc) {
3168                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3169                         return rc;
3170                 }
3171         }
3172
3173         /* grab a lock here due to read response handlers can access ctx */
3174         mutex_lock(&ctx->aio_mutex);
3175
3176         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3177                                   cfile, cifs_sb, &ctx->list, ctx);
3178
3179         /*
3180          * If at least one write was successfully sent, then discard any rc
3181          * value from the later writes. If the other write succeeds, then
3182          * we'll end up returning whatever was written. If it fails, then
3183          * we'll get a new rc value from that.
3184          */
3185         if (!list_empty(&ctx->list))
3186                 rc = 0;
3187
3188         mutex_unlock(&ctx->aio_mutex);
3189
3190         if (rc) {
3191                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3192                 return rc;
3193         }
3194
3195         if (!is_sync_kiocb(iocb)) {
3196                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3197                 return -EIOCBQUEUED;
3198         }
3199
3200         rc = wait_for_completion_killable(&ctx->done);
3201         if (rc) {
3202                 mutex_lock(&ctx->aio_mutex);
3203                 ctx->rc = rc = -EINTR;
3204                 total_written = ctx->total_len;
3205                 mutex_unlock(&ctx->aio_mutex);
3206         } else {
3207                 rc = ctx->rc;
3208                 total_written = ctx->total_len;
3209         }
3210
3211         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3212
3213         if (unlikely(!total_written))
3214                 return rc;
3215
3216         iocb->ki_pos += total_written;
3217         return total_written;
3218 }
3219
3220 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3221 {
3222         return __cifs_writev(iocb, from, true);
3223 }
3224
3225 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3226 {
3227         return __cifs_writev(iocb, from, false);
3228 }
3229
3230 static ssize_t
3231 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3232 {
3233         struct file *file = iocb->ki_filp;
3234         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3235         struct inode *inode = file->f_mapping->host;
3236         struct cifsInodeInfo *cinode = CIFS_I(inode);
3237         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3238         ssize_t rc;
3239
3240         inode_lock(inode);
3241         /*
3242          * We need to hold the sem to be sure nobody modifies lock list
3243          * with a brlock that prevents writing.
3244          */
3245         down_read(&cinode->lock_sem);
3246
3247         rc = generic_write_checks(iocb, from);
3248         if (rc <= 0)
3249                 goto out;
3250
3251         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3252                                      server->vals->exclusive_lock_type, 0,
3253                                      NULL, CIFS_WRITE_OP))
3254                 rc = __generic_file_write_iter(iocb, from);
3255         else
3256                 rc = -EACCES;
3257 out:
3258         up_read(&cinode->lock_sem);
3259         inode_unlock(inode);
3260
3261         if (rc > 0)
3262                 rc = generic_write_sync(iocb, rc);
3263         return rc;
3264 }
3265
3266 ssize_t
3267 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3268 {
3269         struct inode *inode = file_inode(iocb->ki_filp);
3270         struct cifsInodeInfo *cinode = CIFS_I(inode);
3271         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3272         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3273                                                 iocb->ki_filp->private_data;
3274         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3275         ssize_t written;
3276
3277         written = cifs_get_writer(cinode);
3278         if (written)
3279                 return written;
3280
3281         if (CIFS_CACHE_WRITE(cinode)) {
3282                 if (cap_unix(tcon->ses) &&
3283                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3284                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3285                         written = generic_file_write_iter(iocb, from);
3286                         goto out;
3287                 }
3288                 written = cifs_writev(iocb, from);
3289                 goto out;
3290         }
3291         /*
3292          * For non-oplocked files in strict cache mode we need to write the data
3293          * to the server exactly from the pos to pos+len-1 rather than flush all
3294          * affected pages because it may cause a error with mandatory locks on
3295          * these pages but not on the region from pos to ppos+len-1.
3296          */
3297         written = cifs_user_writev(iocb, from);
3298         if (CIFS_CACHE_READ(cinode)) {
3299                 /*
3300                  * We have read level caching and we have just sent a write
3301                  * request to the server thus making data in the cache stale.
3302                  * Zap the cache and set oplock/lease level to NONE to avoid
3303                  * reading stale data from the cache. All subsequent read
3304                  * operations will read new data from the server.
3305                  */
3306                 cifs_zap_mapping(inode);
3307                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3308                          inode);
3309                 cinode->oplock = 0;
3310         }
3311 out:
3312         cifs_put_writer(cinode);
3313         return written;
3314 }
3315
3316 static struct cifs_readdata *
3317 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3318 {
3319         struct cifs_readdata *rdata;
3320
3321         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3322         if (rdata != NULL) {
3323                 rdata->pages = pages;
3324                 kref_init(&rdata->refcount);
3325                 INIT_LIST_HEAD(&rdata->list);
3326                 init_completion(&rdata->done);
3327                 INIT_WORK(&rdata->work, complete);
3328         }
3329
3330         return rdata;
3331 }
3332
3333 static struct cifs_readdata *
3334 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3335 {
3336         struct page **pages =
3337                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3338         struct cifs_readdata *ret = NULL;
3339
3340         if (pages) {
3341                 ret = cifs_readdata_direct_alloc(pages, complete);
3342                 if (!ret)
3343                         kfree(pages);
3344         }
3345
3346         return ret;
3347 }
3348
3349 void
3350 cifs_readdata_release(struct kref *refcount)
3351 {
3352         struct cifs_readdata *rdata = container_of(refcount,
3353                                         struct cifs_readdata, refcount);
3354 #ifdef CONFIG_CIFS_SMB_DIRECT
3355         if (rdata->mr) {
3356                 smbd_deregister_mr(rdata->mr);
3357                 rdata->mr = NULL;
3358         }
3359 #endif
3360         if (rdata->cfile)
3361                 cifsFileInfo_put(rdata->cfile);
3362
3363         kvfree(rdata->pages);
3364         kfree(rdata);
3365 }
3366
3367 static int
3368 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3369 {
3370         int rc = 0;
3371         struct page *page;
3372         unsigned int i;
3373
3374         for (i = 0; i < nr_pages; i++) {
3375                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3376                 if (!page) {
3377                         rc = -ENOMEM;
3378                         break;
3379                 }
3380                 rdata->pages[i] = page;
3381         }
3382
3383         if (rc) {
3384                 unsigned int nr_page_failed = i;
3385
3386                 for (i = 0; i < nr_page_failed; i++) {
3387                         put_page(rdata->pages[i]);
3388                         rdata->pages[i] = NULL;
3389                 }
3390         }
3391         return rc;
3392 }
3393
3394 static void
3395 cifs_uncached_readdata_release(struct kref *refcount)
3396 {
3397         struct cifs_readdata *rdata = container_of(refcount,
3398                                         struct cifs_readdata, refcount);
3399         unsigned int i;
3400
3401         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3402         for (i = 0; i < rdata->nr_pages; i++) {
3403                 put_page(rdata->pages[i]);
3404         }
3405         cifs_readdata_release(refcount);
3406 }
3407
3408 /**
3409  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3410  * @rdata:      the readdata response with list of pages holding data
3411  * @iter:       destination for our data
3412  *
3413  * This function copies data from a list of pages in a readdata response into
3414  * an array of iovecs. It will first calculate where the data should go
3415  * based on the info in the readdata and then copy the data into that spot.
3416  */
3417 static int
3418 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3419 {
3420         size_t remaining = rdata->got_bytes;
3421         unsigned int i;
3422
3423         for (i = 0; i < rdata->nr_pages; i++) {
3424                 struct page *page = rdata->pages[i];
3425                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3426                 size_t written;
3427
3428                 if (unlikely(iov_iter_is_pipe(iter))) {
3429                         void *addr = kmap_atomic(page);
3430
3431                         written = copy_to_iter(addr, copy, iter);
3432                         kunmap_atomic(addr);
3433                 } else
3434                         written = copy_page_to_iter(page, 0, copy, iter);
3435                 remaining -= written;
3436                 if (written < copy && iov_iter_count(iter) > 0)
3437                         break;
3438         }
3439         return remaining ? -EFAULT : 0;
3440 }
3441
3442 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3443
3444 static void
3445 cifs_uncached_readv_complete(struct work_struct *work)
3446 {
3447         struct cifs_readdata *rdata = container_of(work,
3448                                                 struct cifs_readdata, work);
3449
3450         complete(&rdata->done);
3451         collect_uncached_read_data(rdata->ctx);
3452         /* the below call can possibly free the last ref to aio ctx */
3453         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3454 }
3455
3456 static int
3457 uncached_fill_pages(struct TCP_Server_Info *server,
3458                     struct cifs_readdata *rdata, struct iov_iter *iter,
3459                     unsigned int len)
3460 {
3461         int result = 0;
3462         unsigned int i;
3463         unsigned int nr_pages = rdata->nr_pages;
3464         unsigned int page_offset = rdata->page_offset;
3465
3466         rdata->got_bytes = 0;
3467         rdata->tailsz = PAGE_SIZE;
3468         for (i = 0; i < nr_pages; i++) {
3469                 struct page *page = rdata->pages[i];
3470                 size_t n;
3471                 unsigned int segment_size = rdata->pagesz;
3472
3473                 if (i == 0)
3474                         segment_size -= page_offset;
3475                 else
3476                         page_offset = 0;
3477
3478
3479                 if (len <= 0) {
3480                         /* no need to hold page hostage */
3481                         rdata->pages[i] = NULL;
3482                         rdata->nr_pages--;
3483                         put_page(page);
3484                         continue;
3485                 }
3486
3487                 n = len;
3488                 if (len >= segment_size)
3489                         /* enough data to fill the page */
3490                         n = segment_size;
3491                 else
3492                         rdata->tailsz = len;
3493                 len -= n;
3494
3495                 if (iter)
3496                         result = copy_page_from_iter(
3497                                         page, page_offset, n, iter);
3498 #ifdef CONFIG_CIFS_SMB_DIRECT
3499                 else if (rdata->mr)
3500                         result = n;
3501 #endif
3502                 else
3503                         result = cifs_read_page_from_socket(
3504                                         server, page, page_offset, n);
3505                 if (result < 0)
3506                         break;
3507
3508                 rdata->got_bytes += result;
3509         }
3510
3511         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3512                                                 rdata->got_bytes : result;
3513 }
3514
3515 static int
3516 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3517                               struct cifs_readdata *rdata, unsigned int len)
3518 {
3519         return uncached_fill_pages(server, rdata, NULL, len);
3520 }
3521
3522 static int
3523 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3524                               struct cifs_readdata *rdata,
3525                               struct iov_iter *iter)
3526 {
3527         return uncached_fill_pages(server, rdata, iter, iter->count);
3528 }
3529
3530 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3531                         struct list_head *rdata_list,
3532                         struct cifs_aio_ctx *ctx)
3533 {
3534         unsigned int rsize;
3535         struct cifs_credits credits;
3536         int rc;
3537         struct TCP_Server_Info *server =
3538                 tlink_tcon(rdata->cfile->tlink)->ses->server;
3539
3540         do {
3541                 if (rdata->cfile->invalidHandle) {
3542                         rc = cifs_reopen_file(rdata->cfile, true);
3543                         if (rc == -EAGAIN)
3544                                 continue;
3545                         else if (rc)
3546                                 break;
3547                 }
3548
3549                 /*
3550                  * Wait for credits to resend this rdata.
3551                  * Note: we are attempting to resend the whole rdata not in
3552                  * segments
3553                  */
3554                 do {
3555                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3556                                                 &rsize, &credits);
3557
3558                         if (rc)
3559                                 goto fail;
3560
3561                         if (rsize < rdata->bytes) {
3562                                 add_credits_and_wake_if(server, &credits, 0);
3563                                 msleep(1000);
3564                         }
3565                 } while (rsize < rdata->bytes);
3566                 rdata->credits = credits;
3567
3568                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3569                 if (!rc) {
3570                         if (rdata->cfile->invalidHandle)
3571                                 rc = -EAGAIN;
3572                         else {
3573 #ifdef CONFIG_CIFS_SMB_DIRECT
3574                                 if (rdata->mr) {
3575                                         rdata->mr->need_invalidate = true;
3576                                         smbd_deregister_mr(rdata->mr);
3577                                         rdata->mr = NULL;
3578                                 }
3579 #endif
3580                                 rc = server->ops->async_readv(rdata);
3581                         }
3582                 }
3583
3584                 /* If the read was successfully sent, we are done */
3585                 if (!rc) {
3586                         /* Add to aio pending list */
3587                         list_add_tail(&rdata->list, rdata_list);
3588                         return 0;
3589                 }
3590
3591                 /* Roll back credits and retry if needed */
3592                 add_credits_and_wake_if(server, &rdata->credits, 0);
3593         } while (rc == -EAGAIN);
3594
3595 fail:
3596         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3597         return rc;
3598 }
3599
3600 static int
3601 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3602                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3603                      struct cifs_aio_ctx *ctx)
3604 {
3605         struct cifs_readdata *rdata;
3606         unsigned int npages, rsize;
3607         struct cifs_credits credits_on_stack;
3608         struct cifs_credits *credits = &credits_on_stack;
3609         size_t cur_len;
3610         int rc;
3611         pid_t pid;
3612         struct TCP_Server_Info *server;
3613         struct page **pagevec;
3614         size_t start;
3615         struct iov_iter direct_iov = ctx->iter;
3616
3617         server = tlink_tcon(open_file->tlink)->ses->server;
3618
3619         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3620                 pid = open_file->pid;
3621         else
3622                 pid = current->tgid;
3623
3624         if (ctx->direct_io)
3625                 iov_iter_advance(&direct_iov, offset - ctx->pos);
3626
3627         do {
3628                 if (open_file->invalidHandle) {
3629                         rc = cifs_reopen_file(open_file, true);
3630                         if (rc == -EAGAIN)
3631                                 continue;
3632                         else if (rc)
3633                                 break;
3634                 }
3635
3636                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3637                                                    &rsize, credits);
3638                 if (rc)
3639                         break;
3640
3641                 cur_len = min_t(const size_t, len, rsize);
3642
3643                 if (ctx->direct_io) {
3644                         ssize_t result;
3645
3646                         result = iov_iter_get_pages_alloc(
3647                                         &direct_iov, &pagevec,
3648                                         cur_len, &start);
3649                         if (result < 0) {
3650                                 cifs_dbg(VFS,
3651                                         "couldn't get user pages (rc=%zd)"
3652                                         " iter type %d"
3653                                         " iov_offset %zd count %zd\n",
3654                                         result, iov_iter_type(&direct_iov),
3655                                         direct_iov.iov_offset,
3656                                         direct_iov.count);
3657                                 dump_stack();
3658
3659                                 rc = result;
3660                                 add_credits_and_wake_if(server, credits, 0);
3661                                 break;
3662                         }
3663                         cur_len = (size_t)result;
3664                         iov_iter_advance(&direct_iov, cur_len);
3665
3666                         rdata = cifs_readdata_direct_alloc(
3667                                         pagevec, cifs_uncached_readv_complete);
3668                         if (!rdata) {
3669                                 add_credits_and_wake_if(server, credits, 0);
3670                                 rc = -ENOMEM;
3671                                 break;
3672                         }
3673
3674                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
3675                         rdata->page_offset = start;
3676                         rdata->tailsz = npages > 1 ?
3677                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
3678                                 cur_len;
3679
3680                 } else {
3681
3682                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3683                         /* allocate a readdata struct */
3684                         rdata = cifs_readdata_alloc(npages,
3685                                             cifs_uncached_readv_complete);
3686                         if (!rdata) {
3687                                 add_credits_and_wake_if(server, credits, 0);
3688                                 rc = -ENOMEM;
3689                                 break;
3690                         }
3691
3692                         rc = cifs_read_allocate_pages(rdata, npages);
3693                         if (rc) {
3694                                 kvfree(rdata->pages);
3695                                 kfree(rdata);
3696                                 add_credits_and_wake_if(server, credits, 0);
3697                                 break;
3698                         }
3699
3700                         rdata->tailsz = PAGE_SIZE;
3701                 }
3702
3703                 rdata->cfile = cifsFileInfo_get(open_file);
3704                 rdata->nr_pages = npages;
3705                 rdata->offset = offset;
3706                 rdata->bytes = cur_len;
3707                 rdata->pid = pid;
3708                 rdata->pagesz = PAGE_SIZE;
3709                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3710                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3711                 rdata->credits = credits_on_stack;
3712                 rdata->ctx = ctx;
3713                 kref_get(&ctx->refcount);
3714
3715                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3716
3717                 if (!rc) {
3718                         if (rdata->cfile->invalidHandle)
3719                                 rc = -EAGAIN;
3720                         else
3721                                 rc = server->ops->async_readv(rdata);
3722                 }
3723
3724                 if (rc) {
3725                         add_credits_and_wake_if(server, &rdata->credits, 0);
3726                         kref_put(&rdata->refcount,
3727                                 cifs_uncached_readdata_release);
3728                         if (rc == -EAGAIN) {
3729                                 iov_iter_revert(&direct_iov, cur_len);
3730                                 continue;
3731                         }
3732                         break;
3733                 }
3734
3735                 list_add_tail(&rdata->list, rdata_list);
3736                 offset += cur_len;
3737                 len -= cur_len;
3738         } while (len > 0);
3739
3740         return rc;
3741 }
3742
3743 static void
3744 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3745 {
3746         struct cifs_readdata *rdata, *tmp;
3747         struct iov_iter *to = &ctx->iter;
3748         struct cifs_sb_info *cifs_sb;
3749         int rc;
3750
3751         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3752
3753         mutex_lock(&ctx->aio_mutex);
3754
3755         if (list_empty(&ctx->list)) {
3756                 mutex_unlock(&ctx->aio_mutex);
3757                 return;
3758         }
3759
3760         rc = ctx->rc;
3761         /* the loop below should proceed in the order of increasing offsets */
3762 again:
3763         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3764                 if (!rc) {
3765                         if (!try_wait_for_completion(&rdata->done)) {
3766                                 mutex_unlock(&ctx->aio_mutex);
3767                                 return;
3768                         }
3769
3770                         if (rdata->result == -EAGAIN) {
3771                                 /* resend call if it's a retryable error */
3772                                 struct list_head tmp_list;
3773                                 unsigned int got_bytes = rdata->got_bytes;
3774
3775                                 list_del_init(&rdata->list);
3776                                 INIT_LIST_HEAD(&tmp_list);
3777
3778                                 /*
3779                                  * Got a part of data and then reconnect has
3780                                  * happened -- fill the buffer and continue
3781                                  * reading.
3782                                  */
3783                                 if (got_bytes && got_bytes < rdata->bytes) {
3784                                         rc = 0;
3785                                         if (!ctx->direct_io)
3786                                                 rc = cifs_readdata_to_iov(rdata, to);
3787                                         if (rc) {
3788                                                 kref_put(&rdata->refcount,
3789                                                         cifs_uncached_readdata_release);
3790                                                 continue;
3791                                         }
3792                                 }
3793
3794                                 if (ctx->direct_io) {
3795                                         /*
3796                                          * Re-use rdata as this is a
3797                                          * direct I/O
3798                                          */
3799                                         rc = cifs_resend_rdata(
3800                                                 rdata,
3801                                                 &tmp_list, ctx);
3802                                 } else {
3803                                         rc = cifs_send_async_read(
3804                                                 rdata->offset + got_bytes,
3805                                                 rdata->bytes - got_bytes,
3806                                                 rdata->cfile, cifs_sb,
3807                                                 &tmp_list, ctx);
3808
3809                                         kref_put(&rdata->refcount,
3810                                                 cifs_uncached_readdata_release);
3811                                 }
3812
3813                                 list_splice(&tmp_list, &ctx->list);
3814
3815                                 goto again;
3816                         } else if (rdata->result)
3817                                 rc = rdata->result;
3818                         else if (!ctx->direct_io)
3819                                 rc = cifs_readdata_to_iov(rdata, to);
3820
3821                         /* if there was a short read -- discard anything left */
3822                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3823                                 rc = -ENODATA;
3824
3825                         ctx->total_len += rdata->got_bytes;
3826                 }
3827                 list_del_init(&rdata->list);
3828                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3829         }
3830
3831         if (!ctx->direct_io)
3832                 ctx->total_len = ctx->len - iov_iter_count(to);
3833
3834         /* mask nodata case */
3835         if (rc == -ENODATA)
3836                 rc = 0;
3837
3838         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3839
3840         mutex_unlock(&ctx->aio_mutex);
3841
3842         if (ctx->iocb && ctx->iocb->ki_complete)
3843                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3844         else
3845                 complete(&ctx->done);
3846 }
3847
3848 static ssize_t __cifs_readv(
3849         struct kiocb *iocb, struct iov_iter *to, bool direct)
3850 {
3851         size_t len;
3852         struct file *file = iocb->ki_filp;
3853         struct cifs_sb_info *cifs_sb;
3854         struct cifsFileInfo *cfile;
3855         struct cifs_tcon *tcon;
3856         ssize_t rc, total_read = 0;
3857         loff_t offset = iocb->ki_pos;
3858         struct cifs_aio_ctx *ctx;
3859
3860         /*
3861          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
3862          * fall back to data copy read path
3863          * this could be improved by getting pages directly in ITER_KVEC
3864          */
3865         if (direct && iov_iter_is_kvec(to)) {
3866                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
3867                 direct = false;
3868         }
3869
3870         len = iov_iter_count(to);
3871         if (!len)
3872                 return 0;
3873
3874         cifs_sb = CIFS_FILE_SB(file);
3875         cfile = file->private_data;
3876         tcon = tlink_tcon(cfile->tlink);
3877
3878         if (!tcon->ses->server->ops->async_readv)
3879                 return -ENOSYS;
3880
3881         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3882                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3883
3884         ctx = cifs_aio_ctx_alloc();
3885         if (!ctx)
3886                 return -ENOMEM;
3887
3888         ctx->cfile = cifsFileInfo_get(cfile);
3889
3890         if (!is_sync_kiocb(iocb))
3891                 ctx->iocb = iocb;
3892
3893         if (iter_is_iovec(to))
3894                 ctx->should_dirty = true;
3895
3896         if (direct) {
3897                 ctx->pos = offset;
3898                 ctx->direct_io = true;
3899                 ctx->iter = *to;
3900                 ctx->len = len;
3901         } else {
3902                 rc = setup_aio_ctx_iter(ctx, to, READ);
3903                 if (rc) {
3904                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3905                         return rc;
3906                 }
3907                 len = ctx->len;
3908         }
3909
3910         /* grab a lock here due to read response handlers can access ctx */
3911         mutex_lock(&ctx->aio_mutex);
3912
3913         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3914
3915         /* if at least one read request send succeeded, then reset rc */
3916         if (!list_empty(&ctx->list))
3917                 rc = 0;
3918
3919         mutex_unlock(&ctx->aio_mutex);
3920
3921         if (rc) {
3922                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3923                 return rc;
3924         }
3925
3926         if (!is_sync_kiocb(iocb)) {
3927                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3928                 return -EIOCBQUEUED;
3929         }
3930
3931         rc = wait_for_completion_killable(&ctx->done);
3932         if (rc) {
3933                 mutex_lock(&ctx->aio_mutex);
3934                 ctx->rc = rc = -EINTR;
3935                 total_read = ctx->total_len;
3936                 mutex_unlock(&ctx->aio_mutex);
3937         } else {
3938                 rc = ctx->rc;
3939                 total_read = ctx->total_len;
3940         }
3941
3942         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3943
3944         if (total_read) {
3945                 iocb->ki_pos += total_read;
3946                 return total_read;
3947         }
3948         return rc;
3949 }
3950
3951 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
3952 {
3953         return __cifs_readv(iocb, to, true);
3954 }
3955
3956 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3957 {
3958         return __cifs_readv(iocb, to, false);
3959 }
3960
3961 ssize_t
3962 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3963 {
3964         struct inode *inode = file_inode(iocb->ki_filp);
3965         struct cifsInodeInfo *cinode = CIFS_I(inode);
3966         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3967         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3968                                                 iocb->ki_filp->private_data;
3969         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3970         int rc = -EACCES;
3971
3972         /*
3973          * In strict cache mode we need to read from the server all the time
3974          * if we don't have level II oplock because the server can delay mtime
3975          * change - so we can't make a decision about inode invalidating.
3976          * And we can also fail with pagereading if there are mandatory locks
3977          * on pages affected by this read but not on the region from pos to
3978          * pos+len-1.
3979          */
3980         if (!CIFS_CACHE_READ(cinode))
3981                 return cifs_user_readv(iocb, to);
3982
3983         if (cap_unix(tcon->ses) &&
3984             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3985             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3986                 return generic_file_read_iter(iocb, to);
3987
3988         /*
3989          * We need to hold the sem to be sure nobody modifies lock list
3990          * with a brlock that prevents reading.
3991          */
3992         down_read(&cinode->lock_sem);
3993         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3994                                      tcon->ses->server->vals->shared_lock_type,
3995                                      0, NULL, CIFS_READ_OP))
3996                 rc = generic_file_read_iter(iocb, to);
3997         up_read(&cinode->lock_sem);
3998         return rc;
3999 }
4000
4001 static ssize_t
4002 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4003 {
4004         int rc = -EACCES;
4005         unsigned int bytes_read = 0;
4006         unsigned int total_read;
4007         unsigned int current_read_size;
4008         unsigned int rsize;
4009         struct cifs_sb_info *cifs_sb;
4010         struct cifs_tcon *tcon;
4011         struct TCP_Server_Info *server;
4012         unsigned int xid;
4013         char *cur_offset;
4014         struct cifsFileInfo *open_file;
4015         struct cifs_io_parms io_parms;
4016         int buf_type = CIFS_NO_BUFFER;
4017         __u32 pid;
4018
4019         xid = get_xid();
4020         cifs_sb = CIFS_FILE_SB(file);
4021
4022         /* FIXME: set up handlers for larger reads and/or convert to async */
4023         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
4024
4025         if (file->private_data == NULL) {
4026                 rc = -EBADF;
4027                 free_xid(xid);
4028                 return rc;
4029         }
4030         open_file = file->private_data;
4031         tcon = tlink_tcon(open_file->tlink);
4032         server = tcon->ses->server;
4033
4034         if (!server->ops->sync_read) {
4035                 free_xid(xid);
4036                 return -ENOSYS;
4037         }
4038
4039         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4040                 pid = open_file->pid;
4041         else
4042                 pid = current->tgid;
4043
4044         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4045                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4046
4047         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4048              total_read += bytes_read, cur_offset += bytes_read) {
4049                 do {
4050                         current_read_size = min_t(uint, read_size - total_read,
4051                                                   rsize);
4052                         /*
4053                          * For windows me and 9x we do not want to request more
4054                          * than it negotiated since it will refuse the read
4055                          * then.
4056                          */
4057                         if ((tcon->ses) && !(tcon->ses->capabilities &
4058                                 tcon->ses->server->vals->cap_large_files)) {
4059                                 current_read_size = min_t(uint,
4060                                         current_read_size, CIFSMaxBufSize);
4061                         }
4062                         if (open_file->invalidHandle) {
4063                                 rc = cifs_reopen_file(open_file, true);
4064                                 if (rc != 0)
4065                                         break;
4066                         }
4067                         io_parms.pid = pid;
4068                         io_parms.tcon = tcon;
4069                         io_parms.offset = *offset;
4070                         io_parms.length = current_read_size;
4071                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4072                                                     &bytes_read, &cur_offset,
4073                                                     &buf_type);
4074                 } while (rc == -EAGAIN);
4075
4076                 if (rc || (bytes_read == 0)) {
4077                         if (total_read) {
4078                                 break;
4079                         } else {
4080                                 free_xid(xid);
4081                                 return rc;
4082                         }
4083                 } else {
4084                         cifs_stats_bytes_read(tcon, total_read);
4085                         *offset += bytes_read;
4086                 }
4087         }
4088         free_xid(xid);
4089         return total_read;
4090 }
4091
4092 /*
4093  * If the page is mmap'ed into a process' page tables, then we need to make
4094  * sure that it doesn't change while being written back.
4095  */
4096 static vm_fault_t
4097 cifs_page_mkwrite(struct vm_fault *vmf)
4098 {
4099         struct page *page = vmf->page;
4100
4101         lock_page(page);
4102         return VM_FAULT_LOCKED;
4103 }
4104
4105 static const struct vm_operations_struct cifs_file_vm_ops = {
4106         .fault = filemap_fault,
4107         .map_pages = filemap_map_pages,
4108         .page_mkwrite = cifs_page_mkwrite,
4109 };
4110
4111 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4112 {
4113         int xid, rc = 0;
4114         struct inode *inode = file_inode(file);
4115
4116         xid = get_xid();
4117
4118         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4119                 rc = cifs_zap_mapping(inode);
4120         if (!rc)
4121                 rc = generic_file_mmap(file, vma);
4122         if (!rc)
4123                 vma->vm_ops = &cifs_file_vm_ops;
4124
4125         free_xid(xid);
4126         return rc;
4127 }
4128
4129 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4130 {
4131         int rc, xid;
4132
4133         xid = get_xid();
4134
4135         rc = cifs_revalidate_file(file);
4136         if (rc)
4137                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4138                          rc);
4139         if (!rc)
4140                 rc = generic_file_mmap(file, vma);
4141         if (!rc)
4142                 vma->vm_ops = &cifs_file_vm_ops;
4143
4144         free_xid(xid);
4145         return rc;
4146 }
4147
4148 static void
4149 cifs_readv_complete(struct work_struct *work)
4150 {
4151         unsigned int i, got_bytes;
4152         struct cifs_readdata *rdata = container_of(work,
4153                                                 struct cifs_readdata, work);
4154
4155         got_bytes = rdata->got_bytes;
4156         for (i = 0; i < rdata->nr_pages; i++) {
4157                 struct page *page = rdata->pages[i];
4158
4159                 lru_cache_add_file(page);
4160
4161                 if (rdata->result == 0 ||
4162                     (rdata->result == -EAGAIN && got_bytes)) {
4163                         flush_dcache_page(page);
4164                         SetPageUptodate(page);
4165                 }
4166
4167                 unlock_page(page);
4168
4169                 if (rdata->result == 0 ||
4170                     (rdata->result == -EAGAIN && got_bytes))
4171                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4172
4173                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4174
4175                 put_page(page);
4176                 rdata->pages[i] = NULL;
4177         }
4178         kref_put(&rdata->refcount, cifs_readdata_release);
4179 }
4180
4181 static int
4182 readpages_fill_pages(struct TCP_Server_Info *server,
4183                      struct cifs_readdata *rdata, struct iov_iter *iter,
4184                      unsigned int len)
4185 {
4186         int result = 0;
4187         unsigned int i;
4188         u64 eof;
4189         pgoff_t eof_index;
4190         unsigned int nr_pages = rdata->nr_pages;
4191         unsigned int page_offset = rdata->page_offset;
4192
4193         /* determine the eof that the server (probably) has */
4194         eof = CIFS_I(rdata->mapping->host)->server_eof;
4195         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4196         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4197
4198         rdata->got_bytes = 0;
4199         rdata->tailsz = PAGE_SIZE;
4200         for (i = 0; i < nr_pages; i++) {
4201                 struct page *page = rdata->pages[i];
4202                 unsigned int to_read = rdata->pagesz;
4203                 size_t n;
4204
4205                 if (i == 0)
4206                         to_read -= page_offset;
4207                 else
4208                         page_offset = 0;
4209
4210                 n = to_read;
4211
4212                 if (len >= to_read) {
4213                         len -= to_read;
4214                 } else if (len > 0) {
4215                         /* enough for partial page, fill and zero the rest */
4216                         zero_user(page, len + page_offset, to_read - len);
4217                         n = rdata->tailsz = len;
4218                         len = 0;
4219                 } else if (page->index > eof_index) {
4220                         /*
4221                          * The VFS will not try to do readahead past the
4222                          * i_size, but it's possible that we have outstanding
4223                          * writes with gaps in the middle and the i_size hasn't
4224                          * caught up yet. Populate those with zeroed out pages
4225                          * to prevent the VFS from repeatedly attempting to
4226                          * fill them until the writes are flushed.
4227                          */
4228                         zero_user(page, 0, PAGE_SIZE);
4229                         lru_cache_add_file(page);
4230                         flush_dcache_page(page);
4231                         SetPageUptodate(page);
4232                         unlock_page(page);
4233                         put_page(page);
4234                         rdata->pages[i] = NULL;
4235                         rdata->nr_pages--;
4236                         continue;
4237                 } else {
4238                         /* no need to hold page hostage */
4239                         lru_cache_add_file(page);
4240                         unlock_page(page);
4241                         put_page(page);
4242                         rdata->pages[i] = NULL;
4243                         rdata->nr_pages--;
4244                         continue;
4245                 }
4246
4247                 if (iter)
4248                         result = copy_page_from_iter(
4249                                         page, page_offset, n, iter);
4250 #ifdef CONFIG_CIFS_SMB_DIRECT
4251                 else if (rdata->mr)
4252                         result = n;
4253 #endif
4254                 else
4255                         result = cifs_read_page_from_socket(
4256                                         server, page, page_offset, n);
4257                 if (result < 0)
4258                         break;
4259
4260                 rdata->got_bytes += result;
4261         }
4262
4263         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4264                                                 rdata->got_bytes : result;
4265 }
4266
4267 static int
4268 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4269                                struct cifs_readdata *rdata, unsigned int len)
4270 {
4271         return readpages_fill_pages(server, rdata, NULL, len);
4272 }
4273
4274 static int
4275 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4276                                struct cifs_readdata *rdata,
4277                                struct iov_iter *iter)
4278 {
4279         return readpages_fill_pages(server, rdata, iter, iter->count);
4280 }
4281
4282 static int
4283 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
4284                     unsigned int rsize, struct list_head *tmplist,
4285                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
4286 {
4287         struct page *page, *tpage;
4288         unsigned int expected_index;
4289         int rc;
4290         gfp_t gfp = readahead_gfp_mask(mapping);
4291
4292         INIT_LIST_HEAD(tmplist);
4293
4294         page = lru_to_page(page_list);
4295
4296         /*
4297          * Lock the page and put it in the cache. Since no one else
4298          * should have access to this page, we're safe to simply set
4299          * PG_locked without checking it first.
4300          */
4301         __SetPageLocked(page);
4302         rc = add_to_page_cache_locked(page, mapping,
4303                                       page->index, gfp);
4304
4305         /* give up if we can't stick it in the cache */
4306         if (rc) {
4307                 __ClearPageLocked(page);
4308                 return rc;
4309         }
4310
4311         /* move first page to the tmplist */
4312         *offset = (loff_t)page->index << PAGE_SHIFT;
4313         *bytes = PAGE_SIZE;
4314         *nr_pages = 1;
4315         list_move_tail(&page->lru, tmplist);
4316
4317         /* now try and add more pages onto the request */
4318         expected_index = page->index + 1;
4319         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
4320                 /* discontinuity ? */
4321                 if (page->index != expected_index)
4322                         break;
4323
4324                 /* would this page push the read over the rsize? */
4325                 if (*bytes + PAGE_SIZE > rsize)
4326                         break;
4327
4328                 __SetPageLocked(page);
4329                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
4330                         __ClearPageLocked(page);
4331                         break;
4332                 }
4333                 list_move_tail(&page->lru, tmplist);
4334                 (*bytes) += PAGE_SIZE;
4335                 expected_index++;
4336                 (*nr_pages)++;
4337         }
4338         return rc;
4339 }
4340
4341 static int cifs_readpages(struct file *file, struct address_space *mapping,
4342         struct list_head *page_list, unsigned num_pages)
4343 {
4344         int rc;
4345         struct list_head tmplist;
4346         struct cifsFileInfo *open_file = file->private_data;
4347         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
4348         struct TCP_Server_Info *server;
4349         pid_t pid;
4350         unsigned int xid;
4351
4352         xid = get_xid();
4353         /*
4354          * Reads as many pages as possible from fscache. Returns -ENOBUFS
4355          * immediately if the cookie is negative
4356          *
4357          * After this point, every page in the list might have PG_fscache set,
4358          * so we will need to clean that up off of every page we don't use.
4359          */
4360         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
4361                                          &num_pages);
4362         if (rc == 0) {
4363                 free_xid(xid);
4364                 return rc;
4365         }
4366
4367         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4368                 pid = open_file->pid;
4369         else
4370                 pid = current->tgid;
4371
4372         rc = 0;
4373         server = tlink_tcon(open_file->tlink)->ses->server;
4374
4375         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4376                  __func__, file, mapping, num_pages);
4377
4378         /*
4379          * Start with the page at end of list and move it to private
4380          * list. Do the same with any following pages until we hit
4381          * the rsize limit, hit an index discontinuity, or run out of
4382          * pages. Issue the async read and then start the loop again
4383          * until the list is empty.
4384          *
4385          * Note that list order is important. The page_list is in
4386          * the order of declining indexes. When we put the pages in
4387          * the rdata->pages, then we want them in increasing order.
4388          */
4389         while (!list_empty(page_list)) {
4390                 unsigned int i, nr_pages, bytes, rsize;
4391                 loff_t offset;
4392                 struct page *page, *tpage;
4393                 struct cifs_readdata *rdata;
4394                 struct cifs_credits credits_on_stack;
4395                 struct cifs_credits *credits = &credits_on_stack;
4396
4397                 if (open_file->invalidHandle) {
4398                         rc = cifs_reopen_file(open_file, true);
4399                         if (rc == -EAGAIN)
4400                                 continue;
4401                         else if (rc)
4402                                 break;
4403                 }
4404
4405                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
4406                                                    &rsize, credits);
4407                 if (rc)
4408                         break;
4409
4410                 /*
4411                  * Give up immediately if rsize is too small to read an entire
4412                  * page. The VFS will fall back to readpage. We should never
4413                  * reach this point however since we set ra_pages to 0 when the
4414                  * rsize is smaller than a cache page.
4415                  */
4416                 if (unlikely(rsize < PAGE_SIZE)) {
4417                         add_credits_and_wake_if(server, credits, 0);
4418                         free_xid(xid);
4419                         return 0;
4420                 }
4421
4422                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
4423                                          &nr_pages, &offset, &bytes);
4424                 if (rc) {
4425                         add_credits_and_wake_if(server, credits, 0);
4426                         break;
4427                 }
4428
4429                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4430                 if (!rdata) {
4431                         /* best to give up if we're out of mem */
4432                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4433                                 list_del(&page->lru);
4434                                 lru_cache_add_file(page);
4435                                 unlock_page(page);
4436                                 put_page(page);
4437                         }
4438                         rc = -ENOMEM;
4439                         add_credits_and_wake_if(server, credits, 0);
4440                         break;
4441                 }
4442
4443                 rdata->cfile = cifsFileInfo_get(open_file);
4444                 rdata->mapping = mapping;
4445                 rdata->offset = offset;
4446                 rdata->bytes = bytes;
4447                 rdata->pid = pid;
4448                 rdata->pagesz = PAGE_SIZE;
4449                 rdata->tailsz = PAGE_SIZE;
4450                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4451                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4452                 rdata->credits = credits_on_stack;
4453
4454                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
4455                         list_del(&page->lru);
4456                         rdata->pages[rdata->nr_pages++] = page;
4457                 }
4458
4459                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4460
4461                 if (!rc) {
4462                         if (rdata->cfile->invalidHandle)
4463                                 rc = -EAGAIN;
4464                         else
4465                                 rc = server->ops->async_readv(rdata);
4466                 }
4467
4468                 if (rc) {
4469                         add_credits_and_wake_if(server, &rdata->credits, 0);
4470                         for (i = 0; i < rdata->nr_pages; i++) {
4471                                 page = rdata->pages[i];
4472                                 lru_cache_add_file(page);
4473                                 unlock_page(page);
4474                                 put_page(page);
4475                         }
4476                         /* Fallback to the readpage in error/reconnect cases */
4477                         kref_put(&rdata->refcount, cifs_readdata_release);
4478                         break;
4479                 }
4480
4481                 kref_put(&rdata->refcount, cifs_readdata_release);
4482         }
4483
4484         /* Any pages that have been shown to fscache but didn't get added to
4485          * the pagecache must be uncached before they get returned to the
4486          * allocator.
4487          */
4488         cifs_fscache_readpages_cancel(mapping->host, page_list);
4489         free_xid(xid);
4490         return rc;
4491 }
4492
4493 /*
4494  * cifs_readpage_worker must be called with the page pinned
4495  */
4496 static int cifs_readpage_worker(struct file *file, struct page *page,
4497         loff_t *poffset)
4498 {
4499         char *read_data;
4500         int rc;
4501
4502         /* Is the page cached? */
4503         rc = cifs_readpage_from_fscache(file_inode(file), page);
4504         if (rc == 0)
4505                 goto read_complete;
4506
4507         read_data = kmap(page);
4508         /* for reads over a certain size could initiate async read ahead */
4509
4510         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4511
4512         if (rc < 0)
4513                 goto io_error;
4514         else
4515                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4516
4517         /* we do not want atime to be less than mtime, it broke some apps */
4518         file_inode(file)->i_atime = current_time(file_inode(file));
4519         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4520                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4521         else
4522                 file_inode(file)->i_atime = current_time(file_inode(file));
4523
4524         if (PAGE_SIZE > rc)
4525                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4526
4527         flush_dcache_page(page);
4528         SetPageUptodate(page);
4529
4530         /* send this page to the cache */
4531         cifs_readpage_to_fscache(file_inode(file), page);
4532
4533         rc = 0;
4534
4535 io_error:
4536         kunmap(page);
4537         unlock_page(page);
4538
4539 read_complete:
4540         return rc;
4541 }
4542
4543 static int cifs_readpage(struct file *file, struct page *page)
4544 {
4545         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
4546         int rc = -EACCES;
4547         unsigned int xid;
4548
4549         xid = get_xid();
4550
4551         if (file->private_data == NULL) {
4552                 rc = -EBADF;
4553                 free_xid(xid);
4554                 return rc;
4555         }
4556
4557         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
4558                  page, (int)offset, (int)offset);
4559
4560         rc = cifs_readpage_worker(file, page, &offset);
4561
4562         free_xid(xid);
4563         return rc;
4564 }
4565
4566 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4567 {
4568         struct cifsFileInfo *open_file;
4569
4570         spin_lock(&cifs_inode->open_file_lock);
4571         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4572                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4573                         spin_unlock(&cifs_inode->open_file_lock);
4574                         return 1;
4575                 }
4576         }
4577         spin_unlock(&cifs_inode->open_file_lock);
4578         return 0;
4579 }
4580
4581 /* We do not want to update the file size from server for inodes
4582    open for write - to avoid races with writepage extending
4583    the file - in the future we could consider allowing
4584    refreshing the inode only on increases in the file size
4585    but this is tricky to do without racing with writebehind
4586    page caching in the current Linux kernel design */
4587 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4588 {
4589         if (!cifsInode)
4590                 return true;
4591
4592         if (is_inode_writable(cifsInode)) {
4593                 /* This inode is open for write at least once */
4594                 struct cifs_sb_info *cifs_sb;
4595
4596                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4597                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4598                         /* since no page cache to corrupt on directio
4599                         we can change size safely */
4600                         return true;
4601                 }
4602
4603                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4604                         return true;
4605
4606                 return false;
4607         } else
4608                 return true;
4609 }
4610
4611 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4612                         loff_t pos, unsigned len, unsigned flags,
4613                         struct page **pagep, void **fsdata)
4614 {
4615         int oncethru = 0;
4616         pgoff_t index = pos >> PAGE_SHIFT;
4617         loff_t offset = pos & (PAGE_SIZE - 1);
4618         loff_t page_start = pos & PAGE_MASK;
4619         loff_t i_size;
4620         struct page *page;
4621         int rc = 0;
4622
4623         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4624
4625 start:
4626         page = grab_cache_page_write_begin(mapping, index, flags);
4627         if (!page) {
4628                 rc = -ENOMEM;
4629                 goto out;
4630         }
4631
4632         if (PageUptodate(page))
4633                 goto out;
4634
4635         /*
4636          * If we write a full page it will be up to date, no need to read from
4637          * the server. If the write is short, we'll end up doing a sync write
4638          * instead.
4639          */
4640         if (len == PAGE_SIZE)
4641                 goto out;
4642
4643         /*
4644          * optimize away the read when we have an oplock, and we're not
4645          * expecting to use any of the data we'd be reading in. That
4646          * is, when the page lies beyond the EOF, or straddles the EOF
4647          * and the write will cover all of the existing data.
4648          */
4649         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4650                 i_size = i_size_read(mapping->host);
4651                 if (page_start >= i_size ||
4652                     (offset == 0 && (pos + len) >= i_size)) {
4653                         zero_user_segments(page, 0, offset,
4654                                            offset + len,
4655                                            PAGE_SIZE);
4656                         /*
4657                          * PageChecked means that the parts of the page
4658                          * to which we're not writing are considered up
4659                          * to date. Once the data is copied to the
4660                          * page, it can be set uptodate.
4661                          */
4662                         SetPageChecked(page);
4663                         goto out;
4664                 }
4665         }
4666
4667         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4668                 /*
4669                  * might as well read a page, it is fast enough. If we get
4670                  * an error, we don't need to return it. cifs_write_end will
4671                  * do a sync write instead since PG_uptodate isn't set.
4672                  */
4673                 cifs_readpage_worker(file, page, &page_start);
4674                 put_page(page);
4675                 oncethru = 1;
4676                 goto start;
4677         } else {
4678                 /* we could try using another file handle if there is one -
4679                    but how would we lock it to prevent close of that handle
4680                    racing with this read? In any case
4681                    this will be written out by write_end so is fine */
4682         }
4683 out:
4684         *pagep = page;
4685         return rc;
4686 }
4687
4688 static int cifs_release_page(struct page *page, gfp_t gfp)
4689 {
4690         if (PagePrivate(page))
4691                 return 0;
4692
4693         return cifs_fscache_release_page(page, gfp);
4694 }
4695
4696 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4697                                  unsigned int length)
4698 {
4699         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4700
4701         if (offset == 0 && length == PAGE_SIZE)
4702                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4703 }
4704
4705 static int cifs_launder_page(struct page *page)
4706 {
4707         int rc = 0;
4708         loff_t range_start = page_offset(page);
4709         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4710         struct writeback_control wbc = {
4711                 .sync_mode = WB_SYNC_ALL,
4712                 .nr_to_write = 0,
4713                 .range_start = range_start,
4714                 .range_end = range_end,
4715         };
4716
4717         cifs_dbg(FYI, "Launder page: %p\n", page);
4718
4719         if (clear_page_dirty_for_io(page))
4720                 rc = cifs_writepage_locked(page, &wbc);
4721
4722         cifs_fscache_invalidate_page(page, page->mapping->host);
4723         return rc;
4724 }
4725
4726 void cifs_oplock_break(struct work_struct *work)
4727 {
4728         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4729                                                   oplock_break);
4730         struct inode *inode = d_inode(cfile->dentry);
4731         struct cifsInodeInfo *cinode = CIFS_I(inode);
4732         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4733         struct TCP_Server_Info *server = tcon->ses->server;
4734         int rc = 0;
4735         bool purge_cache = false;
4736
4737         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4738                         TASK_UNINTERRUPTIBLE);
4739
4740         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4741                                       cfile->oplock_epoch, &purge_cache);
4742
4743         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4744                                                 cifs_has_mand_locks(cinode)) {
4745                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4746                          inode);
4747                 cinode->oplock = 0;
4748         }
4749
4750         if (inode && S_ISREG(inode->i_mode)) {
4751                 if (CIFS_CACHE_READ(cinode))
4752                         break_lease(inode, O_RDONLY);
4753                 else
4754                         break_lease(inode, O_WRONLY);
4755                 rc = filemap_fdatawrite(inode->i_mapping);
4756                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4757                         rc = filemap_fdatawait(inode->i_mapping);
4758                         mapping_set_error(inode->i_mapping, rc);
4759                         cifs_zap_mapping(inode);
4760                 }
4761                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4762                 if (CIFS_CACHE_WRITE(cinode))
4763                         goto oplock_break_ack;
4764         }
4765
4766         rc = cifs_push_locks(cfile);
4767         if (rc)
4768                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4769
4770 oplock_break_ack:
4771         /*
4772          * releasing stale oplock after recent reconnect of smb session using
4773          * a now incorrect file handle is not a data integrity issue but do
4774          * not bother sending an oplock release if session to server still is
4775          * disconnected since oplock already released by the server
4776          */
4777         if (!cfile->oplock_break_cancelled) {
4778                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4779                                                              cinode);
4780                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4781         }
4782         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
4783         cifs_done_oplock_break(cinode);
4784 }
4785
4786 /*
4787  * The presence of cifs_direct_io() in the address space ops vector
4788  * allowes open() O_DIRECT flags which would have failed otherwise.
4789  *
4790  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4791  * so this method should never be called.
4792  *
4793  * Direct IO is not yet supported in the cached mode. 
4794  */
4795 static ssize_t
4796 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4797 {
4798         /*
4799          * FIXME
4800          * Eventually need to support direct IO for non forcedirectio mounts
4801          */
4802         return -EINVAL;
4803 }
4804
4805
4806 const struct address_space_operations cifs_addr_ops = {
4807         .readpage = cifs_readpage,
4808         .readpages = cifs_readpages,
4809         .writepage = cifs_writepage,
4810         .writepages = cifs_writepages,
4811         .write_begin = cifs_write_begin,
4812         .write_end = cifs_write_end,
4813         .set_page_dirty = __set_page_dirty_nobuffers,
4814         .releasepage = cifs_release_page,
4815         .direct_IO = cifs_direct_io,
4816         .invalidatepage = cifs_invalidate_page,
4817         .launder_page = cifs_launder_page,
4818 };
4819
4820 /*
4821  * cifs_readpages requires the server to support a buffer large enough to
4822  * contain the header plus one complete page of data.  Otherwise, we need
4823  * to leave cifs_readpages out of the address space operations.
4824  */
4825 const struct address_space_operations cifs_addr_ops_smallbuf = {
4826         .readpage = cifs_readpage,
4827         .writepage = cifs_writepage,
4828         .writepages = cifs_writepages,
4829         .write_begin = cifs_write_begin,
4830         .write_end = cifs_write_end,
4831         .set_page_dirty = __set_page_dirty_nobuffers,
4832         .releasepage = cifs_release_page,
4833         .invalidatepage = cifs_invalidate_page,
4834         .launder_page = cifs_launder_page,
4835 };