regmap: Fix unused warning
[linux-2.6-block.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_fattr_to_inode(*pinode, &fattr);
167         }
168
169 posix_open_ret:
170         kfree(presp_data);
171         return rc;
172 }
173
174 static int
175 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
176              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
177              struct cifs_fid *fid, unsigned int xid)
178 {
179         int rc;
180         int desired_access;
181         int disposition;
182         int create_options = CREATE_NOT_DIR;
183         FILE_ALL_INFO *buf;
184         struct TCP_Server_Info *server = tcon->ses->server;
185         struct cifs_open_parms oparms;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         oparms.tcon = tcon;
228         oparms.cifs_sb = cifs_sb;
229         oparms.desired_access = desired_access;
230         oparms.create_options = create_options;
231         oparms.disposition = disposition;
232         oparms.path = full_path;
233         oparms.fid = fid;
234         oparms.reconnect = false;
235
236         rc = server->ops->open(xid, &oparms, oplock, buf);
237
238         if (rc)
239                 goto out;
240
241         if (tcon->unix_ext)
242                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
243                                               xid);
244         else
245                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
246                                          xid, fid);
247
248 out:
249         kfree(buf);
250         return rc;
251 }
252
253 static bool
254 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
255 {
256         struct cifs_fid_locks *cur;
257         bool has_locks = false;
258
259         down_read(&cinode->lock_sem);
260         list_for_each_entry(cur, &cinode->llist, llist) {
261                 if (!list_empty(&cur->locks)) {
262                         has_locks = true;
263                         break;
264                 }
265         }
266         up_read(&cinode->lock_sem);
267         return has_locks;
268 }
269
270 struct cifsFileInfo *
271 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
272                   struct tcon_link *tlink, __u32 oplock)
273 {
274         struct dentry *dentry = file_dentry(file);
275         struct inode *inode = d_inode(dentry);
276         struct cifsInodeInfo *cinode = CIFS_I(inode);
277         struct cifsFileInfo *cfile;
278         struct cifs_fid_locks *fdlocks;
279         struct cifs_tcon *tcon = tlink_tcon(tlink);
280         struct TCP_Server_Info *server = tcon->ses->server;
281
282         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
283         if (cfile == NULL)
284                 return cfile;
285
286         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
287         if (!fdlocks) {
288                 kfree(cfile);
289                 return NULL;
290         }
291
292         INIT_LIST_HEAD(&fdlocks->locks);
293         fdlocks->cfile = cfile;
294         cfile->llist = fdlocks;
295         down_write(&cinode->lock_sem);
296         list_add(&fdlocks->llist, &cinode->llist);
297         up_write(&cinode->lock_sem);
298
299         cfile->count = 1;
300         cfile->pid = current->tgid;
301         cfile->uid = current_fsuid();
302         cfile->dentry = dget(dentry);
303         cfile->f_flags = file->f_flags;
304         cfile->invalidHandle = false;
305         cfile->tlink = cifs_get_tlink(tlink);
306         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
307         mutex_init(&cfile->fh_mutex);
308         spin_lock_init(&cfile->file_info_lock);
309
310         cifs_sb_active(inode->i_sb);
311
312         /*
313          * If the server returned a read oplock and we have mandatory brlocks,
314          * set oplock level to None.
315          */
316         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
317                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
318                 oplock = 0;
319         }
320
321         spin_lock(&tcon->open_file_lock);
322         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
323                 oplock = fid->pending_open->oplock;
324         list_del(&fid->pending_open->olist);
325
326         fid->purge_cache = false;
327         server->ops->set_fid(cfile, fid, oplock);
328
329         list_add(&cfile->tlist, &tcon->openFileList);
330
331         /* if readable file instance put first in list*/
332         if (file->f_mode & FMODE_READ)
333                 list_add(&cfile->flist, &cinode->openFileList);
334         else
335                 list_add_tail(&cfile->flist, &cinode->openFileList);
336         spin_unlock(&tcon->open_file_lock);
337
338         if (fid->purge_cache)
339                 cifs_zap_mapping(inode);
340
341         file->private_data = cfile;
342         return cfile;
343 }
344
345 struct cifsFileInfo *
346 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
347 {
348         spin_lock(&cifs_file->file_info_lock);
349         cifsFileInfo_get_locked(cifs_file);
350         spin_unlock(&cifs_file->file_info_lock);
351         return cifs_file;
352 }
353
354 /*
355  * Release a reference on the file private data. This may involve closing
356  * the filehandle out on the server. Must be called without holding
357  * tcon->open_file_lock and cifs_file->file_info_lock.
358  */
359 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
360 {
361         struct inode *inode = d_inode(cifs_file->dentry);
362         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
363         struct TCP_Server_Info *server = tcon->ses->server;
364         struct cifsInodeInfo *cifsi = CIFS_I(inode);
365         struct super_block *sb = inode->i_sb;
366         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
367         struct cifsLockInfo *li, *tmp;
368         struct cifs_fid fid;
369         struct cifs_pending_open open;
370         bool oplock_break_cancelled;
371
372         spin_lock(&tcon->open_file_lock);
373
374         spin_lock(&cifs_file->file_info_lock);
375         if (--cifs_file->count > 0) {
376                 spin_unlock(&cifs_file->file_info_lock);
377                 spin_unlock(&tcon->open_file_lock);
378                 return;
379         }
380         spin_unlock(&cifs_file->file_info_lock);
381
382         if (server->ops->get_lease_key)
383                 server->ops->get_lease_key(inode, &fid);
384
385         /* store open in pending opens to make sure we don't miss lease break */
386         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
387
388         /* remove it from the lists */
389         list_del(&cifs_file->flist);
390         list_del(&cifs_file->tlist);
391
392         if (list_empty(&cifsi->openFileList)) {
393                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
394                          d_inode(cifs_file->dentry));
395                 /*
396                  * In strict cache mode we need invalidate mapping on the last
397                  * close  because it may cause a error when we open this file
398                  * again and get at least level II oplock.
399                  */
400                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
401                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
402                 cifs_set_oplock_level(cifsi, 0);
403         }
404
405         spin_unlock(&tcon->open_file_lock);
406
407         oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
408
409         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
410                 struct TCP_Server_Info *server = tcon->ses->server;
411                 unsigned int xid;
412
413                 xid = get_xid();
414                 if (server->ops->close)
415                         server->ops->close(xid, tcon, &cifs_file->fid);
416                 _free_xid(xid);
417         }
418
419         if (oplock_break_cancelled)
420                 cifs_done_oplock_break(cifsi);
421
422         cifs_del_pending_open(&open);
423
424         /*
425          * Delete any outstanding lock records. We'll lose them when the file
426          * is closed anyway.
427          */
428         down_write(&cifsi->lock_sem);
429         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
430                 list_del(&li->llist);
431                 cifs_del_lock_waiters(li);
432                 kfree(li);
433         }
434         list_del(&cifs_file->llist->llist);
435         kfree(cifs_file->llist);
436         up_write(&cifsi->lock_sem);
437
438         cifs_put_tlink(cifs_file->tlink);
439         dput(cifs_file->dentry);
440         cifs_sb_deactive(sb);
441         kfree(cifs_file);
442 }
443
444 int cifs_open(struct inode *inode, struct file *file)
445
446 {
447         int rc = -EACCES;
448         unsigned int xid;
449         __u32 oplock;
450         struct cifs_sb_info *cifs_sb;
451         struct TCP_Server_Info *server;
452         struct cifs_tcon *tcon;
453         struct tcon_link *tlink;
454         struct cifsFileInfo *cfile = NULL;
455         char *full_path = NULL;
456         bool posix_open_ok = false;
457         struct cifs_fid fid;
458         struct cifs_pending_open open;
459
460         xid = get_xid();
461
462         cifs_sb = CIFS_SB(inode->i_sb);
463         tlink = cifs_sb_tlink(cifs_sb);
464         if (IS_ERR(tlink)) {
465                 free_xid(xid);
466                 return PTR_ERR(tlink);
467         }
468         tcon = tlink_tcon(tlink);
469         server = tcon->ses->server;
470
471         full_path = build_path_from_dentry(file_dentry(file));
472         if (full_path == NULL) {
473                 rc = -ENOMEM;
474                 goto out;
475         }
476
477         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
478                  inode, file->f_flags, full_path);
479
480         if (file->f_flags & O_DIRECT &&
481             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
482                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
483                         file->f_op = &cifs_file_direct_nobrl_ops;
484                 else
485                         file->f_op = &cifs_file_direct_ops;
486         }
487
488         if (server->oplocks)
489                 oplock = REQ_OPLOCK;
490         else
491                 oplock = 0;
492
493         if (!tcon->broken_posix_open && tcon->unix_ext &&
494             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
495                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
496                 /* can not refresh inode info since size could be stale */
497                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
498                                 cifs_sb->mnt_file_mode /* ignored */,
499                                 file->f_flags, &oplock, &fid.netfid, xid);
500                 if (rc == 0) {
501                         cifs_dbg(FYI, "posix open succeeded\n");
502                         posix_open_ok = true;
503                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
504                         if (tcon->ses->serverNOS)
505                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
506                                          tcon->ses->serverName,
507                                          tcon->ses->serverNOS);
508                         tcon->broken_posix_open = true;
509                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
510                          (rc != -EOPNOTSUPP)) /* path not found or net err */
511                         goto out;
512                 /*
513                  * Else fallthrough to retry open the old way on network i/o
514                  * or DFS errors.
515                  */
516         }
517
518         if (server->ops->get_lease_key)
519                 server->ops->get_lease_key(inode, &fid);
520
521         cifs_add_pending_open(&fid, tlink, &open);
522
523         if (!posix_open_ok) {
524                 if (server->ops->get_lease_key)
525                         server->ops->get_lease_key(inode, &fid);
526
527                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
528                                   file->f_flags, &oplock, &fid, xid);
529                 if (rc) {
530                         cifs_del_pending_open(&open);
531                         goto out;
532                 }
533         }
534
535         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
536         if (cfile == NULL) {
537                 if (server->ops->close)
538                         server->ops->close(xid, tcon, &fid);
539                 cifs_del_pending_open(&open);
540                 rc = -ENOMEM;
541                 goto out;
542         }
543
544         cifs_fscache_set_inode_cookie(inode, file);
545
546         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
547                 /*
548                  * Time to set mode which we can not set earlier due to
549                  * problems creating new read-only files.
550                  */
551                 struct cifs_unix_set_info_args args = {
552                         .mode   = inode->i_mode,
553                         .uid    = INVALID_UID, /* no change */
554                         .gid    = INVALID_GID, /* no change */
555                         .ctime  = NO_CHANGE_64,
556                         .atime  = NO_CHANGE_64,
557                         .mtime  = NO_CHANGE_64,
558                         .device = 0,
559                 };
560                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
561                                        cfile->pid);
562         }
563
564 out:
565         kfree(full_path);
566         free_xid(xid);
567         cifs_put_tlink(tlink);
568         return rc;
569 }
570
571 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
572
573 /*
574  * Try to reacquire byte range locks that were released when session
575  * to server was lost.
576  */
577 static int
578 cifs_relock_file(struct cifsFileInfo *cfile)
579 {
580         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
581         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
582         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
583         int rc = 0;
584
585         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
586         if (cinode->can_cache_brlcks) {
587                 /* can cache locks - no need to relock */
588                 up_read(&cinode->lock_sem);
589                 return rc;
590         }
591
592         if (cap_unix(tcon->ses) &&
593             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
594             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
595                 rc = cifs_push_posix_locks(cfile);
596         else
597                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
598
599         up_read(&cinode->lock_sem);
600         return rc;
601 }
602
603 static int
604 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
605 {
606         int rc = -EACCES;
607         unsigned int xid;
608         __u32 oplock;
609         struct cifs_sb_info *cifs_sb;
610         struct cifs_tcon *tcon;
611         struct TCP_Server_Info *server;
612         struct cifsInodeInfo *cinode;
613         struct inode *inode;
614         char *full_path = NULL;
615         int desired_access;
616         int disposition = FILE_OPEN;
617         int create_options = CREATE_NOT_DIR;
618         struct cifs_open_parms oparms;
619
620         xid = get_xid();
621         mutex_lock(&cfile->fh_mutex);
622         if (!cfile->invalidHandle) {
623                 mutex_unlock(&cfile->fh_mutex);
624                 rc = 0;
625                 free_xid(xid);
626                 return rc;
627         }
628
629         inode = d_inode(cfile->dentry);
630         cifs_sb = CIFS_SB(inode->i_sb);
631         tcon = tlink_tcon(cfile->tlink);
632         server = tcon->ses->server;
633
634         /*
635          * Can not grab rename sem here because various ops, including those
636          * that already have the rename sem can end up causing writepage to get
637          * called and if the server was down that means we end up here, and we
638          * can never tell if the caller already has the rename_sem.
639          */
640         full_path = build_path_from_dentry(cfile->dentry);
641         if (full_path == NULL) {
642                 rc = -ENOMEM;
643                 mutex_unlock(&cfile->fh_mutex);
644                 free_xid(xid);
645                 return rc;
646         }
647
648         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
649                  inode, cfile->f_flags, full_path);
650
651         if (tcon->ses->server->oplocks)
652                 oplock = REQ_OPLOCK;
653         else
654                 oplock = 0;
655
656         if (tcon->unix_ext && cap_unix(tcon->ses) &&
657             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
658                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
659                 /*
660                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
661                  * original open. Must mask them off for a reopen.
662                  */
663                 unsigned int oflags = cfile->f_flags &
664                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
665
666                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
667                                      cifs_sb->mnt_file_mode /* ignored */,
668                                      oflags, &oplock, &cfile->fid.netfid, xid);
669                 if (rc == 0) {
670                         cifs_dbg(FYI, "posix reopen succeeded\n");
671                         oparms.reconnect = true;
672                         goto reopen_success;
673                 }
674                 /*
675                  * fallthrough to retry open the old way on errors, especially
676                  * in the reconnect path it is important to retry hard
677                  */
678         }
679
680         desired_access = cifs_convert_flags(cfile->f_flags);
681
682         if (backup_cred(cifs_sb))
683                 create_options |= CREATE_OPEN_BACKUP_INTENT;
684
685         if (server->ops->get_lease_key)
686                 server->ops->get_lease_key(inode, &cfile->fid);
687
688         oparms.tcon = tcon;
689         oparms.cifs_sb = cifs_sb;
690         oparms.desired_access = desired_access;
691         oparms.create_options = create_options;
692         oparms.disposition = disposition;
693         oparms.path = full_path;
694         oparms.fid = &cfile->fid;
695         oparms.reconnect = true;
696
697         /*
698          * Can not refresh inode by passing in file_info buf to be returned by
699          * ops->open and then calling get_inode_info with returned buf since
700          * file might have write behind data that needs to be flushed and server
701          * version of file size can be stale. If we knew for sure that inode was
702          * not dirty locally we could do this.
703          */
704         rc = server->ops->open(xid, &oparms, &oplock, NULL);
705         if (rc == -ENOENT && oparms.reconnect == false) {
706                 /* durable handle timeout is expired - open the file again */
707                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
708                 /* indicate that we need to relock the file */
709                 oparms.reconnect = true;
710         }
711
712         if (rc) {
713                 mutex_unlock(&cfile->fh_mutex);
714                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
715                 cifs_dbg(FYI, "oplock: %d\n", oplock);
716                 goto reopen_error_exit;
717         }
718
719 reopen_success:
720         cfile->invalidHandle = false;
721         mutex_unlock(&cfile->fh_mutex);
722         cinode = CIFS_I(inode);
723
724         if (can_flush) {
725                 rc = filemap_write_and_wait(inode->i_mapping);
726                 mapping_set_error(inode->i_mapping, rc);
727
728                 if (tcon->unix_ext)
729                         rc = cifs_get_inode_info_unix(&inode, full_path,
730                                                       inode->i_sb, xid);
731                 else
732                         rc = cifs_get_inode_info(&inode, full_path, NULL,
733                                                  inode->i_sb, xid, NULL);
734         }
735         /*
736          * Else we are writing out data to server already and could deadlock if
737          * we tried to flush data, and since we do not know if we have data that
738          * would invalidate the current end of file on the server we can not go
739          * to the server to get the new inode info.
740          */
741
742         /*
743          * If the server returned a read oplock and we have mandatory brlocks,
744          * set oplock level to None.
745          */
746         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
747                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
748                 oplock = 0;
749         }
750
751         server->ops->set_fid(cfile, &cfile->fid, oplock);
752         if (oparms.reconnect)
753                 cifs_relock_file(cfile);
754
755 reopen_error_exit:
756         kfree(full_path);
757         free_xid(xid);
758         return rc;
759 }
760
761 int cifs_close(struct inode *inode, struct file *file)
762 {
763         if (file->private_data != NULL) {
764                 cifsFileInfo_put(file->private_data);
765                 file->private_data = NULL;
766         }
767
768         /* return code from the ->release op is always ignored */
769         return 0;
770 }
771
772 void
773 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
774 {
775         struct cifsFileInfo *open_file;
776         struct list_head *tmp;
777         struct list_head *tmp1;
778         struct list_head tmp_list;
779
780         if (!tcon->use_persistent || !tcon->need_reopen_files)
781                 return;
782
783         tcon->need_reopen_files = false;
784
785         cifs_dbg(FYI, "Reopen persistent handles");
786         INIT_LIST_HEAD(&tmp_list);
787
788         /* list all files open on tree connection, reopen resilient handles  */
789         spin_lock(&tcon->open_file_lock);
790         list_for_each(tmp, &tcon->openFileList) {
791                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
792                 if (!open_file->invalidHandle)
793                         continue;
794                 cifsFileInfo_get(open_file);
795                 list_add_tail(&open_file->rlist, &tmp_list);
796         }
797         spin_unlock(&tcon->open_file_lock);
798
799         list_for_each_safe(tmp, tmp1, &tmp_list) {
800                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
801                 if (cifs_reopen_file(open_file, false /* do not flush */))
802                         tcon->need_reopen_files = true;
803                 list_del_init(&open_file->rlist);
804                 cifsFileInfo_put(open_file);
805         }
806 }
807
808 int cifs_closedir(struct inode *inode, struct file *file)
809 {
810         int rc = 0;
811         unsigned int xid;
812         struct cifsFileInfo *cfile = file->private_data;
813         struct cifs_tcon *tcon;
814         struct TCP_Server_Info *server;
815         char *buf;
816
817         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
818
819         if (cfile == NULL)
820                 return rc;
821
822         xid = get_xid();
823         tcon = tlink_tcon(cfile->tlink);
824         server = tcon->ses->server;
825
826         cifs_dbg(FYI, "Freeing private data in close dir\n");
827         spin_lock(&cfile->file_info_lock);
828         if (server->ops->dir_needs_close(cfile)) {
829                 cfile->invalidHandle = true;
830                 spin_unlock(&cfile->file_info_lock);
831                 if (server->ops->close_dir)
832                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
833                 else
834                         rc = -ENOSYS;
835                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
836                 /* not much we can do if it fails anyway, ignore rc */
837                 rc = 0;
838         } else
839                 spin_unlock(&cfile->file_info_lock);
840
841         buf = cfile->srch_inf.ntwrk_buf_start;
842         if (buf) {
843                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
844                 cfile->srch_inf.ntwrk_buf_start = NULL;
845                 if (cfile->srch_inf.smallBuf)
846                         cifs_small_buf_release(buf);
847                 else
848                         cifs_buf_release(buf);
849         }
850
851         cifs_put_tlink(cfile->tlink);
852         kfree(file->private_data);
853         file->private_data = NULL;
854         /* BB can we lock the filestruct while this is going on? */
855         free_xid(xid);
856         return rc;
857 }
858
859 static struct cifsLockInfo *
860 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
861 {
862         struct cifsLockInfo *lock =
863                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
864         if (!lock)
865                 return lock;
866         lock->offset = offset;
867         lock->length = length;
868         lock->type = type;
869         lock->pid = current->tgid;
870         INIT_LIST_HEAD(&lock->blist);
871         init_waitqueue_head(&lock->block_q);
872         return lock;
873 }
874
875 void
876 cifs_del_lock_waiters(struct cifsLockInfo *lock)
877 {
878         struct cifsLockInfo *li, *tmp;
879         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
880                 list_del_init(&li->blist);
881                 wake_up(&li->block_q);
882         }
883 }
884
885 #define CIFS_LOCK_OP    0
886 #define CIFS_READ_OP    1
887 #define CIFS_WRITE_OP   2
888
889 /* @rw_check : 0 - no op, 1 - read, 2 - write */
890 static bool
891 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
892                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
893                             struct cifsLockInfo **conf_lock, int rw_check)
894 {
895         struct cifsLockInfo *li;
896         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
897         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
898
899         list_for_each_entry(li, &fdlocks->locks, llist) {
900                 if (offset + length <= li->offset ||
901                     offset >= li->offset + li->length)
902                         continue;
903                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
904                     server->ops->compare_fids(cfile, cur_cfile)) {
905                         /* shared lock prevents write op through the same fid */
906                         if (!(li->type & server->vals->shared_lock_type) ||
907                             rw_check != CIFS_WRITE_OP)
908                                 continue;
909                 }
910                 if ((type & server->vals->shared_lock_type) &&
911                     ((server->ops->compare_fids(cfile, cur_cfile) &&
912                      current->tgid == li->pid) || type == li->type))
913                         continue;
914                 if (conf_lock)
915                         *conf_lock = li;
916                 return true;
917         }
918         return false;
919 }
920
921 bool
922 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
923                         __u8 type, struct cifsLockInfo **conf_lock,
924                         int rw_check)
925 {
926         bool rc = false;
927         struct cifs_fid_locks *cur;
928         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
929
930         list_for_each_entry(cur, &cinode->llist, llist) {
931                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
932                                                  cfile, conf_lock, rw_check);
933                 if (rc)
934                         break;
935         }
936
937         return rc;
938 }
939
940 /*
941  * Check if there is another lock that prevents us to set the lock (mandatory
942  * style). If such a lock exists, update the flock structure with its
943  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
944  * or leave it the same if we can't. Returns 0 if we don't need to request to
945  * the server or 1 otherwise.
946  */
947 static int
948 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
949                __u8 type, struct file_lock *flock)
950 {
951         int rc = 0;
952         struct cifsLockInfo *conf_lock;
953         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
954         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
955         bool exist;
956
957         down_read(&cinode->lock_sem);
958
959         exist = cifs_find_lock_conflict(cfile, offset, length, type,
960                                         &conf_lock, CIFS_LOCK_OP);
961         if (exist) {
962                 flock->fl_start = conf_lock->offset;
963                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
964                 flock->fl_pid = conf_lock->pid;
965                 if (conf_lock->type & server->vals->shared_lock_type)
966                         flock->fl_type = F_RDLCK;
967                 else
968                         flock->fl_type = F_WRLCK;
969         } else if (!cinode->can_cache_brlcks)
970                 rc = 1;
971         else
972                 flock->fl_type = F_UNLCK;
973
974         up_read(&cinode->lock_sem);
975         return rc;
976 }
977
978 static void
979 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
980 {
981         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
982         down_write(&cinode->lock_sem);
983         list_add_tail(&lock->llist, &cfile->llist->locks);
984         up_write(&cinode->lock_sem);
985 }
986
987 /*
988  * Set the byte-range lock (mandatory style). Returns:
989  * 1) 0, if we set the lock and don't need to request to the server;
990  * 2) 1, if no locks prevent us but we need to request to the server;
991  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
992  */
993 static int
994 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
995                  bool wait)
996 {
997         struct cifsLockInfo *conf_lock;
998         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
999         bool exist;
1000         int rc = 0;
1001
1002 try_again:
1003         exist = false;
1004         down_write(&cinode->lock_sem);
1005
1006         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1007                                         lock->type, &conf_lock, CIFS_LOCK_OP);
1008         if (!exist && cinode->can_cache_brlcks) {
1009                 list_add_tail(&lock->llist, &cfile->llist->locks);
1010                 up_write(&cinode->lock_sem);
1011                 return rc;
1012         }
1013
1014         if (!exist)
1015                 rc = 1;
1016         else if (!wait)
1017                 rc = -EACCES;
1018         else {
1019                 list_add_tail(&lock->blist, &conf_lock->blist);
1020                 up_write(&cinode->lock_sem);
1021                 rc = wait_event_interruptible(lock->block_q,
1022                                         (lock->blist.prev == &lock->blist) &&
1023                                         (lock->blist.next == &lock->blist));
1024                 if (!rc)
1025                         goto try_again;
1026                 down_write(&cinode->lock_sem);
1027                 list_del_init(&lock->blist);
1028         }
1029
1030         up_write(&cinode->lock_sem);
1031         return rc;
1032 }
1033
1034 /*
1035  * Check if there is another lock that prevents us to set the lock (posix
1036  * style). If such a lock exists, update the flock structure with its
1037  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1038  * or leave it the same if we can't. Returns 0 if we don't need to request to
1039  * the server or 1 otherwise.
1040  */
1041 static int
1042 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1043 {
1044         int rc = 0;
1045         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1046         unsigned char saved_type = flock->fl_type;
1047
1048         if ((flock->fl_flags & FL_POSIX) == 0)
1049                 return 1;
1050
1051         down_read(&cinode->lock_sem);
1052         posix_test_lock(file, flock);
1053
1054         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1055                 flock->fl_type = saved_type;
1056                 rc = 1;
1057         }
1058
1059         up_read(&cinode->lock_sem);
1060         return rc;
1061 }
1062
1063 /*
1064  * Set the byte-range lock (posix style). Returns:
1065  * 1) 0, if we set the lock and don't need to request to the server;
1066  * 2) 1, if we need to request to the server;
1067  * 3) <0, if the error occurs while setting the lock.
1068  */
1069 static int
1070 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1071 {
1072         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1073         int rc = 1;
1074
1075         if ((flock->fl_flags & FL_POSIX) == 0)
1076                 return rc;
1077
1078 try_again:
1079         down_write(&cinode->lock_sem);
1080         if (!cinode->can_cache_brlcks) {
1081                 up_write(&cinode->lock_sem);
1082                 return rc;
1083         }
1084
1085         rc = posix_lock_file(file, flock, NULL);
1086         up_write(&cinode->lock_sem);
1087         if (rc == FILE_LOCK_DEFERRED) {
1088                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1089                 if (!rc)
1090                         goto try_again;
1091                 posix_unblock_lock(flock);
1092         }
1093         return rc;
1094 }
1095
1096 int
1097 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1098 {
1099         unsigned int xid;
1100         int rc = 0, stored_rc;
1101         struct cifsLockInfo *li, *tmp;
1102         struct cifs_tcon *tcon;
1103         unsigned int num, max_num, max_buf;
1104         LOCKING_ANDX_RANGE *buf, *cur;
1105         int types[] = {LOCKING_ANDX_LARGE_FILES,
1106                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1107         int i;
1108
1109         xid = get_xid();
1110         tcon = tlink_tcon(cfile->tlink);
1111
1112         /*
1113          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1114          * and check it for zero before using.
1115          */
1116         max_buf = tcon->ses->server->maxBuf;
1117         if (!max_buf) {
1118                 free_xid(xid);
1119                 return -EINVAL;
1120         }
1121
1122         max_num = (max_buf - sizeof(struct smb_hdr)) /
1123                                                 sizeof(LOCKING_ANDX_RANGE);
1124         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1125         if (!buf) {
1126                 free_xid(xid);
1127                 return -ENOMEM;
1128         }
1129
1130         for (i = 0; i < 2; i++) {
1131                 cur = buf;
1132                 num = 0;
1133                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1134                         if (li->type != types[i])
1135                                 continue;
1136                         cur->Pid = cpu_to_le16(li->pid);
1137                         cur->LengthLow = cpu_to_le32((u32)li->length);
1138                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1139                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1140                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1141                         if (++num == max_num) {
1142                                 stored_rc = cifs_lockv(xid, tcon,
1143                                                        cfile->fid.netfid,
1144                                                        (__u8)li->type, 0, num,
1145                                                        buf);
1146                                 if (stored_rc)
1147                                         rc = stored_rc;
1148                                 cur = buf;
1149                                 num = 0;
1150                         } else
1151                                 cur++;
1152                 }
1153
1154                 if (num) {
1155                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1156                                                (__u8)types[i], 0, num, buf);
1157                         if (stored_rc)
1158                                 rc = stored_rc;
1159                 }
1160         }
1161
1162         kfree(buf);
1163         free_xid(xid);
1164         return rc;
1165 }
1166
1167 static __u32
1168 hash_lockowner(fl_owner_t owner)
1169 {
1170         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1171 }
1172
1173 struct lock_to_push {
1174         struct list_head llist;
1175         __u64 offset;
1176         __u64 length;
1177         __u32 pid;
1178         __u16 netfid;
1179         __u8 type;
1180 };
1181
1182 static int
1183 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1184 {
1185         struct inode *inode = d_inode(cfile->dentry);
1186         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1187         struct file_lock *flock;
1188         struct file_lock_context *flctx = inode->i_flctx;
1189         unsigned int count = 0, i;
1190         int rc = 0, xid, type;
1191         struct list_head locks_to_send, *el;
1192         struct lock_to_push *lck, *tmp;
1193         __u64 length;
1194
1195         xid = get_xid();
1196
1197         if (!flctx)
1198                 goto out;
1199
1200         spin_lock(&flctx->flc_lock);
1201         list_for_each(el, &flctx->flc_posix) {
1202                 count++;
1203         }
1204         spin_unlock(&flctx->flc_lock);
1205
1206         INIT_LIST_HEAD(&locks_to_send);
1207
1208         /*
1209          * Allocating count locks is enough because no FL_POSIX locks can be
1210          * added to the list while we are holding cinode->lock_sem that
1211          * protects locking operations of this inode.
1212          */
1213         for (i = 0; i < count; i++) {
1214                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1215                 if (!lck) {
1216                         rc = -ENOMEM;
1217                         goto err_out;
1218                 }
1219                 list_add_tail(&lck->llist, &locks_to_send);
1220         }
1221
1222         el = locks_to_send.next;
1223         spin_lock(&flctx->flc_lock);
1224         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1225                 if (el == &locks_to_send) {
1226                         /*
1227                          * The list ended. We don't have enough allocated
1228                          * structures - something is really wrong.
1229                          */
1230                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1231                         break;
1232                 }
1233                 length = 1 + flock->fl_end - flock->fl_start;
1234                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1235                         type = CIFS_RDLCK;
1236                 else
1237                         type = CIFS_WRLCK;
1238                 lck = list_entry(el, struct lock_to_push, llist);
1239                 lck->pid = hash_lockowner(flock->fl_owner);
1240                 lck->netfid = cfile->fid.netfid;
1241                 lck->length = length;
1242                 lck->type = type;
1243                 lck->offset = flock->fl_start;
1244         }
1245         spin_unlock(&flctx->flc_lock);
1246
1247         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1248                 int stored_rc;
1249
1250                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1251                                              lck->offset, lck->length, NULL,
1252                                              lck->type, 0);
1253                 if (stored_rc)
1254                         rc = stored_rc;
1255                 list_del(&lck->llist);
1256                 kfree(lck);
1257         }
1258
1259 out:
1260         free_xid(xid);
1261         return rc;
1262 err_out:
1263         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1264                 list_del(&lck->llist);
1265                 kfree(lck);
1266         }
1267         goto out;
1268 }
1269
1270 static int
1271 cifs_push_locks(struct cifsFileInfo *cfile)
1272 {
1273         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1274         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1275         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1276         int rc = 0;
1277
1278         /* we are going to update can_cache_brlcks here - need a write access */
1279         down_write(&cinode->lock_sem);
1280         if (!cinode->can_cache_brlcks) {
1281                 up_write(&cinode->lock_sem);
1282                 return rc;
1283         }
1284
1285         if (cap_unix(tcon->ses) &&
1286             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1287             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1288                 rc = cifs_push_posix_locks(cfile);
1289         else
1290                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1291
1292         cinode->can_cache_brlcks = false;
1293         up_write(&cinode->lock_sem);
1294         return rc;
1295 }
1296
1297 static void
1298 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1299                 bool *wait_flag, struct TCP_Server_Info *server)
1300 {
1301         if (flock->fl_flags & FL_POSIX)
1302                 cifs_dbg(FYI, "Posix\n");
1303         if (flock->fl_flags & FL_FLOCK)
1304                 cifs_dbg(FYI, "Flock\n");
1305         if (flock->fl_flags & FL_SLEEP) {
1306                 cifs_dbg(FYI, "Blocking lock\n");
1307                 *wait_flag = true;
1308         }
1309         if (flock->fl_flags & FL_ACCESS)
1310                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1311         if (flock->fl_flags & FL_LEASE)
1312                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1313         if (flock->fl_flags &
1314             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1315                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1316                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1317
1318         *type = server->vals->large_lock_type;
1319         if (flock->fl_type == F_WRLCK) {
1320                 cifs_dbg(FYI, "F_WRLCK\n");
1321                 *type |= server->vals->exclusive_lock_type;
1322                 *lock = 1;
1323         } else if (flock->fl_type == F_UNLCK) {
1324                 cifs_dbg(FYI, "F_UNLCK\n");
1325                 *type |= server->vals->unlock_lock_type;
1326                 *unlock = 1;
1327                 /* Check if unlock includes more than one lock range */
1328         } else if (flock->fl_type == F_RDLCK) {
1329                 cifs_dbg(FYI, "F_RDLCK\n");
1330                 *type |= server->vals->shared_lock_type;
1331                 *lock = 1;
1332         } else if (flock->fl_type == F_EXLCK) {
1333                 cifs_dbg(FYI, "F_EXLCK\n");
1334                 *type |= server->vals->exclusive_lock_type;
1335                 *lock = 1;
1336         } else if (flock->fl_type == F_SHLCK) {
1337                 cifs_dbg(FYI, "F_SHLCK\n");
1338                 *type |= server->vals->shared_lock_type;
1339                 *lock = 1;
1340         } else
1341                 cifs_dbg(FYI, "Unknown type of lock\n");
1342 }
1343
1344 static int
1345 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1346            bool wait_flag, bool posix_lck, unsigned int xid)
1347 {
1348         int rc = 0;
1349         __u64 length = 1 + flock->fl_end - flock->fl_start;
1350         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1351         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1352         struct TCP_Server_Info *server = tcon->ses->server;
1353         __u16 netfid = cfile->fid.netfid;
1354
1355         if (posix_lck) {
1356                 int posix_lock_type;
1357
1358                 rc = cifs_posix_lock_test(file, flock);
1359                 if (!rc)
1360                         return rc;
1361
1362                 if (type & server->vals->shared_lock_type)
1363                         posix_lock_type = CIFS_RDLCK;
1364                 else
1365                         posix_lock_type = CIFS_WRLCK;
1366                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1367                                       hash_lockowner(flock->fl_owner),
1368                                       flock->fl_start, length, flock,
1369                                       posix_lock_type, wait_flag);
1370                 return rc;
1371         }
1372
1373         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1374         if (!rc)
1375                 return rc;
1376
1377         /* BB we could chain these into one lock request BB */
1378         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1379                                     1, 0, false);
1380         if (rc == 0) {
1381                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1382                                             type, 0, 1, false);
1383                 flock->fl_type = F_UNLCK;
1384                 if (rc != 0)
1385                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1386                                  rc);
1387                 return 0;
1388         }
1389
1390         if (type & server->vals->shared_lock_type) {
1391                 flock->fl_type = F_WRLCK;
1392                 return 0;
1393         }
1394
1395         type &= ~server->vals->exclusive_lock_type;
1396
1397         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1398                                     type | server->vals->shared_lock_type,
1399                                     1, 0, false);
1400         if (rc == 0) {
1401                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1402                         type | server->vals->shared_lock_type, 0, 1, false);
1403                 flock->fl_type = F_RDLCK;
1404                 if (rc != 0)
1405                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1406                                  rc);
1407         } else
1408                 flock->fl_type = F_WRLCK;
1409
1410         return 0;
1411 }
1412
1413 void
1414 cifs_move_llist(struct list_head *source, struct list_head *dest)
1415 {
1416         struct list_head *li, *tmp;
1417         list_for_each_safe(li, tmp, source)
1418                 list_move(li, dest);
1419 }
1420
1421 void
1422 cifs_free_llist(struct list_head *llist)
1423 {
1424         struct cifsLockInfo *li, *tmp;
1425         list_for_each_entry_safe(li, tmp, llist, llist) {
1426                 cifs_del_lock_waiters(li);
1427                 list_del(&li->llist);
1428                 kfree(li);
1429         }
1430 }
1431
1432 int
1433 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1434                   unsigned int xid)
1435 {
1436         int rc = 0, stored_rc;
1437         int types[] = {LOCKING_ANDX_LARGE_FILES,
1438                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1439         unsigned int i;
1440         unsigned int max_num, num, max_buf;
1441         LOCKING_ANDX_RANGE *buf, *cur;
1442         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1443         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1444         struct cifsLockInfo *li, *tmp;
1445         __u64 length = 1 + flock->fl_end - flock->fl_start;
1446         struct list_head tmp_llist;
1447
1448         INIT_LIST_HEAD(&tmp_llist);
1449
1450         /*
1451          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1452          * and check it for zero before using.
1453          */
1454         max_buf = tcon->ses->server->maxBuf;
1455         if (!max_buf)
1456                 return -EINVAL;
1457
1458         max_num = (max_buf - sizeof(struct smb_hdr)) /
1459                                                 sizeof(LOCKING_ANDX_RANGE);
1460         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1461         if (!buf)
1462                 return -ENOMEM;
1463
1464         down_write(&cinode->lock_sem);
1465         for (i = 0; i < 2; i++) {
1466                 cur = buf;
1467                 num = 0;
1468                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1469                         if (flock->fl_start > li->offset ||
1470                             (flock->fl_start + length) <
1471                             (li->offset + li->length))
1472                                 continue;
1473                         if (current->tgid != li->pid)
1474                                 continue;
1475                         if (types[i] != li->type)
1476                                 continue;
1477                         if (cinode->can_cache_brlcks) {
1478                                 /*
1479                                  * We can cache brlock requests - simply remove
1480                                  * a lock from the file's list.
1481                                  */
1482                                 list_del(&li->llist);
1483                                 cifs_del_lock_waiters(li);
1484                                 kfree(li);
1485                                 continue;
1486                         }
1487                         cur->Pid = cpu_to_le16(li->pid);
1488                         cur->LengthLow = cpu_to_le32((u32)li->length);
1489                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1490                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1491                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1492                         /*
1493                          * We need to save a lock here to let us add it again to
1494                          * the file's list if the unlock range request fails on
1495                          * the server.
1496                          */
1497                         list_move(&li->llist, &tmp_llist);
1498                         if (++num == max_num) {
1499                                 stored_rc = cifs_lockv(xid, tcon,
1500                                                        cfile->fid.netfid,
1501                                                        li->type, num, 0, buf);
1502                                 if (stored_rc) {
1503                                         /*
1504                                          * We failed on the unlock range
1505                                          * request - add all locks from the tmp
1506                                          * list to the head of the file's list.
1507                                          */
1508                                         cifs_move_llist(&tmp_llist,
1509                                                         &cfile->llist->locks);
1510                                         rc = stored_rc;
1511                                 } else
1512                                         /*
1513                                          * The unlock range request succeed -
1514                                          * free the tmp list.
1515                                          */
1516                                         cifs_free_llist(&tmp_llist);
1517                                 cur = buf;
1518                                 num = 0;
1519                         } else
1520                                 cur++;
1521                 }
1522                 if (num) {
1523                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1524                                                types[i], num, 0, buf);
1525                         if (stored_rc) {
1526                                 cifs_move_llist(&tmp_llist,
1527                                                 &cfile->llist->locks);
1528                                 rc = stored_rc;
1529                         } else
1530                                 cifs_free_llist(&tmp_llist);
1531                 }
1532         }
1533
1534         up_write(&cinode->lock_sem);
1535         kfree(buf);
1536         return rc;
1537 }
1538
1539 static int
1540 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1541            bool wait_flag, bool posix_lck, int lock, int unlock,
1542            unsigned int xid)
1543 {
1544         int rc = 0;
1545         __u64 length = 1 + flock->fl_end - flock->fl_start;
1546         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1547         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1548         struct TCP_Server_Info *server = tcon->ses->server;
1549         struct inode *inode = d_inode(cfile->dentry);
1550
1551         if (posix_lck) {
1552                 int posix_lock_type;
1553
1554                 rc = cifs_posix_lock_set(file, flock);
1555                 if (!rc || rc < 0)
1556                         return rc;
1557
1558                 if (type & server->vals->shared_lock_type)
1559                         posix_lock_type = CIFS_RDLCK;
1560                 else
1561                         posix_lock_type = CIFS_WRLCK;
1562
1563                 if (unlock == 1)
1564                         posix_lock_type = CIFS_UNLCK;
1565
1566                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1567                                       hash_lockowner(flock->fl_owner),
1568                                       flock->fl_start, length,
1569                                       NULL, posix_lock_type, wait_flag);
1570                 goto out;
1571         }
1572
1573         if (lock) {
1574                 struct cifsLockInfo *lock;
1575
1576                 lock = cifs_lock_init(flock->fl_start, length, type);
1577                 if (!lock)
1578                         return -ENOMEM;
1579
1580                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1581                 if (rc < 0) {
1582                         kfree(lock);
1583                         return rc;
1584                 }
1585                 if (!rc)
1586                         goto out;
1587
1588                 /*
1589                  * Windows 7 server can delay breaking lease from read to None
1590                  * if we set a byte-range lock on a file - break it explicitly
1591                  * before sending the lock to the server to be sure the next
1592                  * read won't conflict with non-overlapted locks due to
1593                  * pagereading.
1594                  */
1595                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1596                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1597                         cifs_zap_mapping(inode);
1598                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1599                                  inode);
1600                         CIFS_I(inode)->oplock = 0;
1601                 }
1602
1603                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1604                                             type, 1, 0, wait_flag);
1605                 if (rc) {
1606                         kfree(lock);
1607                         return rc;
1608                 }
1609
1610                 cifs_lock_add(cfile, lock);
1611         } else if (unlock)
1612                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1613
1614 out:
1615         if (flock->fl_flags & FL_POSIX && !rc)
1616                 rc = locks_lock_file_wait(file, flock);
1617         return rc;
1618 }
1619
1620 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1621 {
1622         int rc, xid;
1623         int lock = 0, unlock = 0;
1624         bool wait_flag = false;
1625         bool posix_lck = false;
1626         struct cifs_sb_info *cifs_sb;
1627         struct cifs_tcon *tcon;
1628         struct cifsInodeInfo *cinode;
1629         struct cifsFileInfo *cfile;
1630         __u16 netfid;
1631         __u32 type;
1632
1633         rc = -EACCES;
1634         xid = get_xid();
1635
1636         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1637                  cmd, flock->fl_flags, flock->fl_type,
1638                  flock->fl_start, flock->fl_end);
1639
1640         cfile = (struct cifsFileInfo *)file->private_data;
1641         tcon = tlink_tcon(cfile->tlink);
1642
1643         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1644                         tcon->ses->server);
1645
1646         cifs_sb = CIFS_FILE_SB(file);
1647         netfid = cfile->fid.netfid;
1648         cinode = CIFS_I(file_inode(file));
1649
1650         if (cap_unix(tcon->ses) &&
1651             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1652             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1653                 posix_lck = true;
1654         /*
1655          * BB add code here to normalize offset and length to account for
1656          * negative length which we can not accept over the wire.
1657          */
1658         if (IS_GETLK(cmd)) {
1659                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1660                 free_xid(xid);
1661                 return rc;
1662         }
1663
1664         if (!lock && !unlock) {
1665                 /*
1666                  * if no lock or unlock then nothing to do since we do not
1667                  * know what it is
1668                  */
1669                 free_xid(xid);
1670                 return -EOPNOTSUPP;
1671         }
1672
1673         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1674                         xid);
1675         free_xid(xid);
1676         return rc;
1677 }
1678
1679 /*
1680  * update the file size (if needed) after a write. Should be called with
1681  * the inode->i_lock held
1682  */
1683 void
1684 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1685                       unsigned int bytes_written)
1686 {
1687         loff_t end_of_write = offset + bytes_written;
1688
1689         if (end_of_write > cifsi->server_eof)
1690                 cifsi->server_eof = end_of_write;
1691 }
1692
1693 static ssize_t
1694 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1695            size_t write_size, loff_t *offset)
1696 {
1697         int rc = 0;
1698         unsigned int bytes_written = 0;
1699         unsigned int total_written;
1700         struct cifs_sb_info *cifs_sb;
1701         struct cifs_tcon *tcon;
1702         struct TCP_Server_Info *server;
1703         unsigned int xid;
1704         struct dentry *dentry = open_file->dentry;
1705         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1706         struct cifs_io_parms io_parms;
1707
1708         cifs_sb = CIFS_SB(dentry->d_sb);
1709
1710         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1711                  write_size, *offset, dentry);
1712
1713         tcon = tlink_tcon(open_file->tlink);
1714         server = tcon->ses->server;
1715
1716         if (!server->ops->sync_write)
1717                 return -ENOSYS;
1718
1719         xid = get_xid();
1720
1721         for (total_written = 0; write_size > total_written;
1722              total_written += bytes_written) {
1723                 rc = -EAGAIN;
1724                 while (rc == -EAGAIN) {
1725                         struct kvec iov[2];
1726                         unsigned int len;
1727
1728                         if (open_file->invalidHandle) {
1729                                 /* we could deadlock if we called
1730                                    filemap_fdatawait from here so tell
1731                                    reopen_file not to flush data to
1732                                    server now */
1733                                 rc = cifs_reopen_file(open_file, false);
1734                                 if (rc != 0)
1735                                         break;
1736                         }
1737
1738                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1739                                   (unsigned int)write_size - total_written);
1740                         /* iov[0] is reserved for smb header */
1741                         iov[1].iov_base = (char *)write_data + total_written;
1742                         iov[1].iov_len = len;
1743                         io_parms.pid = pid;
1744                         io_parms.tcon = tcon;
1745                         io_parms.offset = *offset;
1746                         io_parms.length = len;
1747                         rc = server->ops->sync_write(xid, &open_file->fid,
1748                                         &io_parms, &bytes_written, iov, 1);
1749                 }
1750                 if (rc || (bytes_written == 0)) {
1751                         if (total_written)
1752                                 break;
1753                         else {
1754                                 free_xid(xid);
1755                                 return rc;
1756                         }
1757                 } else {
1758                         spin_lock(&d_inode(dentry)->i_lock);
1759                         cifs_update_eof(cifsi, *offset, bytes_written);
1760                         spin_unlock(&d_inode(dentry)->i_lock);
1761                         *offset += bytes_written;
1762                 }
1763         }
1764
1765         cifs_stats_bytes_written(tcon, total_written);
1766
1767         if (total_written > 0) {
1768                 spin_lock(&d_inode(dentry)->i_lock);
1769                 if (*offset > d_inode(dentry)->i_size)
1770                         i_size_write(d_inode(dentry), *offset);
1771                 spin_unlock(&d_inode(dentry)->i_lock);
1772         }
1773         mark_inode_dirty_sync(d_inode(dentry));
1774         free_xid(xid);
1775         return total_written;
1776 }
1777
1778 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1779                                         bool fsuid_only)
1780 {
1781         struct cifsFileInfo *open_file = NULL;
1782         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1783         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1784
1785         /* only filter by fsuid on multiuser mounts */
1786         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1787                 fsuid_only = false;
1788
1789         spin_lock(&tcon->open_file_lock);
1790         /* we could simply get the first_list_entry since write-only entries
1791            are always at the end of the list but since the first entry might
1792            have a close pending, we go through the whole list */
1793         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1794                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1795                         continue;
1796                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1797                         if (!open_file->invalidHandle) {
1798                                 /* found a good file */
1799                                 /* lock it so it will not be closed on us */
1800                                 cifsFileInfo_get(open_file);
1801                                 spin_unlock(&tcon->open_file_lock);
1802                                 return open_file;
1803                         } /* else might as well continue, and look for
1804                              another, or simply have the caller reopen it
1805                              again rather than trying to fix this handle */
1806                 } else /* write only file */
1807                         break; /* write only files are last so must be done */
1808         }
1809         spin_unlock(&tcon->open_file_lock);
1810         return NULL;
1811 }
1812
1813 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1814                                         bool fsuid_only)
1815 {
1816         struct cifsFileInfo *open_file, *inv_file = NULL;
1817         struct cifs_sb_info *cifs_sb;
1818         struct cifs_tcon *tcon;
1819         bool any_available = false;
1820         int rc;
1821         unsigned int refind = 0;
1822
1823         /* Having a null inode here (because mapping->host was set to zero by
1824         the VFS or MM) should not happen but we had reports of on oops (due to
1825         it being zero) during stress testcases so we need to check for it */
1826
1827         if (cifs_inode == NULL) {
1828                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1829                 dump_stack();
1830                 return NULL;
1831         }
1832
1833         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1834         tcon = cifs_sb_master_tcon(cifs_sb);
1835
1836         /* only filter by fsuid on multiuser mounts */
1837         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1838                 fsuid_only = false;
1839
1840         spin_lock(&tcon->open_file_lock);
1841 refind_writable:
1842         if (refind > MAX_REOPEN_ATT) {
1843                 spin_unlock(&tcon->open_file_lock);
1844                 return NULL;
1845         }
1846         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1847                 if (!any_available && open_file->pid != current->tgid)
1848                         continue;
1849                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1850                         continue;
1851                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1852                         if (!open_file->invalidHandle) {
1853                                 /* found a good writable file */
1854                                 cifsFileInfo_get(open_file);
1855                                 spin_unlock(&tcon->open_file_lock);
1856                                 return open_file;
1857                         } else {
1858                                 if (!inv_file)
1859                                         inv_file = open_file;
1860                         }
1861                 }
1862         }
1863         /* couldn't find useable FH with same pid, try any available */
1864         if (!any_available) {
1865                 any_available = true;
1866                 goto refind_writable;
1867         }
1868
1869         if (inv_file) {
1870                 any_available = false;
1871                 cifsFileInfo_get(inv_file);
1872         }
1873
1874         spin_unlock(&tcon->open_file_lock);
1875
1876         if (inv_file) {
1877                 rc = cifs_reopen_file(inv_file, false);
1878                 if (!rc)
1879                         return inv_file;
1880                 else {
1881                         spin_lock(&tcon->open_file_lock);
1882                         list_move_tail(&inv_file->flist,
1883                                         &cifs_inode->openFileList);
1884                         spin_unlock(&tcon->open_file_lock);
1885                         cifsFileInfo_put(inv_file);
1886                         ++refind;
1887                         inv_file = NULL;
1888                         spin_lock(&tcon->open_file_lock);
1889                         goto refind_writable;
1890                 }
1891         }
1892
1893         return NULL;
1894 }
1895
1896 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1897 {
1898         struct address_space *mapping = page->mapping;
1899         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1900         char *write_data;
1901         int rc = -EFAULT;
1902         int bytes_written = 0;
1903         struct inode *inode;
1904         struct cifsFileInfo *open_file;
1905
1906         if (!mapping || !mapping->host)
1907                 return -EFAULT;
1908
1909         inode = page->mapping->host;
1910
1911         offset += (loff_t)from;
1912         write_data = kmap(page);
1913         write_data += from;
1914
1915         if ((to > PAGE_SIZE) || (from > to)) {
1916                 kunmap(page);
1917                 return -EIO;
1918         }
1919
1920         /* racing with truncate? */
1921         if (offset > mapping->host->i_size) {
1922                 kunmap(page);
1923                 return 0; /* don't care */
1924         }
1925
1926         /* check to make sure that we are not extending the file */
1927         if (mapping->host->i_size - offset < (loff_t)to)
1928                 to = (unsigned)(mapping->host->i_size - offset);
1929
1930         open_file = find_writable_file(CIFS_I(mapping->host), false);
1931         if (open_file) {
1932                 bytes_written = cifs_write(open_file, open_file->pid,
1933                                            write_data, to - from, &offset);
1934                 cifsFileInfo_put(open_file);
1935                 /* Does mm or vfs already set times? */
1936                 inode->i_atime = inode->i_mtime = current_time(inode);
1937                 if ((bytes_written > 0) && (offset))
1938                         rc = 0;
1939                 else if (bytes_written < 0)
1940                         rc = bytes_written;
1941         } else {
1942                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1943                 rc = -EIO;
1944         }
1945
1946         kunmap(page);
1947         return rc;
1948 }
1949
1950 static struct cifs_writedata *
1951 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
1952                           pgoff_t end, pgoff_t *index,
1953                           unsigned int *found_pages)
1954 {
1955         unsigned int nr_pages;
1956         struct page **pages;
1957         struct cifs_writedata *wdata;
1958
1959         wdata = cifs_writedata_alloc((unsigned int)tofind,
1960                                      cifs_writev_complete);
1961         if (!wdata)
1962                 return NULL;
1963
1964         /*
1965          * find_get_pages_tag seems to return a max of 256 on each
1966          * iteration, so we must call it several times in order to
1967          * fill the array or the wsize is effectively limited to
1968          * 256 * PAGE_SIZE.
1969          */
1970         *found_pages = 0;
1971         pages = wdata->pages;
1972         do {
1973                 nr_pages = find_get_pages_tag(mapping, index,
1974                                               PAGECACHE_TAG_DIRTY, tofind,
1975                                               pages);
1976                 *found_pages += nr_pages;
1977                 tofind -= nr_pages;
1978                 pages += nr_pages;
1979         } while (nr_pages && tofind && *index <= end);
1980
1981         return wdata;
1982 }
1983
1984 static unsigned int
1985 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
1986                     struct address_space *mapping,
1987                     struct writeback_control *wbc,
1988                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
1989 {
1990         unsigned int nr_pages = 0, i;
1991         struct page *page;
1992
1993         for (i = 0; i < found_pages; i++) {
1994                 page = wdata->pages[i];
1995                 /*
1996                  * At this point we hold neither mapping->tree_lock nor
1997                  * lock on the page itself: the page may be truncated or
1998                  * invalidated (changing page->mapping to NULL), or even
1999                  * swizzled back from swapper_space to tmpfs file
2000                  * mapping
2001                  */
2002
2003                 if (nr_pages == 0)
2004                         lock_page(page);
2005                 else if (!trylock_page(page))
2006                         break;
2007
2008                 if (unlikely(page->mapping != mapping)) {
2009                         unlock_page(page);
2010                         break;
2011                 }
2012
2013                 if (!wbc->range_cyclic && page->index > end) {
2014                         *done = true;
2015                         unlock_page(page);
2016                         break;
2017                 }
2018
2019                 if (*next && (page->index != *next)) {
2020                         /* Not next consecutive page */
2021                         unlock_page(page);
2022                         break;
2023                 }
2024
2025                 if (wbc->sync_mode != WB_SYNC_NONE)
2026                         wait_on_page_writeback(page);
2027
2028                 if (PageWriteback(page) ||
2029                                 !clear_page_dirty_for_io(page)) {
2030                         unlock_page(page);
2031                         break;
2032                 }
2033
2034                 /*
2035                  * This actually clears the dirty bit in the radix tree.
2036                  * See cifs_writepage() for more commentary.
2037                  */
2038                 set_page_writeback(page);
2039                 if (page_offset(page) >= i_size_read(mapping->host)) {
2040                         *done = true;
2041                         unlock_page(page);
2042                         end_page_writeback(page);
2043                         break;
2044                 }
2045
2046                 wdata->pages[i] = page;
2047                 *next = page->index + 1;
2048                 ++nr_pages;
2049         }
2050
2051         /* reset index to refind any pages skipped */
2052         if (nr_pages == 0)
2053                 *index = wdata->pages[0]->index + 1;
2054
2055         /* put any pages we aren't going to use */
2056         for (i = nr_pages; i < found_pages; i++) {
2057                 put_page(wdata->pages[i]);
2058                 wdata->pages[i] = NULL;
2059         }
2060
2061         return nr_pages;
2062 }
2063
2064 static int
2065 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2066                  struct address_space *mapping, struct writeback_control *wbc)
2067 {
2068         int rc = 0;
2069         struct TCP_Server_Info *server;
2070         unsigned int i;
2071
2072         wdata->sync_mode = wbc->sync_mode;
2073         wdata->nr_pages = nr_pages;
2074         wdata->offset = page_offset(wdata->pages[0]);
2075         wdata->pagesz = PAGE_SIZE;
2076         wdata->tailsz = min(i_size_read(mapping->host) -
2077                         page_offset(wdata->pages[nr_pages - 1]),
2078                         (loff_t)PAGE_SIZE);
2079         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2080
2081         if (wdata->cfile != NULL)
2082                 cifsFileInfo_put(wdata->cfile);
2083         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2084         if (!wdata->cfile) {
2085                 cifs_dbg(VFS, "No writable handles for inode\n");
2086                 rc = -EBADF;
2087         } else {
2088                 wdata->pid = wdata->cfile->pid;
2089                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2090                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2091         }
2092
2093         for (i = 0; i < nr_pages; ++i)
2094                 unlock_page(wdata->pages[i]);
2095
2096         return rc;
2097 }
2098
2099 static int cifs_writepages(struct address_space *mapping,
2100                            struct writeback_control *wbc)
2101 {
2102         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2103         struct TCP_Server_Info *server;
2104         bool done = false, scanned = false, range_whole = false;
2105         pgoff_t end, index;
2106         struct cifs_writedata *wdata;
2107         int rc = 0;
2108
2109         /*
2110          * If wsize is smaller than the page cache size, default to writing
2111          * one page at a time via cifs_writepage
2112          */
2113         if (cifs_sb->wsize < PAGE_SIZE)
2114                 return generic_writepages(mapping, wbc);
2115
2116         if (wbc->range_cyclic) {
2117                 index = mapping->writeback_index; /* Start from prev offset */
2118                 end = -1;
2119         } else {
2120                 index = wbc->range_start >> PAGE_SHIFT;
2121                 end = wbc->range_end >> PAGE_SHIFT;
2122                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2123                         range_whole = true;
2124                 scanned = true;
2125         }
2126         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2127 retry:
2128         while (!done && index <= end) {
2129                 unsigned int i, nr_pages, found_pages, wsize, credits;
2130                 pgoff_t next = 0, tofind, saved_index = index;
2131
2132                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2133                                                    &wsize, &credits);
2134                 if (rc)
2135                         break;
2136
2137                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2138
2139                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2140                                                   &found_pages);
2141                 if (!wdata) {
2142                         rc = -ENOMEM;
2143                         add_credits_and_wake_if(server, credits, 0);
2144                         break;
2145                 }
2146
2147                 if (found_pages == 0) {
2148                         kref_put(&wdata->refcount, cifs_writedata_release);
2149                         add_credits_and_wake_if(server, credits, 0);
2150                         break;
2151                 }
2152
2153                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2154                                                end, &index, &next, &done);
2155
2156                 /* nothing to write? */
2157                 if (nr_pages == 0) {
2158                         kref_put(&wdata->refcount, cifs_writedata_release);
2159                         add_credits_and_wake_if(server, credits, 0);
2160                         continue;
2161                 }
2162
2163                 wdata->credits = credits;
2164
2165                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2166
2167                 /* send failure -- clean up the mess */
2168                 if (rc != 0) {
2169                         add_credits_and_wake_if(server, wdata->credits, 0);
2170                         for (i = 0; i < nr_pages; ++i) {
2171                                 if (rc == -EAGAIN)
2172                                         redirty_page_for_writepage(wbc,
2173                                                            wdata->pages[i]);
2174                                 else
2175                                         SetPageError(wdata->pages[i]);
2176                                 end_page_writeback(wdata->pages[i]);
2177                                 put_page(wdata->pages[i]);
2178                         }
2179                         if (rc != -EAGAIN)
2180                                 mapping_set_error(mapping, rc);
2181                 }
2182                 kref_put(&wdata->refcount, cifs_writedata_release);
2183
2184                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2185                         index = saved_index;
2186                         continue;
2187                 }
2188
2189                 wbc->nr_to_write -= nr_pages;
2190                 if (wbc->nr_to_write <= 0)
2191                         done = true;
2192
2193                 index = next;
2194         }
2195
2196         if (!scanned && !done) {
2197                 /*
2198                  * We hit the last page and there is more work to be done: wrap
2199                  * back to the start of the file
2200                  */
2201                 scanned = true;
2202                 index = 0;
2203                 goto retry;
2204         }
2205
2206         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2207                 mapping->writeback_index = index;
2208
2209         return rc;
2210 }
2211
2212 static int
2213 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2214 {
2215         int rc;
2216         unsigned int xid;
2217
2218         xid = get_xid();
2219 /* BB add check for wbc flags */
2220         get_page(page);
2221         if (!PageUptodate(page))
2222                 cifs_dbg(FYI, "ppw - page not up to date\n");
2223
2224         /*
2225          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2226          *
2227          * A writepage() implementation always needs to do either this,
2228          * or re-dirty the page with "redirty_page_for_writepage()" in
2229          * the case of a failure.
2230          *
2231          * Just unlocking the page will cause the radix tree tag-bits
2232          * to fail to update with the state of the page correctly.
2233          */
2234         set_page_writeback(page);
2235 retry_write:
2236         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2237         if (rc == -EAGAIN) {
2238                 if (wbc->sync_mode == WB_SYNC_ALL)
2239                         goto retry_write;
2240                 redirty_page_for_writepage(wbc, page);
2241         } else if (rc != 0) {
2242                 SetPageError(page);
2243                 mapping_set_error(page->mapping, rc);
2244         } else {
2245                 SetPageUptodate(page);
2246         }
2247         end_page_writeback(page);
2248         put_page(page);
2249         free_xid(xid);
2250         return rc;
2251 }
2252
2253 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2254 {
2255         int rc = cifs_writepage_locked(page, wbc);
2256         unlock_page(page);
2257         return rc;
2258 }
2259
2260 static int cifs_write_end(struct file *file, struct address_space *mapping,
2261                         loff_t pos, unsigned len, unsigned copied,
2262                         struct page *page, void *fsdata)
2263 {
2264         int rc;
2265         struct inode *inode = mapping->host;
2266         struct cifsFileInfo *cfile = file->private_data;
2267         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2268         __u32 pid;
2269
2270         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2271                 pid = cfile->pid;
2272         else
2273                 pid = current->tgid;
2274
2275         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2276                  page, pos, copied);
2277
2278         if (PageChecked(page)) {
2279                 if (copied == len)
2280                         SetPageUptodate(page);
2281                 ClearPageChecked(page);
2282         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2283                 SetPageUptodate(page);
2284
2285         if (!PageUptodate(page)) {
2286                 char *page_data;
2287                 unsigned offset = pos & (PAGE_SIZE - 1);
2288                 unsigned int xid;
2289
2290                 xid = get_xid();
2291                 /* this is probably better than directly calling
2292                    partialpage_write since in this function the file handle is
2293                    known which we might as well leverage */
2294                 /* BB check if anything else missing out of ppw
2295                    such as updating last write time */
2296                 page_data = kmap(page);
2297                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2298                 /* if (rc < 0) should we set writebehind rc? */
2299                 kunmap(page);
2300
2301                 free_xid(xid);
2302         } else {
2303                 rc = copied;
2304                 pos += copied;
2305                 set_page_dirty(page);
2306         }
2307
2308         if (rc > 0) {
2309                 spin_lock(&inode->i_lock);
2310                 if (pos > inode->i_size)
2311                         i_size_write(inode, pos);
2312                 spin_unlock(&inode->i_lock);
2313         }
2314
2315         unlock_page(page);
2316         put_page(page);
2317
2318         return rc;
2319 }
2320
2321 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2322                       int datasync)
2323 {
2324         unsigned int xid;
2325         int rc = 0;
2326         struct cifs_tcon *tcon;
2327         struct TCP_Server_Info *server;
2328         struct cifsFileInfo *smbfile = file->private_data;
2329         struct inode *inode = file_inode(file);
2330         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2331
2332         rc = file_write_and_wait_range(file, start, end);
2333         if (rc)
2334                 return rc;
2335         inode_lock(inode);
2336
2337         xid = get_xid();
2338
2339         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2340                  file, datasync);
2341
2342         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2343                 rc = cifs_zap_mapping(inode);
2344                 if (rc) {
2345                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2346                         rc = 0; /* don't care about it in fsync */
2347                 }
2348         }
2349
2350         tcon = tlink_tcon(smbfile->tlink);
2351         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2352                 server = tcon->ses->server;
2353                 if (server->ops->flush)
2354                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2355                 else
2356                         rc = -ENOSYS;
2357         }
2358
2359         free_xid(xid);
2360         inode_unlock(inode);
2361         return rc;
2362 }
2363
2364 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2365 {
2366         unsigned int xid;
2367         int rc = 0;
2368         struct cifs_tcon *tcon;
2369         struct TCP_Server_Info *server;
2370         struct cifsFileInfo *smbfile = file->private_data;
2371         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2372         struct inode *inode = file->f_mapping->host;
2373
2374         rc = file_write_and_wait_range(file, start, end);
2375         if (rc)
2376                 return rc;
2377         inode_lock(inode);
2378
2379         xid = get_xid();
2380
2381         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2382                  file, datasync);
2383
2384         tcon = tlink_tcon(smbfile->tlink);
2385         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2386                 server = tcon->ses->server;
2387                 if (server->ops->flush)
2388                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2389                 else
2390                         rc = -ENOSYS;
2391         }
2392
2393         free_xid(xid);
2394         inode_unlock(inode);
2395         return rc;
2396 }
2397
2398 /*
2399  * As file closes, flush all cached write data for this inode checking
2400  * for write behind errors.
2401  */
2402 int cifs_flush(struct file *file, fl_owner_t id)
2403 {
2404         struct inode *inode = file_inode(file);
2405         int rc = 0;
2406
2407         if (file->f_mode & FMODE_WRITE)
2408                 rc = filemap_write_and_wait(inode->i_mapping);
2409
2410         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2411
2412         return rc;
2413 }
2414
2415 static int
2416 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2417 {
2418         int rc = 0;
2419         unsigned long i;
2420
2421         for (i = 0; i < num_pages; i++) {
2422                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2423                 if (!pages[i]) {
2424                         /*
2425                          * save number of pages we have already allocated and
2426                          * return with ENOMEM error
2427                          */
2428                         num_pages = i;
2429                         rc = -ENOMEM;
2430                         break;
2431                 }
2432         }
2433
2434         if (rc) {
2435                 for (i = 0; i < num_pages; i++)
2436                         put_page(pages[i]);
2437         }
2438         return rc;
2439 }
2440
2441 static inline
2442 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2443 {
2444         size_t num_pages;
2445         size_t clen;
2446
2447         clen = min_t(const size_t, len, wsize);
2448         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2449
2450         if (cur_len)
2451                 *cur_len = clen;
2452
2453         return num_pages;
2454 }
2455
2456 static void
2457 cifs_uncached_writedata_release(struct kref *refcount)
2458 {
2459         int i;
2460         struct cifs_writedata *wdata = container_of(refcount,
2461                                         struct cifs_writedata, refcount);
2462
2463         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2464         for (i = 0; i < wdata->nr_pages; i++)
2465                 put_page(wdata->pages[i]);
2466         cifs_writedata_release(refcount);
2467 }
2468
2469 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2470
2471 static void
2472 cifs_uncached_writev_complete(struct work_struct *work)
2473 {
2474         struct cifs_writedata *wdata = container_of(work,
2475                                         struct cifs_writedata, work);
2476         struct inode *inode = d_inode(wdata->cfile->dentry);
2477         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2478
2479         spin_lock(&inode->i_lock);
2480         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2481         if (cifsi->server_eof > inode->i_size)
2482                 i_size_write(inode, cifsi->server_eof);
2483         spin_unlock(&inode->i_lock);
2484
2485         complete(&wdata->done);
2486         collect_uncached_write_data(wdata->ctx);
2487         /* the below call can possibly free the last ref to aio ctx */
2488         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2489 }
2490
2491 static int
2492 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2493                       size_t *len, unsigned long *num_pages)
2494 {
2495         size_t save_len, copied, bytes, cur_len = *len;
2496         unsigned long i, nr_pages = *num_pages;
2497
2498         save_len = cur_len;
2499         for (i = 0; i < nr_pages; i++) {
2500                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2501                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2502                 cur_len -= copied;
2503                 /*
2504                  * If we didn't copy as much as we expected, then that
2505                  * may mean we trod into an unmapped area. Stop copying
2506                  * at that point. On the next pass through the big
2507                  * loop, we'll likely end up getting a zero-length
2508                  * write and bailing out of it.
2509                  */
2510                 if (copied < bytes)
2511                         break;
2512         }
2513         cur_len = save_len - cur_len;
2514         *len = cur_len;
2515
2516         /*
2517          * If we have no data to send, then that probably means that
2518          * the copy above failed altogether. That's most likely because
2519          * the address in the iovec was bogus. Return -EFAULT and let
2520          * the caller free anything we allocated and bail out.
2521          */
2522         if (!cur_len)
2523                 return -EFAULT;
2524
2525         /*
2526          * i + 1 now represents the number of pages we actually used in
2527          * the copy phase above.
2528          */
2529         *num_pages = i + 1;
2530         return 0;
2531 }
2532
2533 static int
2534 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2535                      struct cifsFileInfo *open_file,
2536                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2537                      struct cifs_aio_ctx *ctx)
2538 {
2539         int rc = 0;
2540         size_t cur_len;
2541         unsigned long nr_pages, num_pages, i;
2542         struct cifs_writedata *wdata;
2543         struct iov_iter saved_from = *from;
2544         loff_t saved_offset = offset;
2545         pid_t pid;
2546         struct TCP_Server_Info *server;
2547
2548         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2549                 pid = open_file->pid;
2550         else
2551                 pid = current->tgid;
2552
2553         server = tlink_tcon(open_file->tlink)->ses->server;
2554
2555         do {
2556                 unsigned int wsize, credits;
2557
2558                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2559                                                    &wsize, &credits);
2560                 if (rc)
2561                         break;
2562
2563                 nr_pages = get_numpages(wsize, len, &cur_len);
2564                 wdata = cifs_writedata_alloc(nr_pages,
2565                                              cifs_uncached_writev_complete);
2566                 if (!wdata) {
2567                         rc = -ENOMEM;
2568                         add_credits_and_wake_if(server, credits, 0);
2569                         break;
2570                 }
2571
2572                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2573                 if (rc) {
2574                         kfree(wdata);
2575                         add_credits_and_wake_if(server, credits, 0);
2576                         break;
2577                 }
2578
2579                 num_pages = nr_pages;
2580                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2581                 if (rc) {
2582                         for (i = 0; i < nr_pages; i++)
2583                                 put_page(wdata->pages[i]);
2584                         kfree(wdata);
2585                         add_credits_and_wake_if(server, credits, 0);
2586                         break;
2587                 }
2588
2589                 /*
2590                  * Bring nr_pages down to the number of pages we actually used,
2591                  * and free any pages that we didn't use.
2592                  */
2593                 for ( ; nr_pages > num_pages; nr_pages--)
2594                         put_page(wdata->pages[nr_pages - 1]);
2595
2596                 wdata->sync_mode = WB_SYNC_ALL;
2597                 wdata->nr_pages = nr_pages;
2598                 wdata->offset = (__u64)offset;
2599                 wdata->cfile = cifsFileInfo_get(open_file);
2600                 wdata->pid = pid;
2601                 wdata->bytes = cur_len;
2602                 wdata->pagesz = PAGE_SIZE;
2603                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2604                 wdata->credits = credits;
2605                 wdata->ctx = ctx;
2606                 kref_get(&ctx->refcount);
2607
2608                 if (!wdata->cfile->invalidHandle ||
2609                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2610                         rc = server->ops->async_writev(wdata,
2611                                         cifs_uncached_writedata_release);
2612                 if (rc) {
2613                         add_credits_and_wake_if(server, wdata->credits, 0);
2614                         kref_put(&wdata->refcount,
2615                                  cifs_uncached_writedata_release);
2616                         if (rc == -EAGAIN) {
2617                                 *from = saved_from;
2618                                 iov_iter_advance(from, offset - saved_offset);
2619                                 continue;
2620                         }
2621                         break;
2622                 }
2623
2624                 list_add_tail(&wdata->list, wdata_list);
2625                 offset += cur_len;
2626                 len -= cur_len;
2627         } while (len > 0);
2628
2629         return rc;
2630 }
2631
2632 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2633 {
2634         struct cifs_writedata *wdata, *tmp;
2635         struct cifs_tcon *tcon;
2636         struct cifs_sb_info *cifs_sb;
2637         struct dentry *dentry = ctx->cfile->dentry;
2638         unsigned int i;
2639         int rc;
2640
2641         tcon = tlink_tcon(ctx->cfile->tlink);
2642         cifs_sb = CIFS_SB(dentry->d_sb);
2643
2644         mutex_lock(&ctx->aio_mutex);
2645
2646         if (list_empty(&ctx->list)) {
2647                 mutex_unlock(&ctx->aio_mutex);
2648                 return;
2649         }
2650
2651         rc = ctx->rc;
2652         /*
2653          * Wait for and collect replies for any successful sends in order of
2654          * increasing offset. Once an error is hit, then return without waiting
2655          * for any more replies.
2656          */
2657 restart_loop:
2658         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2659                 if (!rc) {
2660                         if (!try_wait_for_completion(&wdata->done)) {
2661                                 mutex_unlock(&ctx->aio_mutex);
2662                                 return;
2663                         }
2664
2665                         if (wdata->result)
2666                                 rc = wdata->result;
2667                         else
2668                                 ctx->total_len += wdata->bytes;
2669
2670                         /* resend call if it's a retryable error */
2671                         if (rc == -EAGAIN) {
2672                                 struct list_head tmp_list;
2673                                 struct iov_iter tmp_from = ctx->iter;
2674
2675                                 INIT_LIST_HEAD(&tmp_list);
2676                                 list_del_init(&wdata->list);
2677
2678                                 iov_iter_advance(&tmp_from,
2679                                                  wdata->offset - ctx->pos);
2680
2681                                 rc = cifs_write_from_iter(wdata->offset,
2682                                                 wdata->bytes, &tmp_from,
2683                                                 ctx->cfile, cifs_sb, &tmp_list,
2684                                                 ctx);
2685
2686                                 list_splice(&tmp_list, &ctx->list);
2687
2688                                 kref_put(&wdata->refcount,
2689                                          cifs_uncached_writedata_release);
2690                                 goto restart_loop;
2691                         }
2692                 }
2693                 list_del_init(&wdata->list);
2694                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2695         }
2696
2697         for (i = 0; i < ctx->npages; i++)
2698                 put_page(ctx->bv[i].bv_page);
2699
2700         cifs_stats_bytes_written(tcon, ctx->total_len);
2701         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2702
2703         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2704
2705         mutex_unlock(&ctx->aio_mutex);
2706
2707         if (ctx->iocb && ctx->iocb->ki_complete)
2708                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2709         else
2710                 complete(&ctx->done);
2711 }
2712
2713 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2714 {
2715         struct file *file = iocb->ki_filp;
2716         ssize_t total_written = 0;
2717         struct cifsFileInfo *cfile;
2718         struct cifs_tcon *tcon;
2719         struct cifs_sb_info *cifs_sb;
2720         struct cifs_aio_ctx *ctx;
2721         struct iov_iter saved_from = *from;
2722         int rc;
2723
2724         /*
2725          * BB - optimize the way when signing is disabled. We can drop this
2726          * extra memory-to-memory copying and use iovec buffers for constructing
2727          * write request.
2728          */
2729
2730         rc = generic_write_checks(iocb, from);
2731         if (rc <= 0)
2732                 return rc;
2733
2734         cifs_sb = CIFS_FILE_SB(file);
2735         cfile = file->private_data;
2736         tcon = tlink_tcon(cfile->tlink);
2737
2738         if (!tcon->ses->server->ops->async_writev)
2739                 return -ENOSYS;
2740
2741         ctx = cifs_aio_ctx_alloc();
2742         if (!ctx)
2743                 return -ENOMEM;
2744
2745         ctx->cfile = cifsFileInfo_get(cfile);
2746
2747         if (!is_sync_kiocb(iocb))
2748                 ctx->iocb = iocb;
2749
2750         ctx->pos = iocb->ki_pos;
2751
2752         rc = setup_aio_ctx_iter(ctx, from, WRITE);
2753         if (rc) {
2754                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2755                 return rc;
2756         }
2757
2758         /* grab a lock here due to read response handlers can access ctx */
2759         mutex_lock(&ctx->aio_mutex);
2760
2761         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2762                                   cfile, cifs_sb, &ctx->list, ctx);
2763
2764         /*
2765          * If at least one write was successfully sent, then discard any rc
2766          * value from the later writes. If the other write succeeds, then
2767          * we'll end up returning whatever was written. If it fails, then
2768          * we'll get a new rc value from that.
2769          */
2770         if (!list_empty(&ctx->list))
2771                 rc = 0;
2772
2773         mutex_unlock(&ctx->aio_mutex);
2774
2775         if (rc) {
2776                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2777                 return rc;
2778         }
2779
2780         if (!is_sync_kiocb(iocb)) {
2781                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2782                 return -EIOCBQUEUED;
2783         }
2784
2785         rc = wait_for_completion_killable(&ctx->done);
2786         if (rc) {
2787                 mutex_lock(&ctx->aio_mutex);
2788                 ctx->rc = rc = -EINTR;
2789                 total_written = ctx->total_len;
2790                 mutex_unlock(&ctx->aio_mutex);
2791         } else {
2792                 rc = ctx->rc;
2793                 total_written = ctx->total_len;
2794         }
2795
2796         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2797
2798         if (unlikely(!total_written))
2799                 return rc;
2800
2801         iocb->ki_pos += total_written;
2802         return total_written;
2803 }
2804
2805 static ssize_t
2806 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2807 {
2808         struct file *file = iocb->ki_filp;
2809         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2810         struct inode *inode = file->f_mapping->host;
2811         struct cifsInodeInfo *cinode = CIFS_I(inode);
2812         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2813         ssize_t rc;
2814
2815         inode_lock(inode);
2816         /*
2817          * We need to hold the sem to be sure nobody modifies lock list
2818          * with a brlock that prevents writing.
2819          */
2820         down_read(&cinode->lock_sem);
2821
2822         rc = generic_write_checks(iocb, from);
2823         if (rc <= 0)
2824                 goto out;
2825
2826         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2827                                      server->vals->exclusive_lock_type, NULL,
2828                                      CIFS_WRITE_OP))
2829                 rc = __generic_file_write_iter(iocb, from);
2830         else
2831                 rc = -EACCES;
2832 out:
2833         up_read(&cinode->lock_sem);
2834         inode_unlock(inode);
2835
2836         if (rc > 0)
2837                 rc = generic_write_sync(iocb, rc);
2838         return rc;
2839 }
2840
2841 ssize_t
2842 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2843 {
2844         struct inode *inode = file_inode(iocb->ki_filp);
2845         struct cifsInodeInfo *cinode = CIFS_I(inode);
2846         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2847         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2848                                                 iocb->ki_filp->private_data;
2849         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2850         ssize_t written;
2851
2852         written = cifs_get_writer(cinode);
2853         if (written)
2854                 return written;
2855
2856         if (CIFS_CACHE_WRITE(cinode)) {
2857                 if (cap_unix(tcon->ses) &&
2858                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2859                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2860                         written = generic_file_write_iter(iocb, from);
2861                         goto out;
2862                 }
2863                 written = cifs_writev(iocb, from);
2864                 goto out;
2865         }
2866         /*
2867          * For non-oplocked files in strict cache mode we need to write the data
2868          * to the server exactly from the pos to pos+len-1 rather than flush all
2869          * affected pages because it may cause a error with mandatory locks on
2870          * these pages but not on the region from pos to ppos+len-1.
2871          */
2872         written = cifs_user_writev(iocb, from);
2873         if (written > 0 && CIFS_CACHE_READ(cinode)) {
2874                 /*
2875                  * Windows 7 server can delay breaking level2 oplock if a write
2876                  * request comes - break it on the client to prevent reading
2877                  * an old data.
2878                  */
2879                 cifs_zap_mapping(inode);
2880                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2881                          inode);
2882                 cinode->oplock = 0;
2883         }
2884 out:
2885         cifs_put_writer(cinode);
2886         return written;
2887 }
2888
2889 static struct cifs_readdata *
2890 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2891 {
2892         struct cifs_readdata *rdata;
2893
2894         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2895                         GFP_KERNEL);
2896         if (rdata != NULL) {
2897                 kref_init(&rdata->refcount);
2898                 INIT_LIST_HEAD(&rdata->list);
2899                 init_completion(&rdata->done);
2900                 INIT_WORK(&rdata->work, complete);
2901         }
2902
2903         return rdata;
2904 }
2905
2906 void
2907 cifs_readdata_release(struct kref *refcount)
2908 {
2909         struct cifs_readdata *rdata = container_of(refcount,
2910                                         struct cifs_readdata, refcount);
2911
2912         if (rdata->cfile)
2913                 cifsFileInfo_put(rdata->cfile);
2914
2915         kfree(rdata);
2916 }
2917
2918 static int
2919 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2920 {
2921         int rc = 0;
2922         struct page *page;
2923         unsigned int i;
2924
2925         for (i = 0; i < nr_pages; i++) {
2926                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2927                 if (!page) {
2928                         rc = -ENOMEM;
2929                         break;
2930                 }
2931                 rdata->pages[i] = page;
2932         }
2933
2934         if (rc) {
2935                 for (i = 0; i < nr_pages; i++) {
2936                         put_page(rdata->pages[i]);
2937                         rdata->pages[i] = NULL;
2938                 }
2939         }
2940         return rc;
2941 }
2942
2943 static void
2944 cifs_uncached_readdata_release(struct kref *refcount)
2945 {
2946         struct cifs_readdata *rdata = container_of(refcount,
2947                                         struct cifs_readdata, refcount);
2948         unsigned int i;
2949
2950         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
2951         for (i = 0; i < rdata->nr_pages; i++) {
2952                 put_page(rdata->pages[i]);
2953                 rdata->pages[i] = NULL;
2954         }
2955         cifs_readdata_release(refcount);
2956 }
2957
2958 /**
2959  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2960  * @rdata:      the readdata response with list of pages holding data
2961  * @iter:       destination for our data
2962  *
2963  * This function copies data from a list of pages in a readdata response into
2964  * an array of iovecs. It will first calculate where the data should go
2965  * based on the info in the readdata and then copy the data into that spot.
2966  */
2967 static int
2968 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
2969 {
2970         size_t remaining = rdata->got_bytes;
2971         unsigned int i;
2972
2973         for (i = 0; i < rdata->nr_pages; i++) {
2974                 struct page *page = rdata->pages[i];
2975                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
2976                 size_t written;
2977
2978                 if (unlikely(iter->type & ITER_PIPE)) {
2979                         void *addr = kmap_atomic(page);
2980
2981                         written = copy_to_iter(addr, copy, iter);
2982                         kunmap_atomic(addr);
2983                 } else
2984                         written = copy_page_to_iter(page, 0, copy, iter);
2985                 remaining -= written;
2986                 if (written < copy && iov_iter_count(iter) > 0)
2987                         break;
2988         }
2989         return remaining ? -EFAULT : 0;
2990 }
2991
2992 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
2993
2994 static void
2995 cifs_uncached_readv_complete(struct work_struct *work)
2996 {
2997         struct cifs_readdata *rdata = container_of(work,
2998                                                 struct cifs_readdata, work);
2999
3000         complete(&rdata->done);
3001         collect_uncached_read_data(rdata->ctx);
3002         /* the below call can possibly free the last ref to aio ctx */
3003         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3004 }
3005
3006 static int
3007 uncached_fill_pages(struct TCP_Server_Info *server,
3008                     struct cifs_readdata *rdata, struct iov_iter *iter,
3009                     unsigned int len)
3010 {
3011         int result = 0;
3012         unsigned int i;
3013         unsigned int nr_pages = rdata->nr_pages;
3014
3015         rdata->got_bytes = 0;
3016         rdata->tailsz = PAGE_SIZE;
3017         for (i = 0; i < nr_pages; i++) {
3018                 struct page *page = rdata->pages[i];
3019                 size_t n;
3020
3021                 if (len <= 0) {
3022                         /* no need to hold page hostage */
3023                         rdata->pages[i] = NULL;
3024                         rdata->nr_pages--;
3025                         put_page(page);
3026                         continue;
3027                 }
3028                 n = len;
3029                 if (len >= PAGE_SIZE) {
3030                         /* enough data to fill the page */
3031                         n = PAGE_SIZE;
3032                         len -= n;
3033                 } else {
3034                         zero_user(page, len, PAGE_SIZE - len);
3035                         rdata->tailsz = len;
3036                         len = 0;
3037                 }
3038                 if (iter)
3039                         result = copy_page_from_iter(page, 0, n, iter);
3040                 else
3041                         result = cifs_read_page_from_socket(server, page, n);
3042                 if (result < 0)
3043                         break;
3044
3045                 rdata->got_bytes += result;
3046         }
3047
3048         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3049                                                 rdata->got_bytes : result;
3050 }
3051
3052 static int
3053 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3054                               struct cifs_readdata *rdata, unsigned int len)
3055 {
3056         return uncached_fill_pages(server, rdata, NULL, len);
3057 }
3058
3059 static int
3060 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3061                               struct cifs_readdata *rdata,
3062                               struct iov_iter *iter)
3063 {
3064         return uncached_fill_pages(server, rdata, iter, iter->count);
3065 }
3066
3067 static int
3068 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3069                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3070                      struct cifs_aio_ctx *ctx)
3071 {
3072         struct cifs_readdata *rdata;
3073         unsigned int npages, rsize, credits;
3074         size_t cur_len;
3075         int rc;
3076         pid_t pid;
3077         struct TCP_Server_Info *server;
3078
3079         server = tlink_tcon(open_file->tlink)->ses->server;
3080
3081         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3082                 pid = open_file->pid;
3083         else
3084                 pid = current->tgid;
3085
3086         do {
3087                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3088                                                    &rsize, &credits);
3089                 if (rc)
3090                         break;
3091
3092                 cur_len = min_t(const size_t, len, rsize);
3093                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3094
3095                 /* allocate a readdata struct */
3096                 rdata = cifs_readdata_alloc(npages,
3097                                             cifs_uncached_readv_complete);
3098                 if (!rdata) {
3099                         add_credits_and_wake_if(server, credits, 0);
3100                         rc = -ENOMEM;
3101                         break;
3102                 }
3103
3104                 rc = cifs_read_allocate_pages(rdata, npages);
3105                 if (rc)
3106                         goto error;
3107
3108                 rdata->cfile = cifsFileInfo_get(open_file);
3109                 rdata->nr_pages = npages;
3110                 rdata->offset = offset;
3111                 rdata->bytes = cur_len;
3112                 rdata->pid = pid;
3113                 rdata->pagesz = PAGE_SIZE;
3114                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3115                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3116                 rdata->credits = credits;
3117                 rdata->ctx = ctx;
3118                 kref_get(&ctx->refcount);
3119
3120                 if (!rdata->cfile->invalidHandle ||
3121                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3122                         rc = server->ops->async_readv(rdata);
3123 error:
3124                 if (rc) {
3125                         add_credits_and_wake_if(server, rdata->credits, 0);
3126                         kref_put(&rdata->refcount,
3127                                  cifs_uncached_readdata_release);
3128                         if (rc == -EAGAIN)
3129                                 continue;
3130                         break;
3131                 }
3132
3133                 list_add_tail(&rdata->list, rdata_list);
3134                 offset += cur_len;
3135                 len -= cur_len;
3136         } while (len > 0);
3137
3138         return rc;
3139 }
3140
3141 static void
3142 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3143 {
3144         struct cifs_readdata *rdata, *tmp;
3145         struct iov_iter *to = &ctx->iter;
3146         struct cifs_sb_info *cifs_sb;
3147         struct cifs_tcon *tcon;
3148         unsigned int i;
3149         int rc;
3150
3151         tcon = tlink_tcon(ctx->cfile->tlink);
3152         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3153
3154         mutex_lock(&ctx->aio_mutex);
3155
3156         if (list_empty(&ctx->list)) {
3157                 mutex_unlock(&ctx->aio_mutex);
3158                 return;
3159         }
3160
3161         rc = ctx->rc;
3162         /* the loop below should proceed in the order of increasing offsets */
3163 again:
3164         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3165                 if (!rc) {
3166                         if (!try_wait_for_completion(&rdata->done)) {
3167                                 mutex_unlock(&ctx->aio_mutex);
3168                                 return;
3169                         }
3170
3171                         if (rdata->result == -EAGAIN) {
3172                                 /* resend call if it's a retryable error */
3173                                 struct list_head tmp_list;
3174                                 unsigned int got_bytes = rdata->got_bytes;
3175
3176                                 list_del_init(&rdata->list);
3177                                 INIT_LIST_HEAD(&tmp_list);
3178
3179                                 /*
3180                                  * Got a part of data and then reconnect has
3181                                  * happened -- fill the buffer and continue
3182                                  * reading.
3183                                  */
3184                                 if (got_bytes && got_bytes < rdata->bytes) {
3185                                         rc = cifs_readdata_to_iov(rdata, to);
3186                                         if (rc) {
3187                                                 kref_put(&rdata->refcount,
3188                                                 cifs_uncached_readdata_release);
3189                                                 continue;
3190                                         }
3191                                 }
3192
3193                                 rc = cifs_send_async_read(
3194                                                 rdata->offset + got_bytes,
3195                                                 rdata->bytes - got_bytes,
3196                                                 rdata->cfile, cifs_sb,
3197                                                 &tmp_list, ctx);
3198
3199                                 list_splice(&tmp_list, &ctx->list);
3200
3201                                 kref_put(&rdata->refcount,
3202                                          cifs_uncached_readdata_release);
3203                                 goto again;
3204                         } else if (rdata->result)
3205                                 rc = rdata->result;
3206                         else
3207                                 rc = cifs_readdata_to_iov(rdata, to);
3208
3209                         /* if there was a short read -- discard anything left */
3210                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3211                                 rc = -ENODATA;
3212                 }
3213                 list_del_init(&rdata->list);
3214                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3215         }
3216
3217         for (i = 0; i < ctx->npages; i++) {
3218                 if (ctx->should_dirty)
3219                         set_page_dirty(ctx->bv[i].bv_page);
3220                 put_page(ctx->bv[i].bv_page);
3221         }
3222
3223         ctx->total_len = ctx->len - iov_iter_count(to);
3224
3225         cifs_stats_bytes_read(tcon, ctx->total_len);
3226
3227         /* mask nodata case */
3228         if (rc == -ENODATA)
3229                 rc = 0;
3230
3231         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3232
3233         mutex_unlock(&ctx->aio_mutex);
3234
3235         if (ctx->iocb && ctx->iocb->ki_complete)
3236                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3237         else
3238                 complete(&ctx->done);
3239 }
3240
3241 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3242 {
3243         struct file *file = iocb->ki_filp;
3244         ssize_t rc;
3245         size_t len;
3246         ssize_t total_read = 0;
3247         loff_t offset = iocb->ki_pos;
3248         struct cifs_sb_info *cifs_sb;
3249         struct cifs_tcon *tcon;
3250         struct cifsFileInfo *cfile;
3251         struct cifs_aio_ctx *ctx;
3252
3253         len = iov_iter_count(to);
3254         if (!len)
3255                 return 0;
3256
3257         cifs_sb = CIFS_FILE_SB(file);
3258         cfile = file->private_data;
3259         tcon = tlink_tcon(cfile->tlink);
3260
3261         if (!tcon->ses->server->ops->async_readv)
3262                 return -ENOSYS;
3263
3264         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3265                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3266
3267         ctx = cifs_aio_ctx_alloc();
3268         if (!ctx)
3269                 return -ENOMEM;
3270
3271         ctx->cfile = cifsFileInfo_get(cfile);
3272
3273         if (!is_sync_kiocb(iocb))
3274                 ctx->iocb = iocb;
3275
3276         if (to->type == ITER_IOVEC)
3277                 ctx->should_dirty = true;
3278
3279         rc = setup_aio_ctx_iter(ctx, to, READ);
3280         if (rc) {
3281                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3282                 return rc;
3283         }
3284
3285         len = ctx->len;
3286
3287         /* grab a lock here due to read response handlers can access ctx */
3288         mutex_lock(&ctx->aio_mutex);
3289
3290         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3291
3292         /* if at least one read request send succeeded, then reset rc */
3293         if (!list_empty(&ctx->list))
3294                 rc = 0;
3295
3296         mutex_unlock(&ctx->aio_mutex);
3297
3298         if (rc) {
3299                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3300                 return rc;
3301         }
3302
3303         if (!is_sync_kiocb(iocb)) {
3304                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3305                 return -EIOCBQUEUED;
3306         }
3307
3308         rc = wait_for_completion_killable(&ctx->done);
3309         if (rc) {
3310                 mutex_lock(&ctx->aio_mutex);
3311                 ctx->rc = rc = -EINTR;
3312                 total_read = ctx->total_len;
3313                 mutex_unlock(&ctx->aio_mutex);
3314         } else {
3315                 rc = ctx->rc;
3316                 total_read = ctx->total_len;
3317         }
3318
3319         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3320
3321         if (total_read) {
3322                 iocb->ki_pos += total_read;
3323                 return total_read;
3324         }
3325         return rc;
3326 }
3327
3328 ssize_t
3329 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3330 {
3331         struct inode *inode = file_inode(iocb->ki_filp);
3332         struct cifsInodeInfo *cinode = CIFS_I(inode);
3333         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3334         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3335                                                 iocb->ki_filp->private_data;
3336         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3337         int rc = -EACCES;
3338
3339         /*
3340          * In strict cache mode we need to read from the server all the time
3341          * if we don't have level II oplock because the server can delay mtime
3342          * change - so we can't make a decision about inode invalidating.
3343          * And we can also fail with pagereading if there are mandatory locks
3344          * on pages affected by this read but not on the region from pos to
3345          * pos+len-1.
3346          */
3347         if (!CIFS_CACHE_READ(cinode))
3348                 return cifs_user_readv(iocb, to);
3349
3350         if (cap_unix(tcon->ses) &&
3351             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3352             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3353                 return generic_file_read_iter(iocb, to);
3354
3355         /*
3356          * We need to hold the sem to be sure nobody modifies lock list
3357          * with a brlock that prevents reading.
3358          */
3359         down_read(&cinode->lock_sem);
3360         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3361                                      tcon->ses->server->vals->shared_lock_type,
3362                                      NULL, CIFS_READ_OP))
3363                 rc = generic_file_read_iter(iocb, to);
3364         up_read(&cinode->lock_sem);
3365         return rc;
3366 }
3367
3368 static ssize_t
3369 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3370 {
3371         int rc = -EACCES;
3372         unsigned int bytes_read = 0;
3373         unsigned int total_read;
3374         unsigned int current_read_size;
3375         unsigned int rsize;
3376         struct cifs_sb_info *cifs_sb;
3377         struct cifs_tcon *tcon;
3378         struct TCP_Server_Info *server;
3379         unsigned int xid;
3380         char *cur_offset;
3381         struct cifsFileInfo *open_file;
3382         struct cifs_io_parms io_parms;
3383         int buf_type = CIFS_NO_BUFFER;
3384         __u32 pid;
3385
3386         xid = get_xid();
3387         cifs_sb = CIFS_FILE_SB(file);
3388
3389         /* FIXME: set up handlers for larger reads and/or convert to async */
3390         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3391
3392         if (file->private_data == NULL) {
3393                 rc = -EBADF;
3394                 free_xid(xid);
3395                 return rc;
3396         }
3397         open_file = file->private_data;
3398         tcon = tlink_tcon(open_file->tlink);
3399         server = tcon->ses->server;
3400
3401         if (!server->ops->sync_read) {
3402                 free_xid(xid);
3403                 return -ENOSYS;
3404         }
3405
3406         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3407                 pid = open_file->pid;
3408         else
3409                 pid = current->tgid;
3410
3411         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3412                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3413
3414         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3415              total_read += bytes_read, cur_offset += bytes_read) {
3416                 do {
3417                         current_read_size = min_t(uint, read_size - total_read,
3418                                                   rsize);
3419                         /*
3420                          * For windows me and 9x we do not want to request more
3421                          * than it negotiated since it will refuse the read
3422                          * then.
3423                          */
3424                         if ((tcon->ses) && !(tcon->ses->capabilities &
3425                                 tcon->ses->server->vals->cap_large_files)) {
3426                                 current_read_size = min_t(uint,
3427                                         current_read_size, CIFSMaxBufSize);
3428                         }
3429                         if (open_file->invalidHandle) {
3430                                 rc = cifs_reopen_file(open_file, true);
3431                                 if (rc != 0)
3432                                         break;
3433                         }
3434                         io_parms.pid = pid;
3435                         io_parms.tcon = tcon;
3436                         io_parms.offset = *offset;
3437                         io_parms.length = current_read_size;
3438                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3439                                                     &bytes_read, &cur_offset,
3440                                                     &buf_type);
3441                 } while (rc == -EAGAIN);
3442
3443                 if (rc || (bytes_read == 0)) {
3444                         if (total_read) {
3445                                 break;
3446                         } else {
3447                                 free_xid(xid);
3448                                 return rc;
3449                         }
3450                 } else {
3451                         cifs_stats_bytes_read(tcon, total_read);
3452                         *offset += bytes_read;
3453                 }
3454         }
3455         free_xid(xid);
3456         return total_read;
3457 }
3458
3459 /*
3460  * If the page is mmap'ed into a process' page tables, then we need to make
3461  * sure that it doesn't change while being written back.
3462  */
3463 static int
3464 cifs_page_mkwrite(struct vm_fault *vmf)
3465 {
3466         struct page *page = vmf->page;
3467
3468         lock_page(page);
3469         return VM_FAULT_LOCKED;
3470 }
3471
3472 static const struct vm_operations_struct cifs_file_vm_ops = {
3473         .fault = filemap_fault,
3474         .map_pages = filemap_map_pages,
3475         .page_mkwrite = cifs_page_mkwrite,
3476 };
3477
3478 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3479 {
3480         int rc, xid;
3481         struct inode *inode = file_inode(file);
3482
3483         xid = get_xid();
3484
3485         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3486                 rc = cifs_zap_mapping(inode);
3487                 if (rc)
3488                         return rc;
3489         }
3490
3491         rc = generic_file_mmap(file, vma);
3492         if (rc == 0)
3493                 vma->vm_ops = &cifs_file_vm_ops;
3494         free_xid(xid);
3495         return rc;
3496 }
3497
3498 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3499 {
3500         int rc, xid;
3501
3502         xid = get_xid();
3503         rc = cifs_revalidate_file(file);
3504         if (rc) {
3505                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3506                          rc);
3507                 free_xid(xid);
3508                 return rc;
3509         }
3510         rc = generic_file_mmap(file, vma);
3511         if (rc == 0)
3512                 vma->vm_ops = &cifs_file_vm_ops;
3513         free_xid(xid);
3514         return rc;
3515 }
3516
3517 static void
3518 cifs_readv_complete(struct work_struct *work)
3519 {
3520         unsigned int i, got_bytes;
3521         struct cifs_readdata *rdata = container_of(work,
3522                                                 struct cifs_readdata, work);
3523
3524         got_bytes = rdata->got_bytes;
3525         for (i = 0; i < rdata->nr_pages; i++) {
3526                 struct page *page = rdata->pages[i];
3527
3528                 lru_cache_add_file(page);
3529
3530                 if (rdata->result == 0 ||
3531                     (rdata->result == -EAGAIN && got_bytes)) {
3532                         flush_dcache_page(page);
3533                         SetPageUptodate(page);
3534                 }
3535
3536                 unlock_page(page);
3537
3538                 if (rdata->result == 0 ||
3539                     (rdata->result == -EAGAIN && got_bytes))
3540                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3541
3542                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3543
3544                 put_page(page);
3545                 rdata->pages[i] = NULL;
3546         }
3547         kref_put(&rdata->refcount, cifs_readdata_release);
3548 }
3549
3550 static int
3551 readpages_fill_pages(struct TCP_Server_Info *server,
3552                      struct cifs_readdata *rdata, struct iov_iter *iter,
3553                      unsigned int len)
3554 {
3555         int result = 0;
3556         unsigned int i;
3557         u64 eof;
3558         pgoff_t eof_index;
3559         unsigned int nr_pages = rdata->nr_pages;
3560
3561         /* determine the eof that the server (probably) has */
3562         eof = CIFS_I(rdata->mapping->host)->server_eof;
3563         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3564         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3565
3566         rdata->got_bytes = 0;
3567         rdata->tailsz = PAGE_SIZE;
3568         for (i = 0; i < nr_pages; i++) {
3569                 struct page *page = rdata->pages[i];
3570                 size_t n = PAGE_SIZE;
3571
3572                 if (len >= PAGE_SIZE) {
3573                         len -= PAGE_SIZE;
3574                 } else if (len > 0) {
3575                         /* enough for partial page, fill and zero the rest */
3576                         zero_user(page, len, PAGE_SIZE - len);
3577                         n = rdata->tailsz = len;
3578                         len = 0;
3579                 } else if (page->index > eof_index) {
3580                         /*
3581                          * The VFS will not try to do readahead past the
3582                          * i_size, but it's possible that we have outstanding
3583                          * writes with gaps in the middle and the i_size hasn't
3584                          * caught up yet. Populate those with zeroed out pages
3585                          * to prevent the VFS from repeatedly attempting to
3586                          * fill them until the writes are flushed.
3587                          */
3588                         zero_user(page, 0, PAGE_SIZE);
3589                         lru_cache_add_file(page);
3590                         flush_dcache_page(page);
3591                         SetPageUptodate(page);
3592                         unlock_page(page);
3593                         put_page(page);
3594                         rdata->pages[i] = NULL;
3595                         rdata->nr_pages--;
3596                         continue;
3597                 } else {
3598                         /* no need to hold page hostage */
3599                         lru_cache_add_file(page);
3600                         unlock_page(page);
3601                         put_page(page);
3602                         rdata->pages[i] = NULL;
3603                         rdata->nr_pages--;
3604                         continue;
3605                 }
3606
3607                 if (iter)
3608                         result = copy_page_from_iter(page, 0, n, iter);
3609                 else
3610                         result = cifs_read_page_from_socket(server, page, n);
3611                 if (result < 0)
3612                         break;
3613
3614                 rdata->got_bytes += result;
3615         }
3616
3617         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3618                                                 rdata->got_bytes : result;
3619 }
3620
3621 static int
3622 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3623                                struct cifs_readdata *rdata, unsigned int len)
3624 {
3625         return readpages_fill_pages(server, rdata, NULL, len);
3626 }
3627
3628 static int
3629 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3630                                struct cifs_readdata *rdata,
3631                                struct iov_iter *iter)
3632 {
3633         return readpages_fill_pages(server, rdata, iter, iter->count);
3634 }
3635
3636 static int
3637 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3638                     unsigned int rsize, struct list_head *tmplist,
3639                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3640 {
3641         struct page *page, *tpage;
3642         unsigned int expected_index;
3643         int rc;
3644         gfp_t gfp = readahead_gfp_mask(mapping);
3645
3646         INIT_LIST_HEAD(tmplist);
3647
3648         page = list_entry(page_list->prev, struct page, lru);
3649
3650         /*
3651          * Lock the page and put it in the cache. Since no one else
3652          * should have access to this page, we're safe to simply set
3653          * PG_locked without checking it first.
3654          */
3655         __SetPageLocked(page);
3656         rc = add_to_page_cache_locked(page, mapping,
3657                                       page->index, gfp);
3658
3659         /* give up if we can't stick it in the cache */
3660         if (rc) {
3661                 __ClearPageLocked(page);
3662                 return rc;
3663         }
3664
3665         /* move first page to the tmplist */
3666         *offset = (loff_t)page->index << PAGE_SHIFT;
3667         *bytes = PAGE_SIZE;
3668         *nr_pages = 1;
3669         list_move_tail(&page->lru, tmplist);
3670
3671         /* now try and add more pages onto the request */
3672         expected_index = page->index + 1;
3673         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3674                 /* discontinuity ? */
3675                 if (page->index != expected_index)
3676                         break;
3677
3678                 /* would this page push the read over the rsize? */
3679                 if (*bytes + PAGE_SIZE > rsize)
3680                         break;
3681
3682                 __SetPageLocked(page);
3683                 if (add_to_page_cache_locked(page, mapping, page->index, gfp)) {
3684                         __ClearPageLocked(page);
3685                         break;
3686                 }
3687                 list_move_tail(&page->lru, tmplist);
3688                 (*bytes) += PAGE_SIZE;
3689                 expected_index++;
3690                 (*nr_pages)++;
3691         }
3692         return rc;
3693 }
3694
3695 static int cifs_readpages(struct file *file, struct address_space *mapping,
3696         struct list_head *page_list, unsigned num_pages)
3697 {
3698         int rc;
3699         struct list_head tmplist;
3700         struct cifsFileInfo *open_file = file->private_data;
3701         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3702         struct TCP_Server_Info *server;
3703         pid_t pid;
3704
3705         /*
3706          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3707          * immediately if the cookie is negative
3708          *
3709          * After this point, every page in the list might have PG_fscache set,
3710          * so we will need to clean that up off of every page we don't use.
3711          */
3712         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3713                                          &num_pages);
3714         if (rc == 0)
3715                 return rc;
3716
3717         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3718                 pid = open_file->pid;
3719         else
3720                 pid = current->tgid;
3721
3722         rc = 0;
3723         server = tlink_tcon(open_file->tlink)->ses->server;
3724
3725         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3726                  __func__, file, mapping, num_pages);
3727
3728         /*
3729          * Start with the page at end of list and move it to private
3730          * list. Do the same with any following pages until we hit
3731          * the rsize limit, hit an index discontinuity, or run out of
3732          * pages. Issue the async read and then start the loop again
3733          * until the list is empty.
3734          *
3735          * Note that list order is important. The page_list is in
3736          * the order of declining indexes. When we put the pages in
3737          * the rdata->pages, then we want them in increasing order.
3738          */
3739         while (!list_empty(page_list)) {
3740                 unsigned int i, nr_pages, bytes, rsize;
3741                 loff_t offset;
3742                 struct page *page, *tpage;
3743                 struct cifs_readdata *rdata;
3744                 unsigned credits;
3745
3746                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3747                                                    &rsize, &credits);
3748                 if (rc)
3749                         break;
3750
3751                 /*
3752                  * Give up immediately if rsize is too small to read an entire
3753                  * page. The VFS will fall back to readpage. We should never
3754                  * reach this point however since we set ra_pages to 0 when the
3755                  * rsize is smaller than a cache page.
3756                  */
3757                 if (unlikely(rsize < PAGE_SIZE)) {
3758                         add_credits_and_wake_if(server, credits, 0);
3759                         return 0;
3760                 }
3761
3762                 rc = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3763                                          &nr_pages, &offset, &bytes);
3764                 if (rc) {
3765                         add_credits_and_wake_if(server, credits, 0);
3766                         break;
3767                 }
3768
3769                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3770                 if (!rdata) {
3771                         /* best to give up if we're out of mem */
3772                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3773                                 list_del(&page->lru);
3774                                 lru_cache_add_file(page);
3775                                 unlock_page(page);
3776                                 put_page(page);
3777                         }
3778                         rc = -ENOMEM;
3779                         add_credits_and_wake_if(server, credits, 0);
3780                         break;
3781                 }
3782
3783                 rdata->cfile = cifsFileInfo_get(open_file);
3784                 rdata->mapping = mapping;
3785                 rdata->offset = offset;
3786                 rdata->bytes = bytes;
3787                 rdata->pid = pid;
3788                 rdata->pagesz = PAGE_SIZE;
3789                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3790                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3791                 rdata->credits = credits;
3792
3793                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3794                         list_del(&page->lru);
3795                         rdata->pages[rdata->nr_pages++] = page;
3796                 }
3797
3798                 if (!rdata->cfile->invalidHandle ||
3799                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3800                         rc = server->ops->async_readv(rdata);
3801                 if (rc) {
3802                         add_credits_and_wake_if(server, rdata->credits, 0);
3803                         for (i = 0; i < rdata->nr_pages; i++) {
3804                                 page = rdata->pages[i];
3805                                 lru_cache_add_file(page);
3806                                 unlock_page(page);
3807                                 put_page(page);
3808                         }
3809                         /* Fallback to the readpage in error/reconnect cases */
3810                         kref_put(&rdata->refcount, cifs_readdata_release);
3811                         break;
3812                 }
3813
3814                 kref_put(&rdata->refcount, cifs_readdata_release);
3815         }
3816
3817         /* Any pages that have been shown to fscache but didn't get added to
3818          * the pagecache must be uncached before they get returned to the
3819          * allocator.
3820          */
3821         cifs_fscache_readpages_cancel(mapping->host, page_list);
3822         return rc;
3823 }
3824
3825 /*
3826  * cifs_readpage_worker must be called with the page pinned
3827  */
3828 static int cifs_readpage_worker(struct file *file, struct page *page,
3829         loff_t *poffset)
3830 {
3831         char *read_data;
3832         int rc;
3833
3834         /* Is the page cached? */
3835         rc = cifs_readpage_from_fscache(file_inode(file), page);
3836         if (rc == 0)
3837                 goto read_complete;
3838
3839         read_data = kmap(page);
3840         /* for reads over a certain size could initiate async read ahead */
3841
3842         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3843
3844         if (rc < 0)
3845                 goto io_error;
3846         else
3847                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3848
3849         file_inode(file)->i_atime =
3850                 current_time(file_inode(file));
3851
3852         if (PAGE_SIZE > rc)
3853                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3854
3855         flush_dcache_page(page);
3856         SetPageUptodate(page);
3857
3858         /* send this page to the cache */
3859         cifs_readpage_to_fscache(file_inode(file), page);
3860
3861         rc = 0;
3862
3863 io_error:
3864         kunmap(page);
3865         unlock_page(page);
3866
3867 read_complete:
3868         return rc;
3869 }
3870
3871 static int cifs_readpage(struct file *file, struct page *page)
3872 {
3873         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3874         int rc = -EACCES;
3875         unsigned int xid;
3876
3877         xid = get_xid();
3878
3879         if (file->private_data == NULL) {
3880                 rc = -EBADF;
3881                 free_xid(xid);
3882                 return rc;
3883         }
3884
3885         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3886                  page, (int)offset, (int)offset);
3887
3888         rc = cifs_readpage_worker(file, page, &offset);
3889
3890         free_xid(xid);
3891         return rc;
3892 }
3893
3894 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3895 {
3896         struct cifsFileInfo *open_file;
3897         struct cifs_tcon *tcon =
3898                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3899
3900         spin_lock(&tcon->open_file_lock);
3901         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3902                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3903                         spin_unlock(&tcon->open_file_lock);
3904                         return 1;
3905                 }
3906         }
3907         spin_unlock(&tcon->open_file_lock);
3908         return 0;
3909 }
3910
3911 /* We do not want to update the file size from server for inodes
3912    open for write - to avoid races with writepage extending
3913    the file - in the future we could consider allowing
3914    refreshing the inode only on increases in the file size
3915    but this is tricky to do without racing with writebehind
3916    page caching in the current Linux kernel design */
3917 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3918 {
3919         if (!cifsInode)
3920                 return true;
3921
3922         if (is_inode_writable(cifsInode)) {
3923                 /* This inode is open for write at least once */
3924                 struct cifs_sb_info *cifs_sb;
3925
3926                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3927                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3928                         /* since no page cache to corrupt on directio
3929                         we can change size safely */
3930                         return true;
3931                 }
3932
3933                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3934                         return true;
3935
3936                 return false;
3937         } else
3938                 return true;
3939 }
3940
3941 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3942                         loff_t pos, unsigned len, unsigned flags,
3943                         struct page **pagep, void **fsdata)
3944 {
3945         int oncethru = 0;
3946         pgoff_t index = pos >> PAGE_SHIFT;
3947         loff_t offset = pos & (PAGE_SIZE - 1);
3948         loff_t page_start = pos & PAGE_MASK;
3949         loff_t i_size;
3950         struct page *page;
3951         int rc = 0;
3952
3953         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3954
3955 start:
3956         page = grab_cache_page_write_begin(mapping, index, flags);
3957         if (!page) {
3958                 rc = -ENOMEM;
3959                 goto out;
3960         }
3961
3962         if (PageUptodate(page))
3963                 goto out;
3964
3965         /*
3966          * If we write a full page it will be up to date, no need to read from
3967          * the server. If the write is short, we'll end up doing a sync write
3968          * instead.
3969          */
3970         if (len == PAGE_SIZE)
3971                 goto out;
3972
3973         /*
3974          * optimize away the read when we have an oplock, and we're not
3975          * expecting to use any of the data we'd be reading in. That
3976          * is, when the page lies beyond the EOF, or straddles the EOF
3977          * and the write will cover all of the existing data.
3978          */
3979         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
3980                 i_size = i_size_read(mapping->host);
3981                 if (page_start >= i_size ||
3982                     (offset == 0 && (pos + len) >= i_size)) {
3983                         zero_user_segments(page, 0, offset,
3984                                            offset + len,
3985                                            PAGE_SIZE);
3986                         /*
3987                          * PageChecked means that the parts of the page
3988                          * to which we're not writing are considered up
3989                          * to date. Once the data is copied to the
3990                          * page, it can be set uptodate.
3991                          */
3992                         SetPageChecked(page);
3993                         goto out;
3994                 }
3995         }
3996
3997         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
3998                 /*
3999                  * might as well read a page, it is fast enough. If we get
4000                  * an error, we don't need to return it. cifs_write_end will
4001                  * do a sync write instead since PG_uptodate isn't set.
4002                  */
4003                 cifs_readpage_worker(file, page, &page_start);
4004                 put_page(page);
4005                 oncethru = 1;
4006                 goto start;
4007         } else {
4008                 /* we could try using another file handle if there is one -
4009                    but how would we lock it to prevent close of that handle
4010                    racing with this read? In any case
4011                    this will be written out by write_end so is fine */
4012         }
4013 out:
4014         *pagep = page;
4015         return rc;
4016 }
4017
4018 static int cifs_release_page(struct page *page, gfp_t gfp)
4019 {
4020         if (PagePrivate(page))
4021                 return 0;
4022
4023         return cifs_fscache_release_page(page, gfp);
4024 }
4025
4026 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4027                                  unsigned int length)
4028 {
4029         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4030
4031         if (offset == 0 && length == PAGE_SIZE)
4032                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4033 }
4034
4035 static int cifs_launder_page(struct page *page)
4036 {
4037         int rc = 0;
4038         loff_t range_start = page_offset(page);
4039         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4040         struct writeback_control wbc = {
4041                 .sync_mode = WB_SYNC_ALL,
4042                 .nr_to_write = 0,
4043                 .range_start = range_start,
4044                 .range_end = range_end,
4045         };
4046
4047         cifs_dbg(FYI, "Launder page: %p\n", page);
4048
4049         if (clear_page_dirty_for_io(page))
4050                 rc = cifs_writepage_locked(page, &wbc);
4051
4052         cifs_fscache_invalidate_page(page, page->mapping->host);
4053         return rc;
4054 }
4055
4056 void cifs_oplock_break(struct work_struct *work)
4057 {
4058         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4059                                                   oplock_break);
4060         struct inode *inode = d_inode(cfile->dentry);
4061         struct cifsInodeInfo *cinode = CIFS_I(inode);
4062         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4063         struct TCP_Server_Info *server = tcon->ses->server;
4064         int rc = 0;
4065
4066         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4067                         TASK_UNINTERRUPTIBLE);
4068
4069         server->ops->downgrade_oplock(server, cinode,
4070                 test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
4071
4072         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4073                                                 cifs_has_mand_locks(cinode)) {
4074                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4075                          inode);
4076                 cinode->oplock = 0;
4077         }
4078
4079         if (inode && S_ISREG(inode->i_mode)) {
4080                 if (CIFS_CACHE_READ(cinode))
4081                         break_lease(inode, O_RDONLY);
4082                 else
4083                         break_lease(inode, O_WRONLY);
4084                 rc = filemap_fdatawrite(inode->i_mapping);
4085                 if (!CIFS_CACHE_READ(cinode)) {
4086                         rc = filemap_fdatawait(inode->i_mapping);
4087                         mapping_set_error(inode->i_mapping, rc);
4088                         cifs_zap_mapping(inode);
4089                 }
4090                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4091         }
4092
4093         rc = cifs_push_locks(cfile);
4094         if (rc)
4095                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4096
4097         /*
4098          * releasing stale oplock after recent reconnect of smb session using
4099          * a now incorrect file handle is not a data integrity issue but do
4100          * not bother sending an oplock release if session to server still is
4101          * disconnected since oplock already released by the server
4102          */
4103         if (!cfile->oplock_break_cancelled) {
4104                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4105                                                              cinode);
4106                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4107         }
4108         cifs_done_oplock_break(cinode);
4109 }
4110
4111 /*
4112  * The presence of cifs_direct_io() in the address space ops vector
4113  * allowes open() O_DIRECT flags which would have failed otherwise.
4114  *
4115  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4116  * so this method should never be called.
4117  *
4118  * Direct IO is not yet supported in the cached mode. 
4119  */
4120 static ssize_t
4121 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4122 {
4123         /*
4124          * FIXME
4125          * Eventually need to support direct IO for non forcedirectio mounts
4126          */
4127         return -EINVAL;
4128 }
4129
4130
4131 const struct address_space_operations cifs_addr_ops = {
4132         .readpage = cifs_readpage,
4133         .readpages = cifs_readpages,
4134         .writepage = cifs_writepage,
4135         .writepages = cifs_writepages,
4136         .write_begin = cifs_write_begin,
4137         .write_end = cifs_write_end,
4138         .set_page_dirty = __set_page_dirty_nobuffers,
4139         .releasepage = cifs_release_page,
4140         .direct_IO = cifs_direct_io,
4141         .invalidatepage = cifs_invalidate_page,
4142         .launder_page = cifs_launder_page,
4143 };
4144
4145 /*
4146  * cifs_readpages requires the server to support a buffer large enough to
4147  * contain the header plus one complete page of data.  Otherwise, we need
4148  * to leave cifs_readpages out of the address space operations.
4149  */
4150 const struct address_space_operations cifs_addr_ops_smallbuf = {
4151         .readpage = cifs_readpage,
4152         .writepage = cifs_writepage,
4153         .writepages = cifs_writepages,
4154         .write_begin = cifs_write_begin,
4155         .write_end = cifs_write_end,
4156         .set_page_dirty = __set_page_dirty_nobuffers,
4157         .releasepage = cifs_release_page,
4158         .invalidatepage = cifs_invalidate_page,
4159         .launder_page = cifs_launder_page,
4160 };