use less confusing names for iov_iter direction initializers
[linux-block.git] / fs / cifs / file.c
1 // SPDX-License-Identifier: LGPL-2.1
2 /*
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  */
11 #include <linux/fs.h>
12 #include <linux/backing-dev.h>
13 #include <linux/stat.h>
14 #include <linux/fcntl.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/writeback.h>
18 #include <linux/task_io_accounting_ops.h>
19 #include <linux/delay.h>
20 #include <linux/mount.h>
21 #include <linux/slab.h>
22 #include <linux/swap.h>
23 #include <linux/mm.h>
24 #include <asm/div64.h>
25 #include "cifsfs.h"
26 #include "cifspdu.h"
27 #include "cifsglob.h"
28 #include "cifsproto.h"
29 #include "smb2proto.h"
30 #include "cifs_unicode.h"
31 #include "cifs_debug.h"
32 #include "cifs_fs_sb.h"
33 #include "fscache.h"
34 #include "smbdirect.h"
35 #include "fs_context.h"
36 #include "cifs_ioctl.h"
37 #include "cached_dir.h"
38
39 /*
40  * Mark as invalid, all open files on tree connections since they
41  * were closed when session to server was lost.
42  */
43 void
44 cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
45 {
46         struct cifsFileInfo *open_file = NULL;
47         struct list_head *tmp;
48         struct list_head *tmp1;
49
50         /* only send once per connect */
51         spin_lock(&tcon->ses->ses_lock);
52         if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) {
53                 spin_unlock(&tcon->ses->ses_lock);
54                 return;
55         }
56         tcon->status = TID_IN_FILES_INVALIDATE;
57         spin_unlock(&tcon->ses->ses_lock);
58
59         /* list all files open on tree connection and mark them invalid */
60         spin_lock(&tcon->open_file_lock);
61         list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
62                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
63                 open_file->invalidHandle = true;
64                 open_file->oplock_break_cancelled = true;
65         }
66         spin_unlock(&tcon->open_file_lock);
67
68         invalidate_all_cached_dirs(tcon);
69         spin_lock(&tcon->tc_lock);
70         if (tcon->status == TID_IN_FILES_INVALIDATE)
71                 tcon->status = TID_NEED_TCON;
72         spin_unlock(&tcon->tc_lock);
73
74         /*
75          * BB Add call to invalidate_inodes(sb) for all superblocks mounted
76          * to this tcon.
77          */
78 }
79
80 static inline int cifs_convert_flags(unsigned int flags)
81 {
82         if ((flags & O_ACCMODE) == O_RDONLY)
83                 return GENERIC_READ;
84         else if ((flags & O_ACCMODE) == O_WRONLY)
85                 return GENERIC_WRITE;
86         else if ((flags & O_ACCMODE) == O_RDWR) {
87                 /* GENERIC_ALL is too much permission to request
88                    can cause unnecessary access denied on create */
89                 /* return GENERIC_ALL; */
90                 return (GENERIC_READ | GENERIC_WRITE);
91         }
92
93         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
94                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
95                 FILE_READ_DATA);
96 }
97
98 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
99 static u32 cifs_posix_convert_flags(unsigned int flags)
100 {
101         u32 posix_flags = 0;
102
103         if ((flags & O_ACCMODE) == O_RDONLY)
104                 posix_flags = SMB_O_RDONLY;
105         else if ((flags & O_ACCMODE) == O_WRONLY)
106                 posix_flags = SMB_O_WRONLY;
107         else if ((flags & O_ACCMODE) == O_RDWR)
108                 posix_flags = SMB_O_RDWR;
109
110         if (flags & O_CREAT) {
111                 posix_flags |= SMB_O_CREAT;
112                 if (flags & O_EXCL)
113                         posix_flags |= SMB_O_EXCL;
114         } else if (flags & O_EXCL)
115                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
116                          current->comm, current->tgid);
117
118         if (flags & O_TRUNC)
119                 posix_flags |= SMB_O_TRUNC;
120         /* be safe and imply O_SYNC for O_DSYNC */
121         if (flags & O_DSYNC)
122                 posix_flags |= SMB_O_SYNC;
123         if (flags & O_DIRECTORY)
124                 posix_flags |= SMB_O_DIRECTORY;
125         if (flags & O_NOFOLLOW)
126                 posix_flags |= SMB_O_NOFOLLOW;
127         if (flags & O_DIRECT)
128                 posix_flags |= SMB_O_DIRECT;
129
130         return posix_flags;
131 }
132 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
133
134 static inline int cifs_get_disposition(unsigned int flags)
135 {
136         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
137                 return FILE_CREATE;
138         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
139                 return FILE_OVERWRITE_IF;
140         else if ((flags & O_CREAT) == O_CREAT)
141                 return FILE_OPEN_IF;
142         else if ((flags & O_TRUNC) == O_TRUNC)
143                 return FILE_OVERWRITE;
144         else
145                 return FILE_OPEN;
146 }
147
148 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
149 int cifs_posix_open(const char *full_path, struct inode **pinode,
150                         struct super_block *sb, int mode, unsigned int f_flags,
151                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
152 {
153         int rc;
154         FILE_UNIX_BASIC_INFO *presp_data;
155         __u32 posix_flags = 0;
156         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
157         struct cifs_fattr fattr;
158         struct tcon_link *tlink;
159         struct cifs_tcon *tcon;
160
161         cifs_dbg(FYI, "posix open %s\n", full_path);
162
163         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
164         if (presp_data == NULL)
165                 return -ENOMEM;
166
167         tlink = cifs_sb_tlink(cifs_sb);
168         if (IS_ERR(tlink)) {
169                 rc = PTR_ERR(tlink);
170                 goto posix_open_ret;
171         }
172
173         tcon = tlink_tcon(tlink);
174         mode &= ~current_umask();
175
176         posix_flags = cifs_posix_convert_flags(f_flags);
177         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
178                              poplock, full_path, cifs_sb->local_nls,
179                              cifs_remap(cifs_sb));
180         cifs_put_tlink(tlink);
181
182         if (rc)
183                 goto posix_open_ret;
184
185         if (presp_data->Type == cpu_to_le32(-1))
186                 goto posix_open_ret; /* open ok, caller does qpathinfo */
187
188         if (!pinode)
189                 goto posix_open_ret; /* caller does not need info */
190
191         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
192
193         /* get new inode and set it up */
194         if (*pinode == NULL) {
195                 cifs_fill_uniqueid(sb, &fattr);
196                 *pinode = cifs_iget(sb, &fattr);
197                 if (!*pinode) {
198                         rc = -ENOMEM;
199                         goto posix_open_ret;
200                 }
201         } else {
202                 cifs_revalidate_mapping(*pinode);
203                 rc = cifs_fattr_to_inode(*pinode, &fattr);
204         }
205
206 posix_open_ret:
207         kfree(presp_data);
208         return rc;
209 }
210 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
211
212 static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
213                         struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
214                         struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
215 {
216         int rc;
217         int desired_access;
218         int disposition;
219         int create_options = CREATE_NOT_DIR;
220         struct TCP_Server_Info *server = tcon->ses->server;
221         struct cifs_open_parms oparms;
222
223         if (!server->ops->open)
224                 return -ENOSYS;
225
226         desired_access = cifs_convert_flags(f_flags);
227
228 /*********************************************************************
229  *  open flag mapping table:
230  *
231  *      POSIX Flag            CIFS Disposition
232  *      ----------            ----------------
233  *      O_CREAT               FILE_OPEN_IF
234  *      O_CREAT | O_EXCL      FILE_CREATE
235  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
236  *      O_TRUNC               FILE_OVERWRITE
237  *      none of the above     FILE_OPEN
238  *
239  *      Note that there is not a direct match between disposition
240  *      FILE_SUPERSEDE (ie create whether or not file exists although
241  *      O_CREAT | O_TRUNC is similar but truncates the existing
242  *      file rather than creating a new file as FILE_SUPERSEDE does
243  *      (which uses the attributes / metadata passed in on open call)
244  *?
245  *?  O_SYNC is a reasonable match to CIFS writethrough flag
246  *?  and the read write flags match reasonably.  O_LARGEFILE
247  *?  is irrelevant because largefile support is always used
248  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
249  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
250  *********************************************************************/
251
252         disposition = cifs_get_disposition(f_flags);
253
254         /* BB pass O_SYNC flag through on file attributes .. BB */
255
256         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
257         if (f_flags & O_SYNC)
258                 create_options |= CREATE_WRITE_THROUGH;
259
260         if (f_flags & O_DIRECT)
261                 create_options |= CREATE_NO_BUFFER;
262
263         oparms.tcon = tcon;
264         oparms.cifs_sb = cifs_sb;
265         oparms.desired_access = desired_access;
266         oparms.create_options = cifs_create_options(cifs_sb, create_options);
267         oparms.disposition = disposition;
268         oparms.path = full_path;
269         oparms.fid = fid;
270         oparms.reconnect = false;
271
272         rc = server->ops->open(xid, &oparms, oplock, buf);
273         if (rc)
274                 return rc;
275
276         /* TODO: Add support for calling posix query info but with passing in fid */
277         if (tcon->unix_ext)
278                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
279                                               xid);
280         else
281                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
282                                          xid, fid);
283
284         if (rc) {
285                 server->ops->close(xid, tcon, fid);
286                 if (rc == -ESTALE)
287                         rc = -EOPENSTALE;
288         }
289
290         return rc;
291 }
292
293 static bool
294 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
295 {
296         struct cifs_fid_locks *cur;
297         bool has_locks = false;
298
299         down_read(&cinode->lock_sem);
300         list_for_each_entry(cur, &cinode->llist, llist) {
301                 if (!list_empty(&cur->locks)) {
302                         has_locks = true;
303                         break;
304                 }
305         }
306         up_read(&cinode->lock_sem);
307         return has_locks;
308 }
309
310 void
311 cifs_down_write(struct rw_semaphore *sem)
312 {
313         while (!down_write_trylock(sem))
314                 msleep(10);
315 }
316
317 static void cifsFileInfo_put_work(struct work_struct *work);
318
319 struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
320                                        struct tcon_link *tlink, __u32 oplock,
321                                        const char *symlink_target)
322 {
323         struct dentry *dentry = file_dentry(file);
324         struct inode *inode = d_inode(dentry);
325         struct cifsInodeInfo *cinode = CIFS_I(inode);
326         struct cifsFileInfo *cfile;
327         struct cifs_fid_locks *fdlocks;
328         struct cifs_tcon *tcon = tlink_tcon(tlink);
329         struct TCP_Server_Info *server = tcon->ses->server;
330
331         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
332         if (cfile == NULL)
333                 return cfile;
334
335         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
336         if (!fdlocks) {
337                 kfree(cfile);
338                 return NULL;
339         }
340
341         if (symlink_target) {
342                 cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
343                 if (!cfile->symlink_target) {
344                         kfree(fdlocks);
345                         kfree(cfile);
346                         return NULL;
347                 }
348         }
349
350         INIT_LIST_HEAD(&fdlocks->locks);
351         fdlocks->cfile = cfile;
352         cfile->llist = fdlocks;
353
354         cfile->count = 1;
355         cfile->pid = current->tgid;
356         cfile->uid = current_fsuid();
357         cfile->dentry = dget(dentry);
358         cfile->f_flags = file->f_flags;
359         cfile->invalidHandle = false;
360         cfile->deferred_close_scheduled = false;
361         cfile->tlink = cifs_get_tlink(tlink);
362         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
363         INIT_WORK(&cfile->put, cifsFileInfo_put_work);
364         INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
365         mutex_init(&cfile->fh_mutex);
366         spin_lock_init(&cfile->file_info_lock);
367
368         cifs_sb_active(inode->i_sb);
369
370         /*
371          * If the server returned a read oplock and we have mandatory brlocks,
372          * set oplock level to None.
373          */
374         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
375                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
376                 oplock = 0;
377         }
378
379         cifs_down_write(&cinode->lock_sem);
380         list_add(&fdlocks->llist, &cinode->llist);
381         up_write(&cinode->lock_sem);
382
383         spin_lock(&tcon->open_file_lock);
384         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
385                 oplock = fid->pending_open->oplock;
386         list_del(&fid->pending_open->olist);
387
388         fid->purge_cache = false;
389         server->ops->set_fid(cfile, fid, oplock);
390
391         list_add(&cfile->tlist, &tcon->openFileList);
392         atomic_inc(&tcon->num_local_opens);
393
394         /* if readable file instance put first in list*/
395         spin_lock(&cinode->open_file_lock);
396         if (file->f_mode & FMODE_READ)
397                 list_add(&cfile->flist, &cinode->openFileList);
398         else
399                 list_add_tail(&cfile->flist, &cinode->openFileList);
400         spin_unlock(&cinode->open_file_lock);
401         spin_unlock(&tcon->open_file_lock);
402
403         if (fid->purge_cache)
404                 cifs_zap_mapping(inode);
405
406         file->private_data = cfile;
407         return cfile;
408 }
409
410 struct cifsFileInfo *
411 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
412 {
413         spin_lock(&cifs_file->file_info_lock);
414         cifsFileInfo_get_locked(cifs_file);
415         spin_unlock(&cifs_file->file_info_lock);
416         return cifs_file;
417 }
418
419 static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
420 {
421         struct inode *inode = d_inode(cifs_file->dentry);
422         struct cifsInodeInfo *cifsi = CIFS_I(inode);
423         struct cifsLockInfo *li, *tmp;
424         struct super_block *sb = inode->i_sb;
425
426         /*
427          * Delete any outstanding lock records. We'll lose them when the file
428          * is closed anyway.
429          */
430         cifs_down_write(&cifsi->lock_sem);
431         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
432                 list_del(&li->llist);
433                 cifs_del_lock_waiters(li);
434                 kfree(li);
435         }
436         list_del(&cifs_file->llist->llist);
437         kfree(cifs_file->llist);
438         up_write(&cifsi->lock_sem);
439
440         cifs_put_tlink(cifs_file->tlink);
441         dput(cifs_file->dentry);
442         cifs_sb_deactive(sb);
443         kfree(cifs_file->symlink_target);
444         kfree(cifs_file);
445 }
446
447 static void cifsFileInfo_put_work(struct work_struct *work)
448 {
449         struct cifsFileInfo *cifs_file = container_of(work,
450                         struct cifsFileInfo, put);
451
452         cifsFileInfo_put_final(cifs_file);
453 }
454
455 /**
456  * cifsFileInfo_put - release a reference of file priv data
457  *
458  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
459  *
460  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
461  */
462 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
463 {
464         _cifsFileInfo_put(cifs_file, true, true);
465 }
466
467 /**
468  * _cifsFileInfo_put - release a reference of file priv data
469  *
470  * This may involve closing the filehandle @cifs_file out on the
471  * server. Must be called without holding tcon->open_file_lock,
472  * cinode->open_file_lock and cifs_file->file_info_lock.
473  *
474  * If @wait_for_oplock_handler is true and we are releasing the last
475  * reference, wait for any running oplock break handler of the file
476  * and cancel any pending one.
477  *
478  * @cifs_file:  cifs/smb3 specific info (eg refcounts) for an open file
479  * @wait_oplock_handler: must be false if called from oplock_break_handler
480  * @offload:    not offloaded on close and oplock breaks
481  *
482  */
483 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
484                        bool wait_oplock_handler, bool offload)
485 {
486         struct inode *inode = d_inode(cifs_file->dentry);
487         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
488         struct TCP_Server_Info *server = tcon->ses->server;
489         struct cifsInodeInfo *cifsi = CIFS_I(inode);
490         struct super_block *sb = inode->i_sb;
491         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
492         struct cifs_fid fid = {};
493         struct cifs_pending_open open;
494         bool oplock_break_cancelled;
495
496         spin_lock(&tcon->open_file_lock);
497         spin_lock(&cifsi->open_file_lock);
498         spin_lock(&cifs_file->file_info_lock);
499         if (--cifs_file->count > 0) {
500                 spin_unlock(&cifs_file->file_info_lock);
501                 spin_unlock(&cifsi->open_file_lock);
502                 spin_unlock(&tcon->open_file_lock);
503                 return;
504         }
505         spin_unlock(&cifs_file->file_info_lock);
506
507         if (server->ops->get_lease_key)
508                 server->ops->get_lease_key(inode, &fid);
509
510         /* store open in pending opens to make sure we don't miss lease break */
511         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
512
513         /* remove it from the lists */
514         list_del(&cifs_file->flist);
515         list_del(&cifs_file->tlist);
516         atomic_dec(&tcon->num_local_opens);
517
518         if (list_empty(&cifsi->openFileList)) {
519                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
520                          d_inode(cifs_file->dentry));
521                 /*
522                  * In strict cache mode we need invalidate mapping on the last
523                  * close  because it may cause a error when we open this file
524                  * again and get at least level II oplock.
525                  */
526                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
527                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
528                 cifs_set_oplock_level(cifsi, 0);
529         }
530
531         spin_unlock(&cifsi->open_file_lock);
532         spin_unlock(&tcon->open_file_lock);
533
534         oplock_break_cancelled = wait_oplock_handler ?
535                 cancel_work_sync(&cifs_file->oplock_break) : false;
536
537         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
538                 struct TCP_Server_Info *server = tcon->ses->server;
539                 unsigned int xid;
540
541                 xid = get_xid();
542                 if (server->ops->close_getattr)
543                         server->ops->close_getattr(xid, tcon, cifs_file);
544                 else if (server->ops->close)
545                         server->ops->close(xid, tcon, &cifs_file->fid);
546                 _free_xid(xid);
547         }
548
549         if (oplock_break_cancelled)
550                 cifs_done_oplock_break(cifsi);
551
552         cifs_del_pending_open(&open);
553
554         if (offload)
555                 queue_work(fileinfo_put_wq, &cifs_file->put);
556         else
557                 cifsFileInfo_put_final(cifs_file);
558 }
559
560 int cifs_open(struct inode *inode, struct file *file)
561
562 {
563         int rc = -EACCES;
564         unsigned int xid;
565         __u32 oplock;
566         struct cifs_sb_info *cifs_sb;
567         struct TCP_Server_Info *server;
568         struct cifs_tcon *tcon;
569         struct tcon_link *tlink;
570         struct cifsFileInfo *cfile = NULL;
571         void *page;
572         const char *full_path;
573         bool posix_open_ok = false;
574         struct cifs_fid fid = {};
575         struct cifs_pending_open open;
576         struct cifs_open_info_data data = {};
577
578         xid = get_xid();
579
580         cifs_sb = CIFS_SB(inode->i_sb);
581         if (unlikely(cifs_forced_shutdown(cifs_sb))) {
582                 free_xid(xid);
583                 return -EIO;
584         }
585
586         tlink = cifs_sb_tlink(cifs_sb);
587         if (IS_ERR(tlink)) {
588                 free_xid(xid);
589                 return PTR_ERR(tlink);
590         }
591         tcon = tlink_tcon(tlink);
592         server = tcon->ses->server;
593
594         page = alloc_dentry_path();
595         full_path = build_path_from_dentry(file_dentry(file), page);
596         if (IS_ERR(full_path)) {
597                 rc = PTR_ERR(full_path);
598                 goto out;
599         }
600
601         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
602                  inode, file->f_flags, full_path);
603
604         if (file->f_flags & O_DIRECT &&
605             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
606                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
607                         file->f_op = &cifs_file_direct_nobrl_ops;
608                 else
609                         file->f_op = &cifs_file_direct_ops;
610         }
611
612         /* Get the cached handle as SMB2 close is deferred */
613         rc = cifs_get_readable_path(tcon, full_path, &cfile);
614         if (rc == 0) {
615                 if (file->f_flags == cfile->f_flags) {
616                         file->private_data = cfile;
617                         spin_lock(&CIFS_I(inode)->deferred_lock);
618                         cifs_del_deferred_close(cfile);
619                         spin_unlock(&CIFS_I(inode)->deferred_lock);
620                         goto use_cache;
621                 } else {
622                         _cifsFileInfo_put(cfile, true, false);
623                 }
624         }
625
626         if (server->oplocks)
627                 oplock = REQ_OPLOCK;
628         else
629                 oplock = 0;
630
631 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
632         if (!tcon->broken_posix_open && tcon->unix_ext &&
633             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
634                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
635                 /* can not refresh inode info since size could be stale */
636                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
637                                 cifs_sb->ctx->file_mode /* ignored */,
638                                 file->f_flags, &oplock, &fid.netfid, xid);
639                 if (rc == 0) {
640                         cifs_dbg(FYI, "posix open succeeded\n");
641                         posix_open_ok = true;
642                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
643                         if (tcon->ses->serverNOS)
644                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
645                                          tcon->ses->ip_addr,
646                                          tcon->ses->serverNOS);
647                         tcon->broken_posix_open = true;
648                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
649                          (rc != -EOPNOTSUPP)) /* path not found or net err */
650                         goto out;
651                 /*
652                  * Else fallthrough to retry open the old way on network i/o
653                  * or DFS errors.
654                  */
655         }
656 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
657
658         if (server->ops->get_lease_key)
659                 server->ops->get_lease_key(inode, &fid);
660
661         cifs_add_pending_open(&fid, tlink, &open);
662
663         if (!posix_open_ok) {
664                 if (server->ops->get_lease_key)
665                         server->ops->get_lease_key(inode, &fid);
666
667                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
668                                   xid, &data);
669                 if (rc) {
670                         cifs_del_pending_open(&open);
671                         goto out;
672                 }
673         }
674
675         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
676         if (cfile == NULL) {
677                 if (server->ops->close)
678                         server->ops->close(xid, tcon, &fid);
679                 cifs_del_pending_open(&open);
680                 rc = -ENOMEM;
681                 goto out;
682         }
683
684 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
685         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
686                 /*
687                  * Time to set mode which we can not set earlier due to
688                  * problems creating new read-only files.
689                  */
690                 struct cifs_unix_set_info_args args = {
691                         .mode   = inode->i_mode,
692                         .uid    = INVALID_UID, /* no change */
693                         .gid    = INVALID_GID, /* no change */
694                         .ctime  = NO_CHANGE_64,
695                         .atime  = NO_CHANGE_64,
696                         .mtime  = NO_CHANGE_64,
697                         .device = 0,
698                 };
699                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
700                                        cfile->pid);
701         }
702 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
703
704 use_cache:
705         fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
706                            file->f_mode & FMODE_WRITE);
707         if (file->f_flags & O_DIRECT &&
708             (!((file->f_flags & O_ACCMODE) != O_RDONLY) ||
709              file->f_flags & O_APPEND))
710                 cifs_invalidate_cache(file_inode(file),
711                                       FSCACHE_INVAL_DIO_WRITE);
712
713 out:
714         free_dentry_path(page);
715         free_xid(xid);
716         cifs_put_tlink(tlink);
717         cifs_free_open_info(&data);
718         return rc;
719 }
720
721 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
722 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
723 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
724
725 /*
726  * Try to reacquire byte range locks that were released when session
727  * to server was lost.
728  */
729 static int
730 cifs_relock_file(struct cifsFileInfo *cfile)
731 {
732         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
733         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
734         int rc = 0;
735 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
736         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
737 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
738
739         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
740         if (cinode->can_cache_brlcks) {
741                 /* can cache locks - no need to relock */
742                 up_read(&cinode->lock_sem);
743                 return rc;
744         }
745
746 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
747         if (cap_unix(tcon->ses) &&
748             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
749             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
750                 rc = cifs_push_posix_locks(cfile);
751         else
752 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
753                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
754
755         up_read(&cinode->lock_sem);
756         return rc;
757 }
758
759 static int
760 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
761 {
762         int rc = -EACCES;
763         unsigned int xid;
764         __u32 oplock;
765         struct cifs_sb_info *cifs_sb;
766         struct cifs_tcon *tcon;
767         struct TCP_Server_Info *server;
768         struct cifsInodeInfo *cinode;
769         struct inode *inode;
770         void *page;
771         const char *full_path;
772         int desired_access;
773         int disposition = FILE_OPEN;
774         int create_options = CREATE_NOT_DIR;
775         struct cifs_open_parms oparms;
776
777         xid = get_xid();
778         mutex_lock(&cfile->fh_mutex);
779         if (!cfile->invalidHandle) {
780                 mutex_unlock(&cfile->fh_mutex);
781                 free_xid(xid);
782                 return 0;
783         }
784
785         inode = d_inode(cfile->dentry);
786         cifs_sb = CIFS_SB(inode->i_sb);
787         tcon = tlink_tcon(cfile->tlink);
788         server = tcon->ses->server;
789
790         /*
791          * Can not grab rename sem here because various ops, including those
792          * that already have the rename sem can end up causing writepage to get
793          * called and if the server was down that means we end up here, and we
794          * can never tell if the caller already has the rename_sem.
795          */
796         page = alloc_dentry_path();
797         full_path = build_path_from_dentry(cfile->dentry, page);
798         if (IS_ERR(full_path)) {
799                 mutex_unlock(&cfile->fh_mutex);
800                 free_dentry_path(page);
801                 free_xid(xid);
802                 return PTR_ERR(full_path);
803         }
804
805         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
806                  inode, cfile->f_flags, full_path);
807
808         if (tcon->ses->server->oplocks)
809                 oplock = REQ_OPLOCK;
810         else
811                 oplock = 0;
812
813 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
814         if (tcon->unix_ext && cap_unix(tcon->ses) &&
815             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
816                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
817                 /*
818                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
819                  * original open. Must mask them off for a reopen.
820                  */
821                 unsigned int oflags = cfile->f_flags &
822                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
823
824                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
825                                      cifs_sb->ctx->file_mode /* ignored */,
826                                      oflags, &oplock, &cfile->fid.netfid, xid);
827                 if (rc == 0) {
828                         cifs_dbg(FYI, "posix reopen succeeded\n");
829                         oparms.reconnect = true;
830                         goto reopen_success;
831                 }
832                 /*
833                  * fallthrough to retry open the old way on errors, especially
834                  * in the reconnect path it is important to retry hard
835                  */
836         }
837 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
838
839         desired_access = cifs_convert_flags(cfile->f_flags);
840
841         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
842         if (cfile->f_flags & O_SYNC)
843                 create_options |= CREATE_WRITE_THROUGH;
844
845         if (cfile->f_flags & O_DIRECT)
846                 create_options |= CREATE_NO_BUFFER;
847
848         if (server->ops->get_lease_key)
849                 server->ops->get_lease_key(inode, &cfile->fid);
850
851         oparms.tcon = tcon;
852         oparms.cifs_sb = cifs_sb;
853         oparms.desired_access = desired_access;
854         oparms.create_options = cifs_create_options(cifs_sb, create_options);
855         oparms.disposition = disposition;
856         oparms.path = full_path;
857         oparms.fid = &cfile->fid;
858         oparms.reconnect = true;
859
860         /*
861          * Can not refresh inode by passing in file_info buf to be returned by
862          * ops->open and then calling get_inode_info with returned buf since
863          * file might have write behind data that needs to be flushed and server
864          * version of file size can be stale. If we knew for sure that inode was
865          * not dirty locally we could do this.
866          */
867         rc = server->ops->open(xid, &oparms, &oplock, NULL);
868         if (rc == -ENOENT && oparms.reconnect == false) {
869                 /* durable handle timeout is expired - open the file again */
870                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
871                 /* indicate that we need to relock the file */
872                 oparms.reconnect = true;
873         }
874
875         if (rc) {
876                 mutex_unlock(&cfile->fh_mutex);
877                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
878                 cifs_dbg(FYI, "oplock: %d\n", oplock);
879                 goto reopen_error_exit;
880         }
881
882 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
883 reopen_success:
884 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
885         cfile->invalidHandle = false;
886         mutex_unlock(&cfile->fh_mutex);
887         cinode = CIFS_I(inode);
888
889         if (can_flush) {
890                 rc = filemap_write_and_wait(inode->i_mapping);
891                 if (!is_interrupt_error(rc))
892                         mapping_set_error(inode->i_mapping, rc);
893
894                 if (tcon->posix_extensions)
895                         rc = smb311_posix_get_inode_info(&inode, full_path, inode->i_sb, xid);
896                 else if (tcon->unix_ext)
897                         rc = cifs_get_inode_info_unix(&inode, full_path,
898                                                       inode->i_sb, xid);
899                 else
900                         rc = cifs_get_inode_info(&inode, full_path, NULL,
901                                                  inode->i_sb, xid, NULL);
902         }
903         /*
904          * Else we are writing out data to server already and could deadlock if
905          * we tried to flush data, and since we do not know if we have data that
906          * would invalidate the current end of file on the server we can not go
907          * to the server to get the new inode info.
908          */
909
910         /*
911          * If the server returned a read oplock and we have mandatory brlocks,
912          * set oplock level to None.
913          */
914         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
915                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
916                 oplock = 0;
917         }
918
919         server->ops->set_fid(cfile, &cfile->fid, oplock);
920         if (oparms.reconnect)
921                 cifs_relock_file(cfile);
922
923 reopen_error_exit:
924         free_dentry_path(page);
925         free_xid(xid);
926         return rc;
927 }
928
929 void smb2_deferred_work_close(struct work_struct *work)
930 {
931         struct cifsFileInfo *cfile = container_of(work,
932                         struct cifsFileInfo, deferred.work);
933
934         spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
935         cifs_del_deferred_close(cfile);
936         cfile->deferred_close_scheduled = false;
937         spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
938         _cifsFileInfo_put(cfile, true, false);
939 }
940
941 int cifs_close(struct inode *inode, struct file *file)
942 {
943         struct cifsFileInfo *cfile;
944         struct cifsInodeInfo *cinode = CIFS_I(inode);
945         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
946         struct cifs_deferred_close *dclose;
947
948         cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
949
950         if (file->private_data != NULL) {
951                 cfile = file->private_data;
952                 file->private_data = NULL;
953                 dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
954                 if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
955                     cinode->lease_granted &&
956                     !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
957                     dclose) {
958                         if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
959                                 inode->i_ctime = inode->i_mtime = current_time(inode);
960                         }
961                         spin_lock(&cinode->deferred_lock);
962                         cifs_add_deferred_close(cfile, dclose);
963                         if (cfile->deferred_close_scheduled &&
964                             delayed_work_pending(&cfile->deferred)) {
965                                 /*
966                                  * If there is no pending work, mod_delayed_work queues new work.
967                                  * So, Increase the ref count to avoid use-after-free.
968                                  */
969                                 if (!mod_delayed_work(deferredclose_wq,
970                                                 &cfile->deferred, cifs_sb->ctx->closetimeo))
971                                         cifsFileInfo_get(cfile);
972                         } else {
973                                 /* Deferred close for files */
974                                 queue_delayed_work(deferredclose_wq,
975                                                 &cfile->deferred, cifs_sb->ctx->closetimeo);
976                                 cfile->deferred_close_scheduled = true;
977                                 spin_unlock(&cinode->deferred_lock);
978                                 return 0;
979                         }
980                         spin_unlock(&cinode->deferred_lock);
981                         _cifsFileInfo_put(cfile, true, false);
982                 } else {
983                         _cifsFileInfo_put(cfile, true, false);
984                         kfree(dclose);
985                 }
986         }
987
988         /* return code from the ->release op is always ignored */
989         return 0;
990 }
991
992 void
993 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
994 {
995         struct cifsFileInfo *open_file, *tmp;
996         struct list_head tmp_list;
997
998         if (!tcon->use_persistent || !tcon->need_reopen_files)
999                 return;
1000
1001         tcon->need_reopen_files = false;
1002
1003         cifs_dbg(FYI, "Reopen persistent handles\n");
1004         INIT_LIST_HEAD(&tmp_list);
1005
1006         /* list all files open on tree connection, reopen resilient handles  */
1007         spin_lock(&tcon->open_file_lock);
1008         list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1009                 if (!open_file->invalidHandle)
1010                         continue;
1011                 cifsFileInfo_get(open_file);
1012                 list_add_tail(&open_file->rlist, &tmp_list);
1013         }
1014         spin_unlock(&tcon->open_file_lock);
1015
1016         list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1017                 if (cifs_reopen_file(open_file, false /* do not flush */))
1018                         tcon->need_reopen_files = true;
1019                 list_del_init(&open_file->rlist);
1020                 cifsFileInfo_put(open_file);
1021         }
1022 }
1023
1024 int cifs_closedir(struct inode *inode, struct file *file)
1025 {
1026         int rc = 0;
1027         unsigned int xid;
1028         struct cifsFileInfo *cfile = file->private_data;
1029         struct cifs_tcon *tcon;
1030         struct TCP_Server_Info *server;
1031         char *buf;
1032
1033         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1034
1035         if (cfile == NULL)
1036                 return rc;
1037
1038         xid = get_xid();
1039         tcon = tlink_tcon(cfile->tlink);
1040         server = tcon->ses->server;
1041
1042         cifs_dbg(FYI, "Freeing private data in close dir\n");
1043         spin_lock(&cfile->file_info_lock);
1044         if (server->ops->dir_needs_close(cfile)) {
1045                 cfile->invalidHandle = true;
1046                 spin_unlock(&cfile->file_info_lock);
1047                 if (server->ops->close_dir)
1048                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1049                 else
1050                         rc = -ENOSYS;
1051                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1052                 /* not much we can do if it fails anyway, ignore rc */
1053                 rc = 0;
1054         } else
1055                 spin_unlock(&cfile->file_info_lock);
1056
1057         buf = cfile->srch_inf.ntwrk_buf_start;
1058         if (buf) {
1059                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1060                 cfile->srch_inf.ntwrk_buf_start = NULL;
1061                 if (cfile->srch_inf.smallBuf)
1062                         cifs_small_buf_release(buf);
1063                 else
1064                         cifs_buf_release(buf);
1065         }
1066
1067         cifs_put_tlink(cfile->tlink);
1068         kfree(file->private_data);
1069         file->private_data = NULL;
1070         /* BB can we lock the filestruct while this is going on? */
1071         free_xid(xid);
1072         return rc;
1073 }
1074
1075 static struct cifsLockInfo *
1076 cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1077 {
1078         struct cifsLockInfo *lock =
1079                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1080         if (!lock)
1081                 return lock;
1082         lock->offset = offset;
1083         lock->length = length;
1084         lock->type = type;
1085         lock->pid = current->tgid;
1086         lock->flags = flags;
1087         INIT_LIST_HEAD(&lock->blist);
1088         init_waitqueue_head(&lock->block_q);
1089         return lock;
1090 }
1091
1092 void
1093 cifs_del_lock_waiters(struct cifsLockInfo *lock)
1094 {
1095         struct cifsLockInfo *li, *tmp;
1096         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1097                 list_del_init(&li->blist);
1098                 wake_up(&li->block_q);
1099         }
1100 }
1101
1102 #define CIFS_LOCK_OP    0
1103 #define CIFS_READ_OP    1
1104 #define CIFS_WRITE_OP   2
1105
1106 /* @rw_check : 0 - no op, 1 - read, 2 - write */
1107 static bool
1108 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1109                             __u64 length, __u8 type, __u16 flags,
1110                             struct cifsFileInfo *cfile,
1111                             struct cifsLockInfo **conf_lock, int rw_check)
1112 {
1113         struct cifsLockInfo *li;
1114         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1115         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1116
1117         list_for_each_entry(li, &fdlocks->locks, llist) {
1118                 if (offset + length <= li->offset ||
1119                     offset >= li->offset + li->length)
1120                         continue;
1121                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1122                     server->ops->compare_fids(cfile, cur_cfile)) {
1123                         /* shared lock prevents write op through the same fid */
1124                         if (!(li->type & server->vals->shared_lock_type) ||
1125                             rw_check != CIFS_WRITE_OP)
1126                                 continue;
1127                 }
1128                 if ((type & server->vals->shared_lock_type) &&
1129                     ((server->ops->compare_fids(cfile, cur_cfile) &&
1130                      current->tgid == li->pid) || type == li->type))
1131                         continue;
1132                 if (rw_check == CIFS_LOCK_OP &&
1133                     (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1134                     server->ops->compare_fids(cfile, cur_cfile))
1135                         continue;
1136                 if (conf_lock)
1137                         *conf_lock = li;
1138                 return true;
1139         }
1140         return false;
1141 }
1142
1143 bool
1144 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1145                         __u8 type, __u16 flags,
1146                         struct cifsLockInfo **conf_lock, int rw_check)
1147 {
1148         bool rc = false;
1149         struct cifs_fid_locks *cur;
1150         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1151
1152         list_for_each_entry(cur, &cinode->llist, llist) {
1153                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1154                                                  flags, cfile, conf_lock,
1155                                                  rw_check);
1156                 if (rc)
1157                         break;
1158         }
1159
1160         return rc;
1161 }
1162
1163 /*
1164  * Check if there is another lock that prevents us to set the lock (mandatory
1165  * style). If such a lock exists, update the flock structure with its
1166  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1167  * or leave it the same if we can't. Returns 0 if we don't need to request to
1168  * the server or 1 otherwise.
1169  */
1170 static int
1171 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1172                __u8 type, struct file_lock *flock)
1173 {
1174         int rc = 0;
1175         struct cifsLockInfo *conf_lock;
1176         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1177         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1178         bool exist;
1179
1180         down_read(&cinode->lock_sem);
1181
1182         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1183                                         flock->fl_flags, &conf_lock,
1184                                         CIFS_LOCK_OP);
1185         if (exist) {
1186                 flock->fl_start = conf_lock->offset;
1187                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1188                 flock->fl_pid = conf_lock->pid;
1189                 if (conf_lock->type & server->vals->shared_lock_type)
1190                         flock->fl_type = F_RDLCK;
1191                 else
1192                         flock->fl_type = F_WRLCK;
1193         } else if (!cinode->can_cache_brlcks)
1194                 rc = 1;
1195         else
1196                 flock->fl_type = F_UNLCK;
1197
1198         up_read(&cinode->lock_sem);
1199         return rc;
1200 }
1201
1202 static void
1203 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1204 {
1205         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1206         cifs_down_write(&cinode->lock_sem);
1207         list_add_tail(&lock->llist, &cfile->llist->locks);
1208         up_write(&cinode->lock_sem);
1209 }
1210
1211 /*
1212  * Set the byte-range lock (mandatory style). Returns:
1213  * 1) 0, if we set the lock and don't need to request to the server;
1214  * 2) 1, if no locks prevent us but we need to request to the server;
1215  * 3) -EACCES, if there is a lock that prevents us and wait is false.
1216  */
1217 static int
1218 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1219                  bool wait)
1220 {
1221         struct cifsLockInfo *conf_lock;
1222         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1223         bool exist;
1224         int rc = 0;
1225
1226 try_again:
1227         exist = false;
1228         cifs_down_write(&cinode->lock_sem);
1229
1230         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1231                                         lock->type, lock->flags, &conf_lock,
1232                                         CIFS_LOCK_OP);
1233         if (!exist && cinode->can_cache_brlcks) {
1234                 list_add_tail(&lock->llist, &cfile->llist->locks);
1235                 up_write(&cinode->lock_sem);
1236                 return rc;
1237         }
1238
1239         if (!exist)
1240                 rc = 1;
1241         else if (!wait)
1242                 rc = -EACCES;
1243         else {
1244                 list_add_tail(&lock->blist, &conf_lock->blist);
1245                 up_write(&cinode->lock_sem);
1246                 rc = wait_event_interruptible(lock->block_q,
1247                                         (lock->blist.prev == &lock->blist) &&
1248                                         (lock->blist.next == &lock->blist));
1249                 if (!rc)
1250                         goto try_again;
1251                 cifs_down_write(&cinode->lock_sem);
1252                 list_del_init(&lock->blist);
1253         }
1254
1255         up_write(&cinode->lock_sem);
1256         return rc;
1257 }
1258
1259 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1260 /*
1261  * Check if there is another lock that prevents us to set the lock (posix
1262  * style). If such a lock exists, update the flock structure with its
1263  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1264  * or leave it the same if we can't. Returns 0 if we don't need to request to
1265  * the server or 1 otherwise.
1266  */
1267 static int
1268 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1269 {
1270         int rc = 0;
1271         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1272         unsigned char saved_type = flock->fl_type;
1273
1274         if ((flock->fl_flags & FL_POSIX) == 0)
1275                 return 1;
1276
1277         down_read(&cinode->lock_sem);
1278         posix_test_lock(file, flock);
1279
1280         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1281                 flock->fl_type = saved_type;
1282                 rc = 1;
1283         }
1284
1285         up_read(&cinode->lock_sem);
1286         return rc;
1287 }
1288
1289 /*
1290  * Set the byte-range lock (posix style). Returns:
1291  * 1) <0, if the error occurs while setting the lock;
1292  * 2) 0, if we set the lock and don't need to request to the server;
1293  * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1294  * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1295  */
1296 static int
1297 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1298 {
1299         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1300         int rc = FILE_LOCK_DEFERRED + 1;
1301
1302         if ((flock->fl_flags & FL_POSIX) == 0)
1303                 return rc;
1304
1305         cifs_down_write(&cinode->lock_sem);
1306         if (!cinode->can_cache_brlcks) {
1307                 up_write(&cinode->lock_sem);
1308                 return rc;
1309         }
1310
1311         rc = posix_lock_file(file, flock, NULL);
1312         up_write(&cinode->lock_sem);
1313         return rc;
1314 }
1315
1316 int
1317 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1318 {
1319         unsigned int xid;
1320         int rc = 0, stored_rc;
1321         struct cifsLockInfo *li, *tmp;
1322         struct cifs_tcon *tcon;
1323         unsigned int num, max_num, max_buf;
1324         LOCKING_ANDX_RANGE *buf, *cur;
1325         static const int types[] = {
1326                 LOCKING_ANDX_LARGE_FILES,
1327                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1328         };
1329         int i;
1330
1331         xid = get_xid();
1332         tcon = tlink_tcon(cfile->tlink);
1333
1334         /*
1335          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1336          * and check it before using.
1337          */
1338         max_buf = tcon->ses->server->maxBuf;
1339         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1340                 free_xid(xid);
1341                 return -EINVAL;
1342         }
1343
1344         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1345                      PAGE_SIZE);
1346         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1347                         PAGE_SIZE);
1348         max_num = (max_buf - sizeof(struct smb_hdr)) /
1349                                                 sizeof(LOCKING_ANDX_RANGE);
1350         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1351         if (!buf) {
1352                 free_xid(xid);
1353                 return -ENOMEM;
1354         }
1355
1356         for (i = 0; i < 2; i++) {
1357                 cur = buf;
1358                 num = 0;
1359                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1360                         if (li->type != types[i])
1361                                 continue;
1362                         cur->Pid = cpu_to_le16(li->pid);
1363                         cur->LengthLow = cpu_to_le32((u32)li->length);
1364                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1365                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1366                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1367                         if (++num == max_num) {
1368                                 stored_rc = cifs_lockv(xid, tcon,
1369                                                        cfile->fid.netfid,
1370                                                        (__u8)li->type, 0, num,
1371                                                        buf);
1372                                 if (stored_rc)
1373                                         rc = stored_rc;
1374                                 cur = buf;
1375                                 num = 0;
1376                         } else
1377                                 cur++;
1378                 }
1379
1380                 if (num) {
1381                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1382                                                (__u8)types[i], 0, num, buf);
1383                         if (stored_rc)
1384                                 rc = stored_rc;
1385                 }
1386         }
1387
1388         kfree(buf);
1389         free_xid(xid);
1390         return rc;
1391 }
1392
1393 static __u32
1394 hash_lockowner(fl_owner_t owner)
1395 {
1396         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1397 }
1398 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1399
1400 struct lock_to_push {
1401         struct list_head llist;
1402         __u64 offset;
1403         __u64 length;
1404         __u32 pid;
1405         __u16 netfid;
1406         __u8 type;
1407 };
1408
1409 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1410 static int
1411 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1412 {
1413         struct inode *inode = d_inode(cfile->dentry);
1414         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1415         struct file_lock *flock;
1416         struct file_lock_context *flctx = inode->i_flctx;
1417         unsigned int count = 0, i;
1418         int rc = 0, xid, type;
1419         struct list_head locks_to_send, *el;
1420         struct lock_to_push *lck, *tmp;
1421         __u64 length;
1422
1423         xid = get_xid();
1424
1425         if (!flctx)
1426                 goto out;
1427
1428         spin_lock(&flctx->flc_lock);
1429         list_for_each(el, &flctx->flc_posix) {
1430                 count++;
1431         }
1432         spin_unlock(&flctx->flc_lock);
1433
1434         INIT_LIST_HEAD(&locks_to_send);
1435
1436         /*
1437          * Allocating count locks is enough because no FL_POSIX locks can be
1438          * added to the list while we are holding cinode->lock_sem that
1439          * protects locking operations of this inode.
1440          */
1441         for (i = 0; i < count; i++) {
1442                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1443                 if (!lck) {
1444                         rc = -ENOMEM;
1445                         goto err_out;
1446                 }
1447                 list_add_tail(&lck->llist, &locks_to_send);
1448         }
1449
1450         el = locks_to_send.next;
1451         spin_lock(&flctx->flc_lock);
1452         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1453                 if (el == &locks_to_send) {
1454                         /*
1455                          * The list ended. We don't have enough allocated
1456                          * structures - something is really wrong.
1457                          */
1458                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1459                         break;
1460                 }
1461                 length = cifs_flock_len(flock);
1462                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1463                         type = CIFS_RDLCK;
1464                 else
1465                         type = CIFS_WRLCK;
1466                 lck = list_entry(el, struct lock_to_push, llist);
1467                 lck->pid = hash_lockowner(flock->fl_owner);
1468                 lck->netfid = cfile->fid.netfid;
1469                 lck->length = length;
1470                 lck->type = type;
1471                 lck->offset = flock->fl_start;
1472         }
1473         spin_unlock(&flctx->flc_lock);
1474
1475         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1476                 int stored_rc;
1477
1478                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1479                                              lck->offset, lck->length, NULL,
1480                                              lck->type, 0);
1481                 if (stored_rc)
1482                         rc = stored_rc;
1483                 list_del(&lck->llist);
1484                 kfree(lck);
1485         }
1486
1487 out:
1488         free_xid(xid);
1489         return rc;
1490 err_out:
1491         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1492                 list_del(&lck->llist);
1493                 kfree(lck);
1494         }
1495         goto out;
1496 }
1497 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1498
1499 static int
1500 cifs_push_locks(struct cifsFileInfo *cfile)
1501 {
1502         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1503         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1504         int rc = 0;
1505 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1506         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1507 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1508
1509         /* we are going to update can_cache_brlcks here - need a write access */
1510         cifs_down_write(&cinode->lock_sem);
1511         if (!cinode->can_cache_brlcks) {
1512                 up_write(&cinode->lock_sem);
1513                 return rc;
1514         }
1515
1516 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1517         if (cap_unix(tcon->ses) &&
1518             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1519             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1520                 rc = cifs_push_posix_locks(cfile);
1521         else
1522 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1523                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1524
1525         cinode->can_cache_brlcks = false;
1526         up_write(&cinode->lock_sem);
1527         return rc;
1528 }
1529
1530 static void
1531 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1532                 bool *wait_flag, struct TCP_Server_Info *server)
1533 {
1534         if (flock->fl_flags & FL_POSIX)
1535                 cifs_dbg(FYI, "Posix\n");
1536         if (flock->fl_flags & FL_FLOCK)
1537                 cifs_dbg(FYI, "Flock\n");
1538         if (flock->fl_flags & FL_SLEEP) {
1539                 cifs_dbg(FYI, "Blocking lock\n");
1540                 *wait_flag = true;
1541         }
1542         if (flock->fl_flags & FL_ACCESS)
1543                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1544         if (flock->fl_flags & FL_LEASE)
1545                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1546         if (flock->fl_flags &
1547             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1548                FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1549                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1550
1551         *type = server->vals->large_lock_type;
1552         if (flock->fl_type == F_WRLCK) {
1553                 cifs_dbg(FYI, "F_WRLCK\n");
1554                 *type |= server->vals->exclusive_lock_type;
1555                 *lock = 1;
1556         } else if (flock->fl_type == F_UNLCK) {
1557                 cifs_dbg(FYI, "F_UNLCK\n");
1558                 *type |= server->vals->unlock_lock_type;
1559                 *unlock = 1;
1560                 /* Check if unlock includes more than one lock range */
1561         } else if (flock->fl_type == F_RDLCK) {
1562                 cifs_dbg(FYI, "F_RDLCK\n");
1563                 *type |= server->vals->shared_lock_type;
1564                 *lock = 1;
1565         } else if (flock->fl_type == F_EXLCK) {
1566                 cifs_dbg(FYI, "F_EXLCK\n");
1567                 *type |= server->vals->exclusive_lock_type;
1568                 *lock = 1;
1569         } else if (flock->fl_type == F_SHLCK) {
1570                 cifs_dbg(FYI, "F_SHLCK\n");
1571                 *type |= server->vals->shared_lock_type;
1572                 *lock = 1;
1573         } else
1574                 cifs_dbg(FYI, "Unknown type of lock\n");
1575 }
1576
1577 static int
1578 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1579            bool wait_flag, bool posix_lck, unsigned int xid)
1580 {
1581         int rc = 0;
1582         __u64 length = cifs_flock_len(flock);
1583         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1584         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1585         struct TCP_Server_Info *server = tcon->ses->server;
1586 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1587         __u16 netfid = cfile->fid.netfid;
1588
1589         if (posix_lck) {
1590                 int posix_lock_type;
1591
1592                 rc = cifs_posix_lock_test(file, flock);
1593                 if (!rc)
1594                         return rc;
1595
1596                 if (type & server->vals->shared_lock_type)
1597                         posix_lock_type = CIFS_RDLCK;
1598                 else
1599                         posix_lock_type = CIFS_WRLCK;
1600                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1601                                       hash_lockowner(flock->fl_owner),
1602                                       flock->fl_start, length, flock,
1603                                       posix_lock_type, wait_flag);
1604                 return rc;
1605         }
1606 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1607
1608         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1609         if (!rc)
1610                 return rc;
1611
1612         /* BB we could chain these into one lock request BB */
1613         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1614                                     1, 0, false);
1615         if (rc == 0) {
1616                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1617                                             type, 0, 1, false);
1618                 flock->fl_type = F_UNLCK;
1619                 if (rc != 0)
1620                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1621                                  rc);
1622                 return 0;
1623         }
1624
1625         if (type & server->vals->shared_lock_type) {
1626                 flock->fl_type = F_WRLCK;
1627                 return 0;
1628         }
1629
1630         type &= ~server->vals->exclusive_lock_type;
1631
1632         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1633                                     type | server->vals->shared_lock_type,
1634                                     1, 0, false);
1635         if (rc == 0) {
1636                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1637                         type | server->vals->shared_lock_type, 0, 1, false);
1638                 flock->fl_type = F_RDLCK;
1639                 if (rc != 0)
1640                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1641                                  rc);
1642         } else
1643                 flock->fl_type = F_WRLCK;
1644
1645         return 0;
1646 }
1647
1648 void
1649 cifs_move_llist(struct list_head *source, struct list_head *dest)
1650 {
1651         struct list_head *li, *tmp;
1652         list_for_each_safe(li, tmp, source)
1653                 list_move(li, dest);
1654 }
1655
1656 void
1657 cifs_free_llist(struct list_head *llist)
1658 {
1659         struct cifsLockInfo *li, *tmp;
1660         list_for_each_entry_safe(li, tmp, llist, llist) {
1661                 cifs_del_lock_waiters(li);
1662                 list_del(&li->llist);
1663                 kfree(li);
1664         }
1665 }
1666
1667 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1668 int
1669 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1670                   unsigned int xid)
1671 {
1672         int rc = 0, stored_rc;
1673         static const int types[] = {
1674                 LOCKING_ANDX_LARGE_FILES,
1675                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1676         };
1677         unsigned int i;
1678         unsigned int max_num, num, max_buf;
1679         LOCKING_ANDX_RANGE *buf, *cur;
1680         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1681         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1682         struct cifsLockInfo *li, *tmp;
1683         __u64 length = cifs_flock_len(flock);
1684         struct list_head tmp_llist;
1685
1686         INIT_LIST_HEAD(&tmp_llist);
1687
1688         /*
1689          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1690          * and check it before using.
1691          */
1692         max_buf = tcon->ses->server->maxBuf;
1693         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1694                 return -EINVAL;
1695
1696         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1697                      PAGE_SIZE);
1698         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1699                         PAGE_SIZE);
1700         max_num = (max_buf - sizeof(struct smb_hdr)) /
1701                                                 sizeof(LOCKING_ANDX_RANGE);
1702         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1703         if (!buf)
1704                 return -ENOMEM;
1705
1706         cifs_down_write(&cinode->lock_sem);
1707         for (i = 0; i < 2; i++) {
1708                 cur = buf;
1709                 num = 0;
1710                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1711                         if (flock->fl_start > li->offset ||
1712                             (flock->fl_start + length) <
1713                             (li->offset + li->length))
1714                                 continue;
1715                         if (current->tgid != li->pid)
1716                                 continue;
1717                         if (types[i] != li->type)
1718                                 continue;
1719                         if (cinode->can_cache_brlcks) {
1720                                 /*
1721                                  * We can cache brlock requests - simply remove
1722                                  * a lock from the file's list.
1723                                  */
1724                                 list_del(&li->llist);
1725                                 cifs_del_lock_waiters(li);
1726                                 kfree(li);
1727                                 continue;
1728                         }
1729                         cur->Pid = cpu_to_le16(li->pid);
1730                         cur->LengthLow = cpu_to_le32((u32)li->length);
1731                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1732                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1733                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1734                         /*
1735                          * We need to save a lock here to let us add it again to
1736                          * the file's list if the unlock range request fails on
1737                          * the server.
1738                          */
1739                         list_move(&li->llist, &tmp_llist);
1740                         if (++num == max_num) {
1741                                 stored_rc = cifs_lockv(xid, tcon,
1742                                                        cfile->fid.netfid,
1743                                                        li->type, num, 0, buf);
1744                                 if (stored_rc) {
1745                                         /*
1746                                          * We failed on the unlock range
1747                                          * request - add all locks from the tmp
1748                                          * list to the head of the file's list.
1749                                          */
1750                                         cifs_move_llist(&tmp_llist,
1751                                                         &cfile->llist->locks);
1752                                         rc = stored_rc;
1753                                 } else
1754                                         /*
1755                                          * The unlock range request succeed -
1756                                          * free the tmp list.
1757                                          */
1758                                         cifs_free_llist(&tmp_llist);
1759                                 cur = buf;
1760                                 num = 0;
1761                         } else
1762                                 cur++;
1763                 }
1764                 if (num) {
1765                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1766                                                types[i], num, 0, buf);
1767                         if (stored_rc) {
1768                                 cifs_move_llist(&tmp_llist,
1769                                                 &cfile->llist->locks);
1770                                 rc = stored_rc;
1771                         } else
1772                                 cifs_free_llist(&tmp_llist);
1773                 }
1774         }
1775
1776         up_write(&cinode->lock_sem);
1777         kfree(buf);
1778         return rc;
1779 }
1780 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1781
1782 static int
1783 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1784            bool wait_flag, bool posix_lck, int lock, int unlock,
1785            unsigned int xid)
1786 {
1787         int rc = 0;
1788         __u64 length = cifs_flock_len(flock);
1789         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1790         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1791         struct TCP_Server_Info *server = tcon->ses->server;
1792         struct inode *inode = d_inode(cfile->dentry);
1793
1794 #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1795         if (posix_lck) {
1796                 int posix_lock_type;
1797
1798                 rc = cifs_posix_lock_set(file, flock);
1799                 if (rc <= FILE_LOCK_DEFERRED)
1800                         return rc;
1801
1802                 if (type & server->vals->shared_lock_type)
1803                         posix_lock_type = CIFS_RDLCK;
1804                 else
1805                         posix_lock_type = CIFS_WRLCK;
1806
1807                 if (unlock == 1)
1808                         posix_lock_type = CIFS_UNLCK;
1809
1810                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1811                                       hash_lockowner(flock->fl_owner),
1812                                       flock->fl_start, length,
1813                                       NULL, posix_lock_type, wait_flag);
1814                 goto out;
1815         }
1816 #endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1817         if (lock) {
1818                 struct cifsLockInfo *lock;
1819
1820                 lock = cifs_lock_init(flock->fl_start, length, type,
1821                                       flock->fl_flags);
1822                 if (!lock)
1823                         return -ENOMEM;
1824
1825                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1826                 if (rc < 0) {
1827                         kfree(lock);
1828                         return rc;
1829                 }
1830                 if (!rc)
1831                         goto out;
1832
1833                 /*
1834                  * Windows 7 server can delay breaking lease from read to None
1835                  * if we set a byte-range lock on a file - break it explicitly
1836                  * before sending the lock to the server to be sure the next
1837                  * read won't conflict with non-overlapted locks due to
1838                  * pagereading.
1839                  */
1840                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1841                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1842                         cifs_zap_mapping(inode);
1843                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1844                                  inode);
1845                         CIFS_I(inode)->oplock = 0;
1846                 }
1847
1848                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1849                                             type, 1, 0, wait_flag);
1850                 if (rc) {
1851                         kfree(lock);
1852                         return rc;
1853                 }
1854
1855                 cifs_lock_add(cfile, lock);
1856         } else if (unlock)
1857                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1858
1859 out:
1860         if ((flock->fl_flags & FL_POSIX) || (flock->fl_flags & FL_FLOCK)) {
1861                 /*
1862                  * If this is a request to remove all locks because we
1863                  * are closing the file, it doesn't matter if the
1864                  * unlocking failed as both cifs.ko and the SMB server
1865                  * remove the lock on file close
1866                  */
1867                 if (rc) {
1868                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1869                         if (!(flock->fl_flags & FL_CLOSE))
1870                                 return rc;
1871                 }
1872                 rc = locks_lock_file_wait(file, flock);
1873         }
1874         return rc;
1875 }
1876
1877 int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
1878 {
1879         int rc, xid;
1880         int lock = 0, unlock = 0;
1881         bool wait_flag = false;
1882         bool posix_lck = false;
1883         struct cifs_sb_info *cifs_sb;
1884         struct cifs_tcon *tcon;
1885         struct cifsFileInfo *cfile;
1886         __u32 type;
1887
1888         xid = get_xid();
1889
1890         if (!(fl->fl_flags & FL_FLOCK)) {
1891                 rc = -ENOLCK;
1892                 free_xid(xid);
1893                 return rc;
1894         }
1895
1896         cfile = (struct cifsFileInfo *)file->private_data;
1897         tcon = tlink_tcon(cfile->tlink);
1898
1899         cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
1900                         tcon->ses->server);
1901         cifs_sb = CIFS_FILE_SB(file);
1902
1903         if (cap_unix(tcon->ses) &&
1904             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1905             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1906                 posix_lck = true;
1907
1908         if (!lock && !unlock) {
1909                 /*
1910                  * if no lock or unlock then nothing to do since we do not
1911                  * know what it is
1912                  */
1913                 rc = -EOPNOTSUPP;
1914                 free_xid(xid);
1915                 return rc;
1916         }
1917
1918         rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
1919                         xid);
1920         free_xid(xid);
1921         return rc;
1922
1923
1924 }
1925
1926 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1927 {
1928         int rc, xid;
1929         int lock = 0, unlock = 0;
1930         bool wait_flag = false;
1931         bool posix_lck = false;
1932         struct cifs_sb_info *cifs_sb;
1933         struct cifs_tcon *tcon;
1934         struct cifsFileInfo *cfile;
1935         __u32 type;
1936
1937         rc = -EACCES;
1938         xid = get_xid();
1939
1940         cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
1941                  flock->fl_flags, flock->fl_type, (long long)flock->fl_start,
1942                  (long long)flock->fl_end);
1943
1944         cfile = (struct cifsFileInfo *)file->private_data;
1945         tcon = tlink_tcon(cfile->tlink);
1946
1947         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1948                         tcon->ses->server);
1949         cifs_sb = CIFS_FILE_SB(file);
1950         set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
1951
1952         if (cap_unix(tcon->ses) &&
1953             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1954             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1955                 posix_lck = true;
1956         /*
1957          * BB add code here to normalize offset and length to account for
1958          * negative length which we can not accept over the wire.
1959          */
1960         if (IS_GETLK(cmd)) {
1961                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1962                 free_xid(xid);
1963                 return rc;
1964         }
1965
1966         if (!lock && !unlock) {
1967                 /*
1968                  * if no lock or unlock then nothing to do since we do not
1969                  * know what it is
1970                  */
1971                 free_xid(xid);
1972                 return -EOPNOTSUPP;
1973         }
1974
1975         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1976                         xid);
1977         free_xid(xid);
1978         return rc;
1979 }
1980
1981 /*
1982  * update the file size (if needed) after a write. Should be called with
1983  * the inode->i_lock held
1984  */
1985 void
1986 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1987                       unsigned int bytes_written)
1988 {
1989         loff_t end_of_write = offset + bytes_written;
1990
1991         if (end_of_write > cifsi->server_eof)
1992                 cifsi->server_eof = end_of_write;
1993 }
1994
1995 static ssize_t
1996 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1997            size_t write_size, loff_t *offset)
1998 {
1999         int rc = 0;
2000         unsigned int bytes_written = 0;
2001         unsigned int total_written;
2002         struct cifs_tcon *tcon;
2003         struct TCP_Server_Info *server;
2004         unsigned int xid;
2005         struct dentry *dentry = open_file->dentry;
2006         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2007         struct cifs_io_parms io_parms = {0};
2008
2009         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2010                  write_size, *offset, dentry);
2011
2012         tcon = tlink_tcon(open_file->tlink);
2013         server = tcon->ses->server;
2014
2015         if (!server->ops->sync_write)
2016                 return -ENOSYS;
2017
2018         xid = get_xid();
2019
2020         for (total_written = 0; write_size > total_written;
2021              total_written += bytes_written) {
2022                 rc = -EAGAIN;
2023                 while (rc == -EAGAIN) {
2024                         struct kvec iov[2];
2025                         unsigned int len;
2026
2027                         if (open_file->invalidHandle) {
2028                                 /* we could deadlock if we called
2029                                    filemap_fdatawait from here so tell
2030                                    reopen_file not to flush data to
2031                                    server now */
2032                                 rc = cifs_reopen_file(open_file, false);
2033                                 if (rc != 0)
2034                                         break;
2035                         }
2036
2037                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
2038                                   (unsigned int)write_size - total_written);
2039                         /* iov[0] is reserved for smb header */
2040                         iov[1].iov_base = (char *)write_data + total_written;
2041                         iov[1].iov_len = len;
2042                         io_parms.pid = pid;
2043                         io_parms.tcon = tcon;
2044                         io_parms.offset = *offset;
2045                         io_parms.length = len;
2046                         rc = server->ops->sync_write(xid, &open_file->fid,
2047                                         &io_parms, &bytes_written, iov, 1);
2048                 }
2049                 if (rc || (bytes_written == 0)) {
2050                         if (total_written)
2051                                 break;
2052                         else {
2053                                 free_xid(xid);
2054                                 return rc;
2055                         }
2056                 } else {
2057                         spin_lock(&d_inode(dentry)->i_lock);
2058                         cifs_update_eof(cifsi, *offset, bytes_written);
2059                         spin_unlock(&d_inode(dentry)->i_lock);
2060                         *offset += bytes_written;
2061                 }
2062         }
2063
2064         cifs_stats_bytes_written(tcon, total_written);
2065
2066         if (total_written > 0) {
2067                 spin_lock(&d_inode(dentry)->i_lock);
2068                 if (*offset > d_inode(dentry)->i_size) {
2069                         i_size_write(d_inode(dentry), *offset);
2070                         d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2071                 }
2072                 spin_unlock(&d_inode(dentry)->i_lock);
2073         }
2074         mark_inode_dirty_sync(d_inode(dentry));
2075         free_xid(xid);
2076         return total_written;
2077 }
2078
2079 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2080                                         bool fsuid_only)
2081 {
2082         struct cifsFileInfo *open_file = NULL;
2083         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2084
2085         /* only filter by fsuid on multiuser mounts */
2086         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2087                 fsuid_only = false;
2088
2089         spin_lock(&cifs_inode->open_file_lock);
2090         /* we could simply get the first_list_entry since write-only entries
2091            are always at the end of the list but since the first entry might
2092            have a close pending, we go through the whole list */
2093         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2094                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2095                         continue;
2096                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2097                         if ((!open_file->invalidHandle)) {
2098                                 /* found a good file */
2099                                 /* lock it so it will not be closed on us */
2100                                 cifsFileInfo_get(open_file);
2101                                 spin_unlock(&cifs_inode->open_file_lock);
2102                                 return open_file;
2103                         } /* else might as well continue, and look for
2104                              another, or simply have the caller reopen it
2105                              again rather than trying to fix this handle */
2106                 } else /* write only file */
2107                         break; /* write only files are last so must be done */
2108         }
2109         spin_unlock(&cifs_inode->open_file_lock);
2110         return NULL;
2111 }
2112
2113 /* Return -EBADF if no handle is found and general rc otherwise */
2114 int
2115 cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2116                        struct cifsFileInfo **ret_file)
2117 {
2118         struct cifsFileInfo *open_file, *inv_file = NULL;
2119         struct cifs_sb_info *cifs_sb;
2120         bool any_available = false;
2121         int rc = -EBADF;
2122         unsigned int refind = 0;
2123         bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2124         bool with_delete = flags & FIND_WR_WITH_DELETE;
2125         *ret_file = NULL;
2126
2127         /*
2128          * Having a null inode here (because mapping->host was set to zero by
2129          * the VFS or MM) should not happen but we had reports of on oops (due
2130          * to it being zero) during stress testcases so we need to check for it
2131          */
2132
2133         if (cifs_inode == NULL) {
2134                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2135                 dump_stack();
2136                 return rc;
2137         }
2138
2139         cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2140
2141         /* only filter by fsuid on multiuser mounts */
2142         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2143                 fsuid_only = false;
2144
2145         spin_lock(&cifs_inode->open_file_lock);
2146 refind_writable:
2147         if (refind > MAX_REOPEN_ATT) {
2148                 spin_unlock(&cifs_inode->open_file_lock);
2149                 return rc;
2150         }
2151         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2152                 if (!any_available && open_file->pid != current->tgid)
2153                         continue;
2154                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2155                         continue;
2156                 if (with_delete && !(open_file->fid.access & DELETE))
2157                         continue;
2158                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2159                         if (!open_file->invalidHandle) {
2160                                 /* found a good writable file */
2161                                 cifsFileInfo_get(open_file);
2162                                 spin_unlock(&cifs_inode->open_file_lock);
2163                                 *ret_file = open_file;
2164                                 return 0;
2165                         } else {
2166                                 if (!inv_file)
2167                                         inv_file = open_file;
2168                         }
2169                 }
2170         }
2171         /* couldn't find useable FH with same pid, try any available */
2172         if (!any_available) {
2173                 any_available = true;
2174                 goto refind_writable;
2175         }
2176
2177         if (inv_file) {
2178                 any_available = false;
2179                 cifsFileInfo_get(inv_file);
2180         }
2181
2182         spin_unlock(&cifs_inode->open_file_lock);
2183
2184         if (inv_file) {
2185                 rc = cifs_reopen_file(inv_file, false);
2186                 if (!rc) {
2187                         *ret_file = inv_file;
2188                         return 0;
2189                 }
2190
2191                 spin_lock(&cifs_inode->open_file_lock);
2192                 list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2193                 spin_unlock(&cifs_inode->open_file_lock);
2194                 cifsFileInfo_put(inv_file);
2195                 ++refind;
2196                 inv_file = NULL;
2197                 spin_lock(&cifs_inode->open_file_lock);
2198                 goto refind_writable;
2199         }
2200
2201         return rc;
2202 }
2203
2204 struct cifsFileInfo *
2205 find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2206 {
2207         struct cifsFileInfo *cfile;
2208         int rc;
2209
2210         rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2211         if (rc)
2212                 cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2213
2214         return cfile;
2215 }
2216
2217 int
2218 cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2219                        int flags,
2220                        struct cifsFileInfo **ret_file)
2221 {
2222         struct cifsFileInfo *cfile;
2223         void *page = alloc_dentry_path();
2224
2225         *ret_file = NULL;
2226
2227         spin_lock(&tcon->open_file_lock);
2228         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2229                 struct cifsInodeInfo *cinode;
2230                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2231                 if (IS_ERR(full_path)) {
2232                         spin_unlock(&tcon->open_file_lock);
2233                         free_dentry_path(page);
2234                         return PTR_ERR(full_path);
2235                 }
2236                 if (strcmp(full_path, name))
2237                         continue;
2238
2239                 cinode = CIFS_I(d_inode(cfile->dentry));
2240                 spin_unlock(&tcon->open_file_lock);
2241                 free_dentry_path(page);
2242                 return cifs_get_writable_file(cinode, flags, ret_file);
2243         }
2244
2245         spin_unlock(&tcon->open_file_lock);
2246         free_dentry_path(page);
2247         return -ENOENT;
2248 }
2249
2250 int
2251 cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2252                        struct cifsFileInfo **ret_file)
2253 {
2254         struct cifsFileInfo *cfile;
2255         void *page = alloc_dentry_path();
2256
2257         *ret_file = NULL;
2258
2259         spin_lock(&tcon->open_file_lock);
2260         list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2261                 struct cifsInodeInfo *cinode;
2262                 const char *full_path = build_path_from_dentry(cfile->dentry, page);
2263                 if (IS_ERR(full_path)) {
2264                         spin_unlock(&tcon->open_file_lock);
2265                         free_dentry_path(page);
2266                         return PTR_ERR(full_path);
2267                 }
2268                 if (strcmp(full_path, name))
2269                         continue;
2270
2271                 cinode = CIFS_I(d_inode(cfile->dentry));
2272                 spin_unlock(&tcon->open_file_lock);
2273                 free_dentry_path(page);
2274                 *ret_file = find_readable_file(cinode, 0);
2275                 return *ret_file ? 0 : -ENOENT;
2276         }
2277
2278         spin_unlock(&tcon->open_file_lock);
2279         free_dentry_path(page);
2280         return -ENOENT;
2281 }
2282
2283 void
2284 cifs_writedata_release(struct kref *refcount)
2285 {
2286         struct cifs_writedata *wdata = container_of(refcount,
2287                                         struct cifs_writedata, refcount);
2288 #ifdef CONFIG_CIFS_SMB_DIRECT
2289         if (wdata->mr) {
2290                 smbd_deregister_mr(wdata->mr);
2291                 wdata->mr = NULL;
2292         }
2293 #endif
2294
2295         if (wdata->cfile)
2296                 cifsFileInfo_put(wdata->cfile);
2297
2298         kvfree(wdata->pages);
2299         kfree(wdata);
2300 }
2301
2302 /*
2303  * Write failed with a retryable error. Resend the write request. It's also
2304  * possible that the page was redirtied so re-clean the page.
2305  */
2306 static void
2307 cifs_writev_requeue(struct cifs_writedata *wdata)
2308 {
2309         int i, rc = 0;
2310         struct inode *inode = d_inode(wdata->cfile->dentry);
2311         struct TCP_Server_Info *server;
2312         unsigned int rest_len;
2313
2314         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2315         i = 0;
2316         rest_len = wdata->bytes;
2317         do {
2318                 struct cifs_writedata *wdata2;
2319                 unsigned int j, nr_pages, wsize, tailsz, cur_len;
2320
2321                 wsize = server->ops->wp_retry_size(inode);
2322                 if (wsize < rest_len) {
2323                         nr_pages = wsize / PAGE_SIZE;
2324                         if (!nr_pages) {
2325                                 rc = -EOPNOTSUPP;
2326                                 break;
2327                         }
2328                         cur_len = nr_pages * PAGE_SIZE;
2329                         tailsz = PAGE_SIZE;
2330                 } else {
2331                         nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
2332                         cur_len = rest_len;
2333                         tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
2334                 }
2335
2336                 wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
2337                 if (!wdata2) {
2338                         rc = -ENOMEM;
2339                         break;
2340                 }
2341
2342                 for (j = 0; j < nr_pages; j++) {
2343                         wdata2->pages[j] = wdata->pages[i + j];
2344                         lock_page(wdata2->pages[j]);
2345                         clear_page_dirty_for_io(wdata2->pages[j]);
2346                 }
2347
2348                 wdata2->sync_mode = wdata->sync_mode;
2349                 wdata2->nr_pages = nr_pages;
2350                 wdata2->offset = page_offset(wdata2->pages[0]);
2351                 wdata2->pagesz = PAGE_SIZE;
2352                 wdata2->tailsz = tailsz;
2353                 wdata2->bytes = cur_len;
2354
2355                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2356                                             &wdata2->cfile);
2357                 if (!wdata2->cfile) {
2358                         cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2359                                  rc);
2360                         if (!is_retryable_error(rc))
2361                                 rc = -EBADF;
2362                 } else {
2363                         wdata2->pid = wdata2->cfile->pid;
2364                         rc = server->ops->async_writev(wdata2,
2365                                                        cifs_writedata_release);
2366                 }
2367
2368                 for (j = 0; j < nr_pages; j++) {
2369                         unlock_page(wdata2->pages[j]);
2370                         if (rc != 0 && !is_retryable_error(rc)) {
2371                                 SetPageError(wdata2->pages[j]);
2372                                 end_page_writeback(wdata2->pages[j]);
2373                                 put_page(wdata2->pages[j]);
2374                         }
2375                 }
2376
2377                 kref_put(&wdata2->refcount, cifs_writedata_release);
2378                 if (rc) {
2379                         if (is_retryable_error(rc))
2380                                 continue;
2381                         i += nr_pages;
2382                         break;
2383                 }
2384
2385                 rest_len -= cur_len;
2386                 i += nr_pages;
2387         } while (i < wdata->nr_pages);
2388
2389         /* cleanup remaining pages from the original wdata */
2390         for (; i < wdata->nr_pages; i++) {
2391                 SetPageError(wdata->pages[i]);
2392                 end_page_writeback(wdata->pages[i]);
2393                 put_page(wdata->pages[i]);
2394         }
2395
2396         if (rc != 0 && !is_retryable_error(rc))
2397                 mapping_set_error(inode->i_mapping, rc);
2398         kref_put(&wdata->refcount, cifs_writedata_release);
2399 }
2400
2401 void
2402 cifs_writev_complete(struct work_struct *work)
2403 {
2404         struct cifs_writedata *wdata = container_of(work,
2405                                                 struct cifs_writedata, work);
2406         struct inode *inode = d_inode(wdata->cfile->dentry);
2407         int i = 0;
2408
2409         if (wdata->result == 0) {
2410                 spin_lock(&inode->i_lock);
2411                 cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2412                 spin_unlock(&inode->i_lock);
2413                 cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2414                                          wdata->bytes);
2415         } else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2416                 return cifs_writev_requeue(wdata);
2417
2418         for (i = 0; i < wdata->nr_pages; i++) {
2419                 struct page *page = wdata->pages[i];
2420
2421                 if (wdata->result == -EAGAIN)
2422                         __set_page_dirty_nobuffers(page);
2423                 else if (wdata->result < 0)
2424                         SetPageError(page);
2425                 end_page_writeback(page);
2426                 cifs_readpage_to_fscache(inode, page);
2427                 put_page(page);
2428         }
2429         if (wdata->result != -EAGAIN)
2430                 mapping_set_error(inode->i_mapping, wdata->result);
2431         kref_put(&wdata->refcount, cifs_writedata_release);
2432 }
2433
2434 struct cifs_writedata *
2435 cifs_writedata_alloc(unsigned int nr_pages, work_func_t complete)
2436 {
2437         struct cifs_writedata *writedata = NULL;
2438         struct page **pages =
2439                 kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
2440         if (pages) {
2441                 writedata = cifs_writedata_direct_alloc(pages, complete);
2442                 if (!writedata)
2443                         kvfree(pages);
2444         }
2445
2446         return writedata;
2447 }
2448
2449 struct cifs_writedata *
2450 cifs_writedata_direct_alloc(struct page **pages, work_func_t complete)
2451 {
2452         struct cifs_writedata *wdata;
2453
2454         wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2455         if (wdata != NULL) {
2456                 wdata->pages = pages;
2457                 kref_init(&wdata->refcount);
2458                 INIT_LIST_HEAD(&wdata->list);
2459                 init_completion(&wdata->done);
2460                 INIT_WORK(&wdata->work, complete);
2461         }
2462         return wdata;
2463 }
2464
2465
2466 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2467 {
2468         struct address_space *mapping = page->mapping;
2469         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2470         char *write_data;
2471         int rc = -EFAULT;
2472         int bytes_written = 0;
2473         struct inode *inode;
2474         struct cifsFileInfo *open_file;
2475
2476         if (!mapping || !mapping->host)
2477                 return -EFAULT;
2478
2479         inode = page->mapping->host;
2480
2481         offset += (loff_t)from;
2482         write_data = kmap(page);
2483         write_data += from;
2484
2485         if ((to > PAGE_SIZE) || (from > to)) {
2486                 kunmap(page);
2487                 return -EIO;
2488         }
2489
2490         /* racing with truncate? */
2491         if (offset > mapping->host->i_size) {
2492                 kunmap(page);
2493                 return 0; /* don't care */
2494         }
2495
2496         /* check to make sure that we are not extending the file */
2497         if (mapping->host->i_size - offset < (loff_t)to)
2498                 to = (unsigned)(mapping->host->i_size - offset);
2499
2500         rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2501                                     &open_file);
2502         if (!rc) {
2503                 bytes_written = cifs_write(open_file, open_file->pid,
2504                                            write_data, to - from, &offset);
2505                 cifsFileInfo_put(open_file);
2506                 /* Does mm or vfs already set times? */
2507                 inode->i_atime = inode->i_mtime = current_time(inode);
2508                 if ((bytes_written > 0) && (offset))
2509                         rc = 0;
2510                 else if (bytes_written < 0)
2511                         rc = bytes_written;
2512                 else
2513                         rc = -EFAULT;
2514         } else {
2515                 cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2516                 if (!is_retryable_error(rc))
2517                         rc = -EIO;
2518         }
2519
2520         kunmap(page);
2521         return rc;
2522 }
2523
2524 static struct cifs_writedata *
2525 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2526                           pgoff_t end, pgoff_t *index,
2527                           unsigned int *found_pages)
2528 {
2529         struct cifs_writedata *wdata;
2530
2531         wdata = cifs_writedata_alloc((unsigned int)tofind,
2532                                      cifs_writev_complete);
2533         if (!wdata)
2534                 return NULL;
2535
2536         *found_pages = find_get_pages_range_tag(mapping, index, end,
2537                                 PAGECACHE_TAG_DIRTY, tofind, wdata->pages);
2538         return wdata;
2539 }
2540
2541 static unsigned int
2542 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2543                     struct address_space *mapping,
2544                     struct writeback_control *wbc,
2545                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2546 {
2547         unsigned int nr_pages = 0, i;
2548         struct page *page;
2549
2550         for (i = 0; i < found_pages; i++) {
2551                 page = wdata->pages[i];
2552                 /*
2553                  * At this point we hold neither the i_pages lock nor the
2554                  * page lock: the page may be truncated or invalidated
2555                  * (changing page->mapping to NULL), or even swizzled
2556                  * back from swapper_space to tmpfs file mapping
2557                  */
2558
2559                 if (nr_pages == 0)
2560                         lock_page(page);
2561                 else if (!trylock_page(page))
2562                         break;
2563
2564                 if (unlikely(page->mapping != mapping)) {
2565                         unlock_page(page);
2566                         break;
2567                 }
2568
2569                 if (!wbc->range_cyclic && page->index > end) {
2570                         *done = true;
2571                         unlock_page(page);
2572                         break;
2573                 }
2574
2575                 if (*next && (page->index != *next)) {
2576                         /* Not next consecutive page */
2577                         unlock_page(page);
2578                         break;
2579                 }
2580
2581                 if (wbc->sync_mode != WB_SYNC_NONE)
2582                         wait_on_page_writeback(page);
2583
2584                 if (PageWriteback(page) ||
2585                                 !clear_page_dirty_for_io(page)) {
2586                         unlock_page(page);
2587                         break;
2588                 }
2589
2590                 /*
2591                  * This actually clears the dirty bit in the radix tree.
2592                  * See cifs_writepage() for more commentary.
2593                  */
2594                 set_page_writeback(page);
2595                 if (page_offset(page) >= i_size_read(mapping->host)) {
2596                         *done = true;
2597                         unlock_page(page);
2598                         end_page_writeback(page);
2599                         break;
2600                 }
2601
2602                 wdata->pages[i] = page;
2603                 *next = page->index + 1;
2604                 ++nr_pages;
2605         }
2606
2607         /* reset index to refind any pages skipped */
2608         if (nr_pages == 0)
2609                 *index = wdata->pages[0]->index + 1;
2610
2611         /* put any pages we aren't going to use */
2612         for (i = nr_pages; i < found_pages; i++) {
2613                 put_page(wdata->pages[i]);
2614                 wdata->pages[i] = NULL;
2615         }
2616
2617         return nr_pages;
2618 }
2619
2620 static int
2621 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2622                  struct address_space *mapping, struct writeback_control *wbc)
2623 {
2624         int rc;
2625
2626         wdata->sync_mode = wbc->sync_mode;
2627         wdata->nr_pages = nr_pages;
2628         wdata->offset = page_offset(wdata->pages[0]);
2629         wdata->pagesz = PAGE_SIZE;
2630         wdata->tailsz = min(i_size_read(mapping->host) -
2631                         page_offset(wdata->pages[nr_pages - 1]),
2632                         (loff_t)PAGE_SIZE);
2633         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2634         wdata->pid = wdata->cfile->pid;
2635
2636         rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2637         if (rc)
2638                 return rc;
2639
2640         if (wdata->cfile->invalidHandle)
2641                 rc = -EAGAIN;
2642         else
2643                 rc = wdata->server->ops->async_writev(wdata,
2644                                                       cifs_writedata_release);
2645
2646         return rc;
2647 }
2648
2649 static int cifs_writepages(struct address_space *mapping,
2650                            struct writeback_control *wbc)
2651 {
2652         struct inode *inode = mapping->host;
2653         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2654         struct TCP_Server_Info *server;
2655         bool done = false, scanned = false, range_whole = false;
2656         pgoff_t end, index;
2657         struct cifs_writedata *wdata;
2658         struct cifsFileInfo *cfile = NULL;
2659         int rc = 0;
2660         int saved_rc = 0;
2661         unsigned int xid;
2662
2663         /*
2664          * If wsize is smaller than the page cache size, default to writing
2665          * one page at a time via cifs_writepage
2666          */
2667         if (cifs_sb->ctx->wsize < PAGE_SIZE)
2668                 return generic_writepages(mapping, wbc);
2669
2670         xid = get_xid();
2671         if (wbc->range_cyclic) {
2672                 index = mapping->writeback_index; /* Start from prev offset */
2673                 end = -1;
2674         } else {
2675                 index = wbc->range_start >> PAGE_SHIFT;
2676                 end = wbc->range_end >> PAGE_SHIFT;
2677                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2678                         range_whole = true;
2679                 scanned = true;
2680         }
2681         server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2682
2683 retry:
2684         while (!done && index <= end) {
2685                 unsigned int i, nr_pages, found_pages, wsize;
2686                 pgoff_t next = 0, tofind, saved_index = index;
2687                 struct cifs_credits credits_on_stack;
2688                 struct cifs_credits *credits = &credits_on_stack;
2689                 int get_file_rc = 0;
2690
2691                 if (cfile)
2692                         cifsFileInfo_put(cfile);
2693
2694                 rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2695
2696                 /* in case of an error store it to return later */
2697                 if (rc)
2698                         get_file_rc = rc;
2699
2700                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2701                                                    &wsize, credits);
2702                 if (rc != 0) {
2703                         done = true;
2704                         break;
2705                 }
2706
2707                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2708
2709                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2710                                                   &found_pages);
2711                 if (!wdata) {
2712                         rc = -ENOMEM;
2713                         done = true;
2714                         add_credits_and_wake_if(server, credits, 0);
2715                         break;
2716                 }
2717
2718                 if (found_pages == 0) {
2719                         kref_put(&wdata->refcount, cifs_writedata_release);
2720                         add_credits_and_wake_if(server, credits, 0);
2721                         break;
2722                 }
2723
2724                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2725                                                end, &index, &next, &done);
2726
2727                 /* nothing to write? */
2728                 if (nr_pages == 0) {
2729                         kref_put(&wdata->refcount, cifs_writedata_release);
2730                         add_credits_and_wake_if(server, credits, 0);
2731                         continue;
2732                 }
2733
2734                 wdata->credits = credits_on_stack;
2735                 wdata->cfile = cfile;
2736                 wdata->server = server;
2737                 cfile = NULL;
2738
2739                 if (!wdata->cfile) {
2740                         cifs_dbg(VFS, "No writable handle in writepages rc=%d\n",
2741                                  get_file_rc);
2742                         if (is_retryable_error(get_file_rc))
2743                                 rc = get_file_rc;
2744                         else
2745                                 rc = -EBADF;
2746                 } else
2747                         rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2748
2749                 for (i = 0; i < nr_pages; ++i)
2750                         unlock_page(wdata->pages[i]);
2751
2752                 /* send failure -- clean up the mess */
2753                 if (rc != 0) {
2754                         add_credits_and_wake_if(server, &wdata->credits, 0);
2755                         for (i = 0; i < nr_pages; ++i) {
2756                                 if (is_retryable_error(rc))
2757                                         redirty_page_for_writepage(wbc,
2758                                                            wdata->pages[i]);
2759                                 else
2760                                         SetPageError(wdata->pages[i]);
2761                                 end_page_writeback(wdata->pages[i]);
2762                                 put_page(wdata->pages[i]);
2763                         }
2764                         if (!is_retryable_error(rc))
2765                                 mapping_set_error(mapping, rc);
2766                 }
2767                 kref_put(&wdata->refcount, cifs_writedata_release);
2768
2769                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2770                         index = saved_index;
2771                         continue;
2772                 }
2773
2774                 /* Return immediately if we received a signal during writing */
2775                 if (is_interrupt_error(rc)) {
2776                         done = true;
2777                         break;
2778                 }
2779
2780                 if (rc != 0 && saved_rc == 0)
2781                         saved_rc = rc;
2782
2783                 wbc->nr_to_write -= nr_pages;
2784                 if (wbc->nr_to_write <= 0)
2785                         done = true;
2786
2787                 index = next;
2788         }
2789
2790         if (!scanned && !done) {
2791                 /*
2792                  * We hit the last page and there is more work to be done: wrap
2793                  * back to the start of the file
2794                  */
2795                 scanned = true;
2796                 index = 0;
2797                 goto retry;
2798         }
2799
2800         if (saved_rc != 0)
2801                 rc = saved_rc;
2802
2803         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2804                 mapping->writeback_index = index;
2805
2806         if (cfile)
2807                 cifsFileInfo_put(cfile);
2808         free_xid(xid);
2809         /* Indication to update ctime and mtime as close is deferred */
2810         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2811         return rc;
2812 }
2813
2814 static int
2815 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2816 {
2817         int rc;
2818         unsigned int xid;
2819
2820         xid = get_xid();
2821 /* BB add check for wbc flags */
2822         get_page(page);
2823         if (!PageUptodate(page))
2824                 cifs_dbg(FYI, "ppw - page not up to date\n");
2825
2826         /*
2827          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2828          *
2829          * A writepage() implementation always needs to do either this,
2830          * or re-dirty the page with "redirty_page_for_writepage()" in
2831          * the case of a failure.
2832          *
2833          * Just unlocking the page will cause the radix tree tag-bits
2834          * to fail to update with the state of the page correctly.
2835          */
2836         set_page_writeback(page);
2837 retry_write:
2838         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2839         if (is_retryable_error(rc)) {
2840                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
2841                         goto retry_write;
2842                 redirty_page_for_writepage(wbc, page);
2843         } else if (rc != 0) {
2844                 SetPageError(page);
2845                 mapping_set_error(page->mapping, rc);
2846         } else {
2847                 SetPageUptodate(page);
2848         }
2849         end_page_writeback(page);
2850         put_page(page);
2851         free_xid(xid);
2852         return rc;
2853 }
2854
2855 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2856 {
2857         int rc = cifs_writepage_locked(page, wbc);
2858         unlock_page(page);
2859         return rc;
2860 }
2861
2862 static int cifs_write_end(struct file *file, struct address_space *mapping,
2863                         loff_t pos, unsigned len, unsigned copied,
2864                         struct page *page, void *fsdata)
2865 {
2866         int rc;
2867         struct inode *inode = mapping->host;
2868         struct cifsFileInfo *cfile = file->private_data;
2869         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2870         __u32 pid;
2871
2872         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2873                 pid = cfile->pid;
2874         else
2875                 pid = current->tgid;
2876
2877         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2878                  page, pos, copied);
2879
2880         if (PageChecked(page)) {
2881                 if (copied == len)
2882                         SetPageUptodate(page);
2883                 ClearPageChecked(page);
2884         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2885                 SetPageUptodate(page);
2886
2887         if (!PageUptodate(page)) {
2888                 char *page_data;
2889                 unsigned offset = pos & (PAGE_SIZE - 1);
2890                 unsigned int xid;
2891
2892                 xid = get_xid();
2893                 /* this is probably better than directly calling
2894                    partialpage_write since in this function the file handle is
2895                    known which we might as well leverage */
2896                 /* BB check if anything else missing out of ppw
2897                    such as updating last write time */
2898                 page_data = kmap(page);
2899                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2900                 /* if (rc < 0) should we set writebehind rc? */
2901                 kunmap(page);
2902
2903                 free_xid(xid);
2904         } else {
2905                 rc = copied;
2906                 pos += copied;
2907                 set_page_dirty(page);
2908         }
2909
2910         if (rc > 0) {
2911                 spin_lock(&inode->i_lock);
2912                 if (pos > inode->i_size) {
2913                         i_size_write(inode, pos);
2914                         inode->i_blocks = (512 - 1 + pos) >> 9;
2915                 }
2916                 spin_unlock(&inode->i_lock);
2917         }
2918
2919         unlock_page(page);
2920         put_page(page);
2921         /* Indication to update ctime and mtime as close is deferred */
2922         set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2923
2924         return rc;
2925 }
2926
2927 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2928                       int datasync)
2929 {
2930         unsigned int xid;
2931         int rc = 0;
2932         struct cifs_tcon *tcon;
2933         struct TCP_Server_Info *server;
2934         struct cifsFileInfo *smbfile = file->private_data;
2935         struct inode *inode = file_inode(file);
2936         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2937
2938         rc = file_write_and_wait_range(file, start, end);
2939         if (rc) {
2940                 trace_cifs_fsync_err(inode->i_ino, rc);
2941                 return rc;
2942         }
2943
2944         xid = get_xid();
2945
2946         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2947                  file, datasync);
2948
2949         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2950                 rc = cifs_zap_mapping(inode);
2951                 if (rc) {
2952                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2953                         rc = 0; /* don't care about it in fsync */
2954                 }
2955         }
2956
2957         tcon = tlink_tcon(smbfile->tlink);
2958         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2959                 server = tcon->ses->server;
2960                 if (server->ops->flush == NULL) {
2961                         rc = -ENOSYS;
2962                         goto strict_fsync_exit;
2963                 }
2964
2965                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
2966                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
2967                         if (smbfile) {
2968                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
2969                                 cifsFileInfo_put(smbfile);
2970                         } else
2971                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
2972                 } else
2973                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2974         }
2975
2976 strict_fsync_exit:
2977         free_xid(xid);
2978         return rc;
2979 }
2980
2981 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2982 {
2983         unsigned int xid;
2984         int rc = 0;
2985         struct cifs_tcon *tcon;
2986         struct TCP_Server_Info *server;
2987         struct cifsFileInfo *smbfile = file->private_data;
2988         struct inode *inode = file_inode(file);
2989         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2990
2991         rc = file_write_and_wait_range(file, start, end);
2992         if (rc) {
2993                 trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
2994                 return rc;
2995         }
2996
2997         xid = get_xid();
2998
2999         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3000                  file, datasync);
3001
3002         tcon = tlink_tcon(smbfile->tlink);
3003         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3004                 server = tcon->ses->server;
3005                 if (server->ops->flush == NULL) {
3006                         rc = -ENOSYS;
3007                         goto fsync_exit;
3008                 }
3009
3010                 if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3011                         smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3012                         if (smbfile) {
3013                                 rc = server->ops->flush(xid, tcon, &smbfile->fid);
3014                                 cifsFileInfo_put(smbfile);
3015                         } else
3016                                 cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3017                 } else
3018                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
3019         }
3020
3021 fsync_exit:
3022         free_xid(xid);
3023         return rc;
3024 }
3025
3026 /*
3027  * As file closes, flush all cached write data for this inode checking
3028  * for write behind errors.
3029  */
3030 int cifs_flush(struct file *file, fl_owner_t id)
3031 {
3032         struct inode *inode = file_inode(file);
3033         int rc = 0;
3034
3035         if (file->f_mode & FMODE_WRITE)
3036                 rc = filemap_write_and_wait(inode->i_mapping);
3037
3038         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3039         if (rc) {
3040                 /* get more nuanced writeback errors */
3041                 rc = filemap_check_wb_err(file->f_mapping, 0);
3042                 trace_cifs_flush_err(inode->i_ino, rc);
3043         }
3044         return rc;
3045 }
3046
3047 static int
3048 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
3049 {
3050         int rc = 0;
3051         unsigned long i;
3052
3053         for (i = 0; i < num_pages; i++) {
3054                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3055                 if (!pages[i]) {
3056                         /*
3057                          * save number of pages we have already allocated and
3058                          * return with ENOMEM error
3059                          */
3060                         num_pages = i;
3061                         rc = -ENOMEM;
3062                         break;
3063                 }
3064         }
3065
3066         if (rc) {
3067                 for (i = 0; i < num_pages; i++)
3068                         put_page(pages[i]);
3069         }
3070         return rc;
3071 }
3072
3073 static inline
3074 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
3075 {
3076         size_t num_pages;
3077         size_t clen;
3078
3079         clen = min_t(const size_t, len, wsize);
3080         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
3081
3082         if (cur_len)
3083                 *cur_len = clen;
3084
3085         return num_pages;
3086 }
3087
3088 static void
3089 cifs_uncached_writedata_release(struct kref *refcount)
3090 {
3091         int i;
3092         struct cifs_writedata *wdata = container_of(refcount,
3093                                         struct cifs_writedata, refcount);
3094
3095         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3096         for (i = 0; i < wdata->nr_pages; i++)
3097                 put_page(wdata->pages[i]);
3098         cifs_writedata_release(refcount);
3099 }
3100
3101 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3102
3103 static void
3104 cifs_uncached_writev_complete(struct work_struct *work)
3105 {
3106         struct cifs_writedata *wdata = container_of(work,
3107                                         struct cifs_writedata, work);
3108         struct inode *inode = d_inode(wdata->cfile->dentry);
3109         struct cifsInodeInfo *cifsi = CIFS_I(inode);
3110
3111         spin_lock(&inode->i_lock);
3112         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3113         if (cifsi->server_eof > inode->i_size)
3114                 i_size_write(inode, cifsi->server_eof);
3115         spin_unlock(&inode->i_lock);
3116
3117         complete(&wdata->done);
3118         collect_uncached_write_data(wdata->ctx);
3119         /* the below call can possibly free the last ref to aio ctx */
3120         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3121 }
3122
3123 static int
3124 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
3125                       size_t *len, unsigned long *num_pages)
3126 {
3127         size_t save_len, copied, bytes, cur_len = *len;
3128         unsigned long i, nr_pages = *num_pages;
3129
3130         save_len = cur_len;
3131         for (i = 0; i < nr_pages; i++) {
3132                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
3133                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
3134                 cur_len -= copied;
3135                 /*
3136                  * If we didn't copy as much as we expected, then that
3137                  * may mean we trod into an unmapped area. Stop copying
3138                  * at that point. On the next pass through the big
3139                  * loop, we'll likely end up getting a zero-length
3140                  * write and bailing out of it.
3141                  */
3142                 if (copied < bytes)
3143                         break;
3144         }
3145         cur_len = save_len - cur_len;
3146         *len = cur_len;
3147
3148         /*
3149          * If we have no data to send, then that probably means that
3150          * the copy above failed altogether. That's most likely because
3151          * the address in the iovec was bogus. Return -EFAULT and let
3152          * the caller free anything we allocated and bail out.
3153          */
3154         if (!cur_len)
3155                 return -EFAULT;
3156
3157         /*
3158          * i + 1 now represents the number of pages we actually used in
3159          * the copy phase above.
3160          */
3161         *num_pages = i + 1;
3162         return 0;
3163 }
3164
3165 static int
3166 cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3167         struct cifs_aio_ctx *ctx)
3168 {
3169         unsigned int wsize;
3170         struct cifs_credits credits;
3171         int rc;
3172         struct TCP_Server_Info *server = wdata->server;
3173
3174         do {
3175                 if (wdata->cfile->invalidHandle) {
3176                         rc = cifs_reopen_file(wdata->cfile, false);
3177                         if (rc == -EAGAIN)
3178                                 continue;
3179                         else if (rc)
3180                                 break;
3181                 }
3182
3183
3184                 /*
3185                  * Wait for credits to resend this wdata.
3186                  * Note: we are attempting to resend the whole wdata not in
3187                  * segments
3188                  */
3189                 do {
3190                         rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3191                                                 &wsize, &credits);
3192                         if (rc)
3193                                 goto fail;
3194
3195                         if (wsize < wdata->bytes) {
3196                                 add_credits_and_wake_if(server, &credits, 0);
3197                                 msleep(1000);
3198                         }
3199                 } while (wsize < wdata->bytes);
3200                 wdata->credits = credits;
3201
3202                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3203
3204                 if (!rc) {
3205                         if (wdata->cfile->invalidHandle)
3206                                 rc = -EAGAIN;
3207                         else {
3208 #ifdef CONFIG_CIFS_SMB_DIRECT
3209                                 if (wdata->mr) {
3210                                         wdata->mr->need_invalidate = true;
3211                                         smbd_deregister_mr(wdata->mr);
3212                                         wdata->mr = NULL;
3213                                 }
3214 #endif
3215                                 rc = server->ops->async_writev(wdata,
3216                                         cifs_uncached_writedata_release);
3217                         }
3218                 }
3219
3220                 /* If the write was successfully sent, we are done */
3221                 if (!rc) {
3222                         list_add_tail(&wdata->list, wdata_list);
3223                         return 0;
3224                 }
3225
3226                 /* Roll back credits and retry if needed */
3227                 add_credits_and_wake_if(server, &wdata->credits, 0);
3228         } while (rc == -EAGAIN);
3229
3230 fail:
3231         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3232         return rc;
3233 }
3234
3235 static int
3236 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
3237                      struct cifsFileInfo *open_file,
3238                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3239                      struct cifs_aio_ctx *ctx)
3240 {
3241         int rc = 0;
3242         size_t cur_len;
3243         unsigned long nr_pages, num_pages, i;
3244         struct cifs_writedata *wdata;
3245         struct iov_iter saved_from = *from;
3246         loff_t saved_offset = offset;
3247         pid_t pid;
3248         struct TCP_Server_Info *server;
3249         struct page **pagevec;
3250         size_t start;
3251         unsigned int xid;
3252
3253         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3254                 pid = open_file->pid;
3255         else
3256                 pid = current->tgid;
3257
3258         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3259         xid = get_xid();
3260
3261         do {
3262                 unsigned int wsize;
3263                 struct cifs_credits credits_on_stack;
3264                 struct cifs_credits *credits = &credits_on_stack;
3265
3266                 if (open_file->invalidHandle) {
3267                         rc = cifs_reopen_file(open_file, false);
3268                         if (rc == -EAGAIN)
3269                                 continue;
3270                         else if (rc)
3271                                 break;
3272                 }
3273
3274                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3275                                                    &wsize, credits);
3276                 if (rc)
3277                         break;
3278
3279                 cur_len = min_t(const size_t, len, wsize);
3280
3281                 if (ctx->direct_io) {
3282                         ssize_t result;
3283
3284                         result = iov_iter_get_pages_alloc2(
3285                                 from, &pagevec, cur_len, &start);
3286                         if (result < 0) {
3287                                 cifs_dbg(VFS,
3288                                          "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
3289                                          result, iov_iter_type(from),
3290                                          from->iov_offset, from->count);
3291                                 dump_stack();
3292
3293                                 rc = result;
3294                                 add_credits_and_wake_if(server, credits, 0);
3295                                 break;
3296                         }
3297                         cur_len = (size_t)result;
3298
3299                         nr_pages =
3300                                 (cur_len + start + PAGE_SIZE - 1) / PAGE_SIZE;
3301
3302                         wdata = cifs_writedata_direct_alloc(pagevec,
3303                                              cifs_uncached_writev_complete);
3304                         if (!wdata) {
3305                                 rc = -ENOMEM;
3306                                 for (i = 0; i < nr_pages; i++)
3307                                         put_page(pagevec[i]);
3308                                 kvfree(pagevec);
3309                                 add_credits_and_wake_if(server, credits, 0);
3310                                 break;
3311                         }
3312
3313
3314                         wdata->page_offset = start;
3315                         wdata->tailsz =
3316                                 nr_pages > 1 ?
3317                                         cur_len - (PAGE_SIZE - start) -
3318                                         (nr_pages - 2) * PAGE_SIZE :
3319                                         cur_len;
3320                 } else {
3321                         nr_pages = get_numpages(wsize, len, &cur_len);
3322                         wdata = cifs_writedata_alloc(nr_pages,
3323                                              cifs_uncached_writev_complete);
3324                         if (!wdata) {
3325                                 rc = -ENOMEM;
3326                                 add_credits_and_wake_if(server, credits, 0);
3327                                 break;
3328                         }
3329
3330                         rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
3331                         if (rc) {
3332                                 kvfree(wdata->pages);
3333                                 kfree(wdata);
3334                                 add_credits_and_wake_if(server, credits, 0);
3335                                 break;
3336                         }
3337
3338                         num_pages = nr_pages;
3339                         rc = wdata_fill_from_iovec(
3340                                 wdata, from, &cur_len, &num_pages);
3341                         if (rc) {
3342                                 for (i = 0; i < nr_pages; i++)
3343                                         put_page(wdata->pages[i]);
3344                                 kvfree(wdata->pages);
3345                                 kfree(wdata);
3346                                 add_credits_and_wake_if(server, credits, 0);
3347                                 break;
3348                         }
3349
3350                         /*
3351                          * Bring nr_pages down to the number of pages we
3352                          * actually used, and free any pages that we didn't use.
3353                          */
3354                         for ( ; nr_pages > num_pages; nr_pages--)
3355                                 put_page(wdata->pages[nr_pages - 1]);
3356
3357                         wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
3358                 }
3359
3360                 wdata->sync_mode = WB_SYNC_ALL;
3361                 wdata->nr_pages = nr_pages;
3362                 wdata->offset = (__u64)offset;
3363                 wdata->cfile = cifsFileInfo_get(open_file);
3364                 wdata->server = server;
3365                 wdata->pid = pid;
3366                 wdata->bytes = cur_len;
3367                 wdata->pagesz = PAGE_SIZE;
3368                 wdata->credits = credits_on_stack;
3369                 wdata->ctx = ctx;
3370                 kref_get(&ctx->refcount);
3371
3372                 rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3373
3374                 if (!rc) {
3375                         if (wdata->cfile->invalidHandle)
3376                                 rc = -EAGAIN;
3377                         else
3378                                 rc = server->ops->async_writev(wdata,
3379                                         cifs_uncached_writedata_release);
3380                 }
3381
3382                 if (rc) {
3383                         add_credits_and_wake_if(server, &wdata->credits, 0);
3384                         kref_put(&wdata->refcount,
3385                                  cifs_uncached_writedata_release);
3386                         if (rc == -EAGAIN) {
3387                                 *from = saved_from;
3388                                 iov_iter_advance(from, offset - saved_offset);
3389                                 continue;
3390                         }
3391                         break;
3392                 }
3393
3394                 list_add_tail(&wdata->list, wdata_list);
3395                 offset += cur_len;
3396                 len -= cur_len;
3397         } while (len > 0);
3398
3399         free_xid(xid);
3400         return rc;
3401 }
3402
3403 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3404 {
3405         struct cifs_writedata *wdata, *tmp;
3406         struct cifs_tcon *tcon;
3407         struct cifs_sb_info *cifs_sb;
3408         struct dentry *dentry = ctx->cfile->dentry;
3409         ssize_t rc;
3410
3411         tcon = tlink_tcon(ctx->cfile->tlink);
3412         cifs_sb = CIFS_SB(dentry->d_sb);
3413
3414         mutex_lock(&ctx->aio_mutex);
3415
3416         if (list_empty(&ctx->list)) {
3417                 mutex_unlock(&ctx->aio_mutex);
3418                 return;
3419         }
3420
3421         rc = ctx->rc;
3422         /*
3423          * Wait for and collect replies for any successful sends in order of
3424          * increasing offset. Once an error is hit, then return without waiting
3425          * for any more replies.
3426          */
3427 restart_loop:
3428         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3429                 if (!rc) {
3430                         if (!try_wait_for_completion(&wdata->done)) {
3431                                 mutex_unlock(&ctx->aio_mutex);
3432                                 return;
3433                         }
3434
3435                         if (wdata->result)
3436                                 rc = wdata->result;
3437                         else
3438                                 ctx->total_len += wdata->bytes;
3439
3440                         /* resend call if it's a retryable error */
3441                         if (rc == -EAGAIN) {
3442                                 struct list_head tmp_list;
3443                                 struct iov_iter tmp_from = ctx->iter;
3444
3445                                 INIT_LIST_HEAD(&tmp_list);
3446                                 list_del_init(&wdata->list);
3447
3448                                 if (ctx->direct_io)
3449                                         rc = cifs_resend_wdata(
3450                                                 wdata, &tmp_list, ctx);
3451                                 else {
3452                                         iov_iter_advance(&tmp_from,
3453                                                  wdata->offset - ctx->pos);
3454
3455                                         rc = cifs_write_from_iter(wdata->offset,
3456                                                 wdata->bytes, &tmp_from,
3457                                                 ctx->cfile, cifs_sb, &tmp_list,
3458                                                 ctx);
3459
3460                                         kref_put(&wdata->refcount,
3461                                                 cifs_uncached_writedata_release);
3462                                 }
3463
3464                                 list_splice(&tmp_list, &ctx->list);
3465                                 goto restart_loop;
3466                         }
3467                 }
3468                 list_del_init(&wdata->list);
3469                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3470         }
3471
3472         cifs_stats_bytes_written(tcon, ctx->total_len);
3473         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3474
3475         ctx->rc = (rc == 0) ? ctx->total_len : rc;
3476
3477         mutex_unlock(&ctx->aio_mutex);
3478
3479         if (ctx->iocb && ctx->iocb->ki_complete)
3480                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3481         else
3482                 complete(&ctx->done);
3483 }
3484
3485 static ssize_t __cifs_writev(
3486         struct kiocb *iocb, struct iov_iter *from, bool direct)
3487 {
3488         struct file *file = iocb->ki_filp;
3489         ssize_t total_written = 0;
3490         struct cifsFileInfo *cfile;
3491         struct cifs_tcon *tcon;
3492         struct cifs_sb_info *cifs_sb;
3493         struct cifs_aio_ctx *ctx;
3494         struct iov_iter saved_from = *from;
3495         size_t len = iov_iter_count(from);
3496         int rc;
3497
3498         /*
3499          * iov_iter_get_pages_alloc doesn't work with ITER_KVEC.
3500          * In this case, fall back to non-direct write function.
3501          * this could be improved by getting pages directly in ITER_KVEC
3502          */
3503         if (direct && iov_iter_is_kvec(from)) {
3504                 cifs_dbg(FYI, "use non-direct cifs_writev for kvec I/O\n");
3505                 direct = false;
3506         }
3507
3508         rc = generic_write_checks(iocb, from);
3509         if (rc <= 0)
3510                 return rc;
3511
3512         cifs_sb = CIFS_FILE_SB(file);
3513         cfile = file->private_data;
3514         tcon = tlink_tcon(cfile->tlink);
3515
3516         if (!tcon->ses->server->ops->async_writev)
3517                 return -ENOSYS;
3518
3519         ctx = cifs_aio_ctx_alloc();
3520         if (!ctx)
3521                 return -ENOMEM;
3522
3523         ctx->cfile = cifsFileInfo_get(cfile);
3524
3525         if (!is_sync_kiocb(iocb))
3526                 ctx->iocb = iocb;
3527
3528         ctx->pos = iocb->ki_pos;
3529
3530         if (direct) {
3531                 ctx->direct_io = true;
3532                 ctx->iter = *from;
3533                 ctx->len = len;
3534         } else {
3535                 rc = setup_aio_ctx_iter(ctx, from, ITER_SOURCE);
3536                 if (rc) {
3537                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3538                         return rc;
3539                 }
3540         }
3541
3542         /* grab a lock here due to read response handlers can access ctx */
3543         mutex_lock(&ctx->aio_mutex);
3544
3545         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
3546                                   cfile, cifs_sb, &ctx->list, ctx);
3547
3548         /*
3549          * If at least one write was successfully sent, then discard any rc
3550          * value from the later writes. If the other write succeeds, then
3551          * we'll end up returning whatever was written. If it fails, then
3552          * we'll get a new rc value from that.
3553          */
3554         if (!list_empty(&ctx->list))
3555                 rc = 0;
3556
3557         mutex_unlock(&ctx->aio_mutex);
3558
3559         if (rc) {
3560                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3561                 return rc;
3562         }
3563
3564         if (!is_sync_kiocb(iocb)) {
3565                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3566                 return -EIOCBQUEUED;
3567         }
3568
3569         rc = wait_for_completion_killable(&ctx->done);
3570         if (rc) {
3571                 mutex_lock(&ctx->aio_mutex);
3572                 ctx->rc = rc = -EINTR;
3573                 total_written = ctx->total_len;
3574                 mutex_unlock(&ctx->aio_mutex);
3575         } else {
3576                 rc = ctx->rc;
3577                 total_written = ctx->total_len;
3578         }
3579
3580         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3581
3582         if (unlikely(!total_written))
3583                 return rc;
3584
3585         iocb->ki_pos += total_written;
3586         return total_written;
3587 }
3588
3589 ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3590 {
3591         struct file *file = iocb->ki_filp;
3592
3593         cifs_revalidate_mapping(file->f_inode);
3594         return __cifs_writev(iocb, from, true);
3595 }
3596
3597 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3598 {
3599         return __cifs_writev(iocb, from, false);
3600 }
3601
3602 static ssize_t
3603 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3604 {
3605         struct file *file = iocb->ki_filp;
3606         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3607         struct inode *inode = file->f_mapping->host;
3608         struct cifsInodeInfo *cinode = CIFS_I(inode);
3609         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3610         ssize_t rc;
3611
3612         inode_lock(inode);
3613         /*
3614          * We need to hold the sem to be sure nobody modifies lock list
3615          * with a brlock that prevents writing.
3616          */
3617         down_read(&cinode->lock_sem);
3618
3619         rc = generic_write_checks(iocb, from);
3620         if (rc <= 0)
3621                 goto out;
3622
3623         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3624                                      server->vals->exclusive_lock_type, 0,
3625                                      NULL, CIFS_WRITE_OP))
3626                 rc = __generic_file_write_iter(iocb, from);
3627         else
3628                 rc = -EACCES;
3629 out:
3630         up_read(&cinode->lock_sem);
3631         inode_unlock(inode);
3632
3633         if (rc > 0)
3634                 rc = generic_write_sync(iocb, rc);
3635         return rc;
3636 }
3637
3638 ssize_t
3639 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3640 {
3641         struct inode *inode = file_inode(iocb->ki_filp);
3642         struct cifsInodeInfo *cinode = CIFS_I(inode);
3643         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3644         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3645                                                 iocb->ki_filp->private_data;
3646         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3647         ssize_t written;
3648
3649         written = cifs_get_writer(cinode);
3650         if (written)
3651                 return written;
3652
3653         if (CIFS_CACHE_WRITE(cinode)) {
3654                 if (cap_unix(tcon->ses) &&
3655                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3656                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3657                         written = generic_file_write_iter(iocb, from);
3658                         goto out;
3659                 }
3660                 written = cifs_writev(iocb, from);
3661                 goto out;
3662         }
3663         /*
3664          * For non-oplocked files in strict cache mode we need to write the data
3665          * to the server exactly from the pos to pos+len-1 rather than flush all
3666          * affected pages because it may cause a error with mandatory locks on
3667          * these pages but not on the region from pos to ppos+len-1.
3668          */
3669         written = cifs_user_writev(iocb, from);
3670         if (CIFS_CACHE_READ(cinode)) {
3671                 /*
3672                  * We have read level caching and we have just sent a write
3673                  * request to the server thus making data in the cache stale.
3674                  * Zap the cache and set oplock/lease level to NONE to avoid
3675                  * reading stale data from the cache. All subsequent read
3676                  * operations will read new data from the server.
3677                  */
3678                 cifs_zap_mapping(inode);
3679                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3680                          inode);
3681                 cinode->oplock = 0;
3682         }
3683 out:
3684         cifs_put_writer(cinode);
3685         return written;
3686 }
3687
3688 static struct cifs_readdata *
3689 cifs_readdata_direct_alloc(struct page **pages, work_func_t complete)
3690 {
3691         struct cifs_readdata *rdata;
3692
3693         rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3694         if (rdata != NULL) {
3695                 rdata->pages = pages;
3696                 kref_init(&rdata->refcount);
3697                 INIT_LIST_HEAD(&rdata->list);
3698                 init_completion(&rdata->done);
3699                 INIT_WORK(&rdata->work, complete);
3700         }
3701
3702         return rdata;
3703 }
3704
3705 static struct cifs_readdata *
3706 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
3707 {
3708         struct page **pages =
3709                 kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
3710         struct cifs_readdata *ret = NULL;
3711
3712         if (pages) {
3713                 ret = cifs_readdata_direct_alloc(pages, complete);
3714                 if (!ret)
3715                         kfree(pages);
3716         }
3717
3718         return ret;
3719 }
3720
3721 void
3722 cifs_readdata_release(struct kref *refcount)
3723 {
3724         struct cifs_readdata *rdata = container_of(refcount,
3725                                         struct cifs_readdata, refcount);
3726 #ifdef CONFIG_CIFS_SMB_DIRECT
3727         if (rdata->mr) {
3728                 smbd_deregister_mr(rdata->mr);
3729                 rdata->mr = NULL;
3730         }
3731 #endif
3732         if (rdata->cfile)
3733                 cifsFileInfo_put(rdata->cfile);
3734
3735         kvfree(rdata->pages);
3736         kfree(rdata);
3737 }
3738
3739 static int
3740 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
3741 {
3742         int rc = 0;
3743         struct page *page;
3744         unsigned int i;
3745
3746         for (i = 0; i < nr_pages; i++) {
3747                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3748                 if (!page) {
3749                         rc = -ENOMEM;
3750                         break;
3751                 }
3752                 rdata->pages[i] = page;
3753         }
3754
3755         if (rc) {
3756                 unsigned int nr_page_failed = i;
3757
3758                 for (i = 0; i < nr_page_failed; i++) {
3759                         put_page(rdata->pages[i]);
3760                         rdata->pages[i] = NULL;
3761                 }
3762         }
3763         return rc;
3764 }
3765
3766 static void
3767 cifs_uncached_readdata_release(struct kref *refcount)
3768 {
3769         struct cifs_readdata *rdata = container_of(refcount,
3770                                         struct cifs_readdata, refcount);
3771         unsigned int i;
3772
3773         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3774         for (i = 0; i < rdata->nr_pages; i++) {
3775                 put_page(rdata->pages[i]);
3776         }
3777         cifs_readdata_release(refcount);
3778 }
3779
3780 /**
3781  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3782  * @rdata:      the readdata response with list of pages holding data
3783  * @iter:       destination for our data
3784  *
3785  * This function copies data from a list of pages in a readdata response into
3786  * an array of iovecs. It will first calculate where the data should go
3787  * based on the info in the readdata and then copy the data into that spot.
3788  */
3789 static int
3790 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3791 {
3792         size_t remaining = rdata->got_bytes;
3793         unsigned int i;
3794
3795         for (i = 0; i < rdata->nr_pages; i++) {
3796                 struct page *page = rdata->pages[i];
3797                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3798                 size_t written;
3799
3800                 if (unlikely(iov_iter_is_pipe(iter))) {
3801                         void *addr = kmap_atomic(page);
3802
3803                         written = copy_to_iter(addr, copy, iter);
3804                         kunmap_atomic(addr);
3805                 } else
3806                         written = copy_page_to_iter(page, 0, copy, iter);
3807                 remaining -= written;
3808                 if (written < copy && iov_iter_count(iter) > 0)
3809                         break;
3810         }
3811         return remaining ? -EFAULT : 0;
3812 }
3813
3814 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3815
3816 static void
3817 cifs_uncached_readv_complete(struct work_struct *work)
3818 {
3819         struct cifs_readdata *rdata = container_of(work,
3820                                                 struct cifs_readdata, work);
3821
3822         complete(&rdata->done);
3823         collect_uncached_read_data(rdata->ctx);
3824         /* the below call can possibly free the last ref to aio ctx */
3825         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3826 }
3827
3828 static int
3829 uncached_fill_pages(struct TCP_Server_Info *server,
3830                     struct cifs_readdata *rdata, struct iov_iter *iter,
3831                     unsigned int len)
3832 {
3833         int result = 0;
3834         unsigned int i;
3835         unsigned int nr_pages = rdata->nr_pages;
3836         unsigned int page_offset = rdata->page_offset;
3837
3838         rdata->got_bytes = 0;
3839         rdata->tailsz = PAGE_SIZE;
3840         for (i = 0; i < nr_pages; i++) {
3841                 struct page *page = rdata->pages[i];
3842                 size_t n;
3843                 unsigned int segment_size = rdata->pagesz;
3844
3845                 if (i == 0)
3846                         segment_size -= page_offset;
3847                 else
3848                         page_offset = 0;
3849
3850
3851                 if (len <= 0) {
3852                         /* no need to hold page hostage */
3853                         rdata->pages[i] = NULL;
3854                         rdata->nr_pages--;
3855                         put_page(page);
3856                         continue;
3857                 }
3858
3859                 n = len;
3860                 if (len >= segment_size)
3861                         /* enough data to fill the page */
3862                         n = segment_size;
3863                 else
3864                         rdata->tailsz = len;
3865                 len -= n;
3866
3867                 if (iter)
3868                         result = copy_page_from_iter(
3869                                         page, page_offset, n, iter);
3870 #ifdef CONFIG_CIFS_SMB_DIRECT
3871                 else if (rdata->mr)
3872                         result = n;
3873 #endif
3874                 else
3875                         result = cifs_read_page_from_socket(
3876                                         server, page, page_offset, n);
3877                 if (result < 0)
3878                         break;
3879
3880                 rdata->got_bytes += result;
3881         }
3882
3883         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3884                                                 rdata->got_bytes : result;
3885 }
3886
3887 static int
3888 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3889                               struct cifs_readdata *rdata, unsigned int len)
3890 {
3891         return uncached_fill_pages(server, rdata, NULL, len);
3892 }
3893
3894 static int
3895 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3896                               struct cifs_readdata *rdata,
3897                               struct iov_iter *iter)
3898 {
3899         return uncached_fill_pages(server, rdata, iter, iter->count);
3900 }
3901
3902 static int cifs_resend_rdata(struct cifs_readdata *rdata,
3903                         struct list_head *rdata_list,
3904                         struct cifs_aio_ctx *ctx)
3905 {
3906         unsigned int rsize;
3907         struct cifs_credits credits;
3908         int rc;
3909         struct TCP_Server_Info *server;
3910
3911         /* XXX: should we pick a new channel here? */
3912         server = rdata->server;
3913
3914         do {
3915                 if (rdata->cfile->invalidHandle) {
3916                         rc = cifs_reopen_file(rdata->cfile, true);
3917                         if (rc == -EAGAIN)
3918                                 continue;
3919                         else if (rc)
3920                                 break;
3921                 }
3922
3923                 /*
3924                  * Wait for credits to resend this rdata.
3925                  * Note: we are attempting to resend the whole rdata not in
3926                  * segments
3927                  */
3928                 do {
3929                         rc = server->ops->wait_mtu_credits(server, rdata->bytes,
3930                                                 &rsize, &credits);
3931
3932                         if (rc)
3933                                 goto fail;
3934
3935                         if (rsize < rdata->bytes) {
3936                                 add_credits_and_wake_if(server, &credits, 0);
3937                                 msleep(1000);
3938                         }
3939                 } while (rsize < rdata->bytes);
3940                 rdata->credits = credits;
3941
3942                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
3943                 if (!rc) {
3944                         if (rdata->cfile->invalidHandle)
3945                                 rc = -EAGAIN;
3946                         else {
3947 #ifdef CONFIG_CIFS_SMB_DIRECT
3948                                 if (rdata->mr) {
3949                                         rdata->mr->need_invalidate = true;
3950                                         smbd_deregister_mr(rdata->mr);
3951                                         rdata->mr = NULL;
3952                                 }
3953 #endif
3954                                 rc = server->ops->async_readv(rdata);
3955                         }
3956                 }
3957
3958                 /* If the read was successfully sent, we are done */
3959                 if (!rc) {
3960                         /* Add to aio pending list */
3961                         list_add_tail(&rdata->list, rdata_list);
3962                         return 0;
3963                 }
3964
3965                 /* Roll back credits and retry if needed */
3966                 add_credits_and_wake_if(server, &rdata->credits, 0);
3967         } while (rc == -EAGAIN);
3968
3969 fail:
3970         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3971         return rc;
3972 }
3973
3974 static int
3975 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3976                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3977                      struct cifs_aio_ctx *ctx)
3978 {
3979         struct cifs_readdata *rdata;
3980         unsigned int npages, rsize;
3981         struct cifs_credits credits_on_stack;
3982         struct cifs_credits *credits = &credits_on_stack;
3983         size_t cur_len;
3984         int rc;
3985         pid_t pid;
3986         struct TCP_Server_Info *server;
3987         struct page **pagevec;
3988         size_t start;
3989         struct iov_iter direct_iov = ctx->iter;
3990
3991         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3992
3993         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3994                 pid = open_file->pid;
3995         else
3996                 pid = current->tgid;
3997
3998         if (ctx->direct_io)
3999                 iov_iter_advance(&direct_iov, offset - ctx->pos);
4000
4001         do {
4002                 if (open_file->invalidHandle) {
4003                         rc = cifs_reopen_file(open_file, true);
4004                         if (rc == -EAGAIN)
4005                                 continue;
4006                         else if (rc)
4007                                 break;
4008                 }
4009
4010                 if (cifs_sb->ctx->rsize == 0)
4011                         cifs_sb->ctx->rsize =
4012                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4013                                                              cifs_sb->ctx);
4014
4015                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4016                                                    &rsize, credits);
4017                 if (rc)
4018                         break;
4019
4020                 cur_len = min_t(const size_t, len, rsize);
4021
4022                 if (ctx->direct_io) {
4023                         ssize_t result;
4024
4025                         result = iov_iter_get_pages_alloc2(
4026                                         &direct_iov, &pagevec,
4027                                         cur_len, &start);
4028                         if (result < 0) {
4029                                 cifs_dbg(VFS,
4030                                          "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
4031                                          result, iov_iter_type(&direct_iov),
4032                                          direct_iov.iov_offset,
4033                                          direct_iov.count);
4034                                 dump_stack();
4035
4036                                 rc = result;
4037                                 add_credits_and_wake_if(server, credits, 0);
4038                                 break;
4039                         }
4040                         cur_len = (size_t)result;
4041
4042                         rdata = cifs_readdata_direct_alloc(
4043                                         pagevec, cifs_uncached_readv_complete);
4044                         if (!rdata) {
4045                                 add_credits_and_wake_if(server, credits, 0);
4046                                 rc = -ENOMEM;
4047                                 break;
4048                         }
4049
4050                         npages = (cur_len + start + PAGE_SIZE-1) / PAGE_SIZE;
4051                         rdata->page_offset = start;
4052                         rdata->tailsz = npages > 1 ?
4053                                 cur_len-(PAGE_SIZE-start)-(npages-2)*PAGE_SIZE :
4054                                 cur_len;
4055
4056                 } else {
4057
4058                         npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
4059                         /* allocate a readdata struct */
4060                         rdata = cifs_readdata_alloc(npages,
4061                                             cifs_uncached_readv_complete);
4062                         if (!rdata) {
4063                                 add_credits_and_wake_if(server, credits, 0);
4064                                 rc = -ENOMEM;
4065                                 break;
4066                         }
4067
4068                         rc = cifs_read_allocate_pages(rdata, npages);
4069                         if (rc) {
4070                                 kvfree(rdata->pages);
4071                                 kfree(rdata);
4072                                 add_credits_and_wake_if(server, credits, 0);
4073                                 break;
4074                         }
4075
4076                         rdata->tailsz = PAGE_SIZE;
4077                 }
4078
4079                 rdata->server = server;
4080                 rdata->cfile = cifsFileInfo_get(open_file);
4081                 rdata->nr_pages = npages;
4082                 rdata->offset = offset;
4083                 rdata->bytes = cur_len;
4084                 rdata->pid = pid;
4085                 rdata->pagesz = PAGE_SIZE;
4086                 rdata->read_into_pages = cifs_uncached_read_into_pages;
4087                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
4088                 rdata->credits = credits_on_stack;
4089                 rdata->ctx = ctx;
4090                 kref_get(&ctx->refcount);
4091
4092                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4093
4094                 if (!rc) {
4095                         if (rdata->cfile->invalidHandle)
4096                                 rc = -EAGAIN;
4097                         else
4098                                 rc = server->ops->async_readv(rdata);
4099                 }
4100
4101                 if (rc) {
4102                         add_credits_and_wake_if(server, &rdata->credits, 0);
4103                         kref_put(&rdata->refcount,
4104                                 cifs_uncached_readdata_release);
4105                         if (rc == -EAGAIN) {
4106                                 iov_iter_revert(&direct_iov, cur_len);
4107                                 continue;
4108                         }
4109                         break;
4110                 }
4111
4112                 list_add_tail(&rdata->list, rdata_list);
4113                 offset += cur_len;
4114                 len -= cur_len;
4115         } while (len > 0);
4116
4117         return rc;
4118 }
4119
4120 static void
4121 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4122 {
4123         struct cifs_readdata *rdata, *tmp;
4124         struct iov_iter *to = &ctx->iter;
4125         struct cifs_sb_info *cifs_sb;
4126         int rc;
4127
4128         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4129
4130         mutex_lock(&ctx->aio_mutex);
4131
4132         if (list_empty(&ctx->list)) {
4133                 mutex_unlock(&ctx->aio_mutex);
4134                 return;
4135         }
4136
4137         rc = ctx->rc;
4138         /* the loop below should proceed in the order of increasing offsets */
4139 again:
4140         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4141                 if (!rc) {
4142                         if (!try_wait_for_completion(&rdata->done)) {
4143                                 mutex_unlock(&ctx->aio_mutex);
4144                                 return;
4145                         }
4146
4147                         if (rdata->result == -EAGAIN) {
4148                                 /* resend call if it's a retryable error */
4149                                 struct list_head tmp_list;
4150                                 unsigned int got_bytes = rdata->got_bytes;
4151
4152                                 list_del_init(&rdata->list);
4153                                 INIT_LIST_HEAD(&tmp_list);
4154
4155                                 /*
4156                                  * Got a part of data and then reconnect has
4157                                  * happened -- fill the buffer and continue
4158                                  * reading.
4159                                  */
4160                                 if (got_bytes && got_bytes < rdata->bytes) {
4161                                         rc = 0;
4162                                         if (!ctx->direct_io)
4163                                                 rc = cifs_readdata_to_iov(rdata, to);
4164                                         if (rc) {
4165                                                 kref_put(&rdata->refcount,
4166                                                         cifs_uncached_readdata_release);
4167                                                 continue;
4168                                         }
4169                                 }
4170
4171                                 if (ctx->direct_io) {
4172                                         /*
4173                                          * Re-use rdata as this is a
4174                                          * direct I/O
4175                                          */
4176                                         rc = cifs_resend_rdata(
4177                                                 rdata,
4178                                                 &tmp_list, ctx);
4179                                 } else {
4180                                         rc = cifs_send_async_read(
4181                                                 rdata->offset + got_bytes,
4182                                                 rdata->bytes - got_bytes,
4183                                                 rdata->cfile, cifs_sb,
4184                                                 &tmp_list, ctx);
4185
4186                                         kref_put(&rdata->refcount,
4187                                                 cifs_uncached_readdata_release);
4188                                 }
4189
4190                                 list_splice(&tmp_list, &ctx->list);
4191
4192                                 goto again;
4193                         } else if (rdata->result)
4194                                 rc = rdata->result;
4195                         else if (!ctx->direct_io)
4196                                 rc = cifs_readdata_to_iov(rdata, to);
4197
4198                         /* if there was a short read -- discard anything left */
4199                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4200                                 rc = -ENODATA;
4201
4202                         ctx->total_len += rdata->got_bytes;
4203                 }
4204                 list_del_init(&rdata->list);
4205                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
4206         }
4207
4208         if (!ctx->direct_io)
4209                 ctx->total_len = ctx->len - iov_iter_count(to);
4210
4211         /* mask nodata case */
4212         if (rc == -ENODATA)
4213                 rc = 0;
4214
4215         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4216
4217         mutex_unlock(&ctx->aio_mutex);
4218
4219         if (ctx->iocb && ctx->iocb->ki_complete)
4220                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4221         else
4222                 complete(&ctx->done);
4223 }
4224
4225 static ssize_t __cifs_readv(
4226         struct kiocb *iocb, struct iov_iter *to, bool direct)
4227 {
4228         size_t len;
4229         struct file *file = iocb->ki_filp;
4230         struct cifs_sb_info *cifs_sb;
4231         struct cifsFileInfo *cfile;
4232         struct cifs_tcon *tcon;
4233         ssize_t rc, total_read = 0;
4234         loff_t offset = iocb->ki_pos;
4235         struct cifs_aio_ctx *ctx;
4236
4237         /*
4238          * iov_iter_get_pages_alloc() doesn't work with ITER_KVEC,
4239          * fall back to data copy read path
4240          * this could be improved by getting pages directly in ITER_KVEC
4241          */
4242         if (direct && iov_iter_is_kvec(to)) {
4243                 cifs_dbg(FYI, "use non-direct cifs_user_readv for kvec I/O\n");
4244                 direct = false;
4245         }
4246
4247         len = iov_iter_count(to);
4248         if (!len)
4249                 return 0;
4250
4251         cifs_sb = CIFS_FILE_SB(file);
4252         cfile = file->private_data;
4253         tcon = tlink_tcon(cfile->tlink);
4254
4255         if (!tcon->ses->server->ops->async_readv)
4256                 return -ENOSYS;
4257
4258         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4259                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4260
4261         ctx = cifs_aio_ctx_alloc();
4262         if (!ctx)
4263                 return -ENOMEM;
4264
4265         ctx->cfile = cifsFileInfo_get(cfile);
4266
4267         if (!is_sync_kiocb(iocb))
4268                 ctx->iocb = iocb;
4269
4270         if (user_backed_iter(to))
4271                 ctx->should_dirty = true;
4272
4273         if (direct) {
4274                 ctx->pos = offset;
4275                 ctx->direct_io = true;
4276                 ctx->iter = *to;
4277                 ctx->len = len;
4278         } else {
4279                 rc = setup_aio_ctx_iter(ctx, to, ITER_DEST);
4280                 if (rc) {
4281                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4282                         return rc;
4283                 }
4284                 len = ctx->len;
4285         }
4286
4287         if (direct) {
4288                 rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4289                                                   offset, offset + len - 1);
4290                 if (rc) {
4291                         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4292                         return -EAGAIN;
4293                 }
4294         }
4295
4296         /* grab a lock here due to read response handlers can access ctx */
4297         mutex_lock(&ctx->aio_mutex);
4298
4299         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4300
4301         /* if at least one read request send succeeded, then reset rc */
4302         if (!list_empty(&ctx->list))
4303                 rc = 0;
4304
4305         mutex_unlock(&ctx->aio_mutex);
4306
4307         if (rc) {
4308                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4309                 return rc;
4310         }
4311
4312         if (!is_sync_kiocb(iocb)) {
4313                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314                 return -EIOCBQUEUED;
4315         }
4316
4317         rc = wait_for_completion_killable(&ctx->done);
4318         if (rc) {
4319                 mutex_lock(&ctx->aio_mutex);
4320                 ctx->rc = rc = -EINTR;
4321                 total_read = ctx->total_len;
4322                 mutex_unlock(&ctx->aio_mutex);
4323         } else {
4324                 rc = ctx->rc;
4325                 total_read = ctx->total_len;
4326         }
4327
4328         kref_put(&ctx->refcount, cifs_aio_ctx_release);
4329
4330         if (total_read) {
4331                 iocb->ki_pos += total_read;
4332                 return total_read;
4333         }
4334         return rc;
4335 }
4336
4337 ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4338 {
4339         return __cifs_readv(iocb, to, true);
4340 }
4341
4342 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4343 {
4344         return __cifs_readv(iocb, to, false);
4345 }
4346
4347 ssize_t
4348 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4349 {
4350         struct inode *inode = file_inode(iocb->ki_filp);
4351         struct cifsInodeInfo *cinode = CIFS_I(inode);
4352         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4353         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4354                                                 iocb->ki_filp->private_data;
4355         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4356         int rc = -EACCES;
4357
4358         /*
4359          * In strict cache mode we need to read from the server all the time
4360          * if we don't have level II oplock because the server can delay mtime
4361          * change - so we can't make a decision about inode invalidating.
4362          * And we can also fail with pagereading if there are mandatory locks
4363          * on pages affected by this read but not on the region from pos to
4364          * pos+len-1.
4365          */
4366         if (!CIFS_CACHE_READ(cinode))
4367                 return cifs_user_readv(iocb, to);
4368
4369         if (cap_unix(tcon->ses) &&
4370             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4371             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4372                 return generic_file_read_iter(iocb, to);
4373
4374         /*
4375          * We need to hold the sem to be sure nobody modifies lock list
4376          * with a brlock that prevents reading.
4377          */
4378         down_read(&cinode->lock_sem);
4379         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4380                                      tcon->ses->server->vals->shared_lock_type,
4381                                      0, NULL, CIFS_READ_OP))
4382                 rc = generic_file_read_iter(iocb, to);
4383         up_read(&cinode->lock_sem);
4384         return rc;
4385 }
4386
4387 static ssize_t
4388 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4389 {
4390         int rc = -EACCES;
4391         unsigned int bytes_read = 0;
4392         unsigned int total_read;
4393         unsigned int current_read_size;
4394         unsigned int rsize;
4395         struct cifs_sb_info *cifs_sb;
4396         struct cifs_tcon *tcon;
4397         struct TCP_Server_Info *server;
4398         unsigned int xid;
4399         char *cur_offset;
4400         struct cifsFileInfo *open_file;
4401         struct cifs_io_parms io_parms = {0};
4402         int buf_type = CIFS_NO_BUFFER;
4403         __u32 pid;
4404
4405         xid = get_xid();
4406         cifs_sb = CIFS_FILE_SB(file);
4407
4408         /* FIXME: set up handlers for larger reads and/or convert to async */
4409         rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4410
4411         if (file->private_data == NULL) {
4412                 rc = -EBADF;
4413                 free_xid(xid);
4414                 return rc;
4415         }
4416         open_file = file->private_data;
4417         tcon = tlink_tcon(open_file->tlink);
4418         server = cifs_pick_channel(tcon->ses);
4419
4420         if (!server->ops->sync_read) {
4421                 free_xid(xid);
4422                 return -ENOSYS;
4423         }
4424
4425         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4426                 pid = open_file->pid;
4427         else
4428                 pid = current->tgid;
4429
4430         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4431                 cifs_dbg(FYI, "attempting read on write only file instance\n");
4432
4433         for (total_read = 0, cur_offset = read_data; read_size > total_read;
4434              total_read += bytes_read, cur_offset += bytes_read) {
4435                 do {
4436                         current_read_size = min_t(uint, read_size - total_read,
4437                                                   rsize);
4438                         /*
4439                          * For windows me and 9x we do not want to request more
4440                          * than it negotiated since it will refuse the read
4441                          * then.
4442                          */
4443                         if (!(tcon->ses->capabilities &
4444                                 tcon->ses->server->vals->cap_large_files)) {
4445                                 current_read_size = min_t(uint,
4446                                         current_read_size, CIFSMaxBufSize);
4447                         }
4448                         if (open_file->invalidHandle) {
4449                                 rc = cifs_reopen_file(open_file, true);
4450                                 if (rc != 0)
4451                                         break;
4452                         }
4453                         io_parms.pid = pid;
4454                         io_parms.tcon = tcon;
4455                         io_parms.offset = *offset;
4456                         io_parms.length = current_read_size;
4457                         io_parms.server = server;
4458                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4459                                                     &bytes_read, &cur_offset,
4460                                                     &buf_type);
4461                 } while (rc == -EAGAIN);
4462
4463                 if (rc || (bytes_read == 0)) {
4464                         if (total_read) {
4465                                 break;
4466                         } else {
4467                                 free_xid(xid);
4468                                 return rc;
4469                         }
4470                 } else {
4471                         cifs_stats_bytes_read(tcon, total_read);
4472                         *offset += bytes_read;
4473                 }
4474         }
4475         free_xid(xid);
4476         return total_read;
4477 }
4478
4479 /*
4480  * If the page is mmap'ed into a process' page tables, then we need to make
4481  * sure that it doesn't change while being written back.
4482  */
4483 static vm_fault_t
4484 cifs_page_mkwrite(struct vm_fault *vmf)
4485 {
4486         struct page *page = vmf->page;
4487
4488         /* Wait for the page to be written to the cache before we allow it to
4489          * be modified.  We then assume the entire page will need writing back.
4490          */
4491 #ifdef CONFIG_CIFS_FSCACHE
4492         if (PageFsCache(page) &&
4493             wait_on_page_fscache_killable(page) < 0)
4494                 return VM_FAULT_RETRY;
4495 #endif
4496
4497         wait_on_page_writeback(page);
4498
4499         if (lock_page_killable(page) < 0)
4500                 return VM_FAULT_RETRY;
4501         return VM_FAULT_LOCKED;
4502 }
4503
4504 static const struct vm_operations_struct cifs_file_vm_ops = {
4505         .fault = filemap_fault,
4506         .map_pages = filemap_map_pages,
4507         .page_mkwrite = cifs_page_mkwrite,
4508 };
4509
4510 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4511 {
4512         int xid, rc = 0;
4513         struct inode *inode = file_inode(file);
4514
4515         xid = get_xid();
4516
4517         if (!CIFS_CACHE_READ(CIFS_I(inode)))
4518                 rc = cifs_zap_mapping(inode);
4519         if (!rc)
4520                 rc = generic_file_mmap(file, vma);
4521         if (!rc)
4522                 vma->vm_ops = &cifs_file_vm_ops;
4523
4524         free_xid(xid);
4525         return rc;
4526 }
4527
4528 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4529 {
4530         int rc, xid;
4531
4532         xid = get_xid();
4533
4534         rc = cifs_revalidate_file(file);
4535         if (rc)
4536                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4537                          rc);
4538         if (!rc)
4539                 rc = generic_file_mmap(file, vma);
4540         if (!rc)
4541                 vma->vm_ops = &cifs_file_vm_ops;
4542
4543         free_xid(xid);
4544         return rc;
4545 }
4546
4547 static void
4548 cifs_readv_complete(struct work_struct *work)
4549 {
4550         unsigned int i, got_bytes;
4551         struct cifs_readdata *rdata = container_of(work,
4552                                                 struct cifs_readdata, work);
4553
4554         got_bytes = rdata->got_bytes;
4555         for (i = 0; i < rdata->nr_pages; i++) {
4556                 struct page *page = rdata->pages[i];
4557
4558                 if (rdata->result == 0 ||
4559                     (rdata->result == -EAGAIN && got_bytes)) {
4560                         flush_dcache_page(page);
4561                         SetPageUptodate(page);
4562                 } else
4563                         SetPageError(page);
4564
4565                 if (rdata->result == 0 ||
4566                     (rdata->result == -EAGAIN && got_bytes))
4567                         cifs_readpage_to_fscache(rdata->mapping->host, page);
4568
4569                 unlock_page(page);
4570
4571                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
4572
4573                 put_page(page);
4574                 rdata->pages[i] = NULL;
4575         }
4576         kref_put(&rdata->refcount, cifs_readdata_release);
4577 }
4578
4579 static int
4580 readpages_fill_pages(struct TCP_Server_Info *server,
4581                      struct cifs_readdata *rdata, struct iov_iter *iter,
4582                      unsigned int len)
4583 {
4584         int result = 0;
4585         unsigned int i;
4586         u64 eof;
4587         pgoff_t eof_index;
4588         unsigned int nr_pages = rdata->nr_pages;
4589         unsigned int page_offset = rdata->page_offset;
4590
4591         /* determine the eof that the server (probably) has */
4592         eof = CIFS_I(rdata->mapping->host)->server_eof;
4593         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
4594         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
4595
4596         rdata->got_bytes = 0;
4597         rdata->tailsz = PAGE_SIZE;
4598         for (i = 0; i < nr_pages; i++) {
4599                 struct page *page = rdata->pages[i];
4600                 unsigned int to_read = rdata->pagesz;
4601                 size_t n;
4602
4603                 if (i == 0)
4604                         to_read -= page_offset;
4605                 else
4606                         page_offset = 0;
4607
4608                 n = to_read;
4609
4610                 if (len >= to_read) {
4611                         len -= to_read;
4612                 } else if (len > 0) {
4613                         /* enough for partial page, fill and zero the rest */
4614                         zero_user(page, len + page_offset, to_read - len);
4615                         n = rdata->tailsz = len;
4616                         len = 0;
4617                 } else if (page->index > eof_index) {
4618                         /*
4619                          * The VFS will not try to do readahead past the
4620                          * i_size, but it's possible that we have outstanding
4621                          * writes with gaps in the middle and the i_size hasn't
4622                          * caught up yet. Populate those with zeroed out pages
4623                          * to prevent the VFS from repeatedly attempting to
4624                          * fill them until the writes are flushed.
4625                          */
4626                         zero_user(page, 0, PAGE_SIZE);
4627                         flush_dcache_page(page);
4628                         SetPageUptodate(page);
4629                         unlock_page(page);
4630                         put_page(page);
4631                         rdata->pages[i] = NULL;
4632                         rdata->nr_pages--;
4633                         continue;
4634                 } else {
4635                         /* no need to hold page hostage */
4636                         unlock_page(page);
4637                         put_page(page);
4638                         rdata->pages[i] = NULL;
4639                         rdata->nr_pages--;
4640                         continue;
4641                 }
4642
4643                 if (iter)
4644                         result = copy_page_from_iter(
4645                                         page, page_offset, n, iter);
4646 #ifdef CONFIG_CIFS_SMB_DIRECT
4647                 else if (rdata->mr)
4648                         result = n;
4649 #endif
4650                 else
4651                         result = cifs_read_page_from_socket(
4652                                         server, page, page_offset, n);
4653                 if (result < 0)
4654                         break;
4655
4656                 rdata->got_bytes += result;
4657         }
4658
4659         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
4660                                                 rdata->got_bytes : result;
4661 }
4662
4663 static int
4664 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
4665                                struct cifs_readdata *rdata, unsigned int len)
4666 {
4667         return readpages_fill_pages(server, rdata, NULL, len);
4668 }
4669
4670 static int
4671 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
4672                                struct cifs_readdata *rdata,
4673                                struct iov_iter *iter)
4674 {
4675         return readpages_fill_pages(server, rdata, iter, iter->count);
4676 }
4677
4678 static void cifs_readahead(struct readahead_control *ractl)
4679 {
4680         int rc;
4681         struct cifsFileInfo *open_file = ractl->file->private_data;
4682         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4683         struct TCP_Server_Info *server;
4684         pid_t pid;
4685         unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0;
4686         pgoff_t next_cached = ULONG_MAX;
4687         bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4688                 cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4689         bool check_cache = caching;
4690
4691         xid = get_xid();
4692
4693         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4694                 pid = open_file->pid;
4695         else
4696                 pid = current->tgid;
4697
4698         rc = 0;
4699         server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4700
4701         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4702                  __func__, ractl->file, ractl->mapping, readahead_count(ractl));
4703
4704         /*
4705          * Chop the readahead request up into rsize-sized read requests.
4706          */
4707         while ((nr_pages = readahead_count(ractl) - last_batch_size)) {
4708                 unsigned int i, got, rsize;
4709                 struct page *page;
4710                 struct cifs_readdata *rdata;
4711                 struct cifs_credits credits_on_stack;
4712                 struct cifs_credits *credits = &credits_on_stack;
4713                 pgoff_t index = readahead_index(ractl) + last_batch_size;
4714
4715                 /*
4716                  * Find out if we have anything cached in the range of
4717                  * interest, and if so, where the next chunk of cached data is.
4718                  */
4719                 if (caching) {
4720                         if (check_cache) {
4721                                 rc = cifs_fscache_query_occupancy(
4722                                         ractl->mapping->host, index, nr_pages,
4723                                         &next_cached, &cache_nr_pages);
4724                                 if (rc < 0)
4725                                         caching = false;
4726                                 check_cache = false;
4727                         }
4728
4729                         if (index == next_cached) {
4730                                 /*
4731                                  * TODO: Send a whole batch of pages to be read
4732                                  * by the cache.
4733                                  */
4734                                 struct folio *folio = readahead_folio(ractl);
4735
4736                                 last_batch_size = folio_nr_pages(folio);
4737                                 if (cifs_readpage_from_fscache(ractl->mapping->host,
4738                                                                &folio->page) < 0) {
4739                                         /*
4740                                          * TODO: Deal with cache read failure
4741                                          * here, but for the moment, delegate
4742                                          * that to readpage.
4743                                          */
4744                                         caching = false;
4745                                 }
4746                                 folio_unlock(folio);
4747                                 next_cached++;
4748                                 cache_nr_pages--;
4749                                 if (cache_nr_pages == 0)
4750                                         check_cache = true;
4751                                 continue;
4752                         }
4753                 }
4754
4755                 if (open_file->invalidHandle) {
4756                         rc = cifs_reopen_file(open_file, true);
4757                         if (rc) {
4758                                 if (rc == -EAGAIN)
4759                                         continue;
4760                                 break;
4761                         }
4762                 }
4763
4764                 if (cifs_sb->ctx->rsize == 0)
4765                         cifs_sb->ctx->rsize =
4766                                 server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4767                                                              cifs_sb->ctx);
4768
4769                 rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4770                                                    &rsize, credits);
4771                 if (rc)
4772                         break;
4773                 nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl));
4774                 nr_pages = min_t(size_t, nr_pages, next_cached - index);
4775
4776                 /*
4777                  * Give up immediately if rsize is too small to read an entire
4778                  * page. The VFS will fall back to readpage. We should never
4779                  * reach this point however since we set ra_pages to 0 when the
4780                  * rsize is smaller than a cache page.
4781                  */
4782                 if (unlikely(!nr_pages)) {
4783                         add_credits_and_wake_if(server, credits, 0);
4784                         break;
4785                 }
4786
4787                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
4788                 if (!rdata) {
4789                         /* best to give up if we're out of mem */
4790                         add_credits_and_wake_if(server, credits, 0);
4791                         break;
4792                 }
4793
4794                 got = __readahead_batch(ractl, rdata->pages, nr_pages);
4795                 if (got != nr_pages) {
4796                         pr_warn("__readahead_batch() returned %u/%u\n",
4797                                 got, nr_pages);
4798                         nr_pages = got;
4799                 }
4800
4801                 rdata->nr_pages = nr_pages;
4802                 rdata->bytes    = readahead_batch_length(ractl);
4803                 rdata->cfile    = cifsFileInfo_get(open_file);
4804                 rdata->server   = server;
4805                 rdata->mapping  = ractl->mapping;
4806                 rdata->offset   = readahead_pos(ractl);
4807                 rdata->pid      = pid;
4808                 rdata->pagesz   = PAGE_SIZE;
4809                 rdata->tailsz   = PAGE_SIZE;
4810                 rdata->read_into_pages = cifs_readpages_read_into_pages;
4811                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
4812                 rdata->credits  = credits_on_stack;
4813
4814                 rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4815                 if (!rc) {
4816                         if (rdata->cfile->invalidHandle)
4817                                 rc = -EAGAIN;
4818                         else
4819                                 rc = server->ops->async_readv(rdata);
4820                 }
4821
4822                 if (rc) {
4823                         add_credits_and_wake_if(server, &rdata->credits, 0);
4824                         for (i = 0; i < rdata->nr_pages; i++) {
4825                                 page = rdata->pages[i];
4826                                 unlock_page(page);
4827                                 put_page(page);
4828                         }
4829                         /* Fallback to the readpage in error/reconnect cases */
4830                         kref_put(&rdata->refcount, cifs_readdata_release);
4831                         break;
4832                 }
4833
4834                 kref_put(&rdata->refcount, cifs_readdata_release);
4835                 last_batch_size = nr_pages;
4836         }
4837
4838         free_xid(xid);
4839 }
4840
4841 /*
4842  * cifs_readpage_worker must be called with the page pinned
4843  */
4844 static int cifs_readpage_worker(struct file *file, struct page *page,
4845         loff_t *poffset)
4846 {
4847         char *read_data;
4848         int rc;
4849
4850         /* Is the page cached? */
4851         rc = cifs_readpage_from_fscache(file_inode(file), page);
4852         if (rc == 0)
4853                 goto read_complete;
4854
4855         read_data = kmap(page);
4856         /* for reads over a certain size could initiate async read ahead */
4857
4858         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4859
4860         if (rc < 0)
4861                 goto io_error;
4862         else
4863                 cifs_dbg(FYI, "Bytes read %d\n", rc);
4864
4865         /* we do not want atime to be less than mtime, it broke some apps */
4866         file_inode(file)->i_atime = current_time(file_inode(file));
4867         if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
4868                 file_inode(file)->i_atime = file_inode(file)->i_mtime;
4869         else
4870                 file_inode(file)->i_atime = current_time(file_inode(file));
4871
4872         if (PAGE_SIZE > rc)
4873                 memset(read_data + rc, 0, PAGE_SIZE - rc);
4874
4875         flush_dcache_page(page);
4876         SetPageUptodate(page);
4877
4878         /* send this page to the cache */
4879         cifs_readpage_to_fscache(file_inode(file), page);
4880
4881         rc = 0;
4882
4883 io_error:
4884         kunmap(page);
4885         unlock_page(page);
4886
4887 read_complete:
4888         return rc;
4889 }
4890
4891 static int cifs_read_folio(struct file *file, struct folio *folio)
4892 {
4893         struct page *page = &folio->page;
4894         loff_t offset = page_file_offset(page);
4895         int rc = -EACCES;
4896         unsigned int xid;
4897
4898         xid = get_xid();
4899
4900         if (file->private_data == NULL) {
4901                 rc = -EBADF;
4902                 free_xid(xid);
4903                 return rc;
4904         }
4905
4906         cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4907                  page, (int)offset, (int)offset);
4908
4909         rc = cifs_readpage_worker(file, page, &offset);
4910
4911         free_xid(xid);
4912         return rc;
4913 }
4914
4915 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4916 {
4917         struct cifsFileInfo *open_file;
4918
4919         spin_lock(&cifs_inode->open_file_lock);
4920         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4921                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4922                         spin_unlock(&cifs_inode->open_file_lock);
4923                         return 1;
4924                 }
4925         }
4926         spin_unlock(&cifs_inode->open_file_lock);
4927         return 0;
4928 }
4929
4930 /* We do not want to update the file size from server for inodes
4931    open for write - to avoid races with writepage extending
4932    the file - in the future we could consider allowing
4933    refreshing the inode only on increases in the file size
4934    but this is tricky to do without racing with writebehind
4935    page caching in the current Linux kernel design */
4936 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
4937 {
4938         if (!cifsInode)
4939                 return true;
4940
4941         if (is_inode_writable(cifsInode)) {
4942                 /* This inode is open for write at least once */
4943                 struct cifs_sb_info *cifs_sb;
4944
4945                 cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4946                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4947                         /* since no page cache to corrupt on directio
4948                         we can change size safely */
4949                         return true;
4950                 }
4951
4952                 if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4953                         return true;
4954
4955                 return false;
4956         } else
4957                 return true;
4958 }
4959
4960 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4961                         loff_t pos, unsigned len,
4962                         struct page **pagep, void **fsdata)
4963 {
4964         int oncethru = 0;
4965         pgoff_t index = pos >> PAGE_SHIFT;
4966         loff_t offset = pos & (PAGE_SIZE - 1);
4967         loff_t page_start = pos & PAGE_MASK;
4968         loff_t i_size;
4969         struct page *page;
4970         int rc = 0;
4971
4972         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4973
4974 start:
4975         page = grab_cache_page_write_begin(mapping, index);
4976         if (!page) {
4977                 rc = -ENOMEM;
4978                 goto out;
4979         }
4980
4981         if (PageUptodate(page))
4982                 goto out;
4983
4984         /*
4985          * If we write a full page it will be up to date, no need to read from
4986          * the server. If the write is short, we'll end up doing a sync write
4987          * instead.
4988          */
4989         if (len == PAGE_SIZE)
4990                 goto out;
4991
4992         /*
4993          * optimize away the read when we have an oplock, and we're not
4994          * expecting to use any of the data we'd be reading in. That
4995          * is, when the page lies beyond the EOF, or straddles the EOF
4996          * and the write will cover all of the existing data.
4997          */
4998         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4999                 i_size = i_size_read(mapping->host);
5000                 if (page_start >= i_size ||
5001                     (offset == 0 && (pos + len) >= i_size)) {
5002                         zero_user_segments(page, 0, offset,
5003                                            offset + len,
5004                                            PAGE_SIZE);
5005                         /*
5006                          * PageChecked means that the parts of the page
5007                          * to which we're not writing are considered up
5008                          * to date. Once the data is copied to the
5009                          * page, it can be set uptodate.
5010                          */
5011                         SetPageChecked(page);
5012                         goto out;
5013                 }
5014         }
5015
5016         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
5017                 /*
5018                  * might as well read a page, it is fast enough. If we get
5019                  * an error, we don't need to return it. cifs_write_end will
5020                  * do a sync write instead since PG_uptodate isn't set.
5021                  */
5022                 cifs_readpage_worker(file, page, &page_start);
5023                 put_page(page);
5024                 oncethru = 1;
5025                 goto start;
5026         } else {
5027                 /* we could try using another file handle if there is one -
5028                    but how would we lock it to prevent close of that handle
5029                    racing with this read? In any case
5030                    this will be written out by write_end so is fine */
5031         }
5032 out:
5033         *pagep = page;
5034         return rc;
5035 }
5036
5037 static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
5038 {
5039         if (folio_test_private(folio))
5040                 return 0;
5041         if (folio_test_fscache(folio)) {
5042                 if (current_is_kswapd() || !(gfp & __GFP_FS))
5043                         return false;
5044                 folio_wait_fscache(folio);
5045         }
5046         fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
5047         return true;
5048 }
5049
5050 static void cifs_invalidate_folio(struct folio *folio, size_t offset,
5051                                  size_t length)
5052 {
5053         folio_wait_fscache(folio);
5054 }
5055
5056 static int cifs_launder_folio(struct folio *folio)
5057 {
5058         int rc = 0;
5059         loff_t range_start = folio_pos(folio);
5060         loff_t range_end = range_start + folio_size(folio);
5061         struct writeback_control wbc = {
5062                 .sync_mode = WB_SYNC_ALL,
5063                 .nr_to_write = 0,
5064                 .range_start = range_start,
5065                 .range_end = range_end,
5066         };
5067
5068         cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5069
5070         if (folio_clear_dirty_for_io(folio))
5071                 rc = cifs_writepage_locked(&folio->page, &wbc);
5072
5073         folio_wait_fscache(folio);
5074         return rc;
5075 }
5076
5077 void cifs_oplock_break(struct work_struct *work)
5078 {
5079         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5080                                                   oplock_break);
5081         struct inode *inode = d_inode(cfile->dentry);
5082         struct cifsInodeInfo *cinode = CIFS_I(inode);
5083         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
5084         struct TCP_Server_Info *server = tcon->ses->server;
5085         int rc = 0;
5086         bool purge_cache = false;
5087
5088         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5089                         TASK_UNINTERRUPTIBLE);
5090
5091         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5092                                       cfile->oplock_epoch, &purge_cache);
5093
5094         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5095                                                 cifs_has_mand_locks(cinode)) {
5096                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5097                          inode);
5098                 cinode->oplock = 0;
5099         }
5100
5101         if (inode && S_ISREG(inode->i_mode)) {
5102                 if (CIFS_CACHE_READ(cinode))
5103                         break_lease(inode, O_RDONLY);
5104                 else
5105                         break_lease(inode, O_WRONLY);
5106                 rc = filemap_fdatawrite(inode->i_mapping);
5107                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5108                         rc = filemap_fdatawait(inode->i_mapping);
5109                         mapping_set_error(inode->i_mapping, rc);
5110                         cifs_zap_mapping(inode);
5111                 }
5112                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5113                 if (CIFS_CACHE_WRITE(cinode))
5114                         goto oplock_break_ack;
5115         }
5116
5117         rc = cifs_push_locks(cfile);
5118         if (rc)
5119                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5120
5121 oplock_break_ack:
5122         /*
5123          * releasing stale oplock after recent reconnect of smb session using
5124          * a now incorrect file handle is not a data integrity issue but do
5125          * not bother sending an oplock release if session to server still is
5126          * disconnected since oplock already released by the server
5127          */
5128         if (!cfile->oplock_break_cancelled) {
5129                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
5130                                                              cinode);
5131                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5132         }
5133
5134         _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5135         cifs_done_oplock_break(cinode);
5136 }
5137
5138 /*
5139  * The presence of cifs_direct_io() in the address space ops vector
5140  * allowes open() O_DIRECT flags which would have failed otherwise.
5141  *
5142  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5143  * so this method should never be called.
5144  *
5145  * Direct IO is not yet supported in the cached mode.
5146  */
5147 static ssize_t
5148 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5149 {
5150         /*
5151          * FIXME
5152          * Eventually need to support direct IO for non forcedirectio mounts
5153          */
5154         return -EINVAL;
5155 }
5156
5157 static int cifs_swap_activate(struct swap_info_struct *sis,
5158                               struct file *swap_file, sector_t *span)
5159 {
5160         struct cifsFileInfo *cfile = swap_file->private_data;
5161         struct inode *inode = swap_file->f_mapping->host;
5162         unsigned long blocks;
5163         long long isize;
5164
5165         cifs_dbg(FYI, "swap activate\n");
5166
5167         if (!swap_file->f_mapping->a_ops->swap_rw)
5168                 /* Cannot support swap */
5169                 return -EINVAL;
5170
5171         spin_lock(&inode->i_lock);
5172         blocks = inode->i_blocks;
5173         isize = inode->i_size;
5174         spin_unlock(&inode->i_lock);
5175         if (blocks*512 < isize) {
5176                 pr_warn("swap activate: swapfile has holes\n");
5177                 return -EINVAL;
5178         }
5179         *span = sis->pages;
5180
5181         pr_warn_once("Swap support over SMB3 is experimental\n");
5182
5183         /*
5184          * TODO: consider adding ACL (or documenting how) to prevent other
5185          * users (on this or other systems) from reading it
5186          */
5187
5188
5189         /* TODO: add sk_set_memalloc(inet) or similar */
5190
5191         if (cfile)
5192                 cfile->swapfile = true;
5193         /*
5194          * TODO: Since file already open, we can't open with DENY_ALL here
5195          * but we could add call to grab a byte range lock to prevent others
5196          * from reading or writing the file
5197          */
5198
5199         sis->flags |= SWP_FS_OPS;
5200         return add_swap_extent(sis, 0, sis->max, 0);
5201 }
5202
5203 static void cifs_swap_deactivate(struct file *file)
5204 {
5205         struct cifsFileInfo *cfile = file->private_data;
5206
5207         cifs_dbg(FYI, "swap deactivate\n");
5208
5209         /* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5210
5211         if (cfile)
5212                 cfile->swapfile = false;
5213
5214         /* do we need to unpin (or unlock) the file */
5215 }
5216
5217 /*
5218  * Mark a page as having been made dirty and thus needing writeback.  We also
5219  * need to pin the cache object to write back to.
5220  */
5221 #ifdef CONFIG_CIFS_FSCACHE
5222 static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
5223 {
5224         return fscache_dirty_folio(mapping, folio,
5225                                         cifs_inode_cookie(mapping->host));
5226 }
5227 #else
5228 #define cifs_dirty_folio filemap_dirty_folio
5229 #endif
5230
5231 const struct address_space_operations cifs_addr_ops = {
5232         .read_folio = cifs_read_folio,
5233         .readahead = cifs_readahead,
5234         .writepage = cifs_writepage,
5235         .writepages = cifs_writepages,
5236         .write_begin = cifs_write_begin,
5237         .write_end = cifs_write_end,
5238         .dirty_folio = cifs_dirty_folio,
5239         .release_folio = cifs_release_folio,
5240         .direct_IO = cifs_direct_io,
5241         .invalidate_folio = cifs_invalidate_folio,
5242         .launder_folio = cifs_launder_folio,
5243         /*
5244          * TODO: investigate and if useful we could add an cifs_migratePage
5245          * helper (under an CONFIG_MIGRATION) in the future, and also
5246          * investigate and add an is_dirty_writeback helper if needed
5247          */
5248         .swap_activate = cifs_swap_activate,
5249         .swap_deactivate = cifs_swap_deactivate,
5250 };
5251
5252 /*
5253  * cifs_readahead requires the server to support a buffer large enough to
5254  * contain the header plus one complete page of data.  Otherwise, we need
5255  * to leave cifs_readahead out of the address space operations.
5256  */
5257 const struct address_space_operations cifs_addr_ops_smallbuf = {
5258         .read_folio = cifs_read_folio,
5259         .writepage = cifs_writepage,
5260         .writepages = cifs_writepages,
5261         .write_begin = cifs_write_begin,
5262         .write_end = cifs_write_end,
5263         .dirty_folio = cifs_dirty_folio,
5264         .release_folio = cifs_release_folio,
5265         .invalidate_folio = cifs_invalidate_folio,
5266         .launder_folio = cifs_launder_folio,
5267 };