fix "disabling echoes and oplocks" on SMB2 mounts
[linux-3.10.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250         struct cifs_tcon *tcon = tlink_tcon(tlink);
251
252         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253         if (cfile == NULL)
254                 return cfile;
255
256         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
257         if (!fdlocks) {
258                 kfree(cfile);
259                 return NULL;
260         }
261
262         INIT_LIST_HEAD(&fdlocks->locks);
263         fdlocks->cfile = cfile;
264         cfile->llist = fdlocks;
265         down_write(&cinode->lock_sem);
266         list_add(&fdlocks->llist, &cinode->llist);
267         up_write(&cinode->lock_sem);
268
269         cfile->count = 1;
270         cfile->pid = current->tgid;
271         cfile->uid = current_fsuid();
272         cfile->dentry = dget(dentry);
273         cfile->f_flags = file->f_flags;
274         cfile->invalidHandle = false;
275         cfile->tlink = cifs_get_tlink(tlink);
276         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277         mutex_init(&cfile->fh_mutex);
278
279         spin_lock(&cifs_file_list_lock);
280         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281                 oplock = fid->pending_open->oplock;
282         list_del(&fid->pending_open->olist);
283
284         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286         list_add(&cfile->tlist, &tcon->openFileList);
287         /* if readable file instance put first in list*/
288         if (file->f_mode & FMODE_READ)
289                 list_add(&cfile->flist, &cinode->openFileList);
290         else
291                 list_add_tail(&cfile->flist, &cinode->openFileList);
292         spin_unlock(&cifs_file_list_lock);
293
294         file->private_data = cfile;
295         return cfile;
296 }
297
298 struct cifsFileInfo *
299 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
300 {
301         spin_lock(&cifs_file_list_lock);
302         cifsFileInfo_get_locked(cifs_file);
303         spin_unlock(&cifs_file_list_lock);
304         return cifs_file;
305 }
306
307 /*
308  * Release a reference on the file private data. This may involve closing
309  * the filehandle out on the server. Must be called without holding
310  * cifs_file_list_lock.
311  */
312 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
313 {
314         struct inode *inode = cifs_file->dentry->d_inode;
315         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316         struct TCP_Server_Info *server = tcon->ses->server;
317         struct cifsInodeInfo *cifsi = CIFS_I(inode);
318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
319         struct cifsLockInfo *li, *tmp;
320         struct cifs_fid fid;
321         struct cifs_pending_open open;
322
323         spin_lock(&cifs_file_list_lock);
324         if (--cifs_file->count > 0) {
325                 spin_unlock(&cifs_file_list_lock);
326                 return;
327         }
328
329         if (server->ops->get_lease_key)
330                 server->ops->get_lease_key(inode, &fid);
331
332         /* store open in pending opens to make sure we don't miss lease break */
333         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
335         /* remove it from the lists */
336         list_del(&cifs_file->flist);
337         list_del(&cifs_file->tlist);
338
339         if (list_empty(&cifsi->openFileList)) {
340                 cFYI(1, "closing last open instance for inode %p",
341                         cifs_file->dentry->d_inode);
342                 /*
343                  * In strict cache mode we need invalidate mapping on the last
344                  * close  because it may cause a error when we open this file
345                  * again and get at least level II oplock.
346                  */
347                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
348                         CIFS_I(inode)->invalid_mapping = true;
349                 cifs_set_oplock_level(cifsi, 0);
350         }
351         spin_unlock(&cifs_file_list_lock);
352
353         cancel_work_sync(&cifs_file->oplock_break);
354
355         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356                 struct TCP_Server_Info *server = tcon->ses->server;
357                 unsigned int xid;
358
359                 xid = get_xid();
360                 if (server->ops->close)
361                         server->ops->close(xid, tcon, &cifs_file->fid);
362                 _free_xid(xid);
363         }
364
365         cifs_del_pending_open(&open);
366
367         /*
368          * Delete any outstanding lock records. We'll lose them when the file
369          * is closed anyway.
370          */
371         down_write(&cifsi->lock_sem);
372         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
373                 list_del(&li->llist);
374                 cifs_del_lock_waiters(li);
375                 kfree(li);
376         }
377         list_del(&cifs_file->llist->llist);
378         kfree(cifs_file->llist);
379         up_write(&cifsi->lock_sem);
380
381         cifs_put_tlink(cifs_file->tlink);
382         dput(cifs_file->dentry);
383         kfree(cifs_file);
384 }
385
386 int cifs_open(struct inode *inode, struct file *file)
387
388 {
389         int rc = -EACCES;
390         unsigned int xid;
391         __u32 oplock;
392         struct cifs_sb_info *cifs_sb;
393         struct TCP_Server_Info *server;
394         struct cifs_tcon *tcon;
395         struct tcon_link *tlink;
396         struct cifsFileInfo *cfile = NULL;
397         char *full_path = NULL;
398         bool posix_open_ok = false;
399         struct cifs_fid fid;
400         struct cifs_pending_open open;
401
402         xid = get_xid();
403
404         cifs_sb = CIFS_SB(inode->i_sb);
405         tlink = cifs_sb_tlink(cifs_sb);
406         if (IS_ERR(tlink)) {
407                 free_xid(xid);
408                 return PTR_ERR(tlink);
409         }
410         tcon = tlink_tcon(tlink);
411         server = tcon->ses->server;
412
413         full_path = build_path_from_dentry(file->f_path.dentry);
414         if (full_path == NULL) {
415                 rc = -ENOMEM;
416                 goto out;
417         }
418
419         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
420                  inode, file->f_flags, full_path);
421
422         if (server->oplocks)
423                 oplock = REQ_OPLOCK;
424         else
425                 oplock = 0;
426
427         if (!tcon->broken_posix_open && tcon->unix_ext &&
428             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
429                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
430                 /* can not refresh inode info since size could be stale */
431                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
432                                 cifs_sb->mnt_file_mode /* ignored */,
433                                 file->f_flags, &oplock, &fid.netfid, xid);
434                 if (rc == 0) {
435                         cFYI(1, "posix open succeeded");
436                         posix_open_ok = true;
437                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
438                         if (tcon->ses->serverNOS)
439                                 cERROR(1, "server %s of type %s returned"
440                                            " unexpected error on SMB posix open"
441                                            ", disabling posix open support."
442                                            " Check if server update available.",
443                                            tcon->ses->serverName,
444                                            tcon->ses->serverNOS);
445                         tcon->broken_posix_open = true;
446                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
447                          (rc != -EOPNOTSUPP)) /* path not found or net err */
448                         goto out;
449                 /*
450                  * Else fallthrough to retry open the old way on network i/o
451                  * or DFS errors.
452                  */
453         }
454
455         if (server->ops->get_lease_key)
456                 server->ops->get_lease_key(inode, &fid);
457
458         cifs_add_pending_open(&fid, tlink, &open);
459
460         if (!posix_open_ok) {
461                 if (server->ops->get_lease_key)
462                         server->ops->get_lease_key(inode, &fid);
463
464                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
465                                   file->f_flags, &oplock, &fid, xid);
466                 if (rc) {
467                         cifs_del_pending_open(&open);
468                         goto out;
469                 }
470         }
471
472         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
473         if (cfile == NULL) {
474                 if (server->ops->close)
475                         server->ops->close(xid, tcon, &fid);
476                 cifs_del_pending_open(&open);
477                 rc = -ENOMEM;
478                 goto out;
479         }
480
481         cifs_fscache_set_inode_cookie(inode, file);
482
483         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
484                 /*
485                  * Time to set mode which we can not set earlier due to
486                  * problems creating new read-only files.
487                  */
488                 struct cifs_unix_set_info_args args = {
489                         .mode   = inode->i_mode,
490                         .uid    = NO_CHANGE_64,
491                         .gid    = NO_CHANGE_64,
492                         .ctime  = NO_CHANGE_64,
493                         .atime  = NO_CHANGE_64,
494                         .mtime  = NO_CHANGE_64,
495                         .device = 0,
496                 };
497                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
498                                        cfile->pid);
499         }
500
501 out:
502         kfree(full_path);
503         free_xid(xid);
504         cifs_put_tlink(tlink);
505         return rc;
506 }
507
508 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
509
510 /*
511  * Try to reacquire byte range locks that were released when session
512  * to server was lost.
513  */
514 static int
515 cifs_relock_file(struct cifsFileInfo *cfile)
516 {
517         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
518         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
519         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
520         int rc = 0;
521
522         /* we are going to update can_cache_brlcks here - need a write access */
523         down_write(&cinode->lock_sem);
524         if (cinode->can_cache_brlcks) {
525                 /* can cache locks - no need to push them */
526                 up_write(&cinode->lock_sem);
527                 return rc;
528         }
529
530         if (cap_unix(tcon->ses) &&
531             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
532             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
533                 rc = cifs_push_posix_locks(cfile);
534         else
535                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
536
537         up_write(&cinode->lock_sem);
538         return rc;
539 }
540
541 static int
542 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
543 {
544         int rc = -EACCES;
545         unsigned int xid;
546         __u32 oplock;
547         struct cifs_sb_info *cifs_sb;
548         struct cifs_tcon *tcon;
549         struct TCP_Server_Info *server;
550         struct cifsInodeInfo *cinode;
551         struct inode *inode;
552         char *full_path = NULL;
553         int desired_access;
554         int disposition = FILE_OPEN;
555         int create_options = CREATE_NOT_DIR;
556         struct cifs_fid fid;
557
558         xid = get_xid();
559         mutex_lock(&cfile->fh_mutex);
560         if (!cfile->invalidHandle) {
561                 mutex_unlock(&cfile->fh_mutex);
562                 rc = 0;
563                 free_xid(xid);
564                 return rc;
565         }
566
567         inode = cfile->dentry->d_inode;
568         cifs_sb = CIFS_SB(inode->i_sb);
569         tcon = tlink_tcon(cfile->tlink);
570         server = tcon->ses->server;
571
572         /*
573          * Can not grab rename sem here because various ops, including those
574          * that already have the rename sem can end up causing writepage to get
575          * called and if the server was down that means we end up here, and we
576          * can never tell if the caller already has the rename_sem.
577          */
578         full_path = build_path_from_dentry(cfile->dentry);
579         if (full_path == NULL) {
580                 rc = -ENOMEM;
581                 mutex_unlock(&cfile->fh_mutex);
582                 free_xid(xid);
583                 return rc;
584         }
585
586         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
587              full_path);
588
589         if (tcon->ses->server->oplocks)
590                 oplock = REQ_OPLOCK;
591         else
592                 oplock = 0;
593
594         if (tcon->unix_ext && cap_unix(tcon->ses) &&
595             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
596                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
597                 /*
598                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
599                  * original open. Must mask them off for a reopen.
600                  */
601                 unsigned int oflags = cfile->f_flags &
602                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
603
604                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
605                                      cifs_sb->mnt_file_mode /* ignored */,
606                                      oflags, &oplock, &fid.netfid, xid);
607                 if (rc == 0) {
608                         cFYI(1, "posix reopen succeeded");
609                         goto reopen_success;
610                 }
611                 /*
612                  * fallthrough to retry open the old way on errors, especially
613                  * in the reconnect path it is important to retry hard
614                  */
615         }
616
617         desired_access = cifs_convert_flags(cfile->f_flags);
618
619         if (backup_cred(cifs_sb))
620                 create_options |= CREATE_OPEN_BACKUP_INTENT;
621
622         if (server->ops->get_lease_key)
623                 server->ops->get_lease_key(inode, &fid);
624
625         /*
626          * Can not refresh inode by passing in file_info buf to be returned by
627          * CIFSSMBOpen and then calling get_inode_info with returned buf since
628          * file might have write behind data that needs to be flushed and server
629          * version of file size can be stale. If we knew for sure that inode was
630          * not dirty locally we could do this.
631          */
632         rc = server->ops->open(xid, tcon, full_path, disposition,
633                                desired_access, create_options, &fid, &oplock,
634                                NULL, cifs_sb);
635         if (rc) {
636                 mutex_unlock(&cfile->fh_mutex);
637                 cFYI(1, "cifs_reopen returned 0x%x", rc);
638                 cFYI(1, "oplock: %d", oplock);
639                 goto reopen_error_exit;
640         }
641
642 reopen_success:
643         cfile->invalidHandle = false;
644         mutex_unlock(&cfile->fh_mutex);
645         cinode = CIFS_I(inode);
646
647         if (can_flush) {
648                 rc = filemap_write_and_wait(inode->i_mapping);
649                 mapping_set_error(inode->i_mapping, rc);
650
651                 if (tcon->unix_ext)
652                         rc = cifs_get_inode_info_unix(&inode, full_path,
653                                                       inode->i_sb, xid);
654                 else
655                         rc = cifs_get_inode_info(&inode, full_path, NULL,
656                                                  inode->i_sb, xid, NULL);
657         }
658         /*
659          * Else we are writing out data to server already and could deadlock if
660          * we tried to flush data, and since we do not know if we have data that
661          * would invalidate the current end of file on the server we can not go
662          * to the server to get the new inode info.
663          */
664
665         server->ops->set_fid(cfile, &fid, oplock);
666         cifs_relock_file(cfile);
667
668 reopen_error_exit:
669         kfree(full_path);
670         free_xid(xid);
671         return rc;
672 }
673
674 int cifs_close(struct inode *inode, struct file *file)
675 {
676         if (file->private_data != NULL) {
677                 cifsFileInfo_put(file->private_data);
678                 file->private_data = NULL;
679         }
680
681         /* return code from the ->release op is always ignored */
682         return 0;
683 }
684
685 int cifs_closedir(struct inode *inode, struct file *file)
686 {
687         int rc = 0;
688         unsigned int xid;
689         struct cifsFileInfo *cfile = file->private_data;
690         struct cifs_tcon *tcon;
691         struct TCP_Server_Info *server;
692         char *buf;
693
694         cFYI(1, "Closedir inode = 0x%p", inode);
695
696         if (cfile == NULL)
697                 return rc;
698
699         xid = get_xid();
700         tcon = tlink_tcon(cfile->tlink);
701         server = tcon->ses->server;
702
703         cFYI(1, "Freeing private data in close dir");
704         spin_lock(&cifs_file_list_lock);
705         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
706                 cfile->invalidHandle = true;
707                 spin_unlock(&cifs_file_list_lock);
708                 if (server->ops->close_dir)
709                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
710                 else
711                         rc = -ENOSYS;
712                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
713                 /* not much we can do if it fails anyway, ignore rc */
714                 rc = 0;
715         } else
716                 spin_unlock(&cifs_file_list_lock);
717
718         buf = cfile->srch_inf.ntwrk_buf_start;
719         if (buf) {
720                 cFYI(1, "closedir free smb buf in srch struct");
721                 cfile->srch_inf.ntwrk_buf_start = NULL;
722                 if (cfile->srch_inf.smallBuf)
723                         cifs_small_buf_release(buf);
724                 else
725                         cifs_buf_release(buf);
726         }
727
728         cifs_put_tlink(cfile->tlink);
729         kfree(file->private_data);
730         file->private_data = NULL;
731         /* BB can we lock the filestruct while this is going on? */
732         free_xid(xid);
733         return rc;
734 }
735
736 static struct cifsLockInfo *
737 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
738 {
739         struct cifsLockInfo *lock =
740                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
741         if (!lock)
742                 return lock;
743         lock->offset = offset;
744         lock->length = length;
745         lock->type = type;
746         lock->pid = current->tgid;
747         INIT_LIST_HEAD(&lock->blist);
748         init_waitqueue_head(&lock->block_q);
749         return lock;
750 }
751
752 void
753 cifs_del_lock_waiters(struct cifsLockInfo *lock)
754 {
755         struct cifsLockInfo *li, *tmp;
756         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
757                 list_del_init(&li->blist);
758                 wake_up(&li->block_q);
759         }
760 }
761
762 #define CIFS_LOCK_OP    0
763 #define CIFS_READ_OP    1
764 #define CIFS_WRITE_OP   2
765
766 /* @rw_check : 0 - no op, 1 - read, 2 - write */
767 static bool
768 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
769                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
770                             struct cifsLockInfo **conf_lock, int rw_check)
771 {
772         struct cifsLockInfo *li;
773         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
774         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
775
776         list_for_each_entry(li, &fdlocks->locks, llist) {
777                 if (offset + length <= li->offset ||
778                     offset >= li->offset + li->length)
779                         continue;
780                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
781                     server->ops->compare_fids(cfile, cur_cfile)) {
782                         /* shared lock prevents write op through the same fid */
783                         if (!(li->type & server->vals->shared_lock_type) ||
784                             rw_check != CIFS_WRITE_OP)
785                                 continue;
786                 }
787                 if ((type & server->vals->shared_lock_type) &&
788                     ((server->ops->compare_fids(cfile, cur_cfile) &&
789                      current->tgid == li->pid) || type == li->type))
790                         continue;
791                 if (conf_lock)
792                         *conf_lock = li;
793                 return true;
794         }
795         return false;
796 }
797
798 bool
799 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
800                         __u8 type, struct cifsLockInfo **conf_lock,
801                         int rw_check)
802 {
803         bool rc = false;
804         struct cifs_fid_locks *cur;
805         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
806
807         list_for_each_entry(cur, &cinode->llist, llist) {
808                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
809                                                  cfile, conf_lock, rw_check);
810                 if (rc)
811                         break;
812         }
813
814         return rc;
815 }
816
817 /*
818  * Check if there is another lock that prevents us to set the lock (mandatory
819  * style). If such a lock exists, update the flock structure with its
820  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
821  * or leave it the same if we can't. Returns 0 if we don't need to request to
822  * the server or 1 otherwise.
823  */
824 static int
825 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
826                __u8 type, struct file_lock *flock)
827 {
828         int rc = 0;
829         struct cifsLockInfo *conf_lock;
830         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
831         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
832         bool exist;
833
834         down_read(&cinode->lock_sem);
835
836         exist = cifs_find_lock_conflict(cfile, offset, length, type,
837                                         &conf_lock, CIFS_LOCK_OP);
838         if (exist) {
839                 flock->fl_start = conf_lock->offset;
840                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
841                 flock->fl_pid = conf_lock->pid;
842                 if (conf_lock->type & server->vals->shared_lock_type)
843                         flock->fl_type = F_RDLCK;
844                 else
845                         flock->fl_type = F_WRLCK;
846         } else if (!cinode->can_cache_brlcks)
847                 rc = 1;
848         else
849                 flock->fl_type = F_UNLCK;
850
851         up_read(&cinode->lock_sem);
852         return rc;
853 }
854
855 static void
856 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
857 {
858         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
859         down_write(&cinode->lock_sem);
860         list_add_tail(&lock->llist, &cfile->llist->locks);
861         up_write(&cinode->lock_sem);
862 }
863
864 /*
865  * Set the byte-range lock (mandatory style). Returns:
866  * 1) 0, if we set the lock and don't need to request to the server;
867  * 2) 1, if no locks prevent us but we need to request to the server;
868  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
869  */
870 static int
871 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
872                  bool wait)
873 {
874         struct cifsLockInfo *conf_lock;
875         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
876         bool exist;
877         int rc = 0;
878
879 try_again:
880         exist = false;
881         down_write(&cinode->lock_sem);
882
883         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
884                                         lock->type, &conf_lock, CIFS_LOCK_OP);
885         if (!exist && cinode->can_cache_brlcks) {
886                 list_add_tail(&lock->llist, &cfile->llist->locks);
887                 up_write(&cinode->lock_sem);
888                 return rc;
889         }
890
891         if (!exist)
892                 rc = 1;
893         else if (!wait)
894                 rc = -EACCES;
895         else {
896                 list_add_tail(&lock->blist, &conf_lock->blist);
897                 up_write(&cinode->lock_sem);
898                 rc = wait_event_interruptible(lock->block_q,
899                                         (lock->blist.prev == &lock->blist) &&
900                                         (lock->blist.next == &lock->blist));
901                 if (!rc)
902                         goto try_again;
903                 down_write(&cinode->lock_sem);
904                 list_del_init(&lock->blist);
905         }
906
907         up_write(&cinode->lock_sem);
908         return rc;
909 }
910
911 /*
912  * Check if there is another lock that prevents us to set the lock (posix
913  * style). If such a lock exists, update the flock structure with its
914  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
915  * or leave it the same if we can't. Returns 0 if we don't need to request to
916  * the server or 1 otherwise.
917  */
918 static int
919 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
920 {
921         int rc = 0;
922         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
923         unsigned char saved_type = flock->fl_type;
924
925         if ((flock->fl_flags & FL_POSIX) == 0)
926                 return 1;
927
928         down_read(&cinode->lock_sem);
929         posix_test_lock(file, flock);
930
931         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
932                 flock->fl_type = saved_type;
933                 rc = 1;
934         }
935
936         up_read(&cinode->lock_sem);
937         return rc;
938 }
939
940 /*
941  * Set the byte-range lock (posix style). Returns:
942  * 1) 0, if we set the lock and don't need to request to the server;
943  * 2) 1, if we need to request to the server;
944  * 3) <0, if the error occurs while setting the lock.
945  */
946 static int
947 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
948 {
949         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
950         int rc = 1;
951
952         if ((flock->fl_flags & FL_POSIX) == 0)
953                 return rc;
954
955 try_again:
956         down_write(&cinode->lock_sem);
957         if (!cinode->can_cache_brlcks) {
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         rc = posix_lock_file(file, flock, NULL);
963         up_write(&cinode->lock_sem);
964         if (rc == FILE_LOCK_DEFERRED) {
965                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
966                 if (!rc)
967                         goto try_again;
968                 locks_delete_block(flock);
969         }
970         return rc;
971 }
972
973 int
974 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
975 {
976         unsigned int xid;
977         int rc = 0, stored_rc;
978         struct cifsLockInfo *li, *tmp;
979         struct cifs_tcon *tcon;
980         unsigned int num, max_num, max_buf;
981         LOCKING_ANDX_RANGE *buf, *cur;
982         int types[] = {LOCKING_ANDX_LARGE_FILES,
983                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
984         int i;
985
986         xid = get_xid();
987         tcon = tlink_tcon(cfile->tlink);
988
989         /*
990          * Accessing maxBuf is racy with cifs_reconnect - need to store value
991          * and check it for zero before using.
992          */
993         max_buf = tcon->ses->server->maxBuf;
994         if (!max_buf) {
995                 free_xid(xid);
996                 return -EINVAL;
997         }
998
999         max_num = (max_buf - sizeof(struct smb_hdr)) /
1000                                                 sizeof(LOCKING_ANDX_RANGE);
1001         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1002         if (!buf) {
1003                 free_xid(xid);
1004                 return -ENOMEM;
1005         }
1006
1007         for (i = 0; i < 2; i++) {
1008                 cur = buf;
1009                 num = 0;
1010                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1011                         if (li->type != types[i])
1012                                 continue;
1013                         cur->Pid = cpu_to_le16(li->pid);
1014                         cur->LengthLow = cpu_to_le32((u32)li->length);
1015                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1016                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1017                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1018                         if (++num == max_num) {
1019                                 stored_rc = cifs_lockv(xid, tcon,
1020                                                        cfile->fid.netfid,
1021                                                        (__u8)li->type, 0, num,
1022                                                        buf);
1023                                 if (stored_rc)
1024                                         rc = stored_rc;
1025                                 cur = buf;
1026                                 num = 0;
1027                         } else
1028                                 cur++;
1029                 }
1030
1031                 if (num) {
1032                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1033                                                (__u8)types[i], 0, num, buf);
1034                         if (stored_rc)
1035                                 rc = stored_rc;
1036                 }
1037         }
1038
1039         kfree(buf);
1040         free_xid(xid);
1041         return rc;
1042 }
1043
1044 /* copied from fs/locks.c with a name change */
1045 #define cifs_for_each_lock(inode, lockp) \
1046         for (lockp = &inode->i_flock; *lockp != NULL; \
1047              lockp = &(*lockp)->fl_next)
1048
1049 struct lock_to_push {
1050         struct list_head llist;
1051         __u64 offset;
1052         __u64 length;
1053         __u32 pid;
1054         __u16 netfid;
1055         __u8 type;
1056 };
1057
1058 static int
1059 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1060 {
1061         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1062         struct file_lock *flock, **before;
1063         unsigned int count = 0, i = 0;
1064         int rc = 0, xid, type;
1065         struct list_head locks_to_send, *el;
1066         struct lock_to_push *lck, *tmp;
1067         __u64 length;
1068
1069         xid = get_xid();
1070
1071         lock_flocks();
1072         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1073                 if ((*before)->fl_flags & FL_POSIX)
1074                         count++;
1075         }
1076         unlock_flocks();
1077
1078         INIT_LIST_HEAD(&locks_to_send);
1079
1080         /*
1081          * Allocating count locks is enough because no FL_POSIX locks can be
1082          * added to the list while we are holding cinode->lock_sem that
1083          * protects locking operations of this inode.
1084          */
1085         for (; i < count; i++) {
1086                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1087                 if (!lck) {
1088                         rc = -ENOMEM;
1089                         goto err_out;
1090                 }
1091                 list_add_tail(&lck->llist, &locks_to_send);
1092         }
1093
1094         el = locks_to_send.next;
1095         lock_flocks();
1096         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1097                 flock = *before;
1098                 if ((flock->fl_flags & FL_POSIX) == 0)
1099                         continue;
1100                 if (el == &locks_to_send) {
1101                         /*
1102                          * The list ended. We don't have enough allocated
1103                          * structures - something is really wrong.
1104                          */
1105                         cERROR(1, "Can't push all brlocks!");
1106                         break;
1107                 }
1108                 length = 1 + flock->fl_end - flock->fl_start;
1109                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1110                         type = CIFS_RDLCK;
1111                 else
1112                         type = CIFS_WRLCK;
1113                 lck = list_entry(el, struct lock_to_push, llist);
1114                 lck->pid = flock->fl_pid;
1115                 lck->netfid = cfile->fid.netfid;
1116                 lck->length = length;
1117                 lck->type = type;
1118                 lck->offset = flock->fl_start;
1119                 el = el->next;
1120         }
1121         unlock_flocks();
1122
1123         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1124                 int stored_rc;
1125
1126                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1127                                              lck->offset, lck->length, NULL,
1128                                              lck->type, 0);
1129                 if (stored_rc)
1130                         rc = stored_rc;
1131                 list_del(&lck->llist);
1132                 kfree(lck);
1133         }
1134
1135 out:
1136         free_xid(xid);
1137         return rc;
1138 err_out:
1139         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1140                 list_del(&lck->llist);
1141                 kfree(lck);
1142         }
1143         goto out;
1144 }
1145
1146 static int
1147 cifs_push_locks(struct cifsFileInfo *cfile)
1148 {
1149         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1150         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1151         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1152         int rc = 0;
1153
1154         /* we are going to update can_cache_brlcks here - need a write access */
1155         down_write(&cinode->lock_sem);
1156         if (!cinode->can_cache_brlcks) {
1157                 up_write(&cinode->lock_sem);
1158                 return rc;
1159         }
1160
1161         if (cap_unix(tcon->ses) &&
1162             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1163             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1164                 rc = cifs_push_posix_locks(cfile);
1165         else
1166                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1167
1168         cinode->can_cache_brlcks = false;
1169         up_write(&cinode->lock_sem);
1170         return rc;
1171 }
1172
1173 static void
1174 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1175                 bool *wait_flag, struct TCP_Server_Info *server)
1176 {
1177         if (flock->fl_flags & FL_POSIX)
1178                 cFYI(1, "Posix");
1179         if (flock->fl_flags & FL_FLOCK)
1180                 cFYI(1, "Flock");
1181         if (flock->fl_flags & FL_SLEEP) {
1182                 cFYI(1, "Blocking lock");
1183                 *wait_flag = true;
1184         }
1185         if (flock->fl_flags & FL_ACCESS)
1186                 cFYI(1, "Process suspended by mandatory locking - "
1187                         "not implemented yet");
1188         if (flock->fl_flags & FL_LEASE)
1189                 cFYI(1, "Lease on file - not implemented yet");
1190         if (flock->fl_flags &
1191             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1192                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1193                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1194
1195         *type = server->vals->large_lock_type;
1196         if (flock->fl_type == F_WRLCK) {
1197                 cFYI(1, "F_WRLCK ");
1198                 *type |= server->vals->exclusive_lock_type;
1199                 *lock = 1;
1200         } else if (flock->fl_type == F_UNLCK) {
1201                 cFYI(1, "F_UNLCK");
1202                 *type |= server->vals->unlock_lock_type;
1203                 *unlock = 1;
1204                 /* Check if unlock includes more than one lock range */
1205         } else if (flock->fl_type == F_RDLCK) {
1206                 cFYI(1, "F_RDLCK");
1207                 *type |= server->vals->shared_lock_type;
1208                 *lock = 1;
1209         } else if (flock->fl_type == F_EXLCK) {
1210                 cFYI(1, "F_EXLCK");
1211                 *type |= server->vals->exclusive_lock_type;
1212                 *lock = 1;
1213         } else if (flock->fl_type == F_SHLCK) {
1214                 cFYI(1, "F_SHLCK");
1215                 *type |= server->vals->shared_lock_type;
1216                 *lock = 1;
1217         } else
1218                 cFYI(1, "Unknown type of lock");
1219 }
1220
1221 static int
1222 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1223            bool wait_flag, bool posix_lck, unsigned int xid)
1224 {
1225         int rc = 0;
1226         __u64 length = 1 + flock->fl_end - flock->fl_start;
1227         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1228         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1229         struct TCP_Server_Info *server = tcon->ses->server;
1230         __u16 netfid = cfile->fid.netfid;
1231
1232         if (posix_lck) {
1233                 int posix_lock_type;
1234
1235                 rc = cifs_posix_lock_test(file, flock);
1236                 if (!rc)
1237                         return rc;
1238
1239                 if (type & server->vals->shared_lock_type)
1240                         posix_lock_type = CIFS_RDLCK;
1241                 else
1242                         posix_lock_type = CIFS_WRLCK;
1243                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1244                                       flock->fl_start, length, flock,
1245                                       posix_lock_type, wait_flag);
1246                 return rc;
1247         }
1248
1249         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1250         if (!rc)
1251                 return rc;
1252
1253         /* BB we could chain these into one lock request BB */
1254         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1255                                     1, 0, false);
1256         if (rc == 0) {
1257                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1258                                             type, 0, 1, false);
1259                 flock->fl_type = F_UNLCK;
1260                 if (rc != 0)
1261                         cERROR(1, "Error unlocking previously locked "
1262                                   "range %d during test of lock", rc);
1263                 return 0;
1264         }
1265
1266         if (type & server->vals->shared_lock_type) {
1267                 flock->fl_type = F_WRLCK;
1268                 return 0;
1269         }
1270
1271         type &= ~server->vals->exclusive_lock_type;
1272
1273         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1274                                     type | server->vals->shared_lock_type,
1275                                     1, 0, false);
1276         if (rc == 0) {
1277                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1278                         type | server->vals->shared_lock_type, 0, 1, false);
1279                 flock->fl_type = F_RDLCK;
1280                 if (rc != 0)
1281                         cERROR(1, "Error unlocking previously locked "
1282                                   "range %d during test of lock", rc);
1283         } else
1284                 flock->fl_type = F_WRLCK;
1285
1286         return 0;
1287 }
1288
1289 void
1290 cifs_move_llist(struct list_head *source, struct list_head *dest)
1291 {
1292         struct list_head *li, *tmp;
1293         list_for_each_safe(li, tmp, source)
1294                 list_move(li, dest);
1295 }
1296
1297 void
1298 cifs_free_llist(struct list_head *llist)
1299 {
1300         struct cifsLockInfo *li, *tmp;
1301         list_for_each_entry_safe(li, tmp, llist, llist) {
1302                 cifs_del_lock_waiters(li);
1303                 list_del(&li->llist);
1304                 kfree(li);
1305         }
1306 }
1307
1308 int
1309 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1310                   unsigned int xid)
1311 {
1312         int rc = 0, stored_rc;
1313         int types[] = {LOCKING_ANDX_LARGE_FILES,
1314                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1315         unsigned int i;
1316         unsigned int max_num, num, max_buf;
1317         LOCKING_ANDX_RANGE *buf, *cur;
1318         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1319         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1320         struct cifsLockInfo *li, *tmp;
1321         __u64 length = 1 + flock->fl_end - flock->fl_start;
1322         struct list_head tmp_llist;
1323
1324         INIT_LIST_HEAD(&tmp_llist);
1325
1326         /*
1327          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1328          * and check it for zero before using.
1329          */
1330         max_buf = tcon->ses->server->maxBuf;
1331         if (!max_buf)
1332                 return -EINVAL;
1333
1334         max_num = (max_buf - sizeof(struct smb_hdr)) /
1335                                                 sizeof(LOCKING_ANDX_RANGE);
1336         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1337         if (!buf)
1338                 return -ENOMEM;
1339
1340         down_write(&cinode->lock_sem);
1341         for (i = 0; i < 2; i++) {
1342                 cur = buf;
1343                 num = 0;
1344                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1345                         if (flock->fl_start > li->offset ||
1346                             (flock->fl_start + length) <
1347                             (li->offset + li->length))
1348                                 continue;
1349                         if (current->tgid != li->pid)
1350                                 continue;
1351                         if (types[i] != li->type)
1352                                 continue;
1353                         if (cinode->can_cache_brlcks) {
1354                                 /*
1355                                  * We can cache brlock requests - simply remove
1356                                  * a lock from the file's list.
1357                                  */
1358                                 list_del(&li->llist);
1359                                 cifs_del_lock_waiters(li);
1360                                 kfree(li);
1361                                 continue;
1362                         }
1363                         cur->Pid = cpu_to_le16(li->pid);
1364                         cur->LengthLow = cpu_to_le32((u32)li->length);
1365                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1366                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1367                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1368                         /*
1369                          * We need to save a lock here to let us add it again to
1370                          * the file's list if the unlock range request fails on
1371                          * the server.
1372                          */
1373                         list_move(&li->llist, &tmp_llist);
1374                         if (++num == max_num) {
1375                                 stored_rc = cifs_lockv(xid, tcon,
1376                                                        cfile->fid.netfid,
1377                                                        li->type, num, 0, buf);
1378                                 if (stored_rc) {
1379                                         /*
1380                                          * We failed on the unlock range
1381                                          * request - add all locks from the tmp
1382                                          * list to the head of the file's list.
1383                                          */
1384                                         cifs_move_llist(&tmp_llist,
1385                                                         &cfile->llist->locks);
1386                                         rc = stored_rc;
1387                                 } else
1388                                         /*
1389                                          * The unlock range request succeed -
1390                                          * free the tmp list.
1391                                          */
1392                                         cifs_free_llist(&tmp_llist);
1393                                 cur = buf;
1394                                 num = 0;
1395                         } else
1396                                 cur++;
1397                 }
1398                 if (num) {
1399                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1400                                                types[i], num, 0, buf);
1401                         if (stored_rc) {
1402                                 cifs_move_llist(&tmp_llist,
1403                                                 &cfile->llist->locks);
1404                                 rc = stored_rc;
1405                         } else
1406                                 cifs_free_llist(&tmp_llist);
1407                 }
1408         }
1409
1410         up_write(&cinode->lock_sem);
1411         kfree(buf);
1412         return rc;
1413 }
1414
1415 static int
1416 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1417            bool wait_flag, bool posix_lck, int lock, int unlock,
1418            unsigned int xid)
1419 {
1420         int rc = 0;
1421         __u64 length = 1 + flock->fl_end - flock->fl_start;
1422         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1423         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424         struct TCP_Server_Info *server = tcon->ses->server;
1425
1426         if (posix_lck) {
1427                 int posix_lock_type;
1428
1429                 rc = cifs_posix_lock_set(file, flock);
1430                 if (!rc || rc < 0)
1431                         return rc;
1432
1433                 if (type & server->vals->shared_lock_type)
1434                         posix_lock_type = CIFS_RDLCK;
1435                 else
1436                         posix_lock_type = CIFS_WRLCK;
1437
1438                 if (unlock == 1)
1439                         posix_lock_type = CIFS_UNLCK;
1440
1441                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1442                                       current->tgid, flock->fl_start, length,
1443                                       NULL, posix_lock_type, wait_flag);
1444                 goto out;
1445         }
1446
1447         if (lock) {
1448                 struct cifsLockInfo *lock;
1449
1450                 lock = cifs_lock_init(flock->fl_start, length, type);
1451                 if (!lock)
1452                         return -ENOMEM;
1453
1454                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1455                 if (rc < 0) {
1456                         kfree(lock);
1457                         return rc;
1458                 }
1459                 if (!rc)
1460                         goto out;
1461
1462                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1463                                             type, 1, 0, wait_flag);
1464                 if (rc) {
1465                         kfree(lock);
1466                         return rc;
1467                 }
1468
1469                 cifs_lock_add(cfile, lock);
1470         } else if (unlock)
1471                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1472
1473 out:
1474         if (flock->fl_flags & FL_POSIX)
1475                 posix_lock_file_wait(file, flock);
1476         return rc;
1477 }
1478
1479 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1480 {
1481         int rc, xid;
1482         int lock = 0, unlock = 0;
1483         bool wait_flag = false;
1484         bool posix_lck = false;
1485         struct cifs_sb_info *cifs_sb;
1486         struct cifs_tcon *tcon;
1487         struct cifsInodeInfo *cinode;
1488         struct cifsFileInfo *cfile;
1489         __u16 netfid;
1490         __u32 type;
1491
1492         rc = -EACCES;
1493         xid = get_xid();
1494
1495         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1496                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1497                 flock->fl_start, flock->fl_end);
1498
1499         cfile = (struct cifsFileInfo *)file->private_data;
1500         tcon = tlink_tcon(cfile->tlink);
1501
1502         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1503                         tcon->ses->server);
1504
1505         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1506         netfid = cfile->fid.netfid;
1507         cinode = CIFS_I(file->f_path.dentry->d_inode);
1508
1509         if (cap_unix(tcon->ses) &&
1510             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1511             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1512                 posix_lck = true;
1513         /*
1514          * BB add code here to normalize offset and length to account for
1515          * negative length which we can not accept over the wire.
1516          */
1517         if (IS_GETLK(cmd)) {
1518                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1519                 free_xid(xid);
1520                 return rc;
1521         }
1522
1523         if (!lock && !unlock) {
1524                 /*
1525                  * if no lock or unlock then nothing to do since we do not
1526                  * know what it is
1527                  */
1528                 free_xid(xid);
1529                 return -EOPNOTSUPP;
1530         }
1531
1532         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1533                         xid);
1534         free_xid(xid);
1535         return rc;
1536 }
1537
1538 /*
1539  * update the file size (if needed) after a write. Should be called with
1540  * the inode->i_lock held
1541  */
1542 void
1543 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1544                       unsigned int bytes_written)
1545 {
1546         loff_t end_of_write = offset + bytes_written;
1547
1548         if (end_of_write > cifsi->server_eof)
1549                 cifsi->server_eof = end_of_write;
1550 }
1551
1552 static ssize_t
1553 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1554            size_t write_size, loff_t *offset)
1555 {
1556         int rc = 0;
1557         unsigned int bytes_written = 0;
1558         unsigned int total_written;
1559         struct cifs_sb_info *cifs_sb;
1560         struct cifs_tcon *tcon;
1561         struct TCP_Server_Info *server;
1562         unsigned int xid;
1563         struct dentry *dentry = open_file->dentry;
1564         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1565         struct cifs_io_parms io_parms;
1566
1567         cifs_sb = CIFS_SB(dentry->d_sb);
1568
1569         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1570              *offset, dentry->d_name.name);
1571
1572         tcon = tlink_tcon(open_file->tlink);
1573         server = tcon->ses->server;
1574
1575         if (!server->ops->sync_write)
1576                 return -ENOSYS;
1577
1578         xid = get_xid();
1579
1580         for (total_written = 0; write_size > total_written;
1581              total_written += bytes_written) {
1582                 rc = -EAGAIN;
1583                 while (rc == -EAGAIN) {
1584                         struct kvec iov[2];
1585                         unsigned int len;
1586
1587                         if (open_file->invalidHandle) {
1588                                 /* we could deadlock if we called
1589                                    filemap_fdatawait from here so tell
1590                                    reopen_file not to flush data to
1591                                    server now */
1592                                 rc = cifs_reopen_file(open_file, false);
1593                                 if (rc != 0)
1594                                         break;
1595                         }
1596
1597                         len = min((size_t)cifs_sb->wsize,
1598                                   write_size - total_written);
1599                         /* iov[0] is reserved for smb header */
1600                         iov[1].iov_base = (char *)write_data + total_written;
1601                         iov[1].iov_len = len;
1602                         io_parms.pid = pid;
1603                         io_parms.tcon = tcon;
1604                         io_parms.offset = *offset;
1605                         io_parms.length = len;
1606                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1607                                                      &bytes_written, iov, 1);
1608                 }
1609                 if (rc || (bytes_written == 0)) {
1610                         if (total_written)
1611                                 break;
1612                         else {
1613                                 free_xid(xid);
1614                                 return rc;
1615                         }
1616                 } else {
1617                         spin_lock(&dentry->d_inode->i_lock);
1618                         cifs_update_eof(cifsi, *offset, bytes_written);
1619                         spin_unlock(&dentry->d_inode->i_lock);
1620                         *offset += bytes_written;
1621                 }
1622         }
1623
1624         cifs_stats_bytes_written(tcon, total_written);
1625
1626         if (total_written > 0) {
1627                 spin_lock(&dentry->d_inode->i_lock);
1628                 if (*offset > dentry->d_inode->i_size)
1629                         i_size_write(dentry->d_inode, *offset);
1630                 spin_unlock(&dentry->d_inode->i_lock);
1631         }
1632         mark_inode_dirty_sync(dentry->d_inode);
1633         free_xid(xid);
1634         return total_written;
1635 }
1636
1637 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1638                                         bool fsuid_only)
1639 {
1640         struct cifsFileInfo *open_file = NULL;
1641         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1642
1643         /* only filter by fsuid on multiuser mounts */
1644         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1645                 fsuid_only = false;
1646
1647         spin_lock(&cifs_file_list_lock);
1648         /* we could simply get the first_list_entry since write-only entries
1649            are always at the end of the list but since the first entry might
1650            have a close pending, we go through the whole list */
1651         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1652                 if (fsuid_only && open_file->uid != current_fsuid())
1653                         continue;
1654                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1655                         if (!open_file->invalidHandle) {
1656                                 /* found a good file */
1657                                 /* lock it so it will not be closed on us */
1658                                 cifsFileInfo_get_locked(open_file);
1659                                 spin_unlock(&cifs_file_list_lock);
1660                                 return open_file;
1661                         } /* else might as well continue, and look for
1662                              another, or simply have the caller reopen it
1663                              again rather than trying to fix this handle */
1664                 } else /* write only file */
1665                         break; /* write only files are last so must be done */
1666         }
1667         spin_unlock(&cifs_file_list_lock);
1668         return NULL;
1669 }
1670
1671 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1672                                         bool fsuid_only)
1673 {
1674         struct cifsFileInfo *open_file, *inv_file = NULL;
1675         struct cifs_sb_info *cifs_sb;
1676         bool any_available = false;
1677         int rc;
1678         unsigned int refind = 0;
1679
1680         /* Having a null inode here (because mapping->host was set to zero by
1681         the VFS or MM) should not happen but we had reports of on oops (due to
1682         it being zero) during stress testcases so we need to check for it */
1683
1684         if (cifs_inode == NULL) {
1685                 cERROR(1, "Null inode passed to cifs_writeable_file");
1686                 dump_stack();
1687                 return NULL;
1688         }
1689
1690         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1691
1692         /* only filter by fsuid on multiuser mounts */
1693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1694                 fsuid_only = false;
1695
1696         spin_lock(&cifs_file_list_lock);
1697 refind_writable:
1698         if (refind > MAX_REOPEN_ATT) {
1699                 spin_unlock(&cifs_file_list_lock);
1700                 return NULL;
1701         }
1702         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1703                 if (!any_available && open_file->pid != current->tgid)
1704                         continue;
1705                 if (fsuid_only && open_file->uid != current_fsuid())
1706                         continue;
1707                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1708                         if (!open_file->invalidHandle) {
1709                                 /* found a good writable file */
1710                                 cifsFileInfo_get_locked(open_file);
1711                                 spin_unlock(&cifs_file_list_lock);
1712                                 return open_file;
1713                         } else {
1714                                 if (!inv_file)
1715                                         inv_file = open_file;
1716                         }
1717                 }
1718         }
1719         /* couldn't find useable FH with same pid, try any available */
1720         if (!any_available) {
1721                 any_available = true;
1722                 goto refind_writable;
1723         }
1724
1725         if (inv_file) {
1726                 any_available = false;
1727                 cifsFileInfo_get_locked(inv_file);
1728         }
1729
1730         spin_unlock(&cifs_file_list_lock);
1731
1732         if (inv_file) {
1733                 rc = cifs_reopen_file(inv_file, false);
1734                 if (!rc)
1735                         return inv_file;
1736                 else {
1737                         spin_lock(&cifs_file_list_lock);
1738                         list_move_tail(&inv_file->flist,
1739                                         &cifs_inode->openFileList);
1740                         spin_unlock(&cifs_file_list_lock);
1741                         cifsFileInfo_put(inv_file);
1742                         spin_lock(&cifs_file_list_lock);
1743                         ++refind;
1744                         goto refind_writable;
1745                 }
1746         }
1747
1748         return NULL;
1749 }
1750
1751 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1752 {
1753         struct address_space *mapping = page->mapping;
1754         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1755         char *write_data;
1756         int rc = -EFAULT;
1757         int bytes_written = 0;
1758         struct inode *inode;
1759         struct cifsFileInfo *open_file;
1760
1761         if (!mapping || !mapping->host)
1762                 return -EFAULT;
1763
1764         inode = page->mapping->host;
1765
1766         offset += (loff_t)from;
1767         write_data = kmap(page);
1768         write_data += from;
1769
1770         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1771                 kunmap(page);
1772                 return -EIO;
1773         }
1774
1775         /* racing with truncate? */
1776         if (offset > mapping->host->i_size) {
1777                 kunmap(page);
1778                 return 0; /* don't care */
1779         }
1780
1781         /* check to make sure that we are not extending the file */
1782         if (mapping->host->i_size - offset < (loff_t)to)
1783                 to = (unsigned)(mapping->host->i_size - offset);
1784
1785         open_file = find_writable_file(CIFS_I(mapping->host), false);
1786         if (open_file) {
1787                 bytes_written = cifs_write(open_file, open_file->pid,
1788                                            write_data, to - from, &offset);
1789                 cifsFileInfo_put(open_file);
1790                 /* Does mm or vfs already set times? */
1791                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1792                 if ((bytes_written > 0) && (offset))
1793                         rc = 0;
1794                 else if (bytes_written < 0)
1795                         rc = bytes_written;
1796         } else {
1797                 cFYI(1, "No writeable filehandles for inode");
1798                 rc = -EIO;
1799         }
1800
1801         kunmap(page);
1802         return rc;
1803 }
1804
1805 static int cifs_writepages(struct address_space *mapping,
1806                            struct writeback_control *wbc)
1807 {
1808         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1809         bool done = false, scanned = false, range_whole = false;
1810         pgoff_t end, index;
1811         struct cifs_writedata *wdata;
1812         struct TCP_Server_Info *server;
1813         struct page *page;
1814         int rc = 0;
1815
1816         /*
1817          * If wsize is smaller than the page cache size, default to writing
1818          * one page at a time via cifs_writepage
1819          */
1820         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1821                 return generic_writepages(mapping, wbc);
1822
1823         if (wbc->range_cyclic) {
1824                 index = mapping->writeback_index; /* Start from prev offset */
1825                 end = -1;
1826         } else {
1827                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1828                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1829                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1830                         range_whole = true;
1831                 scanned = true;
1832         }
1833 retry:
1834         while (!done && index <= end) {
1835                 unsigned int i, nr_pages, found_pages;
1836                 pgoff_t next = 0, tofind;
1837                 struct page **pages;
1838
1839                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1840                                 end - index) + 1;
1841
1842                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1843                                              cifs_writev_complete);
1844                 if (!wdata) {
1845                         rc = -ENOMEM;
1846                         break;
1847                 }
1848
1849                 /*
1850                  * find_get_pages_tag seems to return a max of 256 on each
1851                  * iteration, so we must call it several times in order to
1852                  * fill the array or the wsize is effectively limited to
1853                  * 256 * PAGE_CACHE_SIZE.
1854                  */
1855                 found_pages = 0;
1856                 pages = wdata->pages;
1857                 do {
1858                         nr_pages = find_get_pages_tag(mapping, &index,
1859                                                         PAGECACHE_TAG_DIRTY,
1860                                                         tofind, pages);
1861                         found_pages += nr_pages;
1862                         tofind -= nr_pages;
1863                         pages += nr_pages;
1864                 } while (nr_pages && tofind && index <= end);
1865
1866                 if (found_pages == 0) {
1867                         kref_put(&wdata->refcount, cifs_writedata_release);
1868                         break;
1869                 }
1870
1871                 nr_pages = 0;
1872                 for (i = 0; i < found_pages; i++) {
1873                         page = wdata->pages[i];
1874                         /*
1875                          * At this point we hold neither mapping->tree_lock nor
1876                          * lock on the page itself: the page may be truncated or
1877                          * invalidated (changing page->mapping to NULL), or even
1878                          * swizzled back from swapper_space to tmpfs file
1879                          * mapping
1880                          */
1881
1882                         if (nr_pages == 0)
1883                                 lock_page(page);
1884                         else if (!trylock_page(page))
1885                                 break;
1886
1887                         if (unlikely(page->mapping != mapping)) {
1888                                 unlock_page(page);
1889                                 break;
1890                         }
1891
1892                         if (!wbc->range_cyclic && page->index > end) {
1893                                 done = true;
1894                                 unlock_page(page);
1895                                 break;
1896                         }
1897
1898                         if (next && (page->index != next)) {
1899                                 /* Not next consecutive page */
1900                                 unlock_page(page);
1901                                 break;
1902                         }
1903
1904                         if (wbc->sync_mode != WB_SYNC_NONE)
1905                                 wait_on_page_writeback(page);
1906
1907                         if (PageWriteback(page) ||
1908                                         !clear_page_dirty_for_io(page)) {
1909                                 unlock_page(page);
1910                                 break;
1911                         }
1912
1913                         /*
1914                          * This actually clears the dirty bit in the radix tree.
1915                          * See cifs_writepage() for more commentary.
1916                          */
1917                         set_page_writeback(page);
1918
1919                         if (page_offset(page) >= i_size_read(mapping->host)) {
1920                                 done = true;
1921                                 unlock_page(page);
1922                                 end_page_writeback(page);
1923                                 break;
1924                         }
1925
1926                         wdata->pages[i] = page;
1927                         next = page->index + 1;
1928                         ++nr_pages;
1929                 }
1930
1931                 /* reset index to refind any pages skipped */
1932                 if (nr_pages == 0)
1933                         index = wdata->pages[0]->index + 1;
1934
1935                 /* put any pages we aren't going to use */
1936                 for (i = nr_pages; i < found_pages; i++) {
1937                         page_cache_release(wdata->pages[i]);
1938                         wdata->pages[i] = NULL;
1939                 }
1940
1941                 /* nothing to write? */
1942                 if (nr_pages == 0) {
1943                         kref_put(&wdata->refcount, cifs_writedata_release);
1944                         continue;
1945                 }
1946
1947                 wdata->sync_mode = wbc->sync_mode;
1948                 wdata->nr_pages = nr_pages;
1949                 wdata->offset = page_offset(wdata->pages[0]);
1950                 wdata->pagesz = PAGE_CACHE_SIZE;
1951                 wdata->tailsz =
1952                         min(i_size_read(mapping->host) -
1953                             page_offset(wdata->pages[nr_pages - 1]),
1954                             (loff_t)PAGE_CACHE_SIZE);
1955                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1956                                         wdata->tailsz;
1957
1958                 do {
1959                         if (wdata->cfile != NULL)
1960                                 cifsFileInfo_put(wdata->cfile);
1961                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1962                                                           false);
1963                         if (!wdata->cfile) {
1964                                 cERROR(1, "No writable handles for inode");
1965                                 rc = -EBADF;
1966                                 break;
1967                         }
1968                         wdata->pid = wdata->cfile->pid;
1969                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1970                         rc = server->ops->async_writev(wdata);
1971                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1972
1973                 for (i = 0; i < nr_pages; ++i)
1974                         unlock_page(wdata->pages[i]);
1975
1976                 /* send failure -- clean up the mess */
1977                 if (rc != 0) {
1978                         for (i = 0; i < nr_pages; ++i) {
1979                                 if (rc == -EAGAIN)
1980                                         redirty_page_for_writepage(wbc,
1981                                                            wdata->pages[i]);
1982                                 else
1983                                         SetPageError(wdata->pages[i]);
1984                                 end_page_writeback(wdata->pages[i]);
1985                                 page_cache_release(wdata->pages[i]);
1986                         }
1987                         if (rc != -EAGAIN)
1988                                 mapping_set_error(mapping, rc);
1989                 }
1990                 kref_put(&wdata->refcount, cifs_writedata_release);
1991
1992                 wbc->nr_to_write -= nr_pages;
1993                 if (wbc->nr_to_write <= 0)
1994                         done = true;
1995
1996                 index = next;
1997         }
1998
1999         if (!scanned && !done) {
2000                 /*
2001                  * We hit the last page and there is more work to be done: wrap
2002                  * back to the start of the file
2003                  */
2004                 scanned = true;
2005                 index = 0;
2006                 goto retry;
2007         }
2008
2009         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2010                 mapping->writeback_index = index;
2011
2012         return rc;
2013 }
2014
2015 static int
2016 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2017 {
2018         int rc;
2019         unsigned int xid;
2020
2021         xid = get_xid();
2022 /* BB add check for wbc flags */
2023         page_cache_get(page);
2024         if (!PageUptodate(page))
2025                 cFYI(1, "ppw - page not up to date");
2026
2027         /*
2028          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2029          *
2030          * A writepage() implementation always needs to do either this,
2031          * or re-dirty the page with "redirty_page_for_writepage()" in
2032          * the case of a failure.
2033          *
2034          * Just unlocking the page will cause the radix tree tag-bits
2035          * to fail to update with the state of the page correctly.
2036          */
2037         set_page_writeback(page);
2038 retry_write:
2039         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2040         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2041                 goto retry_write;
2042         else if (rc == -EAGAIN)
2043                 redirty_page_for_writepage(wbc, page);
2044         else if (rc != 0)
2045                 SetPageError(page);
2046         else
2047                 SetPageUptodate(page);
2048         end_page_writeback(page);
2049         page_cache_release(page);
2050         free_xid(xid);
2051         return rc;
2052 }
2053
2054 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2055 {
2056         int rc = cifs_writepage_locked(page, wbc);
2057         unlock_page(page);
2058         return rc;
2059 }
2060
2061 static int cifs_write_end(struct file *file, struct address_space *mapping,
2062                         loff_t pos, unsigned len, unsigned copied,
2063                         struct page *page, void *fsdata)
2064 {
2065         int rc;
2066         struct inode *inode = mapping->host;
2067         struct cifsFileInfo *cfile = file->private_data;
2068         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2069         __u32 pid;
2070
2071         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2072                 pid = cfile->pid;
2073         else
2074                 pid = current->tgid;
2075
2076         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2077                  page, pos, copied);
2078
2079         if (PageChecked(page)) {
2080                 if (copied == len)
2081                         SetPageUptodate(page);
2082                 ClearPageChecked(page);
2083         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2084                 SetPageUptodate(page);
2085
2086         if (!PageUptodate(page)) {
2087                 char *page_data;
2088                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2089                 unsigned int xid;
2090
2091                 xid = get_xid();
2092                 /* this is probably better than directly calling
2093                    partialpage_write since in this function the file handle is
2094                    known which we might as well leverage */
2095                 /* BB check if anything else missing out of ppw
2096                    such as updating last write time */
2097                 page_data = kmap(page);
2098                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2099                 /* if (rc < 0) should we set writebehind rc? */
2100                 kunmap(page);
2101
2102                 free_xid(xid);
2103         } else {
2104                 rc = copied;
2105                 pos += copied;
2106                 set_page_dirty(page);
2107         }
2108
2109         if (rc > 0) {
2110                 spin_lock(&inode->i_lock);
2111                 if (pos > inode->i_size)
2112                         i_size_write(inode, pos);
2113                 spin_unlock(&inode->i_lock);
2114         }
2115
2116         unlock_page(page);
2117         page_cache_release(page);
2118
2119         return rc;
2120 }
2121
2122 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2123                       int datasync)
2124 {
2125         unsigned int xid;
2126         int rc = 0;
2127         struct cifs_tcon *tcon;
2128         struct TCP_Server_Info *server;
2129         struct cifsFileInfo *smbfile = file->private_data;
2130         struct inode *inode = file->f_path.dentry->d_inode;
2131         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2132
2133         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2134         if (rc)
2135                 return rc;
2136         mutex_lock(&inode->i_mutex);
2137
2138         xid = get_xid();
2139
2140         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2141                 file->f_path.dentry->d_name.name, datasync);
2142
2143         if (!CIFS_I(inode)->clientCanCacheRead) {
2144                 rc = cifs_invalidate_mapping(inode);
2145                 if (rc) {
2146                         cFYI(1, "rc: %d during invalidate phase", rc);
2147                         rc = 0; /* don't care about it in fsync */
2148                 }
2149         }
2150
2151         tcon = tlink_tcon(smbfile->tlink);
2152         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2153                 server = tcon->ses->server;
2154                 if (server->ops->flush)
2155                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2156                 else
2157                         rc = -ENOSYS;
2158         }
2159
2160         free_xid(xid);
2161         mutex_unlock(&inode->i_mutex);
2162         return rc;
2163 }
2164
2165 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2166 {
2167         unsigned int xid;
2168         int rc = 0;
2169         struct cifs_tcon *tcon;
2170         struct TCP_Server_Info *server;
2171         struct cifsFileInfo *smbfile = file->private_data;
2172         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2173         struct inode *inode = file->f_mapping->host;
2174
2175         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2176         if (rc)
2177                 return rc;
2178         mutex_lock(&inode->i_mutex);
2179
2180         xid = get_xid();
2181
2182         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2183                 file->f_path.dentry->d_name.name, datasync);
2184
2185         tcon = tlink_tcon(smbfile->tlink);
2186         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2187                 server = tcon->ses->server;
2188                 if (server->ops->flush)
2189                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2190                 else
2191                         rc = -ENOSYS;
2192         }
2193
2194         free_xid(xid);
2195         mutex_unlock(&inode->i_mutex);
2196         return rc;
2197 }
2198
2199 /*
2200  * As file closes, flush all cached write data for this inode checking
2201  * for write behind errors.
2202  */
2203 int cifs_flush(struct file *file, fl_owner_t id)
2204 {
2205         struct inode *inode = file->f_path.dentry->d_inode;
2206         int rc = 0;
2207
2208         if (file->f_mode & FMODE_WRITE)
2209                 rc = filemap_write_and_wait(inode->i_mapping);
2210
2211         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2212
2213         return rc;
2214 }
2215
2216 static int
2217 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2218 {
2219         int rc = 0;
2220         unsigned long i;
2221
2222         for (i = 0; i < num_pages; i++) {
2223                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2224                 if (!pages[i]) {
2225                         /*
2226                          * save number of pages we have already allocated and
2227                          * return with ENOMEM error
2228                          */
2229                         num_pages = i;
2230                         rc = -ENOMEM;
2231                         break;
2232                 }
2233         }
2234
2235         if (rc) {
2236                 for (i = 0; i < num_pages; i++)
2237                         put_page(pages[i]);
2238         }
2239         return rc;
2240 }
2241
2242 static inline
2243 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2244 {
2245         size_t num_pages;
2246         size_t clen;
2247
2248         clen = min_t(const size_t, len, wsize);
2249         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2250
2251         if (cur_len)
2252                 *cur_len = clen;
2253
2254         return num_pages;
2255 }
2256
2257 static void
2258 cifs_uncached_writev_complete(struct work_struct *work)
2259 {
2260         int i;
2261         struct cifs_writedata *wdata = container_of(work,
2262                                         struct cifs_writedata, work);
2263         struct inode *inode = wdata->cfile->dentry->d_inode;
2264         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2265
2266         spin_lock(&inode->i_lock);
2267         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2268         if (cifsi->server_eof > inode->i_size)
2269                 i_size_write(inode, cifsi->server_eof);
2270         spin_unlock(&inode->i_lock);
2271
2272         complete(&wdata->done);
2273
2274         if (wdata->result != -EAGAIN) {
2275                 for (i = 0; i < wdata->nr_pages; i++)
2276                         put_page(wdata->pages[i]);
2277         }
2278
2279         kref_put(&wdata->refcount, cifs_writedata_release);
2280 }
2281
2282 /* attempt to send write to server, retry on any -EAGAIN errors */
2283 static int
2284 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2285 {
2286         int rc;
2287         struct TCP_Server_Info *server;
2288
2289         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2290
2291         do {
2292                 if (wdata->cfile->invalidHandle) {
2293                         rc = cifs_reopen_file(wdata->cfile, false);
2294                         if (rc != 0)
2295                                 continue;
2296                 }
2297                 rc = server->ops->async_writev(wdata);
2298         } while (rc == -EAGAIN);
2299
2300         return rc;
2301 }
2302
2303 static ssize_t
2304 cifs_iovec_write(struct file *file, const struct iovec *iov,
2305                  unsigned long nr_segs, loff_t *poffset)
2306 {
2307         unsigned long nr_pages, i;
2308         size_t copied, len, cur_len;
2309         ssize_t total_written = 0;
2310         loff_t offset;
2311         struct iov_iter it;
2312         struct cifsFileInfo *open_file;
2313         struct cifs_tcon *tcon;
2314         struct cifs_sb_info *cifs_sb;
2315         struct cifs_writedata *wdata, *tmp;
2316         struct list_head wdata_list;
2317         int rc;
2318         pid_t pid;
2319
2320         len = iov_length(iov, nr_segs);
2321         if (!len)
2322                 return 0;
2323
2324         rc = generic_write_checks(file, poffset, &len, 0);
2325         if (rc)
2326                 return rc;
2327
2328         INIT_LIST_HEAD(&wdata_list);
2329         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2330         open_file = file->private_data;
2331         tcon = tlink_tcon(open_file->tlink);
2332
2333         if (!tcon->ses->server->ops->async_writev)
2334                 return -ENOSYS;
2335
2336         offset = *poffset;
2337
2338         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2339                 pid = open_file->pid;
2340         else
2341                 pid = current->tgid;
2342
2343         iov_iter_init(&it, iov, nr_segs, len, 0);
2344         do {
2345                 size_t save_len;
2346
2347                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2348                 wdata = cifs_writedata_alloc(nr_pages,
2349                                              cifs_uncached_writev_complete);
2350                 if (!wdata) {
2351                         rc = -ENOMEM;
2352                         break;
2353                 }
2354
2355                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2356                 if (rc) {
2357                         kfree(wdata);
2358                         break;
2359                 }
2360
2361                 save_len = cur_len;
2362                 for (i = 0; i < nr_pages; i++) {
2363                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2364                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2365                                                          0, copied);
2366                         cur_len -= copied;
2367                         iov_iter_advance(&it, copied);
2368                 }
2369                 cur_len = save_len - cur_len;
2370
2371                 wdata->sync_mode = WB_SYNC_ALL;
2372                 wdata->nr_pages = nr_pages;
2373                 wdata->offset = (__u64)offset;
2374                 wdata->cfile = cifsFileInfo_get(open_file);
2375                 wdata->pid = pid;
2376                 wdata->bytes = cur_len;
2377                 wdata->pagesz = PAGE_SIZE;
2378                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2379                 rc = cifs_uncached_retry_writev(wdata);
2380                 if (rc) {
2381                         kref_put(&wdata->refcount, cifs_writedata_release);
2382                         break;
2383                 }
2384
2385                 list_add_tail(&wdata->list, &wdata_list);
2386                 offset += cur_len;
2387                 len -= cur_len;
2388         } while (len > 0);
2389
2390         /*
2391          * If at least one write was successfully sent, then discard any rc
2392          * value from the later writes. If the other write succeeds, then
2393          * we'll end up returning whatever was written. If it fails, then
2394          * we'll get a new rc value from that.
2395          */
2396         if (!list_empty(&wdata_list))
2397                 rc = 0;
2398
2399         /*
2400          * Wait for and collect replies for any successful sends in order of
2401          * increasing offset. Once an error is hit or we get a fatal signal
2402          * while waiting, then return without waiting for any more replies.
2403          */
2404 restart_loop:
2405         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2406                 if (!rc) {
2407                         /* FIXME: freezable too? */
2408                         rc = wait_for_completion_killable(&wdata->done);
2409                         if (rc)
2410                                 rc = -EINTR;
2411                         else if (wdata->result)
2412                                 rc = wdata->result;
2413                         else
2414                                 total_written += wdata->bytes;
2415
2416                         /* resend call if it's a retryable error */
2417                         if (rc == -EAGAIN) {
2418                                 rc = cifs_uncached_retry_writev(wdata);
2419                                 goto restart_loop;
2420                         }
2421                 }
2422                 list_del_init(&wdata->list);
2423                 kref_put(&wdata->refcount, cifs_writedata_release);
2424         }
2425
2426         if (total_written > 0)
2427                 *poffset += total_written;
2428
2429         cifs_stats_bytes_written(tcon, total_written);
2430         return total_written ? total_written : (ssize_t)rc;
2431 }
2432
2433 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2434                                 unsigned long nr_segs, loff_t pos)
2435 {
2436         ssize_t written;
2437         struct inode *inode;
2438
2439         inode = iocb->ki_filp->f_path.dentry->d_inode;
2440
2441         /*
2442          * BB - optimize the way when signing is disabled. We can drop this
2443          * extra memory-to-memory copying and use iovec buffers for constructing
2444          * write request.
2445          */
2446
2447         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2448         if (written > 0) {
2449                 CIFS_I(inode)->invalid_mapping = true;
2450                 iocb->ki_pos = pos;
2451         }
2452
2453         return written;
2454 }
2455
2456 static ssize_t
2457 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2458             unsigned long nr_segs, loff_t pos)
2459 {
2460         struct file *file = iocb->ki_filp;
2461         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2462         struct inode *inode = file->f_mapping->host;
2463         struct cifsInodeInfo *cinode = CIFS_I(inode);
2464         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2465         ssize_t rc = -EACCES;
2466
2467         BUG_ON(iocb->ki_pos != pos);
2468
2469         sb_start_write(inode->i_sb);
2470
2471         /*
2472          * We need to hold the sem to be sure nobody modifies lock list
2473          * with a brlock that prevents writing.
2474          */
2475         down_read(&cinode->lock_sem);
2476         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2477                                      server->vals->exclusive_lock_type, NULL,
2478                                      CIFS_WRITE_OP)) {
2479                 mutex_lock(&inode->i_mutex);
2480                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2481                                                &iocb->ki_pos);
2482                 mutex_unlock(&inode->i_mutex);
2483         }
2484
2485         if (rc > 0 || rc == -EIOCBQUEUED) {
2486                 ssize_t err;
2487
2488                 err = generic_write_sync(file, pos, rc);
2489                 if (err < 0 && rc > 0)
2490                         rc = err;
2491         }
2492
2493         up_read(&cinode->lock_sem);
2494         sb_end_write(inode->i_sb);
2495         return rc;
2496 }
2497
2498 ssize_t
2499 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2500                    unsigned long nr_segs, loff_t pos)
2501 {
2502         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2503         struct cifsInodeInfo *cinode = CIFS_I(inode);
2504         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2505         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2506                                                 iocb->ki_filp->private_data;
2507         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2508
2509 #ifdef CONFIG_CIFS_SMB2
2510         /*
2511          * If we have an oplock for read and want to write a data to the file
2512          * we need to store it in the page cache and then push it to the server
2513          * to be sure the next read will get a valid data.
2514          */
2515         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead) {
2516                 ssize_t written;
2517                 int rc;
2518
2519                 written = generic_file_aio_write(iocb, iov, nr_segs, pos);
2520                 rc = filemap_fdatawrite(inode->i_mapping);
2521                 if (rc)
2522                         return (ssize_t)rc;
2523
2524                 return written;
2525         }
2526 #endif
2527
2528         /*
2529          * For non-oplocked files in strict cache mode we need to write the data
2530          * to the server exactly from the pos to pos+len-1 rather than flush all
2531          * affected pages because it may cause a error with mandatory locks on
2532          * these pages but not on the region from pos to ppos+len-1.
2533          */
2534
2535         if (!cinode->clientCanCacheAll)
2536                 return cifs_user_writev(iocb, iov, nr_segs, pos);
2537
2538         if (cap_unix(tcon->ses) &&
2539             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2540             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2541                 return generic_file_aio_write(iocb, iov, nr_segs, pos);
2542
2543         return cifs_writev(iocb, iov, nr_segs, pos);
2544 }
2545
2546 static struct cifs_readdata *
2547 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2548 {
2549         struct cifs_readdata *rdata;
2550
2551         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2552                         GFP_KERNEL);
2553         if (rdata != NULL) {
2554                 kref_init(&rdata->refcount);
2555                 INIT_LIST_HEAD(&rdata->list);
2556                 init_completion(&rdata->done);
2557                 INIT_WORK(&rdata->work, complete);
2558         }
2559
2560         return rdata;
2561 }
2562
2563 void
2564 cifs_readdata_release(struct kref *refcount)
2565 {
2566         struct cifs_readdata *rdata = container_of(refcount,
2567                                         struct cifs_readdata, refcount);
2568
2569         if (rdata->cfile)
2570                 cifsFileInfo_put(rdata->cfile);
2571
2572         kfree(rdata);
2573 }
2574
2575 static int
2576 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2577 {
2578         int rc = 0;
2579         struct page *page;
2580         unsigned int i;
2581
2582         for (i = 0; i < nr_pages; i++) {
2583                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2584                 if (!page) {
2585                         rc = -ENOMEM;
2586                         break;
2587                 }
2588                 rdata->pages[i] = page;
2589         }
2590
2591         if (rc) {
2592                 for (i = 0; i < nr_pages; i++) {
2593                         put_page(rdata->pages[i]);
2594                         rdata->pages[i] = NULL;
2595                 }
2596         }
2597         return rc;
2598 }
2599
2600 static void
2601 cifs_uncached_readdata_release(struct kref *refcount)
2602 {
2603         struct cifs_readdata *rdata = container_of(refcount,
2604                                         struct cifs_readdata, refcount);
2605         unsigned int i;
2606
2607         for (i = 0; i < rdata->nr_pages; i++) {
2608                 put_page(rdata->pages[i]);
2609                 rdata->pages[i] = NULL;
2610         }
2611         cifs_readdata_release(refcount);
2612 }
2613
2614 static int
2615 cifs_retry_async_readv(struct cifs_readdata *rdata)
2616 {
2617         int rc;
2618         struct TCP_Server_Info *server;
2619
2620         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2621
2622         do {
2623                 if (rdata->cfile->invalidHandle) {
2624                         rc = cifs_reopen_file(rdata->cfile, true);
2625                         if (rc != 0)
2626                                 continue;
2627                 }
2628                 rc = server->ops->async_readv(rdata);
2629         } while (rc == -EAGAIN);
2630
2631         return rc;
2632 }
2633
2634 /**
2635  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2636  * @rdata:      the readdata response with list of pages holding data
2637  * @iov:        vector in which we should copy the data
2638  * @nr_segs:    number of segments in vector
2639  * @offset:     offset into file of the first iovec
2640  * @copied:     used to return the amount of data copied to the iov
2641  *
2642  * This function copies data from a list of pages in a readdata response into
2643  * an array of iovecs. It will first calculate where the data should go
2644  * based on the info in the readdata and then copy the data into that spot.
2645  */
2646 static ssize_t
2647 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2648                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2649 {
2650         int rc = 0;
2651         struct iov_iter ii;
2652         size_t pos = rdata->offset - offset;
2653         ssize_t remaining = rdata->bytes;
2654         unsigned char *pdata;
2655         unsigned int i;
2656
2657         /* set up iov_iter and advance to the correct offset */
2658         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2659         iov_iter_advance(&ii, pos);
2660
2661         *copied = 0;
2662         for (i = 0; i < rdata->nr_pages; i++) {
2663                 ssize_t copy;
2664                 struct page *page = rdata->pages[i];
2665
2666                 /* copy a whole page or whatever's left */
2667                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2668
2669                 /* ...but limit it to whatever space is left in the iov */
2670                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2671
2672                 /* go while there's data to be copied and no errors */
2673                 if (copy && !rc) {
2674                         pdata = kmap(page);
2675                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2676                                                 (int)copy);
2677                         kunmap(page);
2678                         if (!rc) {
2679                                 *copied += copy;
2680                                 remaining -= copy;
2681                                 iov_iter_advance(&ii, copy);
2682                         }
2683                 }
2684         }
2685
2686         return rc;
2687 }
2688
2689 static void
2690 cifs_uncached_readv_complete(struct work_struct *work)
2691 {
2692         struct cifs_readdata *rdata = container_of(work,
2693                                                 struct cifs_readdata, work);
2694
2695         complete(&rdata->done);
2696         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2697 }
2698
2699 static int
2700 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2701                         struct cifs_readdata *rdata, unsigned int len)
2702 {
2703         int total_read = 0, result = 0;
2704         unsigned int i;
2705         unsigned int nr_pages = rdata->nr_pages;
2706         struct kvec iov;
2707
2708         rdata->tailsz = PAGE_SIZE;
2709         for (i = 0; i < nr_pages; i++) {
2710                 struct page *page = rdata->pages[i];
2711
2712                 if (len >= PAGE_SIZE) {
2713                         /* enough data to fill the page */
2714                         iov.iov_base = kmap(page);
2715                         iov.iov_len = PAGE_SIZE;
2716                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2717                                 i, iov.iov_base, iov.iov_len);
2718                         len -= PAGE_SIZE;
2719                 } else if (len > 0) {
2720                         /* enough for partial page, fill and zero the rest */
2721                         iov.iov_base = kmap(page);
2722                         iov.iov_len = len;
2723                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2724                                 i, iov.iov_base, iov.iov_len);
2725                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2726                         rdata->tailsz = len;
2727                         len = 0;
2728                 } else {
2729                         /* no need to hold page hostage */
2730                         rdata->pages[i] = NULL;
2731                         rdata->nr_pages--;
2732                         put_page(page);
2733                         continue;
2734                 }
2735
2736                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2737                 kunmap(page);
2738                 if (result < 0)
2739                         break;
2740
2741                 total_read += result;
2742         }
2743
2744         return total_read > 0 ? total_read : result;
2745 }
2746
2747 static ssize_t
2748 cifs_iovec_read(struct file *file, const struct iovec *iov,
2749                  unsigned long nr_segs, loff_t *poffset)
2750 {
2751         ssize_t rc;
2752         size_t len, cur_len;
2753         ssize_t total_read = 0;
2754         loff_t offset = *poffset;
2755         unsigned int npages;
2756         struct cifs_sb_info *cifs_sb;
2757         struct cifs_tcon *tcon;
2758         struct cifsFileInfo *open_file;
2759         struct cifs_readdata *rdata, *tmp;
2760         struct list_head rdata_list;
2761         pid_t pid;
2762
2763         if (!nr_segs)
2764                 return 0;
2765
2766         len = iov_length(iov, nr_segs);
2767         if (!len)
2768                 return 0;
2769
2770         INIT_LIST_HEAD(&rdata_list);
2771         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2772         open_file = file->private_data;
2773         tcon = tlink_tcon(open_file->tlink);
2774
2775         if (!tcon->ses->server->ops->async_readv)
2776                 return -ENOSYS;
2777
2778         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2779                 pid = open_file->pid;
2780         else
2781                 pid = current->tgid;
2782
2783         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2784                 cFYI(1, "attempting read on write only file instance");
2785
2786         do {
2787                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2788                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2789
2790                 /* allocate a readdata struct */
2791                 rdata = cifs_readdata_alloc(npages,
2792                                             cifs_uncached_readv_complete);
2793                 if (!rdata) {
2794                         rc = -ENOMEM;
2795                         goto error;
2796                 }
2797
2798                 rc = cifs_read_allocate_pages(rdata, npages);
2799                 if (rc)
2800                         goto error;
2801
2802                 rdata->cfile = cifsFileInfo_get(open_file);
2803                 rdata->nr_pages = npages;
2804                 rdata->offset = offset;
2805                 rdata->bytes = cur_len;
2806                 rdata->pid = pid;
2807                 rdata->pagesz = PAGE_SIZE;
2808                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2809
2810                 rc = cifs_retry_async_readv(rdata);
2811 error:
2812                 if (rc) {
2813                         kref_put(&rdata->refcount,
2814                                  cifs_uncached_readdata_release);
2815                         break;
2816                 }
2817
2818                 list_add_tail(&rdata->list, &rdata_list);
2819                 offset += cur_len;
2820                 len -= cur_len;
2821         } while (len > 0);
2822
2823         /* if at least one read request send succeeded, then reset rc */
2824         if (!list_empty(&rdata_list))
2825                 rc = 0;
2826
2827         /* the loop below should proceed in the order of increasing offsets */
2828 restart_loop:
2829         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2830                 if (!rc) {
2831                         ssize_t copied;
2832
2833                         /* FIXME: freezable sleep too? */
2834                         rc = wait_for_completion_killable(&rdata->done);
2835                         if (rc)
2836                                 rc = -EINTR;
2837                         else if (rdata->result)
2838                                 rc = rdata->result;
2839                         else {
2840                                 rc = cifs_readdata_to_iov(rdata, iov,
2841                                                         nr_segs, *poffset,
2842                                                         &copied);
2843                                 total_read += copied;
2844                         }
2845
2846                         /* resend call if it's a retryable error */
2847                         if (rc == -EAGAIN) {
2848                                 rc = cifs_retry_async_readv(rdata);
2849                                 goto restart_loop;
2850                         }
2851                 }
2852                 list_del_init(&rdata->list);
2853                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2854         }
2855
2856         cifs_stats_bytes_read(tcon, total_read);
2857         *poffset += total_read;
2858
2859         /* mask nodata case */
2860         if (rc == -ENODATA)
2861                 rc = 0;
2862
2863         return total_read ? total_read : rc;
2864 }
2865
2866 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2867                                unsigned long nr_segs, loff_t pos)
2868 {
2869         ssize_t read;
2870
2871         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2872         if (read > 0)
2873                 iocb->ki_pos = pos;
2874
2875         return read;
2876 }
2877
2878 ssize_t
2879 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2880                   unsigned long nr_segs, loff_t pos)
2881 {
2882         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2883         struct cifsInodeInfo *cinode = CIFS_I(inode);
2884         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2885         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2886                                                 iocb->ki_filp->private_data;
2887         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2888         int rc = -EACCES;
2889
2890         /*
2891          * In strict cache mode we need to read from the server all the time
2892          * if we don't have level II oplock because the server can delay mtime
2893          * change - so we can't make a decision about inode invalidating.
2894          * And we can also fail with pagereading if there are mandatory locks
2895          * on pages affected by this read but not on the region from pos to
2896          * pos+len-1.
2897          */
2898         if (!cinode->clientCanCacheRead)
2899                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2900
2901         if (cap_unix(tcon->ses) &&
2902             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2903             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2904                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2905
2906         /*
2907          * We need to hold the sem to be sure nobody modifies lock list
2908          * with a brlock that prevents reading.
2909          */
2910         down_read(&cinode->lock_sem);
2911         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2912                                      tcon->ses->server->vals->shared_lock_type,
2913                                      NULL, CIFS_READ_OP))
2914                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2915         up_read(&cinode->lock_sem);
2916         return rc;
2917 }
2918
2919 static ssize_t
2920 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2921 {
2922         int rc = -EACCES;
2923         unsigned int bytes_read = 0;
2924         unsigned int total_read;
2925         unsigned int current_read_size;
2926         unsigned int rsize;
2927         struct cifs_sb_info *cifs_sb;
2928         struct cifs_tcon *tcon;
2929         struct TCP_Server_Info *server;
2930         unsigned int xid;
2931         char *cur_offset;
2932         struct cifsFileInfo *open_file;
2933         struct cifs_io_parms io_parms;
2934         int buf_type = CIFS_NO_BUFFER;
2935         __u32 pid;
2936
2937         xid = get_xid();
2938         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2939
2940         /* FIXME: set up handlers for larger reads and/or convert to async */
2941         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2942
2943         if (file->private_data == NULL) {
2944                 rc = -EBADF;
2945                 free_xid(xid);
2946                 return rc;
2947         }
2948         open_file = file->private_data;
2949         tcon = tlink_tcon(open_file->tlink);
2950         server = tcon->ses->server;
2951
2952         if (!server->ops->sync_read) {
2953                 free_xid(xid);
2954                 return -ENOSYS;
2955         }
2956
2957         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2958                 pid = open_file->pid;
2959         else
2960                 pid = current->tgid;
2961
2962         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2963                 cFYI(1, "attempting read on write only file instance");
2964
2965         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2966              total_read += bytes_read, cur_offset += bytes_read) {
2967                 current_read_size = min_t(uint, read_size - total_read, rsize);
2968                 /*
2969                  * For windows me and 9x we do not want to request more than it
2970                  * negotiated since it will refuse the read then.
2971                  */
2972                 if ((tcon->ses) && !(tcon->ses->capabilities &
2973                                 tcon->ses->server->vals->cap_large_files)) {
2974                         current_read_size = min_t(uint, current_read_size,
2975                                         CIFSMaxBufSize);
2976                 }
2977                 rc = -EAGAIN;
2978                 while (rc == -EAGAIN) {
2979                         if (open_file->invalidHandle) {
2980                                 rc = cifs_reopen_file(open_file, true);
2981                                 if (rc != 0)
2982                                         break;
2983                         }
2984                         io_parms.pid = pid;
2985                         io_parms.tcon = tcon;
2986                         io_parms.offset = *offset;
2987                         io_parms.length = current_read_size;
2988                         rc = server->ops->sync_read(xid, open_file, &io_parms,
2989                                                     &bytes_read, &cur_offset,
2990                                                     &buf_type);
2991                 }
2992                 if (rc || (bytes_read == 0)) {
2993                         if (total_read) {
2994                                 break;
2995                         } else {
2996                                 free_xid(xid);
2997                                 return rc;
2998                         }
2999                 } else {
3000                         cifs_stats_bytes_read(tcon, total_read);
3001                         *offset += bytes_read;
3002                 }
3003         }
3004         free_xid(xid);
3005         return total_read;
3006 }
3007
3008 /*
3009  * If the page is mmap'ed into a process' page tables, then we need to make
3010  * sure that it doesn't change while being written back.
3011  */
3012 static int
3013 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3014 {
3015         struct page *page = vmf->page;
3016
3017         lock_page(page);
3018         return VM_FAULT_LOCKED;
3019 }
3020
3021 static struct vm_operations_struct cifs_file_vm_ops = {
3022         .fault = filemap_fault,
3023         .page_mkwrite = cifs_page_mkwrite,
3024         .remap_pages = generic_file_remap_pages,
3025 };
3026
3027 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3028 {
3029         int rc, xid;
3030         struct inode *inode = file->f_path.dentry->d_inode;
3031
3032         xid = get_xid();
3033
3034         if (!CIFS_I(inode)->clientCanCacheRead) {
3035                 rc = cifs_invalidate_mapping(inode);
3036                 if (rc)
3037                         return rc;
3038         }
3039
3040         rc = generic_file_mmap(file, vma);
3041         if (rc == 0)
3042                 vma->vm_ops = &cifs_file_vm_ops;
3043         free_xid(xid);
3044         return rc;
3045 }
3046
3047 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3048 {
3049         int rc, xid;
3050
3051         xid = get_xid();
3052         rc = cifs_revalidate_file(file);
3053         if (rc) {
3054                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3055                 free_xid(xid);
3056                 return rc;
3057         }
3058         rc = generic_file_mmap(file, vma);
3059         if (rc == 0)
3060                 vma->vm_ops = &cifs_file_vm_ops;
3061         free_xid(xid);
3062         return rc;
3063 }
3064
3065 static void
3066 cifs_readv_complete(struct work_struct *work)
3067 {
3068         unsigned int i;
3069         struct cifs_readdata *rdata = container_of(work,
3070                                                 struct cifs_readdata, work);
3071
3072         for (i = 0; i < rdata->nr_pages; i++) {
3073                 struct page *page = rdata->pages[i];
3074
3075                 lru_cache_add_file(page);
3076
3077                 if (rdata->result == 0) {
3078                         flush_dcache_page(page);
3079                         SetPageUptodate(page);
3080                 }
3081
3082                 unlock_page(page);
3083
3084                 if (rdata->result == 0)
3085                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3086
3087                 page_cache_release(page);
3088                 rdata->pages[i] = NULL;
3089         }
3090         kref_put(&rdata->refcount, cifs_readdata_release);
3091 }
3092
3093 static int
3094 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3095                         struct cifs_readdata *rdata, unsigned int len)
3096 {
3097         int total_read = 0, result = 0;
3098         unsigned int i;
3099         u64 eof;
3100         pgoff_t eof_index;
3101         unsigned int nr_pages = rdata->nr_pages;
3102         struct kvec iov;
3103
3104         /* determine the eof that the server (probably) has */
3105         eof = CIFS_I(rdata->mapping->host)->server_eof;
3106         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3107         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3108
3109         rdata->tailsz = PAGE_CACHE_SIZE;
3110         for (i = 0; i < nr_pages; i++) {
3111                 struct page *page = rdata->pages[i];
3112
3113                 if (len >= PAGE_CACHE_SIZE) {
3114                         /* enough data to fill the page */
3115                         iov.iov_base = kmap(page);
3116                         iov.iov_len = PAGE_CACHE_SIZE;
3117                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3118                                 i, page->index, iov.iov_base, iov.iov_len);
3119                         len -= PAGE_CACHE_SIZE;
3120                 } else if (len > 0) {
3121                         /* enough for partial page, fill and zero the rest */
3122                         iov.iov_base = kmap(page);
3123                         iov.iov_len = len;
3124                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3125                                 i, page->index, iov.iov_base, iov.iov_len);
3126                         memset(iov.iov_base + len,
3127                                 '\0', PAGE_CACHE_SIZE - len);
3128                         rdata->tailsz = len;
3129                         len = 0;
3130                 } else if (page->index > eof_index) {
3131                         /*
3132                          * The VFS will not try to do readahead past the
3133                          * i_size, but it's possible that we have outstanding
3134                          * writes with gaps in the middle and the i_size hasn't
3135                          * caught up yet. Populate those with zeroed out pages
3136                          * to prevent the VFS from repeatedly attempting to
3137                          * fill them until the writes are flushed.
3138                          */
3139                         zero_user(page, 0, PAGE_CACHE_SIZE);
3140                         lru_cache_add_file(page);
3141                         flush_dcache_page(page);
3142                         SetPageUptodate(page);
3143                         unlock_page(page);
3144                         page_cache_release(page);
3145                         rdata->pages[i] = NULL;
3146                         rdata->nr_pages--;
3147                         continue;
3148                 } else {
3149                         /* no need to hold page hostage */
3150                         lru_cache_add_file(page);
3151                         unlock_page(page);
3152                         page_cache_release(page);
3153                         rdata->pages[i] = NULL;
3154                         rdata->nr_pages--;
3155                         continue;
3156                 }
3157
3158                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3159                 kunmap(page);
3160                 if (result < 0)
3161                         break;
3162
3163                 total_read += result;
3164         }
3165
3166         return total_read > 0 ? total_read : result;
3167 }
3168
3169 static int cifs_readpages(struct file *file, struct address_space *mapping,
3170         struct list_head *page_list, unsigned num_pages)
3171 {
3172         int rc;
3173         struct list_head tmplist;
3174         struct cifsFileInfo *open_file = file->private_data;
3175         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3176         unsigned int rsize = cifs_sb->rsize;
3177         pid_t pid;
3178
3179         /*
3180          * Give up immediately if rsize is too small to read an entire page.
3181          * The VFS will fall back to readpage. We should never reach this
3182          * point however since we set ra_pages to 0 when the rsize is smaller
3183          * than a cache page.
3184          */
3185         if (unlikely(rsize < PAGE_CACHE_SIZE))
3186                 return 0;
3187
3188         /*
3189          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3190          * immediately if the cookie is negative
3191          */
3192         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3193                                          &num_pages);
3194         if (rc == 0)
3195                 return rc;
3196
3197         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3198                 pid = open_file->pid;
3199         else
3200                 pid = current->tgid;
3201
3202         rc = 0;
3203         INIT_LIST_HEAD(&tmplist);
3204
3205         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3206                 mapping, num_pages);
3207
3208         /*
3209          * Start with the page at end of list and move it to private
3210          * list. Do the same with any following pages until we hit
3211          * the rsize limit, hit an index discontinuity, or run out of
3212          * pages. Issue the async read and then start the loop again
3213          * until the list is empty.
3214          *
3215          * Note that list order is important. The page_list is in
3216          * the order of declining indexes. When we put the pages in
3217          * the rdata->pages, then we want them in increasing order.
3218          */
3219         while (!list_empty(page_list)) {
3220                 unsigned int i;
3221                 unsigned int bytes = PAGE_CACHE_SIZE;
3222                 unsigned int expected_index;
3223                 unsigned int nr_pages = 1;
3224                 loff_t offset;
3225                 struct page *page, *tpage;
3226                 struct cifs_readdata *rdata;
3227
3228                 page = list_entry(page_list->prev, struct page, lru);
3229
3230                 /*
3231                  * Lock the page and put it in the cache. Since no one else
3232                  * should have access to this page, we're safe to simply set
3233                  * PG_locked without checking it first.
3234                  */
3235                 __set_page_locked(page);
3236                 rc = add_to_page_cache_locked(page, mapping,
3237                                               page->index, GFP_KERNEL);
3238
3239                 /* give up if we can't stick it in the cache */
3240                 if (rc) {
3241                         __clear_page_locked(page);
3242                         break;
3243                 }
3244
3245                 /* move first page to the tmplist */
3246                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3247                 list_move_tail(&page->lru, &tmplist);
3248
3249                 /* now try and add more pages onto the request */
3250                 expected_index = page->index + 1;
3251                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3252                         /* discontinuity ? */
3253                         if (page->index != expected_index)
3254                                 break;
3255
3256                         /* would this page push the read over the rsize? */
3257                         if (bytes + PAGE_CACHE_SIZE > rsize)
3258                                 break;
3259
3260                         __set_page_locked(page);
3261                         if (add_to_page_cache_locked(page, mapping,
3262                                                 page->index, GFP_KERNEL)) {
3263                                 __clear_page_locked(page);
3264                                 break;
3265                         }
3266                         list_move_tail(&page->lru, &tmplist);
3267                         bytes += PAGE_CACHE_SIZE;
3268                         expected_index++;
3269                         nr_pages++;
3270                 }
3271
3272                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3273                 if (!rdata) {
3274                         /* best to give up if we're out of mem */
3275                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3276                                 list_del(&page->lru);
3277                                 lru_cache_add_file(page);
3278                                 unlock_page(page);
3279                                 page_cache_release(page);
3280                         }
3281                         rc = -ENOMEM;
3282                         break;
3283                 }
3284
3285                 rdata->cfile = cifsFileInfo_get(open_file);
3286                 rdata->mapping = mapping;
3287                 rdata->offset = offset;
3288                 rdata->bytes = bytes;
3289                 rdata->pid = pid;
3290                 rdata->pagesz = PAGE_CACHE_SIZE;
3291                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3292
3293                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3294                         list_del(&page->lru);
3295                         rdata->pages[rdata->nr_pages++] = page;
3296                 }
3297
3298                 rc = cifs_retry_async_readv(rdata);
3299                 if (rc != 0) {
3300                         for (i = 0; i < rdata->nr_pages; i++) {
3301                                 page = rdata->pages[i];
3302                                 lru_cache_add_file(page);
3303                                 unlock_page(page);
3304                                 page_cache_release(page);
3305                         }
3306                         kref_put(&rdata->refcount, cifs_readdata_release);
3307                         break;
3308                 }
3309
3310                 kref_put(&rdata->refcount, cifs_readdata_release);
3311         }
3312
3313         return rc;
3314 }
3315
3316 static int cifs_readpage_worker(struct file *file, struct page *page,
3317         loff_t *poffset)
3318 {
3319         char *read_data;
3320         int rc;
3321
3322         /* Is the page cached? */
3323         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3324         if (rc == 0)
3325                 goto read_complete;
3326
3327         page_cache_get(page);
3328         read_data = kmap(page);
3329         /* for reads over a certain size could initiate async read ahead */
3330
3331         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3332
3333         if (rc < 0)
3334                 goto io_error;
3335         else
3336                 cFYI(1, "Bytes read %d", rc);
3337
3338         file->f_path.dentry->d_inode->i_atime =
3339                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3340
3341         if (PAGE_CACHE_SIZE > rc)
3342                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3343
3344         flush_dcache_page(page);
3345         SetPageUptodate(page);
3346
3347         /* send this page to the cache */
3348         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3349
3350         rc = 0;
3351
3352 io_error:
3353         kunmap(page);
3354         page_cache_release(page);
3355
3356 read_complete:
3357         return rc;
3358 }
3359
3360 static int cifs_readpage(struct file *file, struct page *page)
3361 {
3362         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3363         int rc = -EACCES;
3364         unsigned int xid;
3365
3366         xid = get_xid();
3367
3368         if (file->private_data == NULL) {
3369                 rc = -EBADF;
3370                 free_xid(xid);
3371                 return rc;
3372         }
3373
3374         cFYI(1, "readpage %p at offset %d 0x%x",
3375                  page, (int)offset, (int)offset);
3376
3377         rc = cifs_readpage_worker(file, page, &offset);
3378
3379         unlock_page(page);
3380
3381         free_xid(xid);
3382         return rc;
3383 }
3384
3385 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3386 {
3387         struct cifsFileInfo *open_file;
3388
3389         spin_lock(&cifs_file_list_lock);
3390         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3391                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3392                         spin_unlock(&cifs_file_list_lock);
3393                         return 1;
3394                 }
3395         }
3396         spin_unlock(&cifs_file_list_lock);
3397         return 0;
3398 }
3399
3400 /* We do not want to update the file size from server for inodes
3401    open for write - to avoid races with writepage extending
3402    the file - in the future we could consider allowing
3403    refreshing the inode only on increases in the file size
3404    but this is tricky to do without racing with writebehind
3405    page caching in the current Linux kernel design */
3406 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3407 {
3408         if (!cifsInode)
3409                 return true;
3410
3411         if (is_inode_writable(cifsInode)) {
3412                 /* This inode is open for write at least once */
3413                 struct cifs_sb_info *cifs_sb;
3414
3415                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3416                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3417                         /* since no page cache to corrupt on directio
3418                         we can change size safely */
3419                         return true;
3420                 }
3421
3422                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3423                         return true;
3424
3425                 return false;
3426         } else
3427                 return true;
3428 }
3429
3430 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3431                         loff_t pos, unsigned len, unsigned flags,
3432                         struct page **pagep, void **fsdata)
3433 {
3434         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3435         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3436         loff_t page_start = pos & PAGE_MASK;
3437         loff_t i_size;
3438         struct page *page;
3439         int rc = 0;
3440
3441         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3442
3443         page = grab_cache_page_write_begin(mapping, index, flags);
3444         if (!page) {
3445                 rc = -ENOMEM;
3446                 goto out;
3447         }
3448
3449         if (PageUptodate(page))
3450                 goto out;
3451
3452         /*
3453          * If we write a full page it will be up to date, no need to read from
3454          * the server. If the write is short, we'll end up doing a sync write
3455          * instead.
3456          */
3457         if (len == PAGE_CACHE_SIZE)
3458                 goto out;
3459
3460         /*
3461          * optimize away the read when we have an oplock, and we're not
3462          * expecting to use any of the data we'd be reading in. That
3463          * is, when the page lies beyond the EOF, or straddles the EOF
3464          * and the write will cover all of the existing data.
3465          */
3466         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3467                 i_size = i_size_read(mapping->host);
3468                 if (page_start >= i_size ||
3469                     (offset == 0 && (pos + len) >= i_size)) {
3470                         zero_user_segments(page, 0, offset,
3471                                            offset + len,
3472                                            PAGE_CACHE_SIZE);
3473                         /*
3474                          * PageChecked means that the parts of the page
3475                          * to which we're not writing are considered up
3476                          * to date. Once the data is copied to the
3477                          * page, it can be set uptodate.
3478                          */
3479                         SetPageChecked(page);
3480                         goto out;
3481                 }
3482         }
3483
3484         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3485                 /*
3486                  * might as well read a page, it is fast enough. If we get
3487                  * an error, we don't need to return it. cifs_write_end will
3488                  * do a sync write instead since PG_uptodate isn't set.
3489                  */
3490                 cifs_readpage_worker(file, page, &page_start);
3491         } else {
3492                 /* we could try using another file handle if there is one -
3493                    but how would we lock it to prevent close of that handle
3494                    racing with this read? In any case
3495                    this will be written out by write_end so is fine */
3496         }
3497 out:
3498         *pagep = page;
3499         return rc;
3500 }
3501
3502 static int cifs_release_page(struct page *page, gfp_t gfp)
3503 {
3504         if (PagePrivate(page))
3505                 return 0;
3506
3507         return cifs_fscache_release_page(page, gfp);
3508 }
3509
3510 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3511 {
3512         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3513
3514         if (offset == 0)
3515                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3516 }
3517
3518 static int cifs_launder_page(struct page *page)
3519 {
3520         int rc = 0;
3521         loff_t range_start = page_offset(page);
3522         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3523         struct writeback_control wbc = {
3524                 .sync_mode = WB_SYNC_ALL,
3525                 .nr_to_write = 0,
3526                 .range_start = range_start,
3527                 .range_end = range_end,
3528         };
3529
3530         cFYI(1, "Launder page: %p", page);
3531
3532         if (clear_page_dirty_for_io(page))
3533                 rc = cifs_writepage_locked(page, &wbc);
3534
3535         cifs_fscache_invalidate_page(page, page->mapping->host);
3536         return rc;
3537 }
3538
3539 void cifs_oplock_break(struct work_struct *work)
3540 {
3541         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3542                                                   oplock_break);
3543         struct inode *inode = cfile->dentry->d_inode;
3544         struct cifsInodeInfo *cinode = CIFS_I(inode);
3545         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3546         int rc = 0;
3547
3548         if (inode && S_ISREG(inode->i_mode)) {
3549                 if (cinode->clientCanCacheRead)
3550                         break_lease(inode, O_RDONLY);
3551                 else
3552                         break_lease(inode, O_WRONLY);
3553                 rc = filemap_fdatawrite(inode->i_mapping);
3554                 if (cinode->clientCanCacheRead == 0) {
3555                         rc = filemap_fdatawait(inode->i_mapping);
3556                         mapping_set_error(inode->i_mapping, rc);
3557                         cifs_invalidate_mapping(inode);
3558                 }
3559                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3560         }
3561
3562         rc = cifs_push_locks(cfile);
3563         if (rc)
3564                 cERROR(1, "Push locks rc = %d", rc);
3565
3566         /*
3567          * releasing stale oplock after recent reconnect of smb session using
3568          * a now incorrect file handle is not a data integrity issue but do
3569          * not bother sending an oplock release if session to server still is
3570          * disconnected since oplock already released by the server
3571          */
3572         if (!cfile->oplock_break_cancelled) {
3573                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3574                                                              cinode);
3575                 cFYI(1, "Oplock release rc = %d", rc);
3576         }
3577 }
3578
3579 const struct address_space_operations cifs_addr_ops = {
3580         .readpage = cifs_readpage,
3581         .readpages = cifs_readpages,
3582         .writepage = cifs_writepage,
3583         .writepages = cifs_writepages,
3584         .write_begin = cifs_write_begin,
3585         .write_end = cifs_write_end,
3586         .set_page_dirty = __set_page_dirty_nobuffers,
3587         .releasepage = cifs_release_page,
3588         .invalidatepage = cifs_invalidate_page,
3589         .launder_page = cifs_launder_page,
3590 };
3591
3592 /*
3593  * cifs_readpages requires the server to support a buffer large enough to
3594  * contain the header plus one complete page of data.  Otherwise, we need
3595  * to leave cifs_readpages out of the address space operations.
3596  */
3597 const struct address_space_operations cifs_addr_ops_smallbuf = {
3598         .readpage = cifs_readpage,
3599         .writepage = cifs_writepage,
3600         .writepages = cifs_writepages,
3601         .write_begin = cifs_write_begin,
3602         .write_end = cifs_write_end,
3603         .set_page_dirty = __set_page_dirty_nobuffers,
3604         .releasepage = cifs_release_page,
3605         .invalidatepage = cifs_invalidate_page,
3606         .launder_page = cifs_launder_page,
3607 };