0a6677ba212b11ef73fbf83465c32f6ffe4fa2c1
[linux-3.10.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46 static inline int cifs_convert_flags(unsigned int flags)
47 {
48         if ((flags & O_ACCMODE) == O_RDONLY)
49                 return GENERIC_READ;
50         else if ((flags & O_ACCMODE) == O_WRONLY)
51                 return GENERIC_WRITE;
52         else if ((flags & O_ACCMODE) == O_RDWR) {
53                 /* GENERIC_ALL is too much permission to request
54                    can cause unnecessary access denied on create */
55                 /* return GENERIC_ALL; */
56                 return (GENERIC_READ | GENERIC_WRITE);
57         }
58
59         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61                 FILE_READ_DATA);
62 }
63
64 static u32 cifs_posix_convert_flags(unsigned int flags)
65 {
66         u32 posix_flags = 0;
67
68         if ((flags & O_ACCMODE) == O_RDONLY)
69                 posix_flags = SMB_O_RDONLY;
70         else if ((flags & O_ACCMODE) == O_WRONLY)
71                 posix_flags = SMB_O_WRONLY;
72         else if ((flags & O_ACCMODE) == O_RDWR)
73                 posix_flags = SMB_O_RDWR;
74
75         if (flags & O_CREAT)
76                 posix_flags |= SMB_O_CREAT;
77         if (flags & O_EXCL)
78                 posix_flags |= SMB_O_EXCL;
79         if (flags & O_TRUNC)
80                 posix_flags |= SMB_O_TRUNC;
81         /* be safe and imply O_SYNC for O_DSYNC */
82         if (flags & O_DSYNC)
83                 posix_flags |= SMB_O_SYNC;
84         if (flags & O_DIRECTORY)
85                 posix_flags |= SMB_O_DIRECTORY;
86         if (flags & O_NOFOLLOW)
87                 posix_flags |= SMB_O_NOFOLLOW;
88         if (flags & O_DIRECT)
89                 posix_flags |= SMB_O_DIRECT;
90
91         return posix_flags;
92 }
93
94 static inline int cifs_get_disposition(unsigned int flags)
95 {
96         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97                 return FILE_CREATE;
98         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99                 return FILE_OVERWRITE_IF;
100         else if ((flags & O_CREAT) == O_CREAT)
101                 return FILE_OPEN_IF;
102         else if ((flags & O_TRUNC) == O_TRUNC)
103                 return FILE_OVERWRITE;
104         else
105                 return FILE_OPEN;
106 }
107
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109                         struct super_block *sb, int mode, unsigned int f_flags,
110                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
111 {
112         int rc;
113         FILE_UNIX_BASIC_INFO *presp_data;
114         __u32 posix_flags = 0;
115         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116         struct cifs_fattr fattr;
117         struct tcon_link *tlink;
118         struct cifs_tcon *tcon;
119
120         cFYI(1, "posix open %s", full_path);
121
122         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123         if (presp_data == NULL)
124                 return -ENOMEM;
125
126         tlink = cifs_sb_tlink(cifs_sb);
127         if (IS_ERR(tlink)) {
128                 rc = PTR_ERR(tlink);
129                 goto posix_open_ret;
130         }
131
132         tcon = tlink_tcon(tlink);
133         mode &= ~current_umask();
134
135         posix_flags = cifs_posix_convert_flags(f_flags);
136         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137                              poplock, full_path, cifs_sb->local_nls,
138                              cifs_sb->mnt_cifs_flags &
139                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
140         cifs_put_tlink(tlink);
141
142         if (rc)
143                 goto posix_open_ret;
144
145         if (presp_data->Type == cpu_to_le32(-1))
146                 goto posix_open_ret; /* open ok, caller does qpathinfo */
147
148         if (!pinode)
149                 goto posix_open_ret; /* caller does not need info */
150
151         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152
153         /* get new inode and set it up */
154         if (*pinode == NULL) {
155                 cifs_fill_uniqueid(sb, &fattr);
156                 *pinode = cifs_iget(sb, &fattr);
157                 if (!*pinode) {
158                         rc = -ENOMEM;
159                         goto posix_open_ret;
160                 }
161         } else {
162                 cifs_fattr_to_inode(*pinode, &fattr);
163         }
164
165 posix_open_ret:
166         kfree(presp_data);
167         return rc;
168 }
169
170 static int
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
173              struct cifs_fid *fid, unsigned int xid)
174 {
175         int rc;
176         int desired_access;
177         int disposition;
178         int create_options = CREATE_NOT_DIR;
179         FILE_ALL_INFO *buf;
180         struct TCP_Server_Info *server = tcon->ses->server;
181
182         if (!server->ops->open)
183                 return -ENOSYS;
184
185         desired_access = cifs_convert_flags(f_flags);
186
187 /*********************************************************************
188  *  open flag mapping table:
189  *
190  *      POSIX Flag            CIFS Disposition
191  *      ----------            ----------------
192  *      O_CREAT               FILE_OPEN_IF
193  *      O_CREAT | O_EXCL      FILE_CREATE
194  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
195  *      O_TRUNC               FILE_OVERWRITE
196  *      none of the above     FILE_OPEN
197  *
198  *      Note that there is not a direct match between disposition
199  *      FILE_SUPERSEDE (ie create whether or not file exists although
200  *      O_CREAT | O_TRUNC is similar but truncates the existing
201  *      file rather than creating a new file as FILE_SUPERSEDE does
202  *      (which uses the attributes / metadata passed in on open call)
203  *?
204  *?  O_SYNC is a reasonable match to CIFS writethrough flag
205  *?  and the read write flags match reasonably.  O_LARGEFILE
206  *?  is irrelevant because largefile support is always used
207  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
208  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
209  *********************************************************************/
210
211         disposition = cifs_get_disposition(f_flags);
212
213         /* BB pass O_SYNC flag through on file attributes .. BB */
214
215         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
216         if (!buf)
217                 return -ENOMEM;
218
219         if (backup_cred(cifs_sb))
220                 create_options |= CREATE_OPEN_BACKUP_INTENT;
221
222         rc = server->ops->open(xid, tcon, full_path, disposition,
223                                desired_access, create_options, fid, oplock, buf,
224                                cifs_sb);
225
226         if (rc)
227                 goto out;
228
229         if (tcon->unix_ext)
230                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231                                               xid);
232         else
233                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
234                                          xid, &fid->netfid);
235
236 out:
237         kfree(buf);
238         return rc;
239 }
240
241 struct cifsFileInfo *
242 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
243                   struct tcon_link *tlink, __u32 oplock)
244 {
245         struct dentry *dentry = file->f_path.dentry;
246         struct inode *inode = dentry->d_inode;
247         struct cifsInodeInfo *cinode = CIFS_I(inode);
248         struct cifsFileInfo *cfile;
249         struct cifs_fid_locks *fdlocks;
250         struct cifs_tcon *tcon = tlink_tcon(tlink);
251
252         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
253         if (cfile == NULL)
254                 return cfile;
255
256         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
257         if (!fdlocks) {
258                 kfree(cfile);
259                 return NULL;
260         }
261
262         INIT_LIST_HEAD(&fdlocks->locks);
263         fdlocks->cfile = cfile;
264         cfile->llist = fdlocks;
265         down_write(&cinode->lock_sem);
266         list_add(&fdlocks->llist, &cinode->llist);
267         up_write(&cinode->lock_sem);
268
269         cfile->count = 1;
270         cfile->pid = current->tgid;
271         cfile->uid = current_fsuid();
272         cfile->dentry = dget(dentry);
273         cfile->f_flags = file->f_flags;
274         cfile->invalidHandle = false;
275         cfile->tlink = cifs_get_tlink(tlink);
276         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
277         mutex_init(&cfile->fh_mutex);
278
279         spin_lock(&cifs_file_list_lock);
280         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE)
281                 oplock = fid->pending_open->oplock;
282         list_del(&fid->pending_open->olist);
283
284         tlink_tcon(tlink)->ses->server->ops->set_fid(cfile, fid, oplock);
285
286         list_add(&cfile->tlist, &tcon->openFileList);
287         /* if readable file instance put first in list*/
288         if (file->f_mode & FMODE_READ)
289                 list_add(&cfile->flist, &cinode->openFileList);
290         else
291                 list_add_tail(&cfile->flist, &cinode->openFileList);
292         spin_unlock(&cifs_file_list_lock);
293
294         file->private_data = cfile;
295         return cfile;
296 }
297
298 struct cifsFileInfo *
299 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
300 {
301         spin_lock(&cifs_file_list_lock);
302         cifsFileInfo_get_locked(cifs_file);
303         spin_unlock(&cifs_file_list_lock);
304         return cifs_file;
305 }
306
307 /*
308  * Release a reference on the file private data. This may involve closing
309  * the filehandle out on the server. Must be called without holding
310  * cifs_file_list_lock.
311  */
312 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
313 {
314         struct inode *inode = cifs_file->dentry->d_inode;
315         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
316         struct TCP_Server_Info *server = tcon->ses->server;
317         struct cifsInodeInfo *cifsi = CIFS_I(inode);
318         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
319         struct cifsLockInfo *li, *tmp;
320         struct cifs_fid fid;
321         struct cifs_pending_open open;
322
323         spin_lock(&cifs_file_list_lock);
324         if (--cifs_file->count > 0) {
325                 spin_unlock(&cifs_file_list_lock);
326                 return;
327         }
328
329         if (server->ops->get_lease_key)
330                 server->ops->get_lease_key(inode, &fid);
331
332         /* store open in pending opens to make sure we don't miss lease break */
333         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
334
335         /* remove it from the lists */
336         list_del(&cifs_file->flist);
337         list_del(&cifs_file->tlist);
338
339         if (list_empty(&cifsi->openFileList)) {
340                 cFYI(1, "closing last open instance for inode %p",
341                         cifs_file->dentry->d_inode);
342                 /*
343                  * In strict cache mode we need invalidate mapping on the last
344                  * close  because it may cause a error when we open this file
345                  * again and get at least level II oplock.
346                  */
347                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
348                         CIFS_I(inode)->invalid_mapping = true;
349                 cifs_set_oplock_level(cifsi, 0);
350         }
351         spin_unlock(&cifs_file_list_lock);
352
353         cancel_work_sync(&cifs_file->oplock_break);
354
355         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
356                 struct TCP_Server_Info *server = tcon->ses->server;
357                 unsigned int xid;
358
359                 xid = get_xid();
360                 if (server->ops->close)
361                         server->ops->close(xid, tcon, &cifs_file->fid);
362                 _free_xid(xid);
363         }
364
365         cifs_del_pending_open(&open);
366
367         /*
368          * Delete any outstanding lock records. We'll lose them when the file
369          * is closed anyway.
370          */
371         down_write(&cifsi->lock_sem);
372         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
373                 list_del(&li->llist);
374                 cifs_del_lock_waiters(li);
375                 kfree(li);
376         }
377         list_del(&cifs_file->llist->llist);
378         kfree(cifs_file->llist);
379         up_write(&cifsi->lock_sem);
380
381         cifs_put_tlink(cifs_file->tlink);
382         dput(cifs_file->dentry);
383         kfree(cifs_file);
384 }
385
386 int cifs_open(struct inode *inode, struct file *file)
387
388 {
389         int rc = -EACCES;
390         unsigned int xid;
391         __u32 oplock;
392         struct cifs_sb_info *cifs_sb;
393         struct TCP_Server_Info *server;
394         struct cifs_tcon *tcon;
395         struct tcon_link *tlink;
396         struct cifsFileInfo *cfile = NULL;
397         char *full_path = NULL;
398         bool posix_open_ok = false;
399         struct cifs_fid fid;
400         struct cifs_pending_open open;
401
402         xid = get_xid();
403
404         cifs_sb = CIFS_SB(inode->i_sb);
405         tlink = cifs_sb_tlink(cifs_sb);
406         if (IS_ERR(tlink)) {
407                 free_xid(xid);
408                 return PTR_ERR(tlink);
409         }
410         tcon = tlink_tcon(tlink);
411         server = tcon->ses->server;
412
413         full_path = build_path_from_dentry(file->f_path.dentry);
414         if (full_path == NULL) {
415                 rc = -ENOMEM;
416                 goto out;
417         }
418
419         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
420                  inode, file->f_flags, full_path);
421
422         if (server->oplocks)
423                 oplock = REQ_OPLOCK;
424         else
425                 oplock = 0;
426
427         if (!tcon->broken_posix_open && tcon->unix_ext &&
428             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
429                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
430                 /* can not refresh inode info since size could be stale */
431                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
432                                 cifs_sb->mnt_file_mode /* ignored */,
433                                 file->f_flags, &oplock, &fid.netfid, xid);
434                 if (rc == 0) {
435                         cFYI(1, "posix open succeeded");
436                         posix_open_ok = true;
437                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
438                         if (tcon->ses->serverNOS)
439                                 cERROR(1, "server %s of type %s returned"
440                                            " unexpected error on SMB posix open"
441                                            ", disabling posix open support."
442                                            " Check if server update available.",
443                                            tcon->ses->serverName,
444                                            tcon->ses->serverNOS);
445                         tcon->broken_posix_open = true;
446                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
447                          (rc != -EOPNOTSUPP)) /* path not found or net err */
448                         goto out;
449                 /*
450                  * Else fallthrough to retry open the old way on network i/o
451                  * or DFS errors.
452                  */
453         }
454
455         if (server->ops->get_lease_key)
456                 server->ops->get_lease_key(inode, &fid);
457
458         cifs_add_pending_open(&fid, tlink, &open);
459
460         if (!posix_open_ok) {
461                 if (server->ops->get_lease_key)
462                         server->ops->get_lease_key(inode, &fid);
463
464                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
465                                   file->f_flags, &oplock, &fid, xid);
466                 if (rc) {
467                         cifs_del_pending_open(&open);
468                         goto out;
469                 }
470         }
471
472         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
473         if (cfile == NULL) {
474                 if (server->ops->close)
475                         server->ops->close(xid, tcon, &fid);
476                 cifs_del_pending_open(&open);
477                 rc = -ENOMEM;
478                 goto out;
479         }
480
481         cifs_fscache_set_inode_cookie(inode, file);
482
483         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
484                 /*
485                  * Time to set mode which we can not set earlier due to
486                  * problems creating new read-only files.
487                  */
488                 struct cifs_unix_set_info_args args = {
489                         .mode   = inode->i_mode,
490                         .uid    = NO_CHANGE_64,
491                         .gid    = NO_CHANGE_64,
492                         .ctime  = NO_CHANGE_64,
493                         .atime  = NO_CHANGE_64,
494                         .mtime  = NO_CHANGE_64,
495                         .device = 0,
496                 };
497                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
498                                        cfile->pid);
499         }
500
501 out:
502         kfree(full_path);
503         free_xid(xid);
504         cifs_put_tlink(tlink);
505         return rc;
506 }
507
508 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
509
510 /*
511  * Try to reacquire byte range locks that were released when session
512  * to server was lost.
513  */
514 static int
515 cifs_relock_file(struct cifsFileInfo *cfile)
516 {
517         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
518         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
519         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
520         int rc = 0;
521
522         /* we are going to update can_cache_brlcks here - need a write access */
523         down_write(&cinode->lock_sem);
524         if (cinode->can_cache_brlcks) {
525                 /* can cache locks - no need to push them */
526                 up_write(&cinode->lock_sem);
527                 return rc;
528         }
529
530         if (cap_unix(tcon->ses) &&
531             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
532             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
533                 rc = cifs_push_posix_locks(cfile);
534         else
535                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
536
537         up_write(&cinode->lock_sem);
538         return rc;
539 }
540
541 static int
542 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
543 {
544         int rc = -EACCES;
545         unsigned int xid;
546         __u32 oplock;
547         struct cifs_sb_info *cifs_sb;
548         struct cifs_tcon *tcon;
549         struct TCP_Server_Info *server;
550         struct cifsInodeInfo *cinode;
551         struct inode *inode;
552         char *full_path = NULL;
553         int desired_access;
554         int disposition = FILE_OPEN;
555         int create_options = CREATE_NOT_DIR;
556         struct cifs_fid fid;
557
558         xid = get_xid();
559         mutex_lock(&cfile->fh_mutex);
560         if (!cfile->invalidHandle) {
561                 mutex_unlock(&cfile->fh_mutex);
562                 rc = 0;
563                 free_xid(xid);
564                 return rc;
565         }
566
567         inode = cfile->dentry->d_inode;
568         cifs_sb = CIFS_SB(inode->i_sb);
569         tcon = tlink_tcon(cfile->tlink);
570         server = tcon->ses->server;
571
572         /*
573          * Can not grab rename sem here because various ops, including those
574          * that already have the rename sem can end up causing writepage to get
575          * called and if the server was down that means we end up here, and we
576          * can never tell if the caller already has the rename_sem.
577          */
578         full_path = build_path_from_dentry(cfile->dentry);
579         if (full_path == NULL) {
580                 rc = -ENOMEM;
581                 mutex_unlock(&cfile->fh_mutex);
582                 free_xid(xid);
583                 return rc;
584         }
585
586         cFYI(1, "inode = 0x%p file flags 0x%x for %s", inode, cfile->f_flags,
587              full_path);
588
589         if (tcon->ses->server->oplocks)
590                 oplock = REQ_OPLOCK;
591         else
592                 oplock = 0;
593
594         if (tcon->unix_ext && cap_unix(tcon->ses) &&
595             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
596                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
597                 /*
598                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
599                  * original open. Must mask them off for a reopen.
600                  */
601                 unsigned int oflags = cfile->f_flags &
602                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
603
604                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
605                                      cifs_sb->mnt_file_mode /* ignored */,
606                                      oflags, &oplock, &fid.netfid, xid);
607                 if (rc == 0) {
608                         cFYI(1, "posix reopen succeeded");
609                         goto reopen_success;
610                 }
611                 /*
612                  * fallthrough to retry open the old way on errors, especially
613                  * in the reconnect path it is important to retry hard
614                  */
615         }
616
617         desired_access = cifs_convert_flags(cfile->f_flags);
618
619         if (backup_cred(cifs_sb))
620                 create_options |= CREATE_OPEN_BACKUP_INTENT;
621
622         if (server->ops->get_lease_key)
623                 server->ops->get_lease_key(inode, &fid);
624
625         /*
626          * Can not refresh inode by passing in file_info buf to be returned by
627          * CIFSSMBOpen and then calling get_inode_info with returned buf since
628          * file might have write behind data that needs to be flushed and server
629          * version of file size can be stale. If we knew for sure that inode was
630          * not dirty locally we could do this.
631          */
632         rc = server->ops->open(xid, tcon, full_path, disposition,
633                                desired_access, create_options, &fid, &oplock,
634                                NULL, cifs_sb);
635         if (rc) {
636                 mutex_unlock(&cfile->fh_mutex);
637                 cFYI(1, "cifs_reopen returned 0x%x", rc);
638                 cFYI(1, "oplock: %d", oplock);
639                 goto reopen_error_exit;
640         }
641
642 reopen_success:
643         cfile->invalidHandle = false;
644         mutex_unlock(&cfile->fh_mutex);
645         cinode = CIFS_I(inode);
646
647         if (can_flush) {
648                 rc = filemap_write_and_wait(inode->i_mapping);
649                 mapping_set_error(inode->i_mapping, rc);
650
651                 if (tcon->unix_ext)
652                         rc = cifs_get_inode_info_unix(&inode, full_path,
653                                                       inode->i_sb, xid);
654                 else
655                         rc = cifs_get_inode_info(&inode, full_path, NULL,
656                                                  inode->i_sb, xid, NULL);
657         }
658         /*
659          * Else we are writing out data to server already and could deadlock if
660          * we tried to flush data, and since we do not know if we have data that
661          * would invalidate the current end of file on the server we can not go
662          * to the server to get the new inode info.
663          */
664
665         server->ops->set_fid(cfile, &fid, oplock);
666         cifs_relock_file(cfile);
667
668 reopen_error_exit:
669         kfree(full_path);
670         free_xid(xid);
671         return rc;
672 }
673
674 int cifs_close(struct inode *inode, struct file *file)
675 {
676         if (file->private_data != NULL) {
677                 cifsFileInfo_put(file->private_data);
678                 file->private_data = NULL;
679         }
680
681         /* return code from the ->release op is always ignored */
682         return 0;
683 }
684
685 int cifs_closedir(struct inode *inode, struct file *file)
686 {
687         int rc = 0;
688         unsigned int xid;
689         struct cifsFileInfo *cfile = file->private_data;
690         struct cifs_tcon *tcon;
691         struct TCP_Server_Info *server;
692         char *buf;
693
694         cFYI(1, "Closedir inode = 0x%p", inode);
695
696         if (cfile == NULL)
697                 return rc;
698
699         xid = get_xid();
700         tcon = tlink_tcon(cfile->tlink);
701         server = tcon->ses->server;
702
703         cFYI(1, "Freeing private data in close dir");
704         spin_lock(&cifs_file_list_lock);
705         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
706                 cfile->invalidHandle = true;
707                 spin_unlock(&cifs_file_list_lock);
708                 if (server->ops->close_dir)
709                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
710                 else
711                         rc = -ENOSYS;
712                 cFYI(1, "Closing uncompleted readdir with rc %d", rc);
713                 /* not much we can do if it fails anyway, ignore rc */
714                 rc = 0;
715         } else
716                 spin_unlock(&cifs_file_list_lock);
717
718         buf = cfile->srch_inf.ntwrk_buf_start;
719         if (buf) {
720                 cFYI(1, "closedir free smb buf in srch struct");
721                 cfile->srch_inf.ntwrk_buf_start = NULL;
722                 if (cfile->srch_inf.smallBuf)
723                         cifs_small_buf_release(buf);
724                 else
725                         cifs_buf_release(buf);
726         }
727
728         cifs_put_tlink(cfile->tlink);
729         kfree(file->private_data);
730         file->private_data = NULL;
731         /* BB can we lock the filestruct while this is going on? */
732         free_xid(xid);
733         return rc;
734 }
735
736 static struct cifsLockInfo *
737 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
738 {
739         struct cifsLockInfo *lock =
740                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
741         if (!lock)
742                 return lock;
743         lock->offset = offset;
744         lock->length = length;
745         lock->type = type;
746         lock->pid = current->tgid;
747         INIT_LIST_HEAD(&lock->blist);
748         init_waitqueue_head(&lock->block_q);
749         return lock;
750 }
751
752 void
753 cifs_del_lock_waiters(struct cifsLockInfo *lock)
754 {
755         struct cifsLockInfo *li, *tmp;
756         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
757                 list_del_init(&li->blist);
758                 wake_up(&li->block_q);
759         }
760 }
761
762 #define CIFS_LOCK_OP    0
763 #define CIFS_READ_OP    1
764 #define CIFS_WRITE_OP   2
765
766 /* @rw_check : 0 - no op, 1 - read, 2 - write */
767 static bool
768 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
769                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
770                             struct cifsLockInfo **conf_lock, int rw_check)
771 {
772         struct cifsLockInfo *li;
773         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
774         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
775
776         list_for_each_entry(li, &fdlocks->locks, llist) {
777                 if (offset + length <= li->offset ||
778                     offset >= li->offset + li->length)
779                         continue;
780                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
781                     server->ops->compare_fids(cfile, cur_cfile)) {
782                         /* shared lock prevents write op through the same fid */
783                         if (!(li->type & server->vals->shared_lock_type) ||
784                             rw_check != CIFS_WRITE_OP)
785                                 continue;
786                 }
787                 if ((type & server->vals->shared_lock_type) &&
788                     ((server->ops->compare_fids(cfile, cur_cfile) &&
789                      current->tgid == li->pid) || type == li->type))
790                         continue;
791                 if (conf_lock)
792                         *conf_lock = li;
793                 return true;
794         }
795         return false;
796 }
797
798 bool
799 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
800                         __u8 type, struct cifsLockInfo **conf_lock,
801                         int rw_check)
802 {
803         bool rc = false;
804         struct cifs_fid_locks *cur;
805         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
806
807         list_for_each_entry(cur, &cinode->llist, llist) {
808                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
809                                                  cfile, conf_lock, rw_check);
810                 if (rc)
811                         break;
812         }
813
814         return rc;
815 }
816
817 /*
818  * Check if there is another lock that prevents us to set the lock (mandatory
819  * style). If such a lock exists, update the flock structure with its
820  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
821  * or leave it the same if we can't. Returns 0 if we don't need to request to
822  * the server or 1 otherwise.
823  */
824 static int
825 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
826                __u8 type, struct file_lock *flock)
827 {
828         int rc = 0;
829         struct cifsLockInfo *conf_lock;
830         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
831         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
832         bool exist;
833
834         down_read(&cinode->lock_sem);
835
836         exist = cifs_find_lock_conflict(cfile, offset, length, type,
837                                         &conf_lock, CIFS_LOCK_OP);
838         if (exist) {
839                 flock->fl_start = conf_lock->offset;
840                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
841                 flock->fl_pid = conf_lock->pid;
842                 if (conf_lock->type & server->vals->shared_lock_type)
843                         flock->fl_type = F_RDLCK;
844                 else
845                         flock->fl_type = F_WRLCK;
846         } else if (!cinode->can_cache_brlcks)
847                 rc = 1;
848         else
849                 flock->fl_type = F_UNLCK;
850
851         up_read(&cinode->lock_sem);
852         return rc;
853 }
854
855 static void
856 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
857 {
858         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
859         down_write(&cinode->lock_sem);
860         list_add_tail(&lock->llist, &cfile->llist->locks);
861         up_write(&cinode->lock_sem);
862 }
863
864 /*
865  * Set the byte-range lock (mandatory style). Returns:
866  * 1) 0, if we set the lock and don't need to request to the server;
867  * 2) 1, if no locks prevent us but we need to request to the server;
868  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
869  */
870 static int
871 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
872                  bool wait)
873 {
874         struct cifsLockInfo *conf_lock;
875         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
876         bool exist;
877         int rc = 0;
878
879 try_again:
880         exist = false;
881         down_write(&cinode->lock_sem);
882
883         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
884                                         lock->type, &conf_lock, CIFS_LOCK_OP);
885         if (!exist && cinode->can_cache_brlcks) {
886                 list_add_tail(&lock->llist, &cfile->llist->locks);
887                 up_write(&cinode->lock_sem);
888                 return rc;
889         }
890
891         if (!exist)
892                 rc = 1;
893         else if (!wait)
894                 rc = -EACCES;
895         else {
896                 list_add_tail(&lock->blist, &conf_lock->blist);
897                 up_write(&cinode->lock_sem);
898                 rc = wait_event_interruptible(lock->block_q,
899                                         (lock->blist.prev == &lock->blist) &&
900                                         (lock->blist.next == &lock->blist));
901                 if (!rc)
902                         goto try_again;
903                 down_write(&cinode->lock_sem);
904                 list_del_init(&lock->blist);
905         }
906
907         up_write(&cinode->lock_sem);
908         return rc;
909 }
910
911 /*
912  * Check if there is another lock that prevents us to set the lock (posix
913  * style). If such a lock exists, update the flock structure with its
914  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
915  * or leave it the same if we can't. Returns 0 if we don't need to request to
916  * the server or 1 otherwise.
917  */
918 static int
919 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
920 {
921         int rc = 0;
922         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
923         unsigned char saved_type = flock->fl_type;
924
925         if ((flock->fl_flags & FL_POSIX) == 0)
926                 return 1;
927
928         down_read(&cinode->lock_sem);
929         posix_test_lock(file, flock);
930
931         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
932                 flock->fl_type = saved_type;
933                 rc = 1;
934         }
935
936         up_read(&cinode->lock_sem);
937         return rc;
938 }
939
940 /*
941  * Set the byte-range lock (posix style). Returns:
942  * 1) 0, if we set the lock and don't need to request to the server;
943  * 2) 1, if we need to request to the server;
944  * 3) <0, if the error occurs while setting the lock.
945  */
946 static int
947 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
948 {
949         struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
950         int rc = 1;
951
952         if ((flock->fl_flags & FL_POSIX) == 0)
953                 return rc;
954
955 try_again:
956         down_write(&cinode->lock_sem);
957         if (!cinode->can_cache_brlcks) {
958                 up_write(&cinode->lock_sem);
959                 return rc;
960         }
961
962         rc = posix_lock_file(file, flock, NULL);
963         up_write(&cinode->lock_sem);
964         if (rc == FILE_LOCK_DEFERRED) {
965                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
966                 if (!rc)
967                         goto try_again;
968                 locks_delete_block(flock);
969         }
970         return rc;
971 }
972
973 int
974 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
975 {
976         unsigned int xid;
977         int rc = 0, stored_rc;
978         struct cifsLockInfo *li, *tmp;
979         struct cifs_tcon *tcon;
980         unsigned int num, max_num, max_buf;
981         LOCKING_ANDX_RANGE *buf, *cur;
982         int types[] = {LOCKING_ANDX_LARGE_FILES,
983                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
984         int i;
985
986         xid = get_xid();
987         tcon = tlink_tcon(cfile->tlink);
988
989         /*
990          * Accessing maxBuf is racy with cifs_reconnect - need to store value
991          * and check it for zero before using.
992          */
993         max_buf = tcon->ses->server->maxBuf;
994         if (!max_buf) {
995                 free_xid(xid);
996                 return -EINVAL;
997         }
998
999         max_num = (max_buf - sizeof(struct smb_hdr)) /
1000                                                 sizeof(LOCKING_ANDX_RANGE);
1001         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1002         if (!buf) {
1003                 free_xid(xid);
1004                 return -ENOMEM;
1005         }
1006
1007         for (i = 0; i < 2; i++) {
1008                 cur = buf;
1009                 num = 0;
1010                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1011                         if (li->type != types[i])
1012                                 continue;
1013                         cur->Pid = cpu_to_le16(li->pid);
1014                         cur->LengthLow = cpu_to_le32((u32)li->length);
1015                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1016                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1017                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1018                         if (++num == max_num) {
1019                                 stored_rc = cifs_lockv(xid, tcon,
1020                                                        cfile->fid.netfid,
1021                                                        (__u8)li->type, 0, num,
1022                                                        buf);
1023                                 if (stored_rc)
1024                                         rc = stored_rc;
1025                                 cur = buf;
1026                                 num = 0;
1027                         } else
1028                                 cur++;
1029                 }
1030
1031                 if (num) {
1032                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1033                                                (__u8)types[i], 0, num, buf);
1034                         if (stored_rc)
1035                                 rc = stored_rc;
1036                 }
1037         }
1038
1039         kfree(buf);
1040         free_xid(xid);
1041         return rc;
1042 }
1043
1044 /* copied from fs/locks.c with a name change */
1045 #define cifs_for_each_lock(inode, lockp) \
1046         for (lockp = &inode->i_flock; *lockp != NULL; \
1047              lockp = &(*lockp)->fl_next)
1048
1049 struct lock_to_push {
1050         struct list_head llist;
1051         __u64 offset;
1052         __u64 length;
1053         __u32 pid;
1054         __u16 netfid;
1055         __u8 type;
1056 };
1057
1058 static int
1059 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1060 {
1061         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1062         struct file_lock *flock, **before;
1063         unsigned int count = 0, i = 0;
1064         int rc = 0, xid, type;
1065         struct list_head locks_to_send, *el;
1066         struct lock_to_push *lck, *tmp;
1067         __u64 length;
1068
1069         xid = get_xid();
1070
1071         lock_flocks();
1072         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1073                 if ((*before)->fl_flags & FL_POSIX)
1074                         count++;
1075         }
1076         unlock_flocks();
1077
1078         INIT_LIST_HEAD(&locks_to_send);
1079
1080         /*
1081          * Allocating count locks is enough because no FL_POSIX locks can be
1082          * added to the list while we are holding cinode->lock_sem that
1083          * protects locking operations of this inode.
1084          */
1085         for (; i < count; i++) {
1086                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1087                 if (!lck) {
1088                         rc = -ENOMEM;
1089                         goto err_out;
1090                 }
1091                 list_add_tail(&lck->llist, &locks_to_send);
1092         }
1093
1094         el = locks_to_send.next;
1095         lock_flocks();
1096         cifs_for_each_lock(cfile->dentry->d_inode, before) {
1097                 flock = *before;
1098                 if ((flock->fl_flags & FL_POSIX) == 0)
1099                         continue;
1100                 if (el == &locks_to_send) {
1101                         /*
1102                          * The list ended. We don't have enough allocated
1103                          * structures - something is really wrong.
1104                          */
1105                         cERROR(1, "Can't push all brlocks!");
1106                         break;
1107                 }
1108                 length = 1 + flock->fl_end - flock->fl_start;
1109                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1110                         type = CIFS_RDLCK;
1111                 else
1112                         type = CIFS_WRLCK;
1113                 lck = list_entry(el, struct lock_to_push, llist);
1114                 lck->pid = flock->fl_pid;
1115                 lck->netfid = cfile->fid.netfid;
1116                 lck->length = length;
1117                 lck->type = type;
1118                 lck->offset = flock->fl_start;
1119                 el = el->next;
1120         }
1121         unlock_flocks();
1122
1123         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1124                 int stored_rc;
1125
1126                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1127                                              lck->offset, lck->length, NULL,
1128                                              lck->type, 0);
1129                 if (stored_rc)
1130                         rc = stored_rc;
1131                 list_del(&lck->llist);
1132                 kfree(lck);
1133         }
1134
1135 out:
1136         free_xid(xid);
1137         return rc;
1138 err_out:
1139         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1140                 list_del(&lck->llist);
1141                 kfree(lck);
1142         }
1143         goto out;
1144 }
1145
1146 static int
1147 cifs_push_locks(struct cifsFileInfo *cfile)
1148 {
1149         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1150         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1151         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1152         int rc = 0;
1153
1154         /* we are going to update can_cache_brlcks here - need a write access */
1155         down_write(&cinode->lock_sem);
1156         if (!cinode->can_cache_brlcks) {
1157                 up_write(&cinode->lock_sem);
1158                 return rc;
1159         }
1160
1161         if (cap_unix(tcon->ses) &&
1162             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1163             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1164                 rc = cifs_push_posix_locks(cfile);
1165         else
1166                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1167
1168         cinode->can_cache_brlcks = false;
1169         up_write(&cinode->lock_sem);
1170         return rc;
1171 }
1172
1173 static void
1174 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1175                 bool *wait_flag, struct TCP_Server_Info *server)
1176 {
1177         if (flock->fl_flags & FL_POSIX)
1178                 cFYI(1, "Posix");
1179         if (flock->fl_flags & FL_FLOCK)
1180                 cFYI(1, "Flock");
1181         if (flock->fl_flags & FL_SLEEP) {
1182                 cFYI(1, "Blocking lock");
1183                 *wait_flag = true;
1184         }
1185         if (flock->fl_flags & FL_ACCESS)
1186                 cFYI(1, "Process suspended by mandatory locking - "
1187                         "not implemented yet");
1188         if (flock->fl_flags & FL_LEASE)
1189                 cFYI(1, "Lease on file - not implemented yet");
1190         if (flock->fl_flags &
1191             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1192                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1193                 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
1194
1195         *type = server->vals->large_lock_type;
1196         if (flock->fl_type == F_WRLCK) {
1197                 cFYI(1, "F_WRLCK ");
1198                 *type |= server->vals->exclusive_lock_type;
1199                 *lock = 1;
1200         } else if (flock->fl_type == F_UNLCK) {
1201                 cFYI(1, "F_UNLCK");
1202                 *type |= server->vals->unlock_lock_type;
1203                 *unlock = 1;
1204                 /* Check if unlock includes more than one lock range */
1205         } else if (flock->fl_type == F_RDLCK) {
1206                 cFYI(1, "F_RDLCK");
1207                 *type |= server->vals->shared_lock_type;
1208                 *lock = 1;
1209         } else if (flock->fl_type == F_EXLCK) {
1210                 cFYI(1, "F_EXLCK");
1211                 *type |= server->vals->exclusive_lock_type;
1212                 *lock = 1;
1213         } else if (flock->fl_type == F_SHLCK) {
1214                 cFYI(1, "F_SHLCK");
1215                 *type |= server->vals->shared_lock_type;
1216                 *lock = 1;
1217         } else
1218                 cFYI(1, "Unknown type of lock");
1219 }
1220
1221 static int
1222 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1223            bool wait_flag, bool posix_lck, unsigned int xid)
1224 {
1225         int rc = 0;
1226         __u64 length = 1 + flock->fl_end - flock->fl_start;
1227         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1228         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1229         struct TCP_Server_Info *server = tcon->ses->server;
1230         __u16 netfid = cfile->fid.netfid;
1231
1232         if (posix_lck) {
1233                 int posix_lock_type;
1234
1235                 rc = cifs_posix_lock_test(file, flock);
1236                 if (!rc)
1237                         return rc;
1238
1239                 if (type & server->vals->shared_lock_type)
1240                         posix_lock_type = CIFS_RDLCK;
1241                 else
1242                         posix_lock_type = CIFS_WRLCK;
1243                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1244                                       flock->fl_start, length, flock,
1245                                       posix_lock_type, wait_flag);
1246                 return rc;
1247         }
1248
1249         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1250         if (!rc)
1251                 return rc;
1252
1253         /* BB we could chain these into one lock request BB */
1254         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1255                                     1, 0, false);
1256         if (rc == 0) {
1257                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1258                                             type, 0, 1, false);
1259                 flock->fl_type = F_UNLCK;
1260                 if (rc != 0)
1261                         cERROR(1, "Error unlocking previously locked "
1262                                   "range %d during test of lock", rc);
1263                 return 0;
1264         }
1265
1266         if (type & server->vals->shared_lock_type) {
1267                 flock->fl_type = F_WRLCK;
1268                 return 0;
1269         }
1270
1271         type &= ~server->vals->exclusive_lock_type;
1272
1273         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1274                                     type | server->vals->shared_lock_type,
1275                                     1, 0, false);
1276         if (rc == 0) {
1277                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1278                         type | server->vals->shared_lock_type, 0, 1, false);
1279                 flock->fl_type = F_RDLCK;
1280                 if (rc != 0)
1281                         cERROR(1, "Error unlocking previously locked "
1282                                   "range %d during test of lock", rc);
1283         } else
1284                 flock->fl_type = F_WRLCK;
1285
1286         return 0;
1287 }
1288
1289 void
1290 cifs_move_llist(struct list_head *source, struct list_head *dest)
1291 {
1292         struct list_head *li, *tmp;
1293         list_for_each_safe(li, tmp, source)
1294                 list_move(li, dest);
1295 }
1296
1297 void
1298 cifs_free_llist(struct list_head *llist)
1299 {
1300         struct cifsLockInfo *li, *tmp;
1301         list_for_each_entry_safe(li, tmp, llist, llist) {
1302                 cifs_del_lock_waiters(li);
1303                 list_del(&li->llist);
1304                 kfree(li);
1305         }
1306 }
1307
1308 int
1309 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1310                   unsigned int xid)
1311 {
1312         int rc = 0, stored_rc;
1313         int types[] = {LOCKING_ANDX_LARGE_FILES,
1314                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1315         unsigned int i;
1316         unsigned int max_num, num, max_buf;
1317         LOCKING_ANDX_RANGE *buf, *cur;
1318         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1319         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1320         struct cifsLockInfo *li, *tmp;
1321         __u64 length = 1 + flock->fl_end - flock->fl_start;
1322         struct list_head tmp_llist;
1323
1324         INIT_LIST_HEAD(&tmp_llist);
1325
1326         /*
1327          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1328          * and check it for zero before using.
1329          */
1330         max_buf = tcon->ses->server->maxBuf;
1331         if (!max_buf)
1332                 return -EINVAL;
1333
1334         max_num = (max_buf - sizeof(struct smb_hdr)) /
1335                                                 sizeof(LOCKING_ANDX_RANGE);
1336         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1337         if (!buf)
1338                 return -ENOMEM;
1339
1340         down_write(&cinode->lock_sem);
1341         for (i = 0; i < 2; i++) {
1342                 cur = buf;
1343                 num = 0;
1344                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1345                         if (flock->fl_start > li->offset ||
1346                             (flock->fl_start + length) <
1347                             (li->offset + li->length))
1348                                 continue;
1349                         if (current->tgid != li->pid)
1350                                 continue;
1351                         if (types[i] != li->type)
1352                                 continue;
1353                         if (cinode->can_cache_brlcks) {
1354                                 /*
1355                                  * We can cache brlock requests - simply remove
1356                                  * a lock from the file's list.
1357                                  */
1358                                 list_del(&li->llist);
1359                                 cifs_del_lock_waiters(li);
1360                                 kfree(li);
1361                                 continue;
1362                         }
1363                         cur->Pid = cpu_to_le16(li->pid);
1364                         cur->LengthLow = cpu_to_le32((u32)li->length);
1365                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1366                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1367                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1368                         /*
1369                          * We need to save a lock here to let us add it again to
1370                          * the file's list if the unlock range request fails on
1371                          * the server.
1372                          */
1373                         list_move(&li->llist, &tmp_llist);
1374                         if (++num == max_num) {
1375                                 stored_rc = cifs_lockv(xid, tcon,
1376                                                        cfile->fid.netfid,
1377                                                        li->type, num, 0, buf);
1378                                 if (stored_rc) {
1379                                         /*
1380                                          * We failed on the unlock range
1381                                          * request - add all locks from the tmp
1382                                          * list to the head of the file's list.
1383                                          */
1384                                         cifs_move_llist(&tmp_llist,
1385                                                         &cfile->llist->locks);
1386                                         rc = stored_rc;
1387                                 } else
1388                                         /*
1389                                          * The unlock range request succeed -
1390                                          * free the tmp list.
1391                                          */
1392                                         cifs_free_llist(&tmp_llist);
1393                                 cur = buf;
1394                                 num = 0;
1395                         } else
1396                                 cur++;
1397                 }
1398                 if (num) {
1399                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1400                                                types[i], num, 0, buf);
1401                         if (stored_rc) {
1402                                 cifs_move_llist(&tmp_llist,
1403                                                 &cfile->llist->locks);
1404                                 rc = stored_rc;
1405                         } else
1406                                 cifs_free_llist(&tmp_llist);
1407                 }
1408         }
1409
1410         up_write(&cinode->lock_sem);
1411         kfree(buf);
1412         return rc;
1413 }
1414
1415 static int
1416 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1417            bool wait_flag, bool posix_lck, int lock, int unlock,
1418            unsigned int xid)
1419 {
1420         int rc = 0;
1421         __u64 length = 1 + flock->fl_end - flock->fl_start;
1422         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1423         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1424         struct TCP_Server_Info *server = tcon->ses->server;
1425
1426         if (posix_lck) {
1427                 int posix_lock_type;
1428
1429                 rc = cifs_posix_lock_set(file, flock);
1430                 if (!rc || rc < 0)
1431                         return rc;
1432
1433                 if (type & server->vals->shared_lock_type)
1434                         posix_lock_type = CIFS_RDLCK;
1435                 else
1436                         posix_lock_type = CIFS_WRLCK;
1437
1438                 if (unlock == 1)
1439                         posix_lock_type = CIFS_UNLCK;
1440
1441                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1442                                       current->tgid, flock->fl_start, length,
1443                                       NULL, posix_lock_type, wait_flag);
1444                 goto out;
1445         }
1446
1447         if (lock) {
1448                 struct cifsLockInfo *lock;
1449
1450                 lock = cifs_lock_init(flock->fl_start, length, type);
1451                 if (!lock)
1452                         return -ENOMEM;
1453
1454                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1455                 if (rc < 0) {
1456                         kfree(lock);
1457                         return rc;
1458                 }
1459                 if (!rc)
1460                         goto out;
1461
1462                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1463                                             type, 1, 0, wait_flag);
1464                 if (rc) {
1465                         kfree(lock);
1466                         return rc;
1467                 }
1468
1469                 cifs_lock_add(cfile, lock);
1470         } else if (unlock)
1471                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1472
1473 out:
1474         if (flock->fl_flags & FL_POSIX)
1475                 posix_lock_file_wait(file, flock);
1476         return rc;
1477 }
1478
1479 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1480 {
1481         int rc, xid;
1482         int lock = 0, unlock = 0;
1483         bool wait_flag = false;
1484         bool posix_lck = false;
1485         struct cifs_sb_info *cifs_sb;
1486         struct cifs_tcon *tcon;
1487         struct cifsInodeInfo *cinode;
1488         struct cifsFileInfo *cfile;
1489         __u16 netfid;
1490         __u32 type;
1491
1492         rc = -EACCES;
1493         xid = get_xid();
1494
1495         cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
1496                 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
1497                 flock->fl_start, flock->fl_end);
1498
1499         cfile = (struct cifsFileInfo *)file->private_data;
1500         tcon = tlink_tcon(cfile->tlink);
1501
1502         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1503                         tcon->ses->server);
1504
1505         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1506         netfid = cfile->fid.netfid;
1507         cinode = CIFS_I(file->f_path.dentry->d_inode);
1508
1509         if (cap_unix(tcon->ses) &&
1510             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1511             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1512                 posix_lck = true;
1513         /*
1514          * BB add code here to normalize offset and length to account for
1515          * negative length which we can not accept over the wire.
1516          */
1517         if (IS_GETLK(cmd)) {
1518                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1519                 free_xid(xid);
1520                 return rc;
1521         }
1522
1523         if (!lock && !unlock) {
1524                 /*
1525                  * if no lock or unlock then nothing to do since we do not
1526                  * know what it is
1527                  */
1528                 free_xid(xid);
1529                 return -EOPNOTSUPP;
1530         }
1531
1532         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1533                         xid);
1534         free_xid(xid);
1535         return rc;
1536 }
1537
1538 /*
1539  * update the file size (if needed) after a write. Should be called with
1540  * the inode->i_lock held
1541  */
1542 void
1543 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1544                       unsigned int bytes_written)
1545 {
1546         loff_t end_of_write = offset + bytes_written;
1547
1548         if (end_of_write > cifsi->server_eof)
1549                 cifsi->server_eof = end_of_write;
1550 }
1551
1552 static ssize_t
1553 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1554            size_t write_size, loff_t *offset)
1555 {
1556         int rc = 0;
1557         unsigned int bytes_written = 0;
1558         unsigned int total_written;
1559         struct cifs_sb_info *cifs_sb;
1560         struct cifs_tcon *tcon;
1561         struct TCP_Server_Info *server;
1562         unsigned int xid;
1563         struct dentry *dentry = open_file->dentry;
1564         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1565         struct cifs_io_parms io_parms;
1566
1567         cifs_sb = CIFS_SB(dentry->d_sb);
1568
1569         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1570              *offset, dentry->d_name.name);
1571
1572         tcon = tlink_tcon(open_file->tlink);
1573         server = tcon->ses->server;
1574
1575         if (!server->ops->sync_write)
1576                 return -ENOSYS;
1577
1578         xid = get_xid();
1579
1580         for (total_written = 0; write_size > total_written;
1581              total_written += bytes_written) {
1582                 rc = -EAGAIN;
1583                 while (rc == -EAGAIN) {
1584                         struct kvec iov[2];
1585                         unsigned int len;
1586
1587                         if (open_file->invalidHandle) {
1588                                 /* we could deadlock if we called
1589                                    filemap_fdatawait from here so tell
1590                                    reopen_file not to flush data to
1591                                    server now */
1592                                 rc = cifs_reopen_file(open_file, false);
1593                                 if (rc != 0)
1594                                         break;
1595                         }
1596
1597                         len = min((size_t)cifs_sb->wsize,
1598                                   write_size - total_written);
1599                         /* iov[0] is reserved for smb header */
1600                         iov[1].iov_base = (char *)write_data + total_written;
1601                         iov[1].iov_len = len;
1602                         io_parms.pid = pid;
1603                         io_parms.tcon = tcon;
1604                         io_parms.offset = *offset;
1605                         io_parms.length = len;
1606                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1607                                                      &bytes_written, iov, 1);
1608                 }
1609                 if (rc || (bytes_written == 0)) {
1610                         if (total_written)
1611                                 break;
1612                         else {
1613                                 free_xid(xid);
1614                                 return rc;
1615                         }
1616                 } else {
1617                         spin_lock(&dentry->d_inode->i_lock);
1618                         cifs_update_eof(cifsi, *offset, bytes_written);
1619                         spin_unlock(&dentry->d_inode->i_lock);
1620                         *offset += bytes_written;
1621                 }
1622         }
1623
1624         cifs_stats_bytes_written(tcon, total_written);
1625
1626         if (total_written > 0) {
1627                 spin_lock(&dentry->d_inode->i_lock);
1628                 if (*offset > dentry->d_inode->i_size)
1629                         i_size_write(dentry->d_inode, *offset);
1630                 spin_unlock(&dentry->d_inode->i_lock);
1631         }
1632         mark_inode_dirty_sync(dentry->d_inode);
1633         free_xid(xid);
1634         return total_written;
1635 }
1636
1637 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1638                                         bool fsuid_only)
1639 {
1640         struct cifsFileInfo *open_file = NULL;
1641         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1642
1643         /* only filter by fsuid on multiuser mounts */
1644         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1645                 fsuid_only = false;
1646
1647         spin_lock(&cifs_file_list_lock);
1648         /* we could simply get the first_list_entry since write-only entries
1649            are always at the end of the list but since the first entry might
1650            have a close pending, we go through the whole list */
1651         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1652                 if (fsuid_only && open_file->uid != current_fsuid())
1653                         continue;
1654                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1655                         if (!open_file->invalidHandle) {
1656                                 /* found a good file */
1657                                 /* lock it so it will not be closed on us */
1658                                 cifsFileInfo_get_locked(open_file);
1659                                 spin_unlock(&cifs_file_list_lock);
1660                                 return open_file;
1661                         } /* else might as well continue, and look for
1662                              another, or simply have the caller reopen it
1663                              again rather than trying to fix this handle */
1664                 } else /* write only file */
1665                         break; /* write only files are last so must be done */
1666         }
1667         spin_unlock(&cifs_file_list_lock);
1668         return NULL;
1669 }
1670
1671 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1672                                         bool fsuid_only)
1673 {
1674         struct cifsFileInfo *open_file, *inv_file = NULL;
1675         struct cifs_sb_info *cifs_sb;
1676         bool any_available = false;
1677         int rc;
1678         unsigned int refind = 0;
1679
1680         /* Having a null inode here (because mapping->host was set to zero by
1681         the VFS or MM) should not happen but we had reports of on oops (due to
1682         it being zero) during stress testcases so we need to check for it */
1683
1684         if (cifs_inode == NULL) {
1685                 cERROR(1, "Null inode passed to cifs_writeable_file");
1686                 dump_stack();
1687                 return NULL;
1688         }
1689
1690         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1691
1692         /* only filter by fsuid on multiuser mounts */
1693         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1694                 fsuid_only = false;
1695
1696         spin_lock(&cifs_file_list_lock);
1697 refind_writable:
1698         if (refind > MAX_REOPEN_ATT) {
1699                 spin_unlock(&cifs_file_list_lock);
1700                 return NULL;
1701         }
1702         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1703                 if (!any_available && open_file->pid != current->tgid)
1704                         continue;
1705                 if (fsuid_only && open_file->uid != current_fsuid())
1706                         continue;
1707                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1708                         if (!open_file->invalidHandle) {
1709                                 /* found a good writable file */
1710                                 cifsFileInfo_get_locked(open_file);
1711                                 spin_unlock(&cifs_file_list_lock);
1712                                 return open_file;
1713                         } else {
1714                                 if (!inv_file)
1715                                         inv_file = open_file;
1716                         }
1717                 }
1718         }
1719         /* couldn't find useable FH with same pid, try any available */
1720         if (!any_available) {
1721                 any_available = true;
1722                 goto refind_writable;
1723         }
1724
1725         if (inv_file) {
1726                 any_available = false;
1727                 cifsFileInfo_get_locked(inv_file);
1728         }
1729
1730         spin_unlock(&cifs_file_list_lock);
1731
1732         if (inv_file) {
1733                 rc = cifs_reopen_file(inv_file, false);
1734                 if (!rc)
1735                         return inv_file;
1736                 else {
1737                         spin_lock(&cifs_file_list_lock);
1738                         list_move_tail(&inv_file->flist,
1739                                         &cifs_inode->openFileList);
1740                         spin_unlock(&cifs_file_list_lock);
1741                         cifsFileInfo_put(inv_file);
1742                         spin_lock(&cifs_file_list_lock);
1743                         ++refind;
1744                         goto refind_writable;
1745                 }
1746         }
1747
1748         return NULL;
1749 }
1750
1751 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1752 {
1753         struct address_space *mapping = page->mapping;
1754         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1755         char *write_data;
1756         int rc = -EFAULT;
1757         int bytes_written = 0;
1758         struct inode *inode;
1759         struct cifsFileInfo *open_file;
1760
1761         if (!mapping || !mapping->host)
1762                 return -EFAULT;
1763
1764         inode = page->mapping->host;
1765
1766         offset += (loff_t)from;
1767         write_data = kmap(page);
1768         write_data += from;
1769
1770         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1771                 kunmap(page);
1772                 return -EIO;
1773         }
1774
1775         /* racing with truncate? */
1776         if (offset > mapping->host->i_size) {
1777                 kunmap(page);
1778                 return 0; /* don't care */
1779         }
1780
1781         /* check to make sure that we are not extending the file */
1782         if (mapping->host->i_size - offset < (loff_t)to)
1783                 to = (unsigned)(mapping->host->i_size - offset);
1784
1785         open_file = find_writable_file(CIFS_I(mapping->host), false);
1786         if (open_file) {
1787                 bytes_written = cifs_write(open_file, open_file->pid,
1788                                            write_data, to - from, &offset);
1789                 cifsFileInfo_put(open_file);
1790                 /* Does mm or vfs already set times? */
1791                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1792                 if ((bytes_written > 0) && (offset))
1793                         rc = 0;
1794                 else if (bytes_written < 0)
1795                         rc = bytes_written;
1796         } else {
1797                 cFYI(1, "No writeable filehandles for inode");
1798                 rc = -EIO;
1799         }
1800
1801         kunmap(page);
1802         return rc;
1803 }
1804
1805 static int cifs_writepages(struct address_space *mapping,
1806                            struct writeback_control *wbc)
1807 {
1808         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1809         bool done = false, scanned = false, range_whole = false;
1810         pgoff_t end, index;
1811         struct cifs_writedata *wdata;
1812         struct TCP_Server_Info *server;
1813         struct page *page;
1814         int rc = 0;
1815
1816         /*
1817          * If wsize is smaller than the page cache size, default to writing
1818          * one page at a time via cifs_writepage
1819          */
1820         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1821                 return generic_writepages(mapping, wbc);
1822
1823         if (wbc->range_cyclic) {
1824                 index = mapping->writeback_index; /* Start from prev offset */
1825                 end = -1;
1826         } else {
1827                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1828                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1829                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1830                         range_whole = true;
1831                 scanned = true;
1832         }
1833 retry:
1834         while (!done && index <= end) {
1835                 unsigned int i, nr_pages, found_pages;
1836                 pgoff_t next = 0, tofind;
1837                 struct page **pages;
1838
1839                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1840                                 end - index) + 1;
1841
1842                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1843                                              cifs_writev_complete);
1844                 if (!wdata) {
1845                         rc = -ENOMEM;
1846                         break;
1847                 }
1848
1849                 /*
1850                  * find_get_pages_tag seems to return a max of 256 on each
1851                  * iteration, so we must call it several times in order to
1852                  * fill the array or the wsize is effectively limited to
1853                  * 256 * PAGE_CACHE_SIZE.
1854                  */
1855                 found_pages = 0;
1856                 pages = wdata->pages;
1857                 do {
1858                         nr_pages = find_get_pages_tag(mapping, &index,
1859                                                         PAGECACHE_TAG_DIRTY,
1860                                                         tofind, pages);
1861                         found_pages += nr_pages;
1862                         tofind -= nr_pages;
1863                         pages += nr_pages;
1864                 } while (nr_pages && tofind && index <= end);
1865
1866                 if (found_pages == 0) {
1867                         kref_put(&wdata->refcount, cifs_writedata_release);
1868                         break;
1869                 }
1870
1871                 nr_pages = 0;
1872                 for (i = 0; i < found_pages; i++) {
1873                         page = wdata->pages[i];
1874                         /*
1875                          * At this point we hold neither mapping->tree_lock nor
1876                          * lock on the page itself: the page may be truncated or
1877                          * invalidated (changing page->mapping to NULL), or even
1878                          * swizzled back from swapper_space to tmpfs file
1879                          * mapping
1880                          */
1881
1882                         if (nr_pages == 0)
1883                                 lock_page(page);
1884                         else if (!trylock_page(page))
1885                                 break;
1886
1887                         if (unlikely(page->mapping != mapping)) {
1888                                 unlock_page(page);
1889                                 break;
1890                         }
1891
1892                         if (!wbc->range_cyclic && page->index > end) {
1893                                 done = true;
1894                                 unlock_page(page);
1895                                 break;
1896                         }
1897
1898                         if (next && (page->index != next)) {
1899                                 /* Not next consecutive page */
1900                                 unlock_page(page);
1901                                 break;
1902                         }
1903
1904                         if (wbc->sync_mode != WB_SYNC_NONE)
1905                                 wait_on_page_writeback(page);
1906
1907                         if (PageWriteback(page) ||
1908                                         !clear_page_dirty_for_io(page)) {
1909                                 unlock_page(page);
1910                                 break;
1911                         }
1912
1913                         /*
1914                          * This actually clears the dirty bit in the radix tree.
1915                          * See cifs_writepage() for more commentary.
1916                          */
1917                         set_page_writeback(page);
1918
1919                         if (page_offset(page) >= i_size_read(mapping->host)) {
1920                                 done = true;
1921                                 unlock_page(page);
1922                                 end_page_writeback(page);
1923                                 break;
1924                         }
1925
1926                         wdata->pages[i] = page;
1927                         next = page->index + 1;
1928                         ++nr_pages;
1929                 }
1930
1931                 /* reset index to refind any pages skipped */
1932                 if (nr_pages == 0)
1933                         index = wdata->pages[0]->index + 1;
1934
1935                 /* put any pages we aren't going to use */
1936                 for (i = nr_pages; i < found_pages; i++) {
1937                         page_cache_release(wdata->pages[i]);
1938                         wdata->pages[i] = NULL;
1939                 }
1940
1941                 /* nothing to write? */
1942                 if (nr_pages == 0) {
1943                         kref_put(&wdata->refcount, cifs_writedata_release);
1944                         continue;
1945                 }
1946
1947                 wdata->sync_mode = wbc->sync_mode;
1948                 wdata->nr_pages = nr_pages;
1949                 wdata->offset = page_offset(wdata->pages[0]);
1950                 wdata->pagesz = PAGE_CACHE_SIZE;
1951                 wdata->tailsz =
1952                         min(i_size_read(mapping->host) -
1953                             page_offset(wdata->pages[nr_pages - 1]),
1954                             (loff_t)PAGE_CACHE_SIZE);
1955                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
1956                                         wdata->tailsz;
1957
1958                 do {
1959                         if (wdata->cfile != NULL)
1960                                 cifsFileInfo_put(wdata->cfile);
1961                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1962                                                           false);
1963                         if (!wdata->cfile) {
1964                                 cERROR(1, "No writable handles for inode");
1965                                 rc = -EBADF;
1966                                 break;
1967                         }
1968                         wdata->pid = wdata->cfile->pid;
1969                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
1970                         rc = server->ops->async_writev(wdata);
1971                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1972
1973                 for (i = 0; i < nr_pages; ++i)
1974                         unlock_page(wdata->pages[i]);
1975
1976                 /* send failure -- clean up the mess */
1977                 if (rc != 0) {
1978                         for (i = 0; i < nr_pages; ++i) {
1979                                 if (rc == -EAGAIN)
1980                                         redirty_page_for_writepage(wbc,
1981                                                            wdata->pages[i]);
1982                                 else
1983                                         SetPageError(wdata->pages[i]);
1984                                 end_page_writeback(wdata->pages[i]);
1985                                 page_cache_release(wdata->pages[i]);
1986                         }
1987                         if (rc != -EAGAIN)
1988                                 mapping_set_error(mapping, rc);
1989                 }
1990                 kref_put(&wdata->refcount, cifs_writedata_release);
1991
1992                 wbc->nr_to_write -= nr_pages;
1993                 if (wbc->nr_to_write <= 0)
1994                         done = true;
1995
1996                 index = next;
1997         }
1998
1999         if (!scanned && !done) {
2000                 /*
2001                  * We hit the last page and there is more work to be done: wrap
2002                  * back to the start of the file
2003                  */
2004                 scanned = true;
2005                 index = 0;
2006                 goto retry;
2007         }
2008
2009         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2010                 mapping->writeback_index = index;
2011
2012         return rc;
2013 }
2014
2015 static int
2016 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2017 {
2018         int rc;
2019         unsigned int xid;
2020
2021         xid = get_xid();
2022 /* BB add check for wbc flags */
2023         page_cache_get(page);
2024         if (!PageUptodate(page))
2025                 cFYI(1, "ppw - page not up to date");
2026
2027         /*
2028          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2029          *
2030          * A writepage() implementation always needs to do either this,
2031          * or re-dirty the page with "redirty_page_for_writepage()" in
2032          * the case of a failure.
2033          *
2034          * Just unlocking the page will cause the radix tree tag-bits
2035          * to fail to update with the state of the page correctly.
2036          */
2037         set_page_writeback(page);
2038 retry_write:
2039         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2040         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2041                 goto retry_write;
2042         else if (rc == -EAGAIN)
2043                 redirty_page_for_writepage(wbc, page);
2044         else if (rc != 0)
2045                 SetPageError(page);
2046         else
2047                 SetPageUptodate(page);
2048         end_page_writeback(page);
2049         page_cache_release(page);
2050         free_xid(xid);
2051         return rc;
2052 }
2053
2054 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2055 {
2056         int rc = cifs_writepage_locked(page, wbc);
2057         unlock_page(page);
2058         return rc;
2059 }
2060
2061 static int cifs_write_end(struct file *file, struct address_space *mapping,
2062                         loff_t pos, unsigned len, unsigned copied,
2063                         struct page *page, void *fsdata)
2064 {
2065         int rc;
2066         struct inode *inode = mapping->host;
2067         struct cifsFileInfo *cfile = file->private_data;
2068         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2069         __u32 pid;
2070
2071         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2072                 pid = cfile->pid;
2073         else
2074                 pid = current->tgid;
2075
2076         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
2077                  page, pos, copied);
2078
2079         if (PageChecked(page)) {
2080                 if (copied == len)
2081                         SetPageUptodate(page);
2082                 ClearPageChecked(page);
2083         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2084                 SetPageUptodate(page);
2085
2086         if (!PageUptodate(page)) {
2087                 char *page_data;
2088                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2089                 unsigned int xid;
2090
2091                 xid = get_xid();
2092                 /* this is probably better than directly calling
2093                    partialpage_write since in this function the file handle is
2094                    known which we might as well leverage */
2095                 /* BB check if anything else missing out of ppw
2096                    such as updating last write time */
2097                 page_data = kmap(page);
2098                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2099                 /* if (rc < 0) should we set writebehind rc? */
2100                 kunmap(page);
2101
2102                 free_xid(xid);
2103         } else {
2104                 rc = copied;
2105                 pos += copied;
2106                 /*
2107                  * When we use strict cache mode and cifs_strict_writev was run
2108                  * with level II oplock (indicated by leave_pages_clean field of
2109                  * CIFS_I(inode)), we can leave pages clean - cifs_strict_writev
2110                  * sent the data to the server itself.
2111                  */
2112                 if (!CIFS_I(inode)->leave_pages_clean ||
2113                     !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO))
2114                         set_page_dirty(page);
2115         }
2116
2117         if (rc > 0) {
2118                 spin_lock(&inode->i_lock);
2119                 if (pos > inode->i_size)
2120                         i_size_write(inode, pos);
2121                 spin_unlock(&inode->i_lock);
2122         }
2123
2124         unlock_page(page);
2125         page_cache_release(page);
2126
2127         return rc;
2128 }
2129
2130 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2131                       int datasync)
2132 {
2133         unsigned int xid;
2134         int rc = 0;
2135         struct cifs_tcon *tcon;
2136         struct TCP_Server_Info *server;
2137         struct cifsFileInfo *smbfile = file->private_data;
2138         struct inode *inode = file->f_path.dentry->d_inode;
2139         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2140
2141         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2142         if (rc)
2143                 return rc;
2144         mutex_lock(&inode->i_mutex);
2145
2146         xid = get_xid();
2147
2148         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2149                 file->f_path.dentry->d_name.name, datasync);
2150
2151         if (!CIFS_I(inode)->clientCanCacheRead) {
2152                 rc = cifs_invalidate_mapping(inode);
2153                 if (rc) {
2154                         cFYI(1, "rc: %d during invalidate phase", rc);
2155                         rc = 0; /* don't care about it in fsync */
2156                 }
2157         }
2158
2159         tcon = tlink_tcon(smbfile->tlink);
2160         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2161                 server = tcon->ses->server;
2162                 if (server->ops->flush)
2163                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2164                 else
2165                         rc = -ENOSYS;
2166         }
2167
2168         free_xid(xid);
2169         mutex_unlock(&inode->i_mutex);
2170         return rc;
2171 }
2172
2173 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2174 {
2175         unsigned int xid;
2176         int rc = 0;
2177         struct cifs_tcon *tcon;
2178         struct TCP_Server_Info *server;
2179         struct cifsFileInfo *smbfile = file->private_data;
2180         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2181         struct inode *inode = file->f_mapping->host;
2182
2183         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2184         if (rc)
2185                 return rc;
2186         mutex_lock(&inode->i_mutex);
2187
2188         xid = get_xid();
2189
2190         cFYI(1, "Sync file - name: %s datasync: 0x%x",
2191                 file->f_path.dentry->d_name.name, datasync);
2192
2193         tcon = tlink_tcon(smbfile->tlink);
2194         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2195                 server = tcon->ses->server;
2196                 if (server->ops->flush)
2197                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2198                 else
2199                         rc = -ENOSYS;
2200         }
2201
2202         free_xid(xid);
2203         mutex_unlock(&inode->i_mutex);
2204         return rc;
2205 }
2206
2207 /*
2208  * As file closes, flush all cached write data for this inode checking
2209  * for write behind errors.
2210  */
2211 int cifs_flush(struct file *file, fl_owner_t id)
2212 {
2213         struct inode *inode = file->f_path.dentry->d_inode;
2214         int rc = 0;
2215
2216         if (file->f_mode & FMODE_WRITE)
2217                 rc = filemap_write_and_wait(inode->i_mapping);
2218
2219         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
2220
2221         return rc;
2222 }
2223
2224 static int
2225 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2226 {
2227         int rc = 0;
2228         unsigned long i;
2229
2230         for (i = 0; i < num_pages; i++) {
2231                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2232                 if (!pages[i]) {
2233                         /*
2234                          * save number of pages we have already allocated and
2235                          * return with ENOMEM error
2236                          */
2237                         num_pages = i;
2238                         rc = -ENOMEM;
2239                         break;
2240                 }
2241         }
2242
2243         if (rc) {
2244                 for (i = 0; i < num_pages; i++)
2245                         put_page(pages[i]);
2246         }
2247         return rc;
2248 }
2249
2250 static inline
2251 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2252 {
2253         size_t num_pages;
2254         size_t clen;
2255
2256         clen = min_t(const size_t, len, wsize);
2257         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2258
2259         if (cur_len)
2260                 *cur_len = clen;
2261
2262         return num_pages;
2263 }
2264
2265 static void
2266 cifs_uncached_writev_complete(struct work_struct *work)
2267 {
2268         int i;
2269         struct cifs_writedata *wdata = container_of(work,
2270                                         struct cifs_writedata, work);
2271         struct inode *inode = wdata->cfile->dentry->d_inode;
2272         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2273
2274         spin_lock(&inode->i_lock);
2275         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2276         if (cifsi->server_eof > inode->i_size)
2277                 i_size_write(inode, cifsi->server_eof);
2278         spin_unlock(&inode->i_lock);
2279
2280         complete(&wdata->done);
2281
2282         if (wdata->result != -EAGAIN) {
2283                 for (i = 0; i < wdata->nr_pages; i++)
2284                         put_page(wdata->pages[i]);
2285         }
2286
2287         kref_put(&wdata->refcount, cifs_writedata_release);
2288 }
2289
2290 /* attempt to send write to server, retry on any -EAGAIN errors */
2291 static int
2292 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2293 {
2294         int rc;
2295         struct TCP_Server_Info *server;
2296
2297         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2298
2299         do {
2300                 if (wdata->cfile->invalidHandle) {
2301                         rc = cifs_reopen_file(wdata->cfile, false);
2302                         if (rc != 0)
2303                                 continue;
2304                 }
2305                 rc = server->ops->async_writev(wdata);
2306         } while (rc == -EAGAIN);
2307
2308         return rc;
2309 }
2310
2311 static ssize_t
2312 cifs_iovec_write(struct file *file, const struct iovec *iov,
2313                  unsigned long nr_segs, loff_t *poffset)
2314 {
2315         unsigned long nr_pages, i;
2316         size_t copied, len, cur_len;
2317         ssize_t total_written = 0;
2318         loff_t offset;
2319         struct iov_iter it;
2320         struct cifsFileInfo *open_file;
2321         struct cifs_tcon *tcon;
2322         struct cifs_sb_info *cifs_sb;
2323         struct cifs_writedata *wdata, *tmp;
2324         struct list_head wdata_list;
2325         int rc;
2326         pid_t pid;
2327
2328         len = iov_length(iov, nr_segs);
2329         if (!len)
2330                 return 0;
2331
2332         rc = generic_write_checks(file, poffset, &len, 0);
2333         if (rc)
2334                 return rc;
2335
2336         INIT_LIST_HEAD(&wdata_list);
2337         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2338         open_file = file->private_data;
2339         tcon = tlink_tcon(open_file->tlink);
2340
2341         if (!tcon->ses->server->ops->async_writev)
2342                 return -ENOSYS;
2343
2344         offset = *poffset;
2345
2346         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2347                 pid = open_file->pid;
2348         else
2349                 pid = current->tgid;
2350
2351         iov_iter_init(&it, iov, nr_segs, len, 0);
2352         do {
2353                 size_t save_len;
2354
2355                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2356                 wdata = cifs_writedata_alloc(nr_pages,
2357                                              cifs_uncached_writev_complete);
2358                 if (!wdata) {
2359                         rc = -ENOMEM;
2360                         break;
2361                 }
2362
2363                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2364                 if (rc) {
2365                         kfree(wdata);
2366                         break;
2367                 }
2368
2369                 save_len = cur_len;
2370                 for (i = 0; i < nr_pages; i++) {
2371                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2372                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2373                                                          0, copied);
2374                         cur_len -= copied;
2375                         iov_iter_advance(&it, copied);
2376                 }
2377                 cur_len = save_len - cur_len;
2378
2379                 wdata->sync_mode = WB_SYNC_ALL;
2380                 wdata->nr_pages = nr_pages;
2381                 wdata->offset = (__u64)offset;
2382                 wdata->cfile = cifsFileInfo_get(open_file);
2383                 wdata->pid = pid;
2384                 wdata->bytes = cur_len;
2385                 wdata->pagesz = PAGE_SIZE;
2386                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2387                 rc = cifs_uncached_retry_writev(wdata);
2388                 if (rc) {
2389                         kref_put(&wdata->refcount, cifs_writedata_release);
2390                         break;
2391                 }
2392
2393                 list_add_tail(&wdata->list, &wdata_list);
2394                 offset += cur_len;
2395                 len -= cur_len;
2396         } while (len > 0);
2397
2398         /*
2399          * If at least one write was successfully sent, then discard any rc
2400          * value from the later writes. If the other write succeeds, then
2401          * we'll end up returning whatever was written. If it fails, then
2402          * we'll get a new rc value from that.
2403          */
2404         if (!list_empty(&wdata_list))
2405                 rc = 0;
2406
2407         /*
2408          * Wait for and collect replies for any successful sends in order of
2409          * increasing offset. Once an error is hit or we get a fatal signal
2410          * while waiting, then return without waiting for any more replies.
2411          */
2412 restart_loop:
2413         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2414                 if (!rc) {
2415                         /* FIXME: freezable too? */
2416                         rc = wait_for_completion_killable(&wdata->done);
2417                         if (rc)
2418                                 rc = -EINTR;
2419                         else if (wdata->result)
2420                                 rc = wdata->result;
2421                         else
2422                                 total_written += wdata->bytes;
2423
2424                         /* resend call if it's a retryable error */
2425                         if (rc == -EAGAIN) {
2426                                 rc = cifs_uncached_retry_writev(wdata);
2427                                 goto restart_loop;
2428                         }
2429                 }
2430                 list_del_init(&wdata->list);
2431                 kref_put(&wdata->refcount, cifs_writedata_release);
2432         }
2433
2434         if (total_written > 0)
2435                 *poffset += total_written;
2436
2437         cifs_stats_bytes_written(tcon, total_written);
2438         return total_written ? total_written : (ssize_t)rc;
2439 }
2440
2441 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2442                                 unsigned long nr_segs, loff_t pos)
2443 {
2444         ssize_t written;
2445         struct inode *inode;
2446
2447         inode = iocb->ki_filp->f_path.dentry->d_inode;
2448
2449         /*
2450          * BB - optimize the way when signing is disabled. We can drop this
2451          * extra memory-to-memory copying and use iovec buffers for constructing
2452          * write request.
2453          */
2454
2455         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2456         if (written > 0) {
2457                 CIFS_I(inode)->invalid_mapping = true;
2458                 iocb->ki_pos = pos;
2459         }
2460
2461         return written;
2462 }
2463
2464 static ssize_t
2465 cifs_pagecache_writev(struct kiocb *iocb, const struct iovec *iov,
2466                       unsigned long nr_segs, loff_t pos, bool cache_ex)
2467 {
2468         struct file *file = iocb->ki_filp;
2469         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2470         struct inode *inode = file->f_mapping->host;
2471         struct cifsInodeInfo *cinode = CIFS_I(inode);
2472         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2473         ssize_t rc = -EACCES;
2474
2475         BUG_ON(iocb->ki_pos != pos);
2476
2477         sb_start_write(inode->i_sb);
2478
2479         /*
2480          * We need to hold the sem to be sure nobody modifies lock list
2481          * with a brlock that prevents writing.
2482          */
2483         down_read(&cinode->lock_sem);
2484         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2485                                      server->vals->exclusive_lock_type, NULL,
2486                                      CIFS_WRITE_OP)) {
2487                 mutex_lock(&inode->i_mutex);
2488                 if (!cache_ex)
2489                         cinode->leave_pages_clean = true;
2490                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2491                                               &iocb->ki_pos);
2492                 if (!cache_ex)
2493                         cinode->leave_pages_clean = false;
2494                 mutex_unlock(&inode->i_mutex);
2495         }
2496
2497         if (rc > 0 || rc == -EIOCBQUEUED) {
2498                 ssize_t err;
2499
2500                 err = generic_write_sync(file, pos, rc);
2501                 if (err < 0 && rc > 0)
2502                         rc = err;
2503         }
2504
2505         up_read(&cinode->lock_sem);
2506         sb_end_write(inode->i_sb);
2507         return rc;
2508 }
2509
2510 ssize_t
2511 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2512                    unsigned long nr_segs, loff_t pos)
2513 {
2514         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2515         struct cifsInodeInfo *cinode = CIFS_I(inode);
2516         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2517         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2518                                                 iocb->ki_filp->private_data;
2519         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2520         ssize_t written, written2;
2521         /*
2522          * We need to store clientCanCacheAll here to prevent race
2523          * conditions - this value can be changed during an execution
2524          * of generic_file_aio_write. For CIFS it can be changed from
2525          * true to false only, but for SMB2 it can be changed both from
2526          * true to false and vice versa. So, we can end up with a data
2527          * stored in the cache, not marked dirty and not sent to the
2528          * server if this value changes its state from false to true
2529          * after cifs_write_end.
2530          */
2531         bool cache_ex = cinode->clientCanCacheAll;
2532         bool cache_read = cinode->clientCanCacheRead;
2533         int rc;
2534         loff_t saved_pos;
2535
2536         if (cache_ex) {
2537                 if (cap_unix(tcon->ses) &&
2538                     ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
2539                     (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
2540                                                 tcon->fsUnixInfo.Capability)))
2541                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2542                 return cifs_pagecache_writev(iocb, iov, nr_segs, pos, cache_ex);
2543         }
2544
2545         /*
2546          * For files without exclusive oplock in strict cache mode we need to
2547          * write the data to the server exactly from the pos to pos+len-1 rather
2548          * than flush all affected pages because it may cause a error with
2549          * mandatory locks on these pages but not on the region from pos to
2550          * ppos+len-1.
2551          */
2552         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2553         if (!cache_read || written <= 0)
2554                 return written;
2555
2556         saved_pos = iocb->ki_pos;
2557         iocb->ki_pos = pos;
2558         /* we have a read oplock - need to store a data in the page cache */
2559         if (cap_unix(tcon->ses) &&
2560             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) &&
2561             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(
2562                                         tcon->fsUnixInfo.Capability)))
2563                 written2 = generic_file_aio_write(iocb, iov, nr_segs, pos);
2564         else
2565                 written2 = cifs_pagecache_writev(iocb, iov, nr_segs, pos,
2566                                                  cache_ex);
2567         /* errors occured during writing - invalidate the page cache */
2568         if (written2 < 0) {
2569                 rc = cifs_invalidate_mapping(inode);
2570                 if (rc)
2571                         written = (ssize_t)rc;
2572                 else
2573                         iocb->ki_pos = saved_pos;
2574         }
2575         return written;
2576 }
2577
2578 static struct cifs_readdata *
2579 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2580 {
2581         struct cifs_readdata *rdata;
2582
2583         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2584                         GFP_KERNEL);
2585         if (rdata != NULL) {
2586                 kref_init(&rdata->refcount);
2587                 INIT_LIST_HEAD(&rdata->list);
2588                 init_completion(&rdata->done);
2589                 INIT_WORK(&rdata->work, complete);
2590         }
2591
2592         return rdata;
2593 }
2594
2595 void
2596 cifs_readdata_release(struct kref *refcount)
2597 {
2598         struct cifs_readdata *rdata = container_of(refcount,
2599                                         struct cifs_readdata, refcount);
2600
2601         if (rdata->cfile)
2602                 cifsFileInfo_put(rdata->cfile);
2603
2604         kfree(rdata);
2605 }
2606
2607 static int
2608 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2609 {
2610         int rc = 0;
2611         struct page *page;
2612         unsigned int i;
2613
2614         for (i = 0; i < nr_pages; i++) {
2615                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2616                 if (!page) {
2617                         rc = -ENOMEM;
2618                         break;
2619                 }
2620                 rdata->pages[i] = page;
2621         }
2622
2623         if (rc) {
2624                 for (i = 0; i < nr_pages; i++) {
2625                         put_page(rdata->pages[i]);
2626                         rdata->pages[i] = NULL;
2627                 }
2628         }
2629         return rc;
2630 }
2631
2632 static void
2633 cifs_uncached_readdata_release(struct kref *refcount)
2634 {
2635         struct cifs_readdata *rdata = container_of(refcount,
2636                                         struct cifs_readdata, refcount);
2637         unsigned int i;
2638
2639         for (i = 0; i < rdata->nr_pages; i++) {
2640                 put_page(rdata->pages[i]);
2641                 rdata->pages[i] = NULL;
2642         }
2643         cifs_readdata_release(refcount);
2644 }
2645
2646 static int
2647 cifs_retry_async_readv(struct cifs_readdata *rdata)
2648 {
2649         int rc;
2650         struct TCP_Server_Info *server;
2651
2652         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2653
2654         do {
2655                 if (rdata->cfile->invalidHandle) {
2656                         rc = cifs_reopen_file(rdata->cfile, true);
2657                         if (rc != 0)
2658                                 continue;
2659                 }
2660                 rc = server->ops->async_readv(rdata);
2661         } while (rc == -EAGAIN);
2662
2663         return rc;
2664 }
2665
2666 /**
2667  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2668  * @rdata:      the readdata response with list of pages holding data
2669  * @iov:        vector in which we should copy the data
2670  * @nr_segs:    number of segments in vector
2671  * @offset:     offset into file of the first iovec
2672  * @copied:     used to return the amount of data copied to the iov
2673  *
2674  * This function copies data from a list of pages in a readdata response into
2675  * an array of iovecs. It will first calculate where the data should go
2676  * based on the info in the readdata and then copy the data into that spot.
2677  */
2678 static ssize_t
2679 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2680                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2681 {
2682         int rc = 0;
2683         struct iov_iter ii;
2684         size_t pos = rdata->offset - offset;
2685         ssize_t remaining = rdata->bytes;
2686         unsigned char *pdata;
2687         unsigned int i;
2688
2689         /* set up iov_iter and advance to the correct offset */
2690         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2691         iov_iter_advance(&ii, pos);
2692
2693         *copied = 0;
2694         for (i = 0; i < rdata->nr_pages; i++) {
2695                 ssize_t copy;
2696                 struct page *page = rdata->pages[i];
2697
2698                 /* copy a whole page or whatever's left */
2699                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2700
2701                 /* ...but limit it to whatever space is left in the iov */
2702                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2703
2704                 /* go while there's data to be copied and no errors */
2705                 if (copy && !rc) {
2706                         pdata = kmap(page);
2707                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2708                                                 (int)copy);
2709                         kunmap(page);
2710                         if (!rc) {
2711                                 *copied += copy;
2712                                 remaining -= copy;
2713                                 iov_iter_advance(&ii, copy);
2714                         }
2715                 }
2716         }
2717
2718         return rc;
2719 }
2720
2721 static void
2722 cifs_uncached_readv_complete(struct work_struct *work)
2723 {
2724         struct cifs_readdata *rdata = container_of(work,
2725                                                 struct cifs_readdata, work);
2726
2727         complete(&rdata->done);
2728         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2729 }
2730
2731 static int
2732 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2733                         struct cifs_readdata *rdata, unsigned int len)
2734 {
2735         int total_read = 0, result = 0;
2736         unsigned int i;
2737         unsigned int nr_pages = rdata->nr_pages;
2738         struct kvec iov;
2739
2740         rdata->tailsz = PAGE_SIZE;
2741         for (i = 0; i < nr_pages; i++) {
2742                 struct page *page = rdata->pages[i];
2743
2744                 if (len >= PAGE_SIZE) {
2745                         /* enough data to fill the page */
2746                         iov.iov_base = kmap(page);
2747                         iov.iov_len = PAGE_SIZE;
2748                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2749                                 i, iov.iov_base, iov.iov_len);
2750                         len -= PAGE_SIZE;
2751                 } else if (len > 0) {
2752                         /* enough for partial page, fill and zero the rest */
2753                         iov.iov_base = kmap(page);
2754                         iov.iov_len = len;
2755                         cFYI(1, "%u: iov_base=%p iov_len=%zu",
2756                                 i, iov.iov_base, iov.iov_len);
2757                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2758                         rdata->tailsz = len;
2759                         len = 0;
2760                 } else {
2761                         /* no need to hold page hostage */
2762                         rdata->pages[i] = NULL;
2763                         rdata->nr_pages--;
2764                         put_page(page);
2765                         continue;
2766                 }
2767
2768                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2769                 kunmap(page);
2770                 if (result < 0)
2771                         break;
2772
2773                 total_read += result;
2774         }
2775
2776         return total_read > 0 ? total_read : result;
2777 }
2778
2779 static ssize_t
2780 cifs_iovec_read(struct file *file, const struct iovec *iov,
2781                  unsigned long nr_segs, loff_t *poffset)
2782 {
2783         ssize_t rc;
2784         size_t len, cur_len;
2785         ssize_t total_read = 0;
2786         loff_t offset = *poffset;
2787         unsigned int npages;
2788         struct cifs_sb_info *cifs_sb;
2789         struct cifs_tcon *tcon;
2790         struct cifsFileInfo *open_file;
2791         struct cifs_readdata *rdata, *tmp;
2792         struct list_head rdata_list;
2793         pid_t pid;
2794
2795         if (!nr_segs)
2796                 return 0;
2797
2798         len = iov_length(iov, nr_segs);
2799         if (!len)
2800                 return 0;
2801
2802         INIT_LIST_HEAD(&rdata_list);
2803         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2804         open_file = file->private_data;
2805         tcon = tlink_tcon(open_file->tlink);
2806
2807         if (!tcon->ses->server->ops->async_readv)
2808                 return -ENOSYS;
2809
2810         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2811                 pid = open_file->pid;
2812         else
2813                 pid = current->tgid;
2814
2815         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2816                 cFYI(1, "attempting read on write only file instance");
2817
2818         do {
2819                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2820                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2821
2822                 /* allocate a readdata struct */
2823                 rdata = cifs_readdata_alloc(npages,
2824                                             cifs_uncached_readv_complete);
2825                 if (!rdata) {
2826                         rc = -ENOMEM;
2827                         goto error;
2828                 }
2829
2830                 rc = cifs_read_allocate_pages(rdata, npages);
2831                 if (rc)
2832                         goto error;
2833
2834                 rdata->cfile = cifsFileInfo_get(open_file);
2835                 rdata->nr_pages = npages;
2836                 rdata->offset = offset;
2837                 rdata->bytes = cur_len;
2838                 rdata->pid = pid;
2839                 rdata->pagesz = PAGE_SIZE;
2840                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2841
2842                 rc = cifs_retry_async_readv(rdata);
2843 error:
2844                 if (rc) {
2845                         kref_put(&rdata->refcount,
2846                                  cifs_uncached_readdata_release);
2847                         break;
2848                 }
2849
2850                 list_add_tail(&rdata->list, &rdata_list);
2851                 offset += cur_len;
2852                 len -= cur_len;
2853         } while (len > 0);
2854
2855         /* if at least one read request send succeeded, then reset rc */
2856         if (!list_empty(&rdata_list))
2857                 rc = 0;
2858
2859         /* the loop below should proceed in the order of increasing offsets */
2860 restart_loop:
2861         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2862                 if (!rc) {
2863                         ssize_t copied;
2864
2865                         /* FIXME: freezable sleep too? */
2866                         rc = wait_for_completion_killable(&rdata->done);
2867                         if (rc)
2868                                 rc = -EINTR;
2869                         else if (rdata->result)
2870                                 rc = rdata->result;
2871                         else {
2872                                 rc = cifs_readdata_to_iov(rdata, iov,
2873                                                         nr_segs, *poffset,
2874                                                         &copied);
2875                                 total_read += copied;
2876                         }
2877
2878                         /* resend call if it's a retryable error */
2879                         if (rc == -EAGAIN) {
2880                                 rc = cifs_retry_async_readv(rdata);
2881                                 goto restart_loop;
2882                         }
2883                 }
2884                 list_del_init(&rdata->list);
2885                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2886         }
2887
2888         cifs_stats_bytes_read(tcon, total_read);
2889         *poffset += total_read;
2890
2891         /* mask nodata case */
2892         if (rc == -ENODATA)
2893                 rc = 0;
2894
2895         return total_read ? total_read : rc;
2896 }
2897
2898 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2899                                unsigned long nr_segs, loff_t pos)
2900 {
2901         ssize_t read;
2902
2903         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2904         if (read > 0)
2905                 iocb->ki_pos = pos;
2906
2907         return read;
2908 }
2909
2910 ssize_t
2911 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2912                   unsigned long nr_segs, loff_t pos)
2913 {
2914         struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2915         struct cifsInodeInfo *cinode = CIFS_I(inode);
2916         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2917         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2918                                                 iocb->ki_filp->private_data;
2919         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2920         int rc = -EACCES;
2921
2922         /*
2923          * In strict cache mode we need to read from the server all the time
2924          * if we don't have level II oplock because the server can delay mtime
2925          * change - so we can't make a decision about inode invalidating.
2926          * And we can also fail with pagereading if there are mandatory locks
2927          * on pages affected by this read but not on the region from pos to
2928          * pos+len-1.
2929          */
2930         if (!cinode->clientCanCacheRead)
2931                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2932
2933         if (cap_unix(tcon->ses) &&
2934             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2935             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2936                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2937
2938         /*
2939          * We need to hold the sem to be sure nobody modifies lock list
2940          * with a brlock that prevents reading.
2941          */
2942         down_read(&cinode->lock_sem);
2943         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2944                                      tcon->ses->server->vals->shared_lock_type,
2945                                      NULL, CIFS_READ_OP))
2946                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2947         up_read(&cinode->lock_sem);
2948         return rc;
2949 }
2950
2951 static ssize_t
2952 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2953 {
2954         int rc = -EACCES;
2955         unsigned int bytes_read = 0;
2956         unsigned int total_read;
2957         unsigned int current_read_size;
2958         unsigned int rsize;
2959         struct cifs_sb_info *cifs_sb;
2960         struct cifs_tcon *tcon;
2961         struct TCP_Server_Info *server;
2962         unsigned int xid;
2963         char *cur_offset;
2964         struct cifsFileInfo *open_file;
2965         struct cifs_io_parms io_parms;
2966         int buf_type = CIFS_NO_BUFFER;
2967         __u32 pid;
2968
2969         xid = get_xid();
2970         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2971
2972         /* FIXME: set up handlers for larger reads and/or convert to async */
2973         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2974
2975         if (file->private_data == NULL) {
2976                 rc = -EBADF;
2977                 free_xid(xid);
2978                 return rc;
2979         }
2980         open_file = file->private_data;
2981         tcon = tlink_tcon(open_file->tlink);
2982         server = tcon->ses->server;
2983
2984         if (!server->ops->sync_read) {
2985                 free_xid(xid);
2986                 return -ENOSYS;
2987         }
2988
2989         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2990                 pid = open_file->pid;
2991         else
2992                 pid = current->tgid;
2993
2994         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2995                 cFYI(1, "attempting read on write only file instance");
2996
2997         for (total_read = 0, cur_offset = read_data; read_size > total_read;
2998              total_read += bytes_read, cur_offset += bytes_read) {
2999                 current_read_size = min_t(uint, read_size - total_read, rsize);
3000                 /*
3001                  * For windows me and 9x we do not want to request more than it
3002                  * negotiated since it will refuse the read then.
3003                  */
3004                 if ((tcon->ses) && !(tcon->ses->capabilities &
3005                                 tcon->ses->server->vals->cap_large_files)) {
3006                         current_read_size = min_t(uint, current_read_size,
3007                                         CIFSMaxBufSize);
3008                 }
3009                 rc = -EAGAIN;
3010                 while (rc == -EAGAIN) {
3011                         if (open_file->invalidHandle) {
3012                                 rc = cifs_reopen_file(open_file, true);
3013                                 if (rc != 0)
3014                                         break;
3015                         }
3016                         io_parms.pid = pid;
3017                         io_parms.tcon = tcon;
3018                         io_parms.offset = *offset;
3019                         io_parms.length = current_read_size;
3020                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3021                                                     &bytes_read, &cur_offset,
3022                                                     &buf_type);
3023                 }
3024                 if (rc || (bytes_read == 0)) {
3025                         if (total_read) {
3026                                 break;
3027                         } else {
3028                                 free_xid(xid);
3029                                 return rc;
3030                         }
3031                 } else {
3032                         cifs_stats_bytes_read(tcon, total_read);
3033                         *offset += bytes_read;
3034                 }
3035         }
3036         free_xid(xid);
3037         return total_read;
3038 }
3039
3040 /*
3041  * If the page is mmap'ed into a process' page tables, then we need to make
3042  * sure that it doesn't change while being written back.
3043  */
3044 static int
3045 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3046 {
3047         struct page *page = vmf->page;
3048
3049         lock_page(page);
3050         return VM_FAULT_LOCKED;
3051 }
3052
3053 static struct vm_operations_struct cifs_file_vm_ops = {
3054         .fault = filemap_fault,
3055         .page_mkwrite = cifs_page_mkwrite,
3056         .remap_pages = generic_file_remap_pages,
3057 };
3058
3059 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3060 {
3061         int rc, xid;
3062         struct inode *inode = file->f_path.dentry->d_inode;
3063
3064         xid = get_xid();
3065
3066         if (!CIFS_I(inode)->clientCanCacheRead) {
3067                 rc = cifs_invalidate_mapping(inode);
3068                 if (rc)
3069                         return rc;
3070         }
3071
3072         rc = generic_file_mmap(file, vma);
3073         if (rc == 0)
3074                 vma->vm_ops = &cifs_file_vm_ops;
3075         free_xid(xid);
3076         return rc;
3077 }
3078
3079 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3080 {
3081         int rc, xid;
3082
3083         xid = get_xid();
3084         rc = cifs_revalidate_file(file);
3085         if (rc) {
3086                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
3087                 free_xid(xid);
3088                 return rc;
3089         }
3090         rc = generic_file_mmap(file, vma);
3091         if (rc == 0)
3092                 vma->vm_ops = &cifs_file_vm_ops;
3093         free_xid(xid);
3094         return rc;
3095 }
3096
3097 static void
3098 cifs_readv_complete(struct work_struct *work)
3099 {
3100         unsigned int i;
3101         struct cifs_readdata *rdata = container_of(work,
3102                                                 struct cifs_readdata, work);
3103
3104         for (i = 0; i < rdata->nr_pages; i++) {
3105                 struct page *page = rdata->pages[i];
3106
3107                 lru_cache_add_file(page);
3108
3109                 if (rdata->result == 0) {
3110                         flush_dcache_page(page);
3111                         SetPageUptodate(page);
3112                 }
3113
3114                 unlock_page(page);
3115
3116                 if (rdata->result == 0)
3117                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3118
3119                 page_cache_release(page);
3120                 rdata->pages[i] = NULL;
3121         }
3122         kref_put(&rdata->refcount, cifs_readdata_release);
3123 }
3124
3125 static int
3126 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3127                         struct cifs_readdata *rdata, unsigned int len)
3128 {
3129         int total_read = 0, result = 0;
3130         unsigned int i;
3131         u64 eof;
3132         pgoff_t eof_index;
3133         unsigned int nr_pages = rdata->nr_pages;
3134         struct kvec iov;
3135
3136         /* determine the eof that the server (probably) has */
3137         eof = CIFS_I(rdata->mapping->host)->server_eof;
3138         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3139         cFYI(1, "eof=%llu eof_index=%lu", eof, eof_index);
3140
3141         rdata->tailsz = PAGE_CACHE_SIZE;
3142         for (i = 0; i < nr_pages; i++) {
3143                 struct page *page = rdata->pages[i];
3144
3145                 if (len >= PAGE_CACHE_SIZE) {
3146                         /* enough data to fill the page */
3147                         iov.iov_base = kmap(page);
3148                         iov.iov_len = PAGE_CACHE_SIZE;
3149                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3150                                 i, page->index, iov.iov_base, iov.iov_len);
3151                         len -= PAGE_CACHE_SIZE;
3152                 } else if (len > 0) {
3153                         /* enough for partial page, fill and zero the rest */
3154                         iov.iov_base = kmap(page);
3155                         iov.iov_len = len;
3156                         cFYI(1, "%u: idx=%lu iov_base=%p iov_len=%zu",
3157                                 i, page->index, iov.iov_base, iov.iov_len);
3158                         memset(iov.iov_base + len,
3159                                 '\0', PAGE_CACHE_SIZE - len);
3160                         rdata->tailsz = len;
3161                         len = 0;
3162                 } else if (page->index > eof_index) {
3163                         /*
3164                          * The VFS will not try to do readahead past the
3165                          * i_size, but it's possible that we have outstanding
3166                          * writes with gaps in the middle and the i_size hasn't
3167                          * caught up yet. Populate those with zeroed out pages
3168                          * to prevent the VFS from repeatedly attempting to
3169                          * fill them until the writes are flushed.
3170                          */
3171                         zero_user(page, 0, PAGE_CACHE_SIZE);
3172                         lru_cache_add_file(page);
3173                         flush_dcache_page(page);
3174                         SetPageUptodate(page);
3175                         unlock_page(page);
3176                         page_cache_release(page);
3177                         rdata->pages[i] = NULL;
3178                         rdata->nr_pages--;
3179                         continue;
3180                 } else {
3181                         /* no need to hold page hostage */
3182                         lru_cache_add_file(page);
3183                         unlock_page(page);
3184                         page_cache_release(page);
3185                         rdata->pages[i] = NULL;
3186                         rdata->nr_pages--;
3187                         continue;
3188                 }
3189
3190                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3191                 kunmap(page);
3192                 if (result < 0)
3193                         break;
3194
3195                 total_read += result;
3196         }
3197
3198         return total_read > 0 ? total_read : result;
3199 }
3200
3201 static int cifs_readpages(struct file *file, struct address_space *mapping,
3202         struct list_head *page_list, unsigned num_pages)
3203 {
3204         int rc;
3205         struct list_head tmplist;
3206         struct cifsFileInfo *open_file = file->private_data;
3207         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3208         unsigned int rsize = cifs_sb->rsize;
3209         pid_t pid;
3210
3211         /*
3212          * Give up immediately if rsize is too small to read an entire page.
3213          * The VFS will fall back to readpage. We should never reach this
3214          * point however since we set ra_pages to 0 when the rsize is smaller
3215          * than a cache page.
3216          */
3217         if (unlikely(rsize < PAGE_CACHE_SIZE))
3218                 return 0;
3219
3220         /*
3221          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3222          * immediately if the cookie is negative
3223          */
3224         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3225                                          &num_pages);
3226         if (rc == 0)
3227                 return rc;
3228
3229         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3230                 pid = open_file->pid;
3231         else
3232                 pid = current->tgid;
3233
3234         rc = 0;
3235         INIT_LIST_HEAD(&tmplist);
3236
3237         cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
3238                 mapping, num_pages);
3239
3240         /*
3241          * Start with the page at end of list and move it to private
3242          * list. Do the same with any following pages until we hit
3243          * the rsize limit, hit an index discontinuity, or run out of
3244          * pages. Issue the async read and then start the loop again
3245          * until the list is empty.
3246          *
3247          * Note that list order is important. The page_list is in
3248          * the order of declining indexes. When we put the pages in
3249          * the rdata->pages, then we want them in increasing order.
3250          */
3251         while (!list_empty(page_list)) {
3252                 unsigned int i;
3253                 unsigned int bytes = PAGE_CACHE_SIZE;
3254                 unsigned int expected_index;
3255                 unsigned int nr_pages = 1;
3256                 loff_t offset;
3257                 struct page *page, *tpage;
3258                 struct cifs_readdata *rdata;
3259
3260                 page = list_entry(page_list->prev, struct page, lru);
3261
3262                 /*
3263                  * Lock the page and put it in the cache. Since no one else
3264                  * should have access to this page, we're safe to simply set
3265                  * PG_locked without checking it first.
3266                  */
3267                 __set_page_locked(page);
3268                 rc = add_to_page_cache_locked(page, mapping,
3269                                               page->index, GFP_KERNEL);
3270
3271                 /* give up if we can't stick it in the cache */
3272                 if (rc) {
3273                         __clear_page_locked(page);
3274                         break;
3275                 }
3276
3277                 /* move first page to the tmplist */
3278                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3279                 list_move_tail(&page->lru, &tmplist);
3280
3281                 /* now try and add more pages onto the request */
3282                 expected_index = page->index + 1;
3283                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3284                         /* discontinuity ? */
3285                         if (page->index != expected_index)
3286                                 break;
3287
3288                         /* would this page push the read over the rsize? */
3289                         if (bytes + PAGE_CACHE_SIZE > rsize)
3290                                 break;
3291
3292                         __set_page_locked(page);
3293                         if (add_to_page_cache_locked(page, mapping,
3294                                                 page->index, GFP_KERNEL)) {
3295                                 __clear_page_locked(page);
3296                                 break;
3297                         }
3298                         list_move_tail(&page->lru, &tmplist);
3299                         bytes += PAGE_CACHE_SIZE;
3300                         expected_index++;
3301                         nr_pages++;
3302                 }
3303
3304                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3305                 if (!rdata) {
3306                         /* best to give up if we're out of mem */
3307                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3308                                 list_del(&page->lru);
3309                                 lru_cache_add_file(page);
3310                                 unlock_page(page);
3311                                 page_cache_release(page);
3312                         }
3313                         rc = -ENOMEM;
3314                         break;
3315                 }
3316
3317                 rdata->cfile = cifsFileInfo_get(open_file);
3318                 rdata->mapping = mapping;
3319                 rdata->offset = offset;
3320                 rdata->bytes = bytes;
3321                 rdata->pid = pid;
3322                 rdata->pagesz = PAGE_CACHE_SIZE;
3323                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3324
3325                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3326                         list_del(&page->lru);
3327                         rdata->pages[rdata->nr_pages++] = page;
3328                 }
3329
3330                 rc = cifs_retry_async_readv(rdata);
3331                 if (rc != 0) {
3332                         for (i = 0; i < rdata->nr_pages; i++) {
3333                                 page = rdata->pages[i];
3334                                 lru_cache_add_file(page);
3335                                 unlock_page(page);
3336                                 page_cache_release(page);
3337                         }
3338                         kref_put(&rdata->refcount, cifs_readdata_release);
3339                         break;
3340                 }
3341
3342                 kref_put(&rdata->refcount, cifs_readdata_release);
3343         }
3344
3345         return rc;
3346 }
3347
3348 static int cifs_readpage_worker(struct file *file, struct page *page,
3349         loff_t *poffset)
3350 {
3351         char *read_data;
3352         int rc;
3353
3354         /* Is the page cached? */
3355         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
3356         if (rc == 0)
3357                 goto read_complete;
3358
3359         page_cache_get(page);
3360         read_data = kmap(page);
3361         /* for reads over a certain size could initiate async read ahead */
3362
3363         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3364
3365         if (rc < 0)
3366                 goto io_error;
3367         else
3368                 cFYI(1, "Bytes read %d", rc);
3369
3370         file->f_path.dentry->d_inode->i_atime =
3371                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
3372
3373         if (PAGE_CACHE_SIZE > rc)
3374                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3375
3376         flush_dcache_page(page);
3377         SetPageUptodate(page);
3378
3379         /* send this page to the cache */
3380         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
3381
3382         rc = 0;
3383
3384 io_error:
3385         kunmap(page);
3386         page_cache_release(page);
3387
3388 read_complete:
3389         return rc;
3390 }
3391
3392 static int cifs_readpage(struct file *file, struct page *page)
3393 {
3394         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3395         int rc = -EACCES;
3396         unsigned int xid;
3397
3398         xid = get_xid();
3399
3400         if (file->private_data == NULL) {
3401                 rc = -EBADF;
3402                 free_xid(xid);
3403                 return rc;
3404         }
3405
3406         cFYI(1, "readpage %p at offset %d 0x%x",
3407                  page, (int)offset, (int)offset);
3408
3409         rc = cifs_readpage_worker(file, page, &offset);
3410
3411         unlock_page(page);
3412
3413         free_xid(xid);
3414         return rc;
3415 }
3416
3417 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3418 {
3419         struct cifsFileInfo *open_file;
3420
3421         spin_lock(&cifs_file_list_lock);
3422         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3423                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3424                         spin_unlock(&cifs_file_list_lock);
3425                         return 1;
3426                 }
3427         }
3428         spin_unlock(&cifs_file_list_lock);
3429         return 0;
3430 }
3431
3432 /* We do not want to update the file size from server for inodes
3433    open for write - to avoid races with writepage extending
3434    the file - in the future we could consider allowing
3435    refreshing the inode only on increases in the file size
3436    but this is tricky to do without racing with writebehind
3437    page caching in the current Linux kernel design */
3438 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3439 {
3440         if (!cifsInode)
3441                 return true;
3442
3443         if (is_inode_writable(cifsInode)) {
3444                 /* This inode is open for write at least once */
3445                 struct cifs_sb_info *cifs_sb;
3446
3447                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3448                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3449                         /* since no page cache to corrupt on directio
3450                         we can change size safely */
3451                         return true;
3452                 }
3453
3454                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3455                         return true;
3456
3457                 return false;
3458         } else
3459                 return true;
3460 }
3461
3462 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3463                         loff_t pos, unsigned len, unsigned flags,
3464                         struct page **pagep, void **fsdata)
3465 {
3466         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3467         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3468         loff_t page_start = pos & PAGE_MASK;
3469         loff_t i_size;
3470         struct page *page;
3471         int rc = 0;
3472
3473         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
3474
3475         page = grab_cache_page_write_begin(mapping, index, flags);
3476         if (!page) {
3477                 rc = -ENOMEM;
3478                 goto out;
3479         }
3480
3481         if (PageUptodate(page))
3482                 goto out;
3483
3484         /*
3485          * If we write a full page it will be up to date, no need to read from
3486          * the server. If the write is short, we'll end up doing a sync write
3487          * instead.
3488          */
3489         if (len == PAGE_CACHE_SIZE)
3490                 goto out;
3491
3492         /*
3493          * optimize away the read when we have an oplock, and we're not
3494          * expecting to use any of the data we'd be reading in. That
3495          * is, when the page lies beyond the EOF, or straddles the EOF
3496          * and the write will cover all of the existing data.
3497          */
3498         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3499                 i_size = i_size_read(mapping->host);
3500                 if (page_start >= i_size ||
3501                     (offset == 0 && (pos + len) >= i_size)) {
3502                         zero_user_segments(page, 0, offset,
3503                                            offset + len,
3504                                            PAGE_CACHE_SIZE);
3505                         /*
3506                          * PageChecked means that the parts of the page
3507                          * to which we're not writing are considered up
3508                          * to date. Once the data is copied to the
3509                          * page, it can be set uptodate.
3510                          */
3511                         SetPageChecked(page);
3512                         goto out;
3513                 }
3514         }
3515
3516         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3517                 /*
3518                  * might as well read a page, it is fast enough. If we get
3519                  * an error, we don't need to return it. cifs_write_end will
3520                  * do a sync write instead since PG_uptodate isn't set.
3521                  */
3522                 cifs_readpage_worker(file, page, &page_start);
3523         } else {
3524                 /* we could try using another file handle if there is one -
3525                    but how would we lock it to prevent close of that handle
3526                    racing with this read? In any case
3527                    this will be written out by write_end so is fine */
3528         }
3529 out:
3530         *pagep = page;
3531         return rc;
3532 }
3533
3534 static int cifs_release_page(struct page *page, gfp_t gfp)
3535 {
3536         if (PagePrivate(page))
3537                 return 0;
3538
3539         return cifs_fscache_release_page(page, gfp);
3540 }
3541
3542 static void cifs_invalidate_page(struct page *page, unsigned long offset)
3543 {
3544         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3545
3546         if (offset == 0)
3547                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3548 }
3549
3550 static int cifs_launder_page(struct page *page)
3551 {
3552         int rc = 0;
3553         loff_t range_start = page_offset(page);
3554         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3555         struct writeback_control wbc = {
3556                 .sync_mode = WB_SYNC_ALL,
3557                 .nr_to_write = 0,
3558                 .range_start = range_start,
3559                 .range_end = range_end,
3560         };
3561
3562         cFYI(1, "Launder page: %p", page);
3563
3564         if (clear_page_dirty_for_io(page))
3565                 rc = cifs_writepage_locked(page, &wbc);
3566
3567         cifs_fscache_invalidate_page(page, page->mapping->host);
3568         return rc;
3569 }
3570
3571 void cifs_oplock_break(struct work_struct *work)
3572 {
3573         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3574                                                   oplock_break);
3575         struct inode *inode = cfile->dentry->d_inode;
3576         struct cifsInodeInfo *cinode = CIFS_I(inode);
3577         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3578         int rc = 0;
3579
3580         if (inode && S_ISREG(inode->i_mode)) {
3581                 if (cinode->clientCanCacheRead)
3582                         break_lease(inode, O_RDONLY);
3583                 else
3584                         break_lease(inode, O_WRONLY);
3585                 rc = filemap_fdatawrite(inode->i_mapping);
3586                 if (cinode->clientCanCacheRead == 0) {
3587                         rc = filemap_fdatawait(inode->i_mapping);
3588                         mapping_set_error(inode->i_mapping, rc);
3589                         cifs_invalidate_mapping(inode);
3590                 }
3591                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
3592         }
3593
3594         rc = cifs_push_locks(cfile);
3595         if (rc)
3596                 cERROR(1, "Push locks rc = %d", rc);
3597
3598         /*
3599          * releasing stale oplock after recent reconnect of smb session using
3600          * a now incorrect file handle is not a data integrity issue but do
3601          * not bother sending an oplock release if session to server still is
3602          * disconnected since oplock already released by the server
3603          */
3604         if (!cfile->oplock_break_cancelled) {
3605                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3606                                                              cinode);
3607                 cFYI(1, "Oplock release rc = %d", rc);
3608         }
3609 }
3610
3611 const struct address_space_operations cifs_addr_ops = {
3612         .readpage = cifs_readpage,
3613         .readpages = cifs_readpages,
3614         .writepage = cifs_writepage,
3615         .writepages = cifs_writepages,
3616         .write_begin = cifs_write_begin,
3617         .write_end = cifs_write_end,
3618         .set_page_dirty = __set_page_dirty_nobuffers,
3619         .releasepage = cifs_release_page,
3620         .invalidatepage = cifs_invalidate_page,
3621         .launder_page = cifs_launder_page,
3622 };
3623
3624 /*
3625  * cifs_readpages requires the server to support a buffer large enough to
3626  * contain the header plus one complete page of data.  Otherwise, we need
3627  * to leave cifs_readpages out of the address space operations.
3628  */
3629 const struct address_space_operations cifs_addr_ops_smallbuf = {
3630         .readpage = cifs_readpage,
3631         .writepage = cifs_writepage,
3632         .writepages = cifs_writepages,
3633         .write_begin = cifs_write_begin,
3634         .write_end = cifs_write_end,
3635         .set_page_dirty = __set_page_dirty_nobuffers,
3636         .releasepage = cifs_release_page,
3637         .invalidatepage = cifs_invalidate_page,
3638         .launder_page = cifs_launder_page,
3639 };