xfs: limit xfs_imap_to_bmap to a single mapping
[linux-2.6.git] / fs / xfs / xfs_iomap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_iomap.h"
50 #include "xfs_trace.h"
51
52
53 #define XFS_WRITEIO_ALIGN(mp,off)       (((off) >> mp->m_writeio_log) \
54                                                 << mp->m_writeio_log)
55 #define XFS_STRAT_WRITE_IMAPS   2
56 #define XFS_WRITE_IMAPS         XFS_BMAP_MAX_NMAP
57
58 STATIC void
59 xfs_imap_to_bmap(
60         xfs_inode_t     *ip,
61         xfs_off_t       offset,
62         xfs_bmbt_irec_t *imap,
63         xfs_iomap_t     *iomapp,
64         int             imaps,                  /* Number of imap entries */
65         int             flags)
66 {
67         xfs_mount_t     *mp = ip->i_mount;
68         xfs_fsblock_t   start_block;
69
70         iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
71         iomapp->iomap_delta = offset - iomapp->iomap_offset;
72         iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
73         iomapp->iomap_flags = flags;
74
75         if (XFS_IS_REALTIME_INODE(ip)) {
76                 iomapp->iomap_flags |= IOMAP_REALTIME;
77                 iomapp->iomap_target = mp->m_rtdev_targp;
78         } else {
79                 iomapp->iomap_target = mp->m_ddev_targp;
80         }
81         start_block = imap->br_startblock;
82         if (start_block == HOLESTARTBLOCK) {
83                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
84                 iomapp->iomap_flags |= IOMAP_HOLE;
85         } else if (start_block == DELAYSTARTBLOCK) {
86                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
87                 iomapp->iomap_flags |= IOMAP_DELAY;
88         } else {
89                 iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
90                 if (ISUNWRITTEN(imap))
91                         iomapp->iomap_flags |= IOMAP_UNWRITTEN;
92         }
93 }
94
95 int
96 xfs_iomap(
97         xfs_inode_t     *ip,
98         xfs_off_t       offset,
99         ssize_t         count,
100         int             flags,
101         xfs_iomap_t     *iomapp,
102         int             *niomaps)
103 {
104         xfs_mount_t     *mp = ip->i_mount;
105         xfs_fileoff_t   offset_fsb, end_fsb;
106         int             error = 0;
107         int             lockmode = 0;
108         xfs_bmbt_irec_t imap;
109         int             nimaps = 1;
110         int             bmapi_flags = 0;
111         int             iomap_flags = 0;
112
113         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
114         ASSERT(niomaps && *niomaps == 1);
115
116         if (XFS_FORCED_SHUTDOWN(mp))
117                 return XFS_ERROR(EIO);
118
119         trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
120
121         switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
122         case BMAPI_READ:
123                 lockmode = xfs_ilock_map_shared(ip);
124                 bmapi_flags = XFS_BMAPI_ENTIRE;
125                 break;
126         case BMAPI_WRITE:
127                 lockmode = XFS_ILOCK_EXCL;
128                 if (flags & BMAPI_IGNSTATE)
129                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
130                 xfs_ilock(ip, lockmode);
131                 break;
132         case BMAPI_ALLOCATE:
133                 lockmode = XFS_ILOCK_SHARED;
134                 bmapi_flags = XFS_BMAPI_ENTIRE;
135
136                 /* Attempt non-blocking lock */
137                 if (flags & BMAPI_TRYLOCK) {
138                         if (!xfs_ilock_nowait(ip, lockmode))
139                                 return XFS_ERROR(EAGAIN);
140                 } else {
141                         xfs_ilock(ip, lockmode);
142                 }
143                 break;
144         default:
145                 BUG();
146         }
147
148         ASSERT(offset <= mp->m_maxioffset);
149         if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
150                 count = mp->m_maxioffset - offset;
151         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
152         offset_fsb = XFS_B_TO_FSBT(mp, offset);
153
154         error = xfs_bmapi(NULL, ip, offset_fsb,
155                         (xfs_filblks_t)(end_fsb - offset_fsb),
156                         bmapi_flags,  NULL, 0, &imap,
157                         &nimaps, NULL, NULL);
158
159         if (error)
160                 goto out;
161
162         switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
163         case BMAPI_WRITE:
164                 /* If we found an extent, return it */
165                 if (nimaps &&
166                     (imap.br_startblock != HOLESTARTBLOCK) &&
167                     (imap.br_startblock != DELAYSTARTBLOCK)) {
168                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
169                         break;
170                 }
171
172                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
173                         error = xfs_iomap_write_direct(ip, offset, count, flags,
174                                                        &imap, &nimaps, nimaps);
175                 } else {
176                         error = xfs_iomap_write_delay(ip, offset, count, flags,
177                                                       &imap, &nimaps);
178                 }
179                 if (!error) {
180                         trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
181                 }
182                 iomap_flags = IOMAP_NEW;
183                 break;
184         case BMAPI_ALLOCATE:
185                 /* If we found an extent, return it */
186                 xfs_iunlock(ip, lockmode);
187                 lockmode = 0;
188
189                 if (nimaps && !isnullstartblock(imap.br_startblock)) {
190                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
191                         break;
192                 }
193
194                 error = xfs_iomap_write_allocate(ip, offset, count,
195                                                  &imap, &nimaps);
196                 break;
197         }
198
199         ASSERT(nimaps <= 1);
200
201         if (nimaps)
202                 xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags);
203         *niomaps = nimaps;
204
205 out:
206         if (lockmode)
207                 xfs_iunlock(ip, lockmode);
208         return XFS_ERROR(error);
209 }
210
211
212 STATIC int
213 xfs_iomap_eof_align_last_fsb(
214         xfs_mount_t     *mp,
215         xfs_inode_t     *ip,
216         xfs_extlen_t    extsize,
217         xfs_fileoff_t   *last_fsb)
218 {
219         xfs_fileoff_t   new_last_fsb = 0;
220         xfs_extlen_t    align;
221         int             eof, error;
222
223         if (XFS_IS_REALTIME_INODE(ip))
224                 ;
225         /*
226          * If mounted with the "-o swalloc" option, roundup the allocation
227          * request to a stripe width boundary if the file size is >=
228          * stripe width and we are allocating past the allocation eof.
229          */
230         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
231                 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
232                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
233         /*
234          * Roundup the allocation request to a stripe unit (m_dalign) boundary
235          * if the file size is >= stripe unit size, and we are allocating past
236          * the allocation eof.
237          */
238         else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
239                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
240
241         /*
242          * Always round up the allocation request to an extent boundary
243          * (when file on a real-time subvolume or has di_extsize hint).
244          */
245         if (extsize) {
246                 if (new_last_fsb)
247                         align = roundup_64(new_last_fsb, extsize);
248                 else
249                         align = extsize;
250                 new_last_fsb = roundup_64(*last_fsb, align);
251         }
252
253         if (new_last_fsb) {
254                 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
255                 if (error)
256                         return error;
257                 if (eof)
258                         *last_fsb = new_last_fsb;
259         }
260         return 0;
261 }
262
263 STATIC int
264 xfs_cmn_err_fsblock_zero(
265         xfs_inode_t     *ip,
266         xfs_bmbt_irec_t *imap)
267 {
268         xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
269                         "Access to block zero in inode %llu "
270                         "start_block: %llx start_off: %llx "
271                         "blkcnt: %llx extent-state: %x\n",
272                 (unsigned long long)ip->i_ino,
273                 (unsigned long long)imap->br_startblock,
274                 (unsigned long long)imap->br_startoff,
275                 (unsigned long long)imap->br_blockcount,
276                 imap->br_state);
277         return EFSCORRUPTED;
278 }
279
280 int
281 xfs_iomap_write_direct(
282         xfs_inode_t     *ip,
283         xfs_off_t       offset,
284         size_t          count,
285         int             flags,
286         xfs_bmbt_irec_t *ret_imap,
287         int             *nmaps,
288         int             found)
289 {
290         xfs_mount_t     *mp = ip->i_mount;
291         xfs_fileoff_t   offset_fsb;
292         xfs_fileoff_t   last_fsb;
293         xfs_filblks_t   count_fsb, resaligned;
294         xfs_fsblock_t   firstfsb;
295         xfs_extlen_t    extsz, temp;
296         int             nimaps;
297         int             bmapi_flag;
298         int             quota_flag;
299         int             rt;
300         xfs_trans_t     *tp;
301         xfs_bmbt_irec_t imap;
302         xfs_bmap_free_t free_list;
303         uint            qblocks, resblks, resrtextents;
304         int             committed;
305         int             error;
306
307         /*
308          * Make sure that the dquots are there. This doesn't hold
309          * the ilock across a disk read.
310          */
311         error = xfs_qm_dqattach_locked(ip, 0);
312         if (error)
313                 return XFS_ERROR(error);
314
315         rt = XFS_IS_REALTIME_INODE(ip);
316         extsz = xfs_get_extsz_hint(ip);
317
318         offset_fsb = XFS_B_TO_FSBT(mp, offset);
319         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
320         if ((offset + count) > ip->i_size) {
321                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
322                 if (error)
323                         goto error_out;
324         } else {
325                 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
326                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
327                                         ret_imap->br_blockcount +
328                                         ret_imap->br_startoff);
329         }
330         count_fsb = last_fsb - offset_fsb;
331         ASSERT(count_fsb > 0);
332
333         resaligned = count_fsb;
334         if (unlikely(extsz)) {
335                 if ((temp = do_mod(offset_fsb, extsz)))
336                         resaligned += temp;
337                 if ((temp = do_mod(resaligned, extsz)))
338                         resaligned += extsz - temp;
339         }
340
341         if (unlikely(rt)) {
342                 resrtextents = qblocks = resaligned;
343                 resrtextents /= mp->m_sb.sb_rextsize;
344                 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
345                 quota_flag = XFS_QMOPT_RES_RTBLKS;
346         } else {
347                 resrtextents = 0;
348                 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
349                 quota_flag = XFS_QMOPT_RES_REGBLKS;
350         }
351
352         /*
353          * Allocate and setup the transaction
354          */
355         xfs_iunlock(ip, XFS_ILOCK_EXCL);
356         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
357         error = xfs_trans_reserve(tp, resblks,
358                         XFS_WRITE_LOG_RES(mp), resrtextents,
359                         XFS_TRANS_PERM_LOG_RES,
360                         XFS_WRITE_LOG_COUNT);
361         /*
362          * Check for running out of space, note: need lock to return
363          */
364         if (error)
365                 xfs_trans_cancel(tp, 0);
366         xfs_ilock(ip, XFS_ILOCK_EXCL);
367         if (error)
368                 goto error_out;
369
370         error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
371         if (error)
372                 goto error1;
373
374         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
375         xfs_trans_ihold(tp, ip);
376
377         bmapi_flag = XFS_BMAPI_WRITE;
378         if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
379                 bmapi_flag |= XFS_BMAPI_PREALLOC;
380
381         /*
382          * Issue the xfs_bmapi() call to allocate the blocks
383          */
384         xfs_bmap_init(&free_list, &firstfsb);
385         nimaps = 1;
386         error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
387                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
388         if (error)
389                 goto error0;
390
391         /*
392          * Complete the transaction
393          */
394         error = xfs_bmap_finish(&tp, &free_list, &committed);
395         if (error)
396                 goto error0;
397         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
398         if (error)
399                 goto error_out;
400
401         /*
402          * Copy any maps to caller's array and return any error.
403          */
404         if (nimaps == 0) {
405                 error = ENOSPC;
406                 goto error_out;
407         }
408
409         if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
410                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
411                 goto error_out;
412         }
413
414         *ret_imap = imap;
415         *nmaps = 1;
416         return 0;
417
418 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
419         xfs_bmap_cancel(&free_list);
420         xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
421
422 error1: /* Just cancel transaction */
423         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
424         *nmaps = 0;     /* nothing set-up here */
425
426 error_out:
427         return XFS_ERROR(error);
428 }
429
430 /*
431  * If the caller is doing a write at the end of the file, then extend the
432  * allocation out to the file system's write iosize.  We clean up any extra
433  * space left over when the file is closed in xfs_inactive().
434  */
435 STATIC int
436 xfs_iomap_eof_want_preallocate(
437         xfs_mount_t     *mp,
438         xfs_inode_t     *ip,
439         xfs_off_t       offset,
440         size_t          count,
441         int             ioflag,
442         xfs_bmbt_irec_t *imap,
443         int             nimaps,
444         int             *prealloc)
445 {
446         xfs_fileoff_t   start_fsb;
447         xfs_filblks_t   count_fsb;
448         xfs_fsblock_t   firstblock;
449         int             n, error, imaps;
450
451         *prealloc = 0;
452         if ((offset + count) <= ip->i_size)
453                 return 0;
454
455         /*
456          * If there are any real blocks past eof, then don't
457          * do any speculative allocation.
458          */
459         start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
460         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
461         while (count_fsb > 0) {
462                 imaps = nimaps;
463                 firstblock = NULLFSBLOCK;
464                 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
465                                   &firstblock, 0, imap, &imaps, NULL, NULL);
466                 if (error)
467                         return error;
468                 for (n = 0; n < imaps; n++) {
469                         if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
470                             (imap[n].br_startblock != DELAYSTARTBLOCK))
471                                 return 0;
472                         start_fsb += imap[n].br_blockcount;
473                         count_fsb -= imap[n].br_blockcount;
474                 }
475         }
476         *prealloc = 1;
477         return 0;
478 }
479
480 int
481 xfs_iomap_write_delay(
482         xfs_inode_t     *ip,
483         xfs_off_t       offset,
484         size_t          count,
485         int             ioflag,
486         xfs_bmbt_irec_t *ret_imap,
487         int             *nmaps)
488 {
489         xfs_mount_t     *mp = ip->i_mount;
490         xfs_fileoff_t   offset_fsb;
491         xfs_fileoff_t   last_fsb;
492         xfs_off_t       aligned_offset;
493         xfs_fileoff_t   ioalign;
494         xfs_fsblock_t   firstblock;
495         xfs_extlen_t    extsz;
496         int             nimaps;
497         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
498         int             prealloc, flushed = 0;
499         int             error;
500
501         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
502
503         /*
504          * Make sure that the dquots are there. This doesn't hold
505          * the ilock across a disk read.
506          */
507         error = xfs_qm_dqattach_locked(ip, 0);
508         if (error)
509                 return XFS_ERROR(error);
510
511         extsz = xfs_get_extsz_hint(ip);
512         offset_fsb = XFS_B_TO_FSBT(mp, offset);
513
514         error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
515                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
516         if (error)
517                 return error;
518
519 retry:
520         if (prealloc) {
521                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
522                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
523                 last_fsb = ioalign + mp->m_writeio_blocks;
524         } else {
525                 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
526         }
527
528         if (prealloc || extsz) {
529                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
530                 if (error)
531                         return error;
532         }
533
534         nimaps = XFS_WRITE_IMAPS;
535         firstblock = NULLFSBLOCK;
536         error = xfs_bmapi(NULL, ip, offset_fsb,
537                           (xfs_filblks_t)(last_fsb - offset_fsb),
538                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
539                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
540                           &nimaps, NULL, NULL);
541         if (error && (error != ENOSPC))
542                 return XFS_ERROR(error);
543
544         /*
545          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
546          * then we must have run out of space - flush all other inodes with
547          * delalloc blocks and retry without EOF preallocation.
548          */
549         if (nimaps == 0) {
550                 trace_xfs_delalloc_enospc(ip, offset, count);
551                 if (flushed)
552                         return XFS_ERROR(ENOSPC);
553
554                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
555                 xfs_flush_inodes(ip);
556                 xfs_ilock(ip, XFS_ILOCK_EXCL);
557
558                 flushed = 1;
559                 error = 0;
560                 prealloc = 0;
561                 goto retry;
562         }
563
564         if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
565                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
566
567         *ret_imap = imap[0];
568         *nmaps = 1;
569
570         return 0;
571 }
572
573 /*
574  * Pass in a delayed allocate extent, convert it to real extents;
575  * return to the caller the extent we create which maps on top of
576  * the originating callers request.
577  *
578  * Called without a lock on the inode.
579  *
580  * We no longer bother to look at the incoming map - all we have to
581  * guarantee is that whatever we allocate fills the required range.
582  */
583 int
584 xfs_iomap_write_allocate(
585         xfs_inode_t     *ip,
586         xfs_off_t       offset,
587         size_t          count,
588         xfs_bmbt_irec_t *map,
589         int             *retmap)
590 {
591         xfs_mount_t     *mp = ip->i_mount;
592         xfs_fileoff_t   offset_fsb, last_block;
593         xfs_fileoff_t   end_fsb, map_start_fsb;
594         xfs_fsblock_t   first_block;
595         xfs_bmap_free_t free_list;
596         xfs_filblks_t   count_fsb;
597         xfs_bmbt_irec_t imap;
598         xfs_trans_t     *tp;
599         int             nimaps, committed;
600         int             error = 0;
601         int             nres;
602
603         *retmap = 0;
604
605         /*
606          * Make sure that the dquots are there.
607          */
608         error = xfs_qm_dqattach(ip, 0);
609         if (error)
610                 return XFS_ERROR(error);
611
612         offset_fsb = XFS_B_TO_FSBT(mp, offset);
613         count_fsb = map->br_blockcount;
614         map_start_fsb = map->br_startoff;
615
616         XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
617
618         while (count_fsb != 0) {
619                 /*
620                  * Set up a transaction with which to allocate the
621                  * backing store for the file.  Do allocations in a
622                  * loop until we get some space in the range we are
623                  * interested in.  The other space that might be allocated
624                  * is in the delayed allocation extent on which we sit
625                  * but before our buffer starts.
626                  */
627
628                 nimaps = 0;
629                 while (nimaps == 0) {
630                         tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
631                         tp->t_flags |= XFS_TRANS_RESERVE;
632                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
633                         error = xfs_trans_reserve(tp, nres,
634                                         XFS_WRITE_LOG_RES(mp),
635                                         0, XFS_TRANS_PERM_LOG_RES,
636                                         XFS_WRITE_LOG_COUNT);
637                         if (error) {
638                                 xfs_trans_cancel(tp, 0);
639                                 return XFS_ERROR(error);
640                         }
641                         xfs_ilock(ip, XFS_ILOCK_EXCL);
642                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
643                         xfs_trans_ihold(tp, ip);
644
645                         xfs_bmap_init(&free_list, &first_block);
646
647                         /*
648                          * it is possible that the extents have changed since
649                          * we did the read call as we dropped the ilock for a
650                          * while. We have to be careful about truncates or hole
651                          * punchs here - we are not allowed to allocate
652                          * non-delalloc blocks here.
653                          *
654                          * The only protection against truncation is the pages
655                          * for the range we are being asked to convert are
656                          * locked and hence a truncate will block on them
657                          * first.
658                          *
659                          * As a result, if we go beyond the range we really
660                          * need and hit an delalloc extent boundary followed by
661                          * a hole while we have excess blocks in the map, we
662                          * will fill the hole incorrectly and overrun the
663                          * transaction reservation.
664                          *
665                          * Using a single map prevents this as we are forced to
666                          * check each map we look for overlap with the desired
667                          * range and abort as soon as we find it. Also, given
668                          * that we only return a single map, having one beyond
669                          * what we can return is probably a bit silly.
670                          *
671                          * We also need to check that we don't go beyond EOF;
672                          * this is a truncate optimisation as a truncate sets
673                          * the new file size before block on the pages we
674                          * currently have locked under writeback. Because they
675                          * are about to be tossed, we don't need to write them
676                          * back....
677                          */
678                         nimaps = 1;
679                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
680                         error = xfs_bmap_last_offset(NULL, ip, &last_block,
681                                                         XFS_DATA_FORK);
682                         if (error)
683                                 goto trans_cancel;
684
685                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
686                         if ((map_start_fsb + count_fsb) > last_block) {
687                                 count_fsb = last_block - map_start_fsb;
688                                 if (count_fsb == 0) {
689                                         error = EAGAIN;
690                                         goto trans_cancel;
691                                 }
692                         }
693
694                         /* Go get the actual blocks */
695                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
696                                         XFS_BMAPI_WRITE, &first_block, 1,
697                                         &imap, &nimaps, &free_list, NULL);
698                         if (error)
699                                 goto trans_cancel;
700
701                         error = xfs_bmap_finish(&tp, &free_list, &committed);
702                         if (error)
703                                 goto trans_cancel;
704
705                         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
706                         if (error)
707                                 goto error0;
708
709                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
710                 }
711
712                 /*
713                  * See if we were able to allocate an extent that
714                  * covers at least part of the callers request
715                  */
716                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
717                         return xfs_cmn_err_fsblock_zero(ip, &imap);
718
719                 if ((offset_fsb >= imap.br_startoff) &&
720                     (offset_fsb < (imap.br_startoff +
721                                    imap.br_blockcount))) {
722                         *map = imap;
723                         *retmap = 1;
724                         XFS_STATS_INC(xs_xstrat_quick);
725                         return 0;
726                 }
727
728                 /*
729                  * So far we have not mapped the requested part of the
730                  * file, just surrounding data, try again.
731                  */
732                 count_fsb -= imap.br_blockcount;
733                 map_start_fsb = imap.br_startoff + imap.br_blockcount;
734         }
735
736 trans_cancel:
737         xfs_bmap_cancel(&free_list);
738         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
739 error0:
740         xfs_iunlock(ip, XFS_ILOCK_EXCL);
741         return XFS_ERROR(error);
742 }
743
744 int
745 xfs_iomap_write_unwritten(
746         xfs_inode_t     *ip,
747         xfs_off_t       offset,
748         size_t          count)
749 {
750         xfs_mount_t     *mp = ip->i_mount;
751         xfs_fileoff_t   offset_fsb;
752         xfs_filblks_t   count_fsb;
753         xfs_filblks_t   numblks_fsb;
754         xfs_fsblock_t   firstfsb;
755         int             nimaps;
756         xfs_trans_t     *tp;
757         xfs_bmbt_irec_t imap;
758         xfs_bmap_free_t free_list;
759         uint            resblks;
760         int             committed;
761         int             error;
762
763         trace_xfs_unwritten_convert(ip, offset, count);
764
765         offset_fsb = XFS_B_TO_FSBT(mp, offset);
766         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
767         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
768
769         /*
770          * Reserve enough blocks in this transaction for two complete extent
771          * btree splits.  We may be converting the middle part of an unwritten
772          * extent and in this case we will insert two new extents in the btree
773          * each of which could cause a full split.
774          *
775          * This reservation amount will be used in the first call to
776          * xfs_bmbt_split() to select an AG with enough space to satisfy the
777          * rest of the operation.
778          */
779         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
780
781         do {
782                 /*
783                  * set up a transaction to convert the range of extents
784                  * from unwritten to real. Do allocations in a loop until
785                  * we have covered the range passed in.
786                  *
787                  * Note that we open code the transaction allocation here
788                  * to pass KM_NOFS--we can't risk to recursing back into
789                  * the filesystem here as we might be asked to write out
790                  * the same inode that we complete here and might deadlock
791                  * on the iolock.
792                  */
793                 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
794                 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
795                 tp->t_flags |= XFS_TRANS_RESERVE;
796                 error = xfs_trans_reserve(tp, resblks,
797                                 XFS_WRITE_LOG_RES(mp), 0,
798                                 XFS_TRANS_PERM_LOG_RES,
799                                 XFS_WRITE_LOG_COUNT);
800                 if (error) {
801                         xfs_trans_cancel(tp, 0);
802                         return XFS_ERROR(error);
803                 }
804
805                 xfs_ilock(ip, XFS_ILOCK_EXCL);
806                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
807                 xfs_trans_ihold(tp, ip);
808
809                 /*
810                  * Modify the unwritten extent state of the buffer.
811                  */
812                 xfs_bmap_init(&free_list, &firstfsb);
813                 nimaps = 1;
814                 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
815                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
816                                   1, &imap, &nimaps, &free_list, NULL);
817                 if (error)
818                         goto error_on_bmapi_transaction;
819
820                 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
821                 if (error)
822                         goto error_on_bmapi_transaction;
823
824                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
825                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
826                 if (error)
827                         return XFS_ERROR(error);
828
829                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
830                         return xfs_cmn_err_fsblock_zero(ip, &imap);
831
832                 if ((numblks_fsb = imap.br_blockcount) == 0) {
833                         /*
834                          * The numblks_fsb value should always get
835                          * smaller, otherwise the loop is stuck.
836                          */
837                         ASSERT(imap.br_blockcount);
838                         break;
839                 }
840                 offset_fsb += numblks_fsb;
841                 count_fsb -= numblks_fsb;
842         } while (count_fsb > 0);
843
844         return 0;
845
846 error_on_bmapi_transaction:
847         xfs_bmap_cancel(&free_list);
848         xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
849         xfs_iunlock(ip, XFS_ILOCK_EXCL);
850         return XFS_ERROR(error);
851 }