ext4: stop looping in ext4_num_dirty_pages when max_pages reached
[linux-2.6.git] / fs / ext4 / inode.c
index 0b1d7c8..d88ba4a 100644 (file)
@@ -167,11 +167,16 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext4_delete_inode(struct inode *inode)
+void ext4_evict_inode(struct inode *inode)
 {
        handle_t *handle;
        int err;
 
+       if (inode->i_nlink) {
+               truncate_inode_pages(&inode->i_data, 0);
+               goto no_delete;
+       }
+
        if (!is_bad_inode(inode))
                dquot_initialize(inode);
 
@@ -221,6 +226,7 @@ void ext4_delete_inode(struct inode *inode)
                                     "couldn't extend journal (err %d)", err);
                stop_handle:
                        ext4_journal_stop(handle);
+                       ext4_orphan_del(NULL, inode);
                        goto no_delete;
                }
        }
@@ -245,13 +251,13 @@ void ext4_delete_inode(struct inode *inode)
         */
        if (ext4_mark_inode_dirty(handle, inode))
                /* If that failed, just do the required in-core inode clear. */
-               clear_inode(inode);
+               ext4_clear_inode(inode);
        else
                ext4_free_inode(handle, inode);
        ext4_journal_stop(handle);
        return;
 no_delete:
-       clear_inode(inode);     /* We must guarantee clearing of inode... */
+       ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
 }
 
 typedef struct {
@@ -337,9 +343,11 @@ static int ext4_block_to_path(struct inode *inode,
        return n;
 }
 
-static int __ext4_check_blockref(const char *function, struct inode *inode,
+static int __ext4_check_blockref(const char *function, unsigned int line,
+                                struct inode *inode,
                                 __le32 *p, unsigned int max)
 {
+       struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
        __le32 *bref = p;
        unsigned int blk;
 
@@ -348,9 +356,9 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
                if (blk &&
                    unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                    blk, 1))) {
-                       __ext4_error(inode->i_sb, function,
-                                  "invalid block reference %u "
-                                  "in inode #%lu", blk, inode->i_ino);
+                       es->s_last_error_block = cpu_to_le64(blk);
+                       ext4_error_inode(inode, function, line, blk,
+                                        "invalid block");
                        return -EIO;
                }
        }
@@ -359,11 +367,13 @@ static int __ext4_check_blockref(const char *function, struct inode *inode,
 
 
 #define ext4_check_indirect_blockref(inode, bh)                         \
-       __ext4_check_blockref(__func__, inode, (__le32 *)(bh)->b_data,  \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             (__le32 *)(bh)->b_data,                   \
                              EXT4_ADDR_PER_BLOCK((inode)->i_sb))
 
 #define ext4_check_inode_blockref(inode)                                \
-       __ext4_check_blockref(__func__, inode, EXT4_I(inode)->i_data,   \
+       __ext4_check_blockref(__func__, __LINE__, inode,                \
+                             EXT4_I(inode)->i_data,                    \
                              EXT4_NDIR_BLOCKS)
 
 /**
@@ -785,7 +795,7 @@ failed:
        /* Allocation failed, free what we already allocated */
        ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
        for (i = 1; i <= n ; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -875,7 +885,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 
 err_out:
        for (i = 1; i <= num; i++) {
-               /* 
+               /*
                 * branch[i].bh is newly allocated, so there is no
                 * need to revoke the block, which is why we don't
                 * need to set EXT4_FREE_BLOCKS_METADATA.
@@ -932,7 +942,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
        int count = 0;
        ext4_fsblk_t first_block = 0;
 
-       J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
+       J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)));
        J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0);
        depth = ext4_block_to_path(inode, map->m_lblk, offsets,
                                   &blocks_to_boundary);
@@ -1060,7 +1070,7 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode,
  */
 static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
 {
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_calc_metadata_amount(inode, lblock);
 
        return ext4_indirect_calc_metadata_amount(inode, lblock);
@@ -1129,20 +1139,24 @@ void ext4_da_update_reserve_space(struct inode *inode,
                ext4_discard_preallocations(inode);
 }
 
-static int check_block_validity(struct inode *inode, const char *msg,
-                               sector_t logical, sector_t phys, int len)
+static int __check_block_validity(struct inode *inode, const char *func,
+                               unsigned int line,
+                               struct ext4_map_blocks *map)
 {
-       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) {
-               __ext4_error(inode->i_sb, msg,
-                          "inode #%lu logical block %llu mapped to %llu "
-                          "(size %d)", inode->i_ino,
-                          (unsigned long long) logical,
-                          (unsigned long long) phys, len);
+       if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk,
+                                  map->m_len)) {
+               ext4_error_inode(inode, func, line, map->m_pblk,
+                                "lblock %lu mapped to illegal pblock "
+                                "(length %d)", (unsigned long) map->m_lblk,
+                                map->m_len);
                return -EIO;
        }
        return 0;
 }
 
+#define check_block_validity(inode, map)       \
+       __check_block_validity((inode), __func__, __LINE__, (map))
+
 /*
  * Return the number of contiguous dirty pages in a given inode
  * starting at page frame idx.
@@ -1193,8 +1207,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
                                break;
                        idx++;
                        num++;
-                       if (num >= max_pages)
+                       if (num >= max_pages) {
+                               done = 1;
                                break;
+                       }
                }
                pagevec_release(&pvec);
        }
@@ -1237,7 +1253,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * file system block.
         */
        down_read((&EXT4_I(inode)->i_data_sem));
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, 0);
        } else {
                retval = ext4_ind_map_blocks(handle, inode, map, 0);
@@ -1245,8 +1261,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        up_read((&EXT4_I(inode)->i_data_sem));
 
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode, "file system corruption",
-                                       map->m_lblk, map->m_pblk, retval);
+               int ret = check_block_validity(inode, map);
                if (ret != 0)
                        return ret;
        }
@@ -1297,7 +1312,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
         * We need to check for EXT4 here because migrate
         * could have changed the inode type in between
         */
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                retval = ext4_ext_map_blocks(handle, inode, map, flags);
        } else {
                retval = ext4_ind_map_blocks(handle, inode, map, flags);
@@ -1326,10 +1341,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
 
        up_write((&EXT4_I(inode)->i_data_sem));
        if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
-               int ret = check_block_validity(inode, "file system "
-                                              "corruption after allocation",
-                                              map->m_lblk, map->m_pblk,
-                                              retval);
+               int ret = check_block_validity(inode, map);
                if (ret != 0)
                        return ret;
        }
@@ -1522,9 +1534,25 @@ static int walk_page_buffers(handle_t *handle,
 static int do_journal_get_write_access(handle_t *handle,
                                       struct buffer_head *bh)
 {
+       int dirty = buffer_dirty(bh);
+       int ret;
+
        if (!buffer_mapped(bh) || buffer_freed(bh))
                return 0;
-       return ext4_journal_get_write_access(handle, bh);
+       /*
+        * __block_prepare_write() could have dirtied some buffers. Clean
+        * the dirty bit as jbd2_journal_get_write_access() could complain
+        * otherwise about fs integrity issues. Setting of the dirty bit
+        * by __block_prepare_write() isn't a real problem here as we clear
+        * the bit before releasing a page lock and thus writeback cannot
+        * ever write the buffer.
+        */
+       if (dirty)
+               clear_buffer_dirty(bh);
+       ret = ext4_journal_get_write_access(handle, bh);
+       if (!ret && dirty)
+               ret = ext4_handle_dirty_metadata(handle, NULL, bh);
+       return ret;
 }
 
 /*
@@ -1581,11 +1609,9 @@ retry:
        *pagep = page;
 
        if (ext4_should_dioread_nolock(inode))
-               ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-                               fsdata, ext4_get_block_write);
+               ret = __block_write_begin(page, pos, len, ext4_get_block_write);
        else
-               ret = block_write_begin(file, mapping, pos, len, flags, pagep,
-                               fsdata, ext4_get_block);
+               ret = __block_write_begin(page, pos, len, ext4_get_block);
 
        if (!ret && ext4_should_journal_data(inode)) {
                ret = walk_page_buffers(handle, page_buffers(page),
@@ -1596,7 +1622,7 @@ retry:
                unlock_page(page);
                page_cache_release(page);
                /*
-                * block_write_begin may have instantiated a few blocks
+                * __block_write_begin may have instantiated a few blocks
                 * outside i_size.  Trim these off again. Don't need
                 * i_size_read because we hold i_mutex.
                 *
@@ -1896,6 +1922,7 @@ static void ext4_da_release_space(struct inode *inode, int to_free)
 
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 
+       trace_ext4_da_release_space(inode, to_free);
        if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                /*
                 * if there aren't enough reserved blocks, then the
@@ -2196,7 +2223,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
        BUG_ON(!handle);
 
        /*
-        * Call ext4_get_blocks() to allocate any delayed allocation
+        * Call ext4_map_blocks() to allocate any delayed allocation
         * blocks, or to convert an uninitialized extent to be
         * initialized (in the case where we have written into
         * one or more preallocated blocks).
@@ -2205,7 +2232,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
         * indicate that we are on the delayed allocation path.  This
         * affects functions in many different parts of the allocation
         * call path.  This flag exists primarily because we don't
-        * want to change *many* call functions, so ext4_get_blocks()
+        * want to change *many* call functions, so ext4_map_blocks()
         * will set the magic i_delalloc_reserved_flag once the
         * inode's allocation semaphore is taken.
         *
@@ -2223,6 +2250,8 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 
        blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags);
        if (blks < 0) {
+               struct super_block *sb = mpd->inode->i_sb;
+
                err = blks;
                /*
                 * If get block returns with error we simply
@@ -2233,7 +2262,7 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                        return 0;
 
                if (err == -ENOSPC &&
-                   ext4_count_free_blocks(mpd->inode->i_sb)) {
+                   ext4_count_free_blocks(sb)) {
                        mpd->retval = err;
                        return 0;
                }
@@ -2245,16 +2274,17 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
                 * writepage and writepages will again try to write
                 * the same.
                 */
-               ext4_msg(mpd->inode->i_sb, KERN_CRIT,
-                        "delayed block allocation failed for inode %lu at "
-                        "logical offset %llu with max blocks %zd with "
-                        "error %d", mpd->inode->i_ino,
-                        (unsigned long long) next,
-                        mpd->b_size >> mpd->inode->i_blkbits, err);
-               printk(KERN_CRIT "This should not happen!!  "
-                      "Data will be lost\n");
-               if (err == -ENOSPC) {
-                       ext4_print_free_blocks(mpd->inode);
+               if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) {
+                       ext4_msg(sb, KERN_CRIT,
+                                "delayed block allocation failed for inode %lu "
+                                "at logical offset %llu with max blocks %zd "
+                                "with error %d", mpd->inode->i_ino,
+                                (unsigned long long) next,
+                                mpd->b_size >> mpd->inode->i_blkbits, err);
+                       ext4_msg(sb, KERN_CRIT,
+                               "This should not happen!! Data will be lost\n");
+                       if (err == -ENOSPC)
+                               ext4_print_free_blocks(mpd->inode);
                }
                /* invalidate all the pages */
                ext4_da_block_invalidatepages(mpd, next,
@@ -2322,13 +2352,13 @@ static void mpage_add_bh_to_extent(struct mpage_da_data *mpd,
         * XXX Don't go larger than mballoc is willing to allocate
         * This is a stopgap solution.  We eventually need to fold
         * mpage_da_submit_io() into this function and then call
-        * ext4_get_blocks() multiple times in a loop
+        * ext4_map_blocks() multiple times in a loop
         */
        if (nrblocks >= 8*1024*1024/mpd->inode->i_sb->s_blocksize)
                goto flush_it;
 
        /* check if thereserved journal credits might overflow */
-       if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS))) {
                if (nrblocks >= EXT4_MAX_TRANS_DATA) {
                        /*
                         * With non-extent format we are limited by the journal
@@ -2555,18 +2585,16 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 /*
  * This function is used as a standard get_block_t calback function
  * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write(), nobh_writepage(), and
- * block_write_full_page().  These functions should only try to map a
- * single block at a time.
+ * callback function for block_prepare_write() and block_write_full_page().
+ * These functions should only try to map a single block at a time.
  *
  * Since this function doesn't do block allocations even if the caller
  * requests it by passing in create=1, it is critically important that
  * any caller checks to make sure that any buffer heads are returned
  * by this function are either all already mapped or marked for
- * delayed allocation before calling nobh_writepage() or
- * block_write_full_page().  Otherwise, b_blocknr could be left
- * unitialized, and the page write functions will be taken by
- * surprise.
+ * delayed allocation before calling  block_write_full_page().  Otherwise,
+ * b_blocknr could be left unitialized, and the page write functions will
+ * be taken by surprise.
  */
 static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
                                   struct buffer_head *bh_result, int create)
@@ -2751,9 +2779,7 @@ static int ext4_writepage(struct page *page,
                return __ext4_journalled_writepage(page, len);
        }
 
-       if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
-               ret = nobh_writepage(page, noalloc_get_block_write, wbc);
-       else if (page_bufs && buffer_uninit(page_bufs)) {
+       if (page_bufs && buffer_uninit(page_bufs)) {
                ext4_set_bh_endio(page_bufs, inode);
                ret = block_write_full_page_endio(page, noalloc_get_block_write,
                                            wbc, ext4_end_io_buffer_write);
@@ -2782,7 +2808,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
         * number of contiguous block. So we will limit
         * number of contiguous block to a sane value
         */
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) &&
            (max_blocks > EXT4_MAX_TRANS_DATA))
                max_blocks = EXT4_MAX_TRANS_DATA;
 
@@ -3148,13 +3174,10 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
        int ret, retries = 0;
        struct page *page;
        pgoff_t index;
-       unsigned from, to;
        struct inode *inode = mapping->host;
        handle_t *handle;
 
        index = pos >> PAGE_CACHE_SHIFT;
-       from = pos & (PAGE_CACHE_SIZE - 1);
-       to = from + len;
 
        if (ext4_nonda_switch(inode->i_sb)) {
                *fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -3187,8 +3210,7 @@ retry:
        }
        *pagep = page;
 
-       ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
-                               ext4_da_get_block_prep);
+       ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
        if (ret < 0) {
                unlock_page(page);
                ext4_journal_stop(handle);
@@ -3547,15 +3569,24 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
 
 retry:
        if (rw == READ && ext4_should_dioread_nolock(inode))
-               ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
+               ret = __blockdev_direct_IO(rw, iocb, inode,
                                 inode->i_sb->s_bdev, iov,
                                 offset, nr_segs,
-                                ext4_get_block, NULL);
-       else
+                                ext4_get_block, NULL, NULL, 0);
+       else {
                ret = blockdev_direct_IO(rw, iocb, inode,
                                 inode->i_sb->s_bdev, iov,
                                 offset, nr_segs,
                                 ext4_get_block, NULL);
+
+               if (unlikely((rw & WRITE) && ret < 0)) {
+                       loff_t isize = i_size_read(inode);
+                       loff_t end = offset + iov_length(iov, nr_segs);
+
+                       if (end > isize)
+                               vmtruncate(inode, isize);
+               }
+       }
        if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
 
@@ -3670,6 +3701,8 @@ static int ext4_end_io_nolock(ext4_io_end_t *io)
                return ret;
        }
 
+       if (io->iocb)
+               aio_complete(io->iocb, io->result, 0);
        /* clear the DIO AIO unwritten flag */
        io->flag = 0;
        return ret;
@@ -3769,6 +3802,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
                io->offset = 0;
                io->size = 0;
                io->page = NULL;
+               io->iocb = NULL;
+               io->result = 0;
                INIT_WORK(&io->work, ext4_end_io_work);
                INIT_LIST_HEAD(&io->list);
        }
@@ -3777,7 +3812,8 @@ static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
 }
 
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
-                           ssize_t size, void *private)
+                           ssize_t size, void *private, int ret,
+                           bool is_async)
 {
         ext4_io_end_t *io_end = iocb->private;
        struct workqueue_struct *wq;
@@ -3786,7 +3822,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 
        /* if not async direct IO or dio with 0 bytes write, just return */
        if (!io_end || !size)
-               return;
+               goto out;
 
        ext_debug("ext4_end_io_dio(): io_end 0x%p"
                  "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
@@ -3797,12 +3833,18 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
        if (io_end->flag != EXT4_IO_UNWRITTEN){
                ext4_free_io_end(io_end);
                iocb->private = NULL;
+out:
+               if (is_async)
+                       aio_complete(iocb, ret, 0);
                return;
        }
 
        io_end->offset = offset;
        io_end->size = size;
-       io_end->flag = EXT4_IO_UNWRITTEN;
+       if (is_async) {
+               io_end->iocb = iocb;
+               io_end->result = ret;
+       }
        wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
 
        /* queue the work to convert unwritten extents to written */
@@ -3939,7 +3981,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                                return -ENOMEM;
                        /*
                         * we save the io structure for current async
-                        * direct IO, so that later ext4_get_blocks()
+                        * direct IO, so that later ext4_map_blocks()
                         * could flag the io structure whether there
                         * is a unwritten extents needs to be converted
                         * when IO is completed.
@@ -3998,7 +4040,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
 
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs);
 
        return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
@@ -4130,17 +4172,6 @@ int ext4_block_truncate_page(handle_t *handle,
        length = blocksize - (offset & (blocksize - 1));
        iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 
-       /*
-        * For "nobh" option,  we can only work if we don't need to
-        * read-in the page - otherwise we create buffers to do the IO.
-        */
-       if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-            ext4_should_writeback_data(inode) && PageUptodate(page)) {
-               zero_user(page, offset, length);
-               set_page_dirty(page);
-               goto unlock;
-       }
-
        if (!page_has_buffers(page))
                create_empty_buffers(page, blocksize, 0);
 
@@ -4327,10 +4358,9 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode,
 
        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free,
                                   count)) {
-               ext4_error(inode->i_sb, "inode #%lu: "
-                          "attempt to clear blocks %llu len %lu, invalid",
-                          inode->i_ino, (unsigned long long) block_to_free,
-                          count);
+               EXT4_ERROR_INODE(inode, "attempt to clear invalid "
+                                "blocks %llu len %lu",
+                                (unsigned long long) block_to_free, count);
                return 1;
        }
 
@@ -4435,11 +4465,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode,
                if ((EXT4_JOURNAL(inode) == NULL) || bh2jh(this_bh))
                        ext4_handle_dirty_metadata(handle, inode, this_bh);
                else
-                       ext4_error(inode->i_sb,
-                                  "circular indirect block detected, "
-                                  "inode=%lu, block=%llu",
-                                  inode->i_ino,
-                                  (unsigned long long) this_bh->b_blocknr);
+                       EXT4_ERROR_INODE(inode,
+                                        "circular indirect block detected at "
+                                        "block %llu",
+                               (unsigned long long) this_bh->b_blocknr);
        }
 }
 
@@ -4477,11 +4506,10 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 
                        if (!ext4_data_block_valid(EXT4_SB(inode->i_sb),
                                                   nr, 1)) {
-                               ext4_error(inode->i_sb,
-                                          "indirect mapped block in inode "
-                                          "#%lu invalid (level %d, blk #%lu)",
-                                          inode->i_ino, depth,
-                                          (unsigned long) nr);
+                               EXT4_ERROR_INODE(inode,
+                                                "invalid indirect mapped "
+                                                "block %lu (level %d)",
+                                                (unsigned long) nr, depth);
                                break;
                        }
 
@@ -4493,9 +4521,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                         * (should be rare).
                         */
                        if (!bh) {
-                               ext4_error(inode->i_sb,
-                                          "Read failure, inode=%lu, block=%llu",
-                                          inode->i_ino, nr);
+                               EXT4_ERROR_INODE_BLOCK(inode, nr,
+                                                      "Read failure");
                                continue;
                        }
 
@@ -4507,27 +4534,6 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                        depth);
 
                        /*
-                        * We've probably journalled the indirect block several
-                        * times during the truncate.  But it's no longer
-                        * needed and we now drop it from the transaction via
-                        * jbd2_journal_revoke().
-                        *
-                        * That's easy if it's exclusively part of this
-                        * transaction.  But if it's part of the committing
-                        * transaction then jbd2_journal_forget() will simply
-                        * brelse() it.  That means that if the underlying
-                        * block is reallocated in ext4_get_block(),
-                        * unmap_underlying_metadata() will find this block
-                        * and will try to get rid of it.  damn, damn.
-                        *
-                        * If this block has already been committed to the
-                        * journal, a revoke record will be written.  And
-                        * revoke records must be emitted *before* clearing
-                        * this block's bit in the bitmaps.
-                        */
-                       ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
-
-                       /*
                         * Everything below this this pointer has been
                         * released.  Now let this top-of-subtree go.
                         *
@@ -4551,8 +4557,20 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
                                            blocks_for_truncate(inode));
                        }
 
+                       /*
+                        * The forget flag here is critical because if
+                        * we are journaling (and not doing data
+                        * journaling), we have to make sure a revoke
+                        * record is written to prevent the journal
+                        * replay from overwriting the (former)
+                        * indirect block if it gets reallocated as a
+                        * data block.  This must happen in the same
+                        * transaction where the data blocks are
+                        * actually freed.
+                        */
                        ext4_free_blocks(handle, inode, 0, nr, 1,
-                                        EXT4_FREE_BLOCKS_METADATA);
+                                        EXT4_FREE_BLOCKS_METADATA|
+                                        EXT4_FREE_BLOCKS_FORGET);
 
                        if (parent_bh) {
                                /*
@@ -4637,12 +4655,12 @@ void ext4_truncate(struct inode *inode)
        if (!ext4_can_truncate(inode))
                return;
 
-       EXT4_I(inode)->i_flags &= ~EXT4_EOFBLOCKS_FL;
+       ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
 
        if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
                ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
 
-       if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
+       if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                ext4_ext_truncate(inode);
                return;
        }
@@ -4810,8 +4828,8 @@ static int __ext4_get_inode_loc(struct inode *inode,
 
        bh = sb_getblk(sb, block);
        if (!bh) {
-               ext4_error(sb, "unable to read inode block - "
-                          "inode=%lu, block=%llu", inode->i_ino, block);
+               EXT4_ERROR_INODE_BLOCK(inode, block,
+                                      "unable to read itable block");
                return -EIO;
        }
        if (!buffer_uptodate(bh)) {
@@ -4909,8 +4927,8 @@ make_io:
                submit_bh(READ_META, bh);
                wait_on_buffer(bh);
                if (!buffer_uptodate(bh)) {
-                       ext4_error(sb, "unable to read inode block - inode=%lu,"
-                                  " block=%llu", inode->i_ino, block);
+                       EXT4_ERROR_INODE_BLOCK(inode, block,
+                                              "unable to read itable block");
                        brelse(bh);
                        return -EIO;
                }
@@ -4947,20 +4965,26 @@ void ext4_set_inode_flags(struct inode *inode)
 /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
 void ext4_get_inode_flags(struct ext4_inode_info *ei)
 {
-       unsigned int flags = ei->vfs_inode.i_flags;
-
-       ei->i_flags &= ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
-                       EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|EXT4_DIRSYNC_FL);
-       if (flags & S_SYNC)
-               ei->i_flags |= EXT4_SYNC_FL;
-       if (flags & S_APPEND)
-               ei->i_flags |= EXT4_APPEND_FL;
-       if (flags & S_IMMUTABLE)
-               ei->i_flags |= EXT4_IMMUTABLE_FL;
-       if (flags & S_NOATIME)
-               ei->i_flags |= EXT4_NOATIME_FL;
-       if (flags & S_DIRSYNC)
-               ei->i_flags |= EXT4_DIRSYNC_FL;
+       unsigned int vfs_fl;
+       unsigned long old_fl, new_fl;
+
+       do {
+               vfs_fl = ei->vfs_inode.i_flags;
+               old_fl = ei->i_flags;
+               new_fl = old_fl & ~(EXT4_SYNC_FL|EXT4_APPEND_FL|
+                               EXT4_IMMUTABLE_FL|EXT4_NOATIME_FL|
+                               EXT4_DIRSYNC_FL);
+               if (vfs_fl & S_SYNC)
+                       new_fl |= EXT4_SYNC_FL;
+               if (vfs_fl & S_APPEND)
+                       new_fl |= EXT4_APPEND_FL;
+               if (vfs_fl & S_IMMUTABLE)
+                       new_fl |= EXT4_IMMUTABLE_FL;
+               if (vfs_fl & S_NOATIME)
+                       new_fl |= EXT4_NOATIME_FL;
+               if (vfs_fl & S_DIRSYNC)
+                       new_fl |= EXT4_DIRSYNC_FL;
+       } while (cmpxchg(&ei->i_flags, old_fl, new_fl) != old_fl);
 }
 
 static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
@@ -4975,7 +4999,7 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
                /* we are using combined 48 bit field */
                i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
                                        le32_to_cpu(raw_inode->i_blocks_lo);
-               if (ei->i_flags & EXT4_HUGE_FILE_FL) {
+               if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
                        /* i_blocks represent file system block size */
                        return i_blocks  << (inode->i_blkbits - 9);
                } else {
@@ -5071,7 +5095,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                transaction_t *transaction;
                tid_t tid;
 
-               spin_lock(&journal->j_state_lock);
+               read_lock(&journal->j_state_lock);
                if (journal->j_running_transaction)
                        transaction = journal->j_running_transaction;
                else
@@ -5080,7 +5104,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                        tid = transaction->t_tid;
                else
                        tid = journal->j_commit_sequence;
-               spin_unlock(&journal->j_state_lock);
+               read_unlock(&journal->j_state_lock);
                ei->i_sync_tid = tid;
                ei->i_datasync_tid = tid;
        }
@@ -5121,11 +5145,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
        ret = 0;
        if (ei->i_file_acl &&
            !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
-               ext4_error(sb, "bad extended attribute block %llu inode #%lu",
-                          ei->i_file_acl, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bad extended attribute block %llu",
+                                ei->i_file_acl);
                ret = -EIO;
                goto bad_inode;
-       } else if (ei->i_flags & EXT4_EXTENTS_FL) {
+       } else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
                if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
                    (S_ISLNK(inode->i_mode) &&
                     !ext4_inode_is_fast_symlink(inode)))
@@ -5167,8 +5191,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
                           new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
        } else {
                ret = -EIO;
-               ext4_error(inode->i_sb, "bogus i_mode (%o) for inode=%lu",
-                          inode->i_mode, inode->i_ino);
+               EXT4_ERROR_INODE(inode, "bogus i_mode (%o)", inode->i_mode);
                goto bad_inode;
        }
        brelse(iloc.bh);
@@ -5197,7 +5220,7 @@ static int ext4_inode_blocks_set(handle_t *handle,
                 */
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                raw_inode->i_blocks_high = 0;
-               ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
                return 0;
        }
        if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
@@ -5210,9 +5233,9 @@ static int ext4_inode_blocks_set(handle_t *handle,
                 */
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
                raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
-               ei->i_flags &= ~EXT4_HUGE_FILE_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
        } else {
-               ei->i_flags |= EXT4_HUGE_FILE_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
                /* i_block is stored in file system block size */
                i_blocks = i_blocks >> (inode->i_blkbits - 9);
                raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
@@ -5406,9 +5429,8 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
                if (wbc->sync_mode == WB_SYNC_ALL)
                        sync_dirty_buffer(iloc.bh);
                if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
-                       ext4_error(inode->i_sb, "IO error syncing inode, "
-                                  "inode=%lu, block=%llu", inode->i_ino,
-                                  (unsigned long long)iloc.bh->b_blocknr);
+                       EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr,
+                                        "IO error syncing inode");
                        err = -EIO;
                }
                brelse(iloc.bh);
@@ -5450,7 +5472,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
        if (error)
                return error;
 
-       if (ia_valid & ATTR_SIZE)
+       if (is_quota_modification(inode, attr))
                dquot_initialize(inode);
        if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
                (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
@@ -5480,20 +5502,18 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
-               if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) {
+               if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 
-                       if (attr->ia_size > sbi->s_bitmap_maxbytes) {
-                               error = -EFBIG;
-                               goto err_out;
-                       }
+                       if (attr->ia_size > sbi->s_bitmap_maxbytes)
+                               return -EFBIG;
                }
        }
 
        if (S_ISREG(inode->i_mode) &&
            attr->ia_valid & ATTR_SIZE &&
            (attr->ia_size < inode->i_size ||
-            (EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))) {
+            (ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))) {
                handle_t *handle;
 
                handle = ext4_journal_start(inode, 3);
@@ -5525,15 +5545,23 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        }
                }
                /* ext4_truncate will clear the flag */
-               if ((EXT4_I(inode)->i_flags & EXT4_EOFBLOCKS_FL))
+               if ((ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)))
                        ext4_truncate(inode);
        }
 
-       rc = inode_setattr(inode, attr);
+       if ((attr->ia_valid & ATTR_SIZE) &&
+           attr->ia_size != i_size_read(inode))
+               rc = vmtruncate(inode, attr->ia_size);
+
+       if (!rc) {
+               setattr_copy(inode, attr);
+               mark_inode_dirty(inode);
+       }
 
-       /* If inode_setattr's call to ext4_truncate failed to get a
-        * transaction handle at all, we need to clean up the in-core
-        * orphan list manually. */
+       /*
+        * If the call to ext4_truncate failed to get a transaction handle at
+        * all, we need to clean up the in-core orphan list manually.
+        */
        if (inode->i_nlink)
                ext4_orphan_del(NULL, inode);
 
@@ -5601,7 +5629,7 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks,
 
 static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
-       if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
+       if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return ext4_indirect_trans_blocks(inode, nrblocks, chunk);
        return ext4_ext_index_trans_blocks(inode, nrblocks, chunk);
 }
@@ -5688,7 +5716,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
  * Calculate the journal credits for a chunk of data modification.
  *
  * This is called from DIO, fallocate or whoever calling
- * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
+ * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
  *
  * journal buffers for data blocks are not included here, as DIO
  * and fallocate do no need to journal data buffers.
@@ -5754,7 +5782,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
 {
        struct ext4_inode *raw_inode;
        struct ext4_xattr_ibody_header *header;
-       struct ext4_xattr_entry *entry;
 
        if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
                return 0;
@@ -5762,7 +5789,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
        raw_inode = ext4_raw_inode(&iloc);
 
        header = IHDR(inode, raw_inode);
-       entry = IFIRST(header);
 
        /* No extended attributes present */
        if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
@@ -5936,9 +5962,9 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
         */
 
        if (val)
-               EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
+               ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        else
-               EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+               ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
        ext4_set_aops(inode);
 
        jbd2_journal_unlock_updates(journal);