ext4: Patch up how we claim metadata blocks for quota purposes
[linux-2.6.git] / fs / ext4 / inode.c
index 3e3b454..84eeb8f 100644 (file)
@@ -1043,43 +1043,47 @@ static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
        return ext4_indirect_calc_metadata_amount(inode, blocks);
 }
 
+/*
+ * Called with i_data_sem down, which is important since we can call
+ * ext4_discard_preallocations() from here.
+ */
 static void ext4_da_update_reserve_space(struct inode *inode, int used)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       int total, mdb, mdb_free, mdb_claim = 0;
-
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       /* recalculate the number of metablocks still need to be reserved */
-       total = EXT4_I(inode)->i_reserved_data_blocks - used;
-       mdb = ext4_calc_metadata_amount(inode, total);
-
-       /* figure out how many metablocks to release */
-       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
-       mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
-       if (mdb_free) {
-               /* Account for allocated meta_blocks */
-               mdb_claim = EXT4_I(inode)->i_allocated_meta_blocks;
-               BUG_ON(mdb_free < mdb_claim);
-               mdb_free -= mdb_claim;
-
-               /* update fs dirty blocks counter */
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       int mdb_free = 0;
+
+       spin_lock(&ei->i_block_reservation_lock);
+       if (unlikely(used > ei->i_reserved_data_blocks)) {
+               ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
+                        "with only %d reserved data blocks\n",
+                        __func__, inode->i_ino, used,
+                        ei->i_reserved_data_blocks);
+               WARN_ON(1);
+               used = ei->i_reserved_data_blocks;
+       }
+
+       /* Update per-inode reservations */
+       ei->i_reserved_data_blocks -= used;
+       used += ei->i_allocated_meta_blocks;
+       ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+       ei->i_allocated_meta_blocks = 0;
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
+
+       if (ei->i_reserved_data_blocks == 0) {
+               /*
+                * We can release all of the reserved metadata blocks
+                * only when we have written all of the delayed
+                * allocation blocks.
+                */
+               mdb_free = ei->i_allocated_meta_blocks;
                percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
-               EXT4_I(inode)->i_allocated_meta_blocks = 0;
-               EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+               ei->i_allocated_meta_blocks = 0;
        }
-
-       /* update per-inode reservations */
-       BUG_ON(used  > EXT4_I(inode)->i_reserved_data_blocks);
-       EXT4_I(inode)->i_reserved_data_blocks -= used;
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, used + mdb_claim);
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       vfs_dq_claim_block(inode, used + mdb_claim);
-
-       /*
-        * free those over-booking quota for metadata blocks
-        */
+       /* Update quota subsystem */
+       vfs_dq_claim_block(inode, used);
        if (mdb_free)
                vfs_dq_release_reservation_block(inode, mdb_free);
 
@@ -1088,7 +1092,8 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used)
         * there aren't any writers on the inode, we can discard the
         * inode's preallocations.
         */
-       if (!total && (atomic_read(&inode->i_writecount) == 0))
+       if ((ei->i_reserved_data_blocks == 0) &&
+           (atomic_read(&inode->i_writecount) == 0))
                ext4_discard_preallocations(inode);
 }
 
@@ -1801,7 +1806,8 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
 {
        int retries = 0;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       unsigned long md_needed, mdblocks, total = 0;
+       struct ext4_inode_info *ei = EXT4_I(inode);
+       unsigned long md_needed, md_reserved, total = 0;
 
        /*
         * recalculate the amount of metadata blocks to reserve
@@ -1809,35 +1815,44 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
         * worse case is one extent per block
         */
 repeat:
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
-       mdblocks = ext4_calc_metadata_amount(inode, total);
-       BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
-
-       md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+       spin_lock(&ei->i_block_reservation_lock);
+       md_reserved = ei->i_reserved_meta_blocks;
+       md_needed = ext4_calc_metadata_amount(inode, nrblocks);
        total = md_needed + nrblocks;
-       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       spin_unlock(&ei->i_block_reservation_lock);
 
        /*
         * Make quota reservation here to prevent quota overflow
         * later. Real quota accounting is done at pages writeout
         * time.
         */
-       if (vfs_dq_reserve_block(inode, total))
+       if (vfs_dq_reserve_block(inode, total)) {
+               /* 
+                * We tend to badly over-estimate the amount of
+                * metadata blocks which are needed, so if we have
+                * reserved any metadata blocks, try to force out the
+                * inode and see if we have any better luck.
+                */
+               if (md_reserved && retries++ <= 3)
+                       goto retry;
                return -EDQUOT;
+       }
 
        if (ext4_claim_free_blocks(sbi, total)) {
                vfs_dq_release_reservation_block(inode, total);
                if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
+               retry:
+                       if (md_reserved)
+                               write_inode_now(inode, (retries == 3));
                        yield();
                        goto repeat;
                }
                return -ENOSPC;
        }
-       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
-       EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
-       EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
-       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+       spin_lock(&ei->i_block_reservation_lock);
+       ei->i_reserved_data_blocks += nrblocks;
+       ei->i_reserved_meta_blocks += md_needed;
+       spin_unlock(&ei->i_block_reservation_lock);
 
        return 0;       /* success */
 }
@@ -1845,49 +1860,45 @@ repeat:
 static void ext4_da_release_space(struct inode *inode, int to_free)
 {
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-       int total, mdb, mdb_free, release;
+       struct ext4_inode_info *ei = EXT4_I(inode);
 
        if (!to_free)
                return;         /* Nothing to release, exit */
 
        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       if (!EXT4_I(inode)->i_reserved_data_blocks) {
+       if (unlikely(to_free > ei->i_reserved_data_blocks)) {
                /*
-                * if there is no reserved blocks, but we try to free some
-                * then the counter is messed up somewhere.
-                * but since this function is called from invalidate
-                * page, it's harmless to return without any action
+                * if there aren't enough reserved blocks, then the
+                * counter is messed up somewhere.  Since this
+                * function is called from invalidate page, it's
+                * harmless to return without any action.
                 */
-               printk(KERN_INFO "ext4 delalloc try to release %d reserved "
-                           "blocks for inode %lu, but there is no reserved "
-                           "data blocks\n", to_free, inode->i_ino);
-               spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-               return;
+               ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
+                        "ino %lu, to_free %d with only %d reserved "
+                        "data blocks\n", inode->i_ino, to_free,
+                        ei->i_reserved_data_blocks);
+               WARN_ON(1);
+               to_free = ei->i_reserved_data_blocks;
        }
+       ei->i_reserved_data_blocks -= to_free;
 
-       /* recalculate the number of metablocks still need to be reserved */
-       total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
-       mdb = ext4_calc_metadata_amount(inode, total);
-
-       /* figure out how many metablocks to release */
-       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
-       mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
-       release = to_free + mdb_free;
-
-       /* update fs dirty blocks counter for truncate case */
-       percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+       if (ei->i_reserved_data_blocks == 0) {
+               /*
+                * We can release all of the reserved metadata blocks
+                * only when we have written all of the delayed
+                * allocation blocks.
+                */
+               to_free += ei->i_allocated_meta_blocks;
+               ei->i_allocated_meta_blocks = 0;
+       }
 
-       /* update per-inode reservations */
-       BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
-       EXT4_I(inode)->i_reserved_data_blocks -= to_free;
+       /* update fs dirty blocks counter */
+       percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
 
-       BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
-       EXT4_I(inode)->i_reserved_meta_blocks = mdb;
        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
-       vfs_dq_release_reservation_block(inode, release);
+       vfs_dq_release_reservation_block(inode, to_free);
 }
 
 static void ext4_da_page_release_reservation(struct page *page,