ocfs2: better group descriptor consistency checks
[linux-2.6.git] / fs / ocfs2 / suballoc.c
index c46c164..7ac68ca 100644 (file)
@@ -85,11 +85,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     u64 *bg_blkno);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
-                                            struct buffer_head *bg_bh,
-                                            unsigned int bits_wanted,
-                                            u16 *bit_off,
-                                            u16 *bits_found);
 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
                                             struct inode *alloc_inode,
                                             struct ocfs2_group_desc *bg,
@@ -143,6 +138,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
        return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
 
+/* somewhat more expensive than our other checks, so use sparingly. */
+static int ocfs2_check_group_descriptor(struct super_block *sb,
+                                       struct ocfs2_dinode *di,
+                                       struct ocfs2_group_desc *gd)
+{
+       unsigned int max_bits;
+
+       if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+               OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+               return -EIO;
+       }
+
+       if (di->i_blkno != gd->bg_parent_dinode) {
+               ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+                           "pointer (%llu, expected %llu)",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+                           (unsigned long long)le64_to_cpu(di->i_blkno));
+               return -EIO;
+       }
+
+       max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+       if (le16_to_cpu(gd->bg_bits) > max_bits) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_chain) >=
+           le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+               ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_chain));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                           "claims that %u are free",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits),
+                           le16_to_cpu(gd->bg_free_bits_count));
+               return -EIO;
+       }
+
+       if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+               ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                           "max bitmap bits of %u",
+                           (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                           le16_to_cpu(gd->bg_bits),
+                           8 * le16_to_cpu(gd->bg_size));
+               return -EIO;
+       }
+
+       return 0;
+}
+
 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
                                  struct inode *alloc_inode,
                                  struct buffer_head *bg_bh,
@@ -157,8 +210,9 @@ static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
        mlog_entry_void();
 
        if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
-               ocfs2_error(alloc_inode->i_sb, "group block (%"MLFu64") "
-                           "!= b_blocknr (%llu)", group_blkno,
+               ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
+                           "b_blocknr (%llu)",
+                           (unsigned long long)group_blkno,
                            (unsigned long long) bg_bh->b_blocknr);
                status = -EIO;
                goto bail;
@@ -280,8 +334,8 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 
        /* setup the group */
        bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
-       mlog(0, "new descriptor, record %u, at block %"MLFu64"\n",
-            alloc_rec, bg_blkno);
+       mlog(0, "new descriptor, record %u, at block %llu\n",
+            alloc_rec, (unsigned long long)bg_blkno);
 
        bg_bh = sb_getblk(osb->sb, bg_blkno);
        if (!bg_bh) {
@@ -382,8 +436,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
                goto bail;
        }
        if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
-               ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator "
-                           "# %"MLFu64, le64_to_cpu(fe->i_blkno));
+               ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
+                           (unsigned long long)le64_to_cpu(fe->i_blkno));
                status = -EIO;
                goto bail;
        }
@@ -662,6 +716,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
                                             struct buffer_head *bg_bh,
                                             unsigned int bits_wanted,
+                                            unsigned int total_bits,
                                             u16 *bit_off,
                                             u16 *bits_found)
 {
@@ -678,10 +733,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
        found = start = best_offset = best_size = 0;
        bitmap = bg->bg_bitmap;
 
-       while((offset = ocfs2_find_next_zero_bit(bitmap,
-                                                le16_to_cpu(bg->bg_bits),
-                                                start)) != -1) {
-               if (offset == le16_to_cpu(bg->bg_bits))
+       while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
+               if (offset == total_bits)
                        break;
 
                if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -829,9 +882,10 @@ static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle,
                goto out;
        }
 
-       mlog(0, "In suballoc %"MLFu64", chain %u, move group %"MLFu64" to "
-            "top, prev = %"MLFu64"\n",
-            fe->i_blkno, chain, bg->bg_blkno, prev_bg->bg_blkno);
+       mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
+            (unsigned long long)fe->i_blkno, chain,
+            (unsigned long long)bg->bg_blkno,
+            (unsigned long long)prev_bg->bg_blkno);
 
        fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
        bg_ptr = le64_to_cpu(bg->bg_next_group);
@@ -909,14 +963,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 {
        int search = -ENOSPC;
        int ret;
-       struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+       struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
        u16 tmp_off, tmp_found;
+       unsigned int max_bits, gd_cluster_off;
 
        BUG_ON(!ocfs2_is_cluster_bitmap(inode));
 
-       if (bg->bg_free_bits_count) {
+       if (gd->bg_free_bits_count) {
+               max_bits = le16_to_cpu(gd->bg_bits);
+
+               /* Tail groups in cluster bitmaps which aren't cpg
+                * aligned are prone to partial extention by a failed
+                * fs resize. If the file system resize never got to
+                * update the dinode cluster count, then we don't want
+                * to trust any clusters past it, regardless of what
+                * the group descriptor says. */
+               gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+                                                         le64_to_cpu(gd->bg_blkno));
+               if ((gd_cluster_off + max_bits) >
+                   OCFS2_I(inode)->ip_clusters) {
+                       max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+                       mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_bits),
+                            OCFS2_I(inode)->ip_clusters, max_bits);
+               }
+
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                       max_bits,
                                                        &tmp_off, &tmp_found);
                if (ret)
                        return ret;
@@ -949,6 +1024,7 @@ static int ocfs2_block_group_search(struct inode *inode,
        if (bg->bg_free_bits_count)
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                       le16_to_cpu(bg->bg_bits),
                                                        bit_off, bits_found);
 
        return ret;
@@ -974,8 +1050,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
        struct ocfs2_group_desc *bg;
 
        chain = ac->ac_chain;
-       mlog(0, "trying to alloc %u bits from chain %u, inode %"MLFu64"\n",
-            bits_wanted, chain, OCFS2_I(alloc_inode)->ip_blkno);
+       mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
+            bits_wanted, chain,
+            (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
 
        status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
                                  le64_to_cpu(cl->cl_recs[chain].c_blkno),
@@ -985,9 +1062,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
        bg = (struct ocfs2_group_desc *) group_bh->b_data;
-       if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-               OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-               status = -EIO;
+       status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+       if (status) {
+               mlog_errno(status);
                goto bail;
        }
 
@@ -1015,9 +1092,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                        goto bail;
                }
                bg = (struct ocfs2_group_desc *) group_bh->b_data;
-               if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
-                       OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
-                       status = -EIO;
+               status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
+               if (status) {
+                       mlog_errno(status);
                        goto bail;
                }
        }
@@ -1027,8 +1104,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
 
-       mlog(0, "alloc succeeds: we give %u bits from block group %"MLFu64"\n",
-            tmp_bits, bg->bg_blkno);
+       mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
+            tmp_bits, (unsigned long long)bg->bg_blkno);
 
        *num_bits = tmp_bits;
 
@@ -1092,8 +1169,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
 
-       mlog(0, "Allocated %u bits from suballocator %"MLFu64"\n",
-            *num_bits, fe->i_blkno);
+       mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
+            (unsigned long long)fe->i_blkno);
 
        *bg_blkno = le64_to_cpu(bg->bg_blkno);
 bail:
@@ -1134,9 +1211,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
        }
        if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
            le32_to_cpu(fe->id1.bitmap1.i_total)) {
-               ocfs2_error(osb->sb, "Chain allocator dinode %"MLFu64" has %u"
-                           "used bits but only %u total.",
-                           le64_to_cpu(fe->i_blkno),
+               ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
+                           "bits but only %u total.",
+                           (unsigned long long)le64_to_cpu(fe->i_blkno),
                            le32_to_cpu(fe->id1.bitmap1.i_used),
                            le32_to_cpu(fe->id1.bitmap1.i_total));
                status = -EIO;
@@ -1479,10 +1556,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
        }
        BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
 
-       mlog(0, "suballocator %"MLFu64": freeing %u bits from group %"MLFu64
-            ", starting at %u\n",
-            OCFS2_I(alloc_inode)->ip_blkno, count, bg_blkno,
-            start_bit);
+       mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
+            (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
+            (unsigned long long)bg_blkno, start_bit);
 
        status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
                                  alloc_inode);
@@ -1492,9 +1568,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
        }
 
        group = (struct ocfs2_group_desc *) group_bh->b_data;
-       if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
-               OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
-               status = -EIO;
+       status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
+       if (status) {
+               mlog_errno(status);
                goto bail;
        }
        BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
@@ -1592,10 +1668,10 @@ int ocfs2_free_clusters(struct ocfs2_journal_handle *handle,
        ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
                                     &bg_start_bit);
 
-       mlog(0, "want to free %u clusters starting at block %"MLFu64"\n",
-            num_clusters, start_blk);
-       mlog(0, "bg_blkno = %"MLFu64", bg_start_bit = %u\n",
-            bg_blkno, bg_start_bit);
+       mlog(0, "want to free %u clusters starting at block %llu\n",
+            num_clusters, (unsigned long long)start_blk);
+       mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
+            (unsigned long long)bg_blkno, bg_start_bit);
 
        status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
                                          bg_start_bit, bg_blkno,
@@ -1616,18 +1692,22 @@ static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
        printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
        printk("bg_chain:           %u\n", bg->bg_chain);
        printk("bg_generation:      %u\n", le32_to_cpu(bg->bg_generation));
-       printk("bg_next_group:      %"MLFu64"\n", bg->bg_next_group);
-       printk("bg_parent_dinode:   %"MLFu64"\n", bg->bg_parent_dinode);
-       printk("bg_blkno:           %"MLFu64"\n", bg->bg_blkno);
+       printk("bg_next_group:      %llu\n",
+              (unsigned long long)bg->bg_next_group);
+       printk("bg_parent_dinode:   %llu\n",
+              (unsigned long long)bg->bg_parent_dinode);
+       printk("bg_blkno:           %llu\n",
+              (unsigned long long)bg->bg_blkno);
 }
 
 static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
 {
        int i;
 
-       printk("Suballoc Inode %"MLFu64":\n", fe->i_blkno);
+       printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
        printk("i_signature:                  %s\n", fe->i_signature);
-       printk("i_size:                       %"MLFu64"\n", fe->i_size);
+       printk("i_size:                       %llu\n",
+              (unsigned long long)fe->i_size);
        printk("i_clusters:                   %u\n", fe->i_clusters);
        printk("i_generation:                 %u\n",
               le32_to_cpu(fe->i_generation));
@@ -1645,7 +1725,7 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
                       fe->id2.i_chain.cl_recs[i].c_free);
                printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
                       fe->id2.i_chain.cl_recs[i].c_total);
-               printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %"MLFu64"\n", i,
-                      fe->id2.i_chain.cl_recs[i].c_blkno);
+               printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
+                      (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
        }
 }