ocfs2: Store dir index records inline
Mark Fasheh [Tue, 25 Nov 2008 01:02:08 +0000 (17:02 -0800)]
Allow us to store a small number of directory index records in the
ocfs2_dx_root_block. This saves us a disk read on small to medium sized
directories (less than about 250 entries). The inline root is automatically
turned into a root block with extents if the directory size increases beyond
it's capacity.

Signed-off-by: Mark Fasheh <mfasheh@suse.com>
Acked-by: Joel Becker <joel.becker@oracle.com>

fs/ocfs2/dir.c
fs/ocfs2/dir.h
fs/ocfs2/journal.h
fs/ocfs2/namei.c
fs/ocfs2/ocfs2_fs.h

index 0b8c88b..47de649 100644 (file)
@@ -151,6 +151,7 @@ static void ocfs2_init_dir_trailer(struct inode *inode,
 
 void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
 {
+       brelse(res->dl_dx_root_bh);
        brelse(res->dl_leaf_bh);
        brelse(res->dl_dx_leaf_bh);
 }
@@ -162,6 +163,11 @@ static int ocfs2_dir_indexed(struct inode *inode)
        return 0;
 }
 
+static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
+{
+       return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
+}
+
 /*
  * Hashing code adapted from ext3
  */
@@ -799,13 +805,18 @@ out:
  * Returns the block index, from the start of the cluster which this
  * hash belongs too.
  */
-static unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
-                                         struct ocfs2_dx_hinfo *hinfo)
+static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
+                                                  u32 minor_hash)
 {
-       u32 minor_hash = hinfo->minor_hash;
        return minor_hash & osb->osb_dx_mask;
 }
 
+static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
+                                         struct ocfs2_dx_hinfo *hinfo)
+{
+       return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
+}
+
 static int ocfs2_dx_dir_lookup(struct inode *inode,
                               struct ocfs2_extent_list *el,
                               struct ocfs2_dx_hinfo *hinfo,
@@ -855,7 +866,7 @@ out:
 
 static int ocfs2_dx_dir_search(const char *name, int namelen,
                               struct inode *dir,
-                              struct ocfs2_extent_list *dr_el,
+                              struct ocfs2_dx_root_block *dx_root,
                               struct ocfs2_dir_lookup_result *res)
 {
        int ret, i, found;
@@ -866,9 +877,18 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
        struct buffer_head *dir_ent_bh = NULL;
        struct ocfs2_dir_entry *dir_ent = NULL;
        struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
+       struct ocfs2_extent_list *dr_el;
+       struct ocfs2_dx_entry_list *entry_list;
 
        ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
 
+       if (ocfs2_dx_root_inline(dx_root)) {
+               entry_list = &dx_root->dr_entries;
+               goto search;
+       }
+
+       dr_el = &dx_root->dr_list;
+
        ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
        if (ret) {
                mlog_errno(ret);
@@ -893,12 +913,15 @@ static int ocfs2_dx_dir_search(const char *name, int namelen,
             le16_to_cpu(dx_leaf->dl_list.de_num_used),
             le16_to_cpu(dx_leaf->dl_list.de_count));
 
+       entry_list = &dx_leaf->dl_list;
+
+search:
        /*
         * Empty leaf is legal, so no need to check for that.
         */
        found = 0;
-       for (i = 0; i < le16_to_cpu(dx_leaf->dl_list.de_num_used); i++) {
-               dx_entry = &dx_leaf->dl_list.de_entries[i];
+       for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
+               dx_entry = &entry_list->de_entries[i];
 
                if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
                    || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
@@ -982,14 +1005,15 @@ static int ocfs2_find_entry_dx(const char *name, int namelen,
        }
        dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
 
-       ret = ocfs2_dx_dir_search(name, namelen, dir, &dx_root->dr_list,
-                                 lookup);
+       ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
        if (ret) {
                if (ret != -ENOENT)
                        mlog_errno(ret);
                goto out;
        }
 
+       lookup->dl_dx_root_bh = dx_root_bh;
+       dx_root_bh = NULL;
 out:
        brelse(di_bh);
        brelse(dx_root_bh);
@@ -1126,64 +1150,88 @@ bail:
        return status;
 }
 
-static void ocfs2_dx_leaf_remove_entry(struct ocfs2_dx_leaf *dx_leaf, int index)
+static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
+                                      int index)
 {
-       struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
-       int num_used = le16_to_cpu(dl_list->de_num_used);
+       int num_used = le16_to_cpu(entry_list->de_num_used);
 
        if (num_used == 1 || index == (num_used - 1))
                goto clear;
 
-       memmove(&dl_list->de_entries[index], &dl_list->de_entries[index + 1],
+       memmove(&entry_list->de_entries[index],
+               &entry_list->de_entries[index + 1],
                (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
 clear:
        num_used--;
-       memset(&dl_list->de_entries[num_used], 0,
+       memset(&entry_list->de_entries[num_used], 0,
               sizeof(struct ocfs2_dx_entry));
-       dl_list->de_num_used = cpu_to_le16(num_used);
+       entry_list->de_num_used = cpu_to_le16(num_used);
 }
 
 static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
                                 struct ocfs2_dir_lookup_result *lookup)
 {
        int ret, index;
+       struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
        struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
        struct ocfs2_dx_leaf *dx_leaf;
        struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
+       struct ocfs2_dx_root_block *dx_root;
+       struct ocfs2_dx_entry_list *entry_list;
+
+       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+       if (ocfs2_dx_root_inline(dx_root)) {
+               entry_list = &dx_root->dr_entries;
+       } else {
+               dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
+               entry_list = &dx_leaf->dl_list;
+       }
 
-       dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
        /* Neither of these are a disk corruption - that should have
         * been caught by lookup, before we got here. */
-       BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_count) <= 0);
-       BUG_ON(le16_to_cpu(dx_leaf->dl_list.de_num_used) <= 0);
+       BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
+       BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
 
-       index = (char *)dx_entry - (char *)dx_leaf->dl_list.de_entries;
+       index = (char *)dx_entry - (char *)entry_list->de_entries;
        index /= sizeof(*dx_entry);
 
-       if (index >= le16_to_cpu(dx_leaf->dl_list.de_num_used)) {
+       if (index >= le16_to_cpu(entry_list->de_num_used)) {
                mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
-                    (unsigned long long)OCFS2_I(dir)->ip_blkno, index, dx_leaf,
-                    dx_entry);
+                    (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
+                    entry_list, dx_entry);
                return -EIO;
        }
 
-       mlog(0, "Dir %llu: delete entry at index: %d\n",
-            (unsigned long long)OCFS2_I(dir)->ip_blkno, index);
-
        /*
-        * Add the index leaf into the journal before removing the
-        * unindexed entry. If we get an error return from
-        * __ocfs2_delete_entry(), then it hasn't removed the entry
-        * yet. Likewise, successful return means we *must* remove the
-        * indexed entry.
+        * Add the block holding our index into the journal before
+        * removing the unindexed entry. If we get an error return
+        * from __ocfs2_delete_entry(), then it hasn't removed the
+        * entry yet. Likewise, successful return means we *must*
+        * remove the indexed entry.
+        *
+        * We're also careful to journal the root tree block here if
+        * we're going to be adding to the start of the free list.
         */
-       ret = ocfs2_journal_access_dl(handle, dir, lookup->dl_dx_leaf_bh,
-                                     OCFS2_JOURNAL_ACCESS_WRITE);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
+       if (ocfs2_dx_root_inline(dx_root)) {
+               ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+                                             OCFS2_JOURNAL_ACCESS_WRITE);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+       } else {
+               ret = ocfs2_journal_access_dl(handle, dir,
+                                             lookup->dl_dx_leaf_bh,
+                                             OCFS2_JOURNAL_ACCESS_WRITE);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
        }
 
+       mlog(0, "Dir %llu: delete entry at index: %d\n",
+            (unsigned long long)OCFS2_I(dir)->ip_blkno, index);
+
        ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
                                   leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
        if (ret) {
@@ -1191,9 +1239,12 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
                goto out;
        }
 
-       ocfs2_dx_leaf_remove_entry(dx_leaf, index);
+       ocfs2_dx_list_remove_entry(entry_list, index);
 
-       ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
+       if (ocfs2_dx_root_inline(dx_root))
+               ocfs2_journal_dirty(handle, dx_root_bh);
+       else
+               ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
 
 out:
        return ret;
@@ -1290,13 +1341,30 @@ static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
        le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
 }
 
+static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
+                                      struct ocfs2_dx_hinfo *hinfo,
+                                      u64 dirent_blk)
+{
+       int i;
+       struct ocfs2_dx_entry *dx_entry;
+
+       i = le16_to_cpu(entry_list->de_num_used);
+       dx_entry = &entry_list->de_entries[i];
+
+       memset(dx_entry, 0, sizeof(*dx_entry));
+       dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
+       dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
+       dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
+
+       le16_add_cpu(&entry_list->de_num_used, 1);
+}
+
 static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
                                      struct ocfs2_dx_hinfo *hinfo,
                                      u64 dirent_blk,
                                      struct buffer_head *dx_leaf_bh)
 {
-       int ret, i;
-       struct ocfs2_dx_entry *dx_entry;
+       int ret;
        struct ocfs2_dx_leaf *dx_leaf;
 
        ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh,
@@ -1307,25 +1375,48 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
        }
 
        dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
-       i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
-       dx_entry = &dx_leaf->dl_list.de_entries[i];
+       ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
+       ocfs2_journal_dirty(handle, dx_leaf_bh);
 
-       memset(dx_entry, 0, sizeof(*dx_entry));
-       dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
-       dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
-       dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
+out:
+       return ret;
+}
 
-       le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
+static int ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
+                                      struct ocfs2_dx_hinfo *hinfo,
+                                      u64 dirent_blk,
+                                      struct buffer_head *dx_root_bh)
+{
+       int ret;
+       struct ocfs2_dx_root_block *dx_root;
 
-       ocfs2_journal_dirty(handle, dx_leaf_bh);
+       ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+       ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
+       ocfs2_journal_dirty(handle, dx_root_bh);
 
 out:
        return ret;
 }
 
-static int ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
-                                   struct ocfs2_dir_lookup_result *lookup)
+static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
+                              struct ocfs2_dir_lookup_result *lookup)
 {
+       struct ocfs2_dx_root_block *dx_root;
+
+       dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
+       if (ocfs2_dx_root_inline(dx_root))
+               return ocfs2_dx_inline_root_insert(dir, handle,
+                                                  &lookup->dl_hinfo,
+                                                  lookup->dl_leaf_bh->b_blocknr,
+                                                  lookup->dl_dx_root_bh);
+
        return __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
                                          lookup->dl_leaf_bh->b_blocknr,
                                          lookup->dl_dx_leaf_bh);
@@ -1409,11 +1500,12 @@ int __ocfs2_add_entry(handle_t *handle,
                        else {
                                status = ocfs2_journal_access_db(handle, dir,
                                                                 insert_bh,
-                                                                OCFS2_JOURNAL_ACCESS_WRITE);
+                                             OCFS2_JOURNAL_ACCESS_WRITE);
+
                                if (ocfs2_dir_indexed(dir)) {
-                                       status = ocfs2_dx_dir_leaf_insert(dir,
-                                                                       handle,
-                                                                       lookup);
+                                       status = ocfs2_dx_dir_insert(dir,
+                                                               handle,
+                                                               lookup);
                                        if (status) {
                                                mlog_errno(status);
                                                goto bail;
@@ -2019,6 +2111,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
                                     handle_t *handle, struct inode *dir,
                                     struct buffer_head *di_bh,
                                     struct ocfs2_alloc_context *meta_ac,
+                                    int dx_inline,
                                     struct buffer_head **ret_dx_root_bh)
 {
        int ret;
@@ -2062,8 +2155,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
        dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
        dx_root->dr_blkno = cpu_to_le64(dr_blkno);
        dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
-       dx_root->dr_list.l_count =
-               cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+
+       if (dx_inline) {
+               dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
+               dx_root->dr_entries.de_count =
+                       cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
+       } else {
+               dx_root->dr_list.l_count =
+                       cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+       }
 
        ret = ocfs2_journal_dirty(handle, dx_root_bh);
        if (ret)
@@ -2236,20 +2336,12 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
                                 struct ocfs2_alloc_context *data_ac,
                                 struct ocfs2_alloc_context *meta_ac)
 {
-       int ret, num_dx_leaves, i;
+       int ret;
        struct buffer_head *leaf_bh = NULL;
        struct buffer_head *dx_root_bh = NULL;
-       struct buffer_head **dx_leaves = NULL;
-       struct ocfs2_extent_tree et;
        struct ocfs2_dx_hinfo hinfo;
-       u64 insert_blkno;
-
-       dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
-       if (!dx_leaves) {
-               ret = -ENOMEM;
-               mlog_errno(ret);
-               goto out;
-       }
+       struct ocfs2_dx_root_block *dx_root;
+       struct ocfs2_dx_entry_list *entry_list;
 
        /*
         * Our strategy is to create the directory as though it were
@@ -2258,7 +2350,8 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
         * very well known quantity.
         *
         * Essentially, we have two dirents ("." and ".."), in the 1st
-        * block which need indexing.
+        * block which need indexing. These are easily inserted into
+        * the index block.
         */
 
        ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
@@ -2268,61 +2361,22 @@ static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
                goto out;
        }
 
-       /*
-        * Allocate and format the index leaf first, before attaching
-        * the index root. That way we're sure that the main bitmap
-        * won't -enospc on us with a half-created dir index.
-        *
-        * The meta data allocation for our index block will not
-        * -enospc on us unless there is a disk corruption.
-        */
-
-       ret = __ocfs2_dx_dir_new_cluster(inode, 0, handle, data_ac, dx_leaves,
-                                        num_dx_leaves, &insert_blkno);
+       ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh,
+                                       meta_ac, 1, &dx_root_bh);
        if (ret) {
                mlog_errno(ret);
                goto out;
        }
+       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+       entry_list = &dx_root->dr_entries;
 
-       ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
-       i = ocfs2_dx_dir_hash_idx(osb, &hinfo);
-       ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
-                                        leaf_bh->b_blocknr, dx_leaves[i]);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
-       }
+       /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
+       ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
 
        ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
-       i = ocfs2_dx_dir_hash_idx(osb, &hinfo);
-       ret = __ocfs2_dx_dir_leaf_insert(inode, handle, &hinfo,
-                                        leaf_bh->b_blocknr, dx_leaves[i]);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
-       }
-
-       ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, meta_ac,
-                                       &dx_root_bh);
-       if (ret) {
-               mlog_errno(ret);
-               goto out;
-       }
-
-       /* This should never fail considering we start with an empty
-        * dx_root. */
-       ocfs2_init_dx_root_extent_tree(&et, inode, dx_root_bh);
-       ret = ocfs2_insert_extent(osb, handle, inode, &et, 0,
-                                 insert_blkno, 1, 0, NULL);
-       if (ret)
-               mlog_errno(ret);
+       ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
 
 out:
-       if (dx_leaves) {
-               for (i = 0; i < num_dx_leaves; i++)
-                       brelse(dx_leaves[i]);
-               kfree(dx_leaves);
-       }
        brelse(dx_root_bh);
        brelse(leaf_bh);
        return ret;
@@ -2392,6 +2446,74 @@ inc:
 out:
        return ret;
 }
+ /*
+ * XXX: This expects dx_root_bh to already be part of the transaction.
+ */
+static void ocfs2_dx_dir_index_root_block(struct inode *dir,
+                                        struct buffer_head *dx_root_bh,
+                                        struct buffer_head *dirent_bh)
+{
+       char *de_buf, *limit;
+       struct ocfs2_dx_root_block *dx_root;
+       struct ocfs2_dir_entry *de;
+       struct ocfs2_dx_hinfo hinfo;
+       u64 dirent_blk = dirent_bh->b_blocknr;
+
+       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+
+       de_buf = dirent_bh->b_data;
+       limit = de_buf + dir->i_sb->s_blocksize;
+
+       while (de_buf < limit) {
+               de = (struct ocfs2_dir_entry *)de_buf;
+
+               if (!de->name_len || !de->inode)
+                       goto inc;
+
+               ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
+
+               mlog(0,
+                    "dir: %llu, major: 0x%x minor: 0x%x, index: %u, name: %.*s\n",
+                    (unsigned long long)dir->i_ino, hinfo.major_hash,
+                    hinfo.minor_hash,
+                    le16_to_cpu(dx_root->dr_entries.de_num_used),
+                    de->name_len, de->name);
+
+               ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
+                                          dirent_blk);
+inc:
+               de_buf += le16_to_cpu(de->rec_len);
+       }
+}
+
+/*
+ * Count the number of inline directory entries in di_bh and compare
+ * them against the number of entries we can hold in an inline dx root
+ * block.
+ */
+static int ocfs2_new_dx_should_be_inline(struct inode *dir,
+                                        struct buffer_head *di_bh)
+{
+       int dirent_count = 0;
+       char *de_buf, *limit;
+       struct ocfs2_dir_entry *de;
+       struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
+
+       de_buf = di->id2.i_data.id_data;
+       limit = de_buf + i_size_read(dir);
+
+       while (de_buf < limit) {
+               de = (struct ocfs2_dir_entry *)de_buf;
+
+               if (de->name_len && de->inode)
+                       dirent_count++;
+
+               de_buf += le16_to_cpu(de->rec_len);
+       }
+
+       /* We are careful to leave room for one extra record. */
+       return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
+}
 
 /*
  * Expand rec_len of the rightmost dirent in a directory block so that it
@@ -2442,7 +2564,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 {
        u32 alloc, dx_alloc, bit_off, len;
        struct super_block *sb = dir->i_sb;
-       int ret, i, num_dx_leaves = 0,
+       int ret, i, num_dx_leaves = 0, dx_inline = 0,
                credits = ocfs2_inline_to_extents_credits(sb);
        u64 dx_insert_blkno, blkno,
                bytes = blocks_wanted << sb->s_blocksize_bits;
@@ -2465,15 +2587,19 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
        dx_alloc = 0;
 
        if (ocfs2_supports_indexed_dirs(osb)) {
-               /* Add one more cluster for an index leaf */
-               dx_alloc++;
                credits += ocfs2_add_dir_index_credits(sb);
 
-               dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb, &num_dx_leaves);
-               if (!dx_leaves) {
-                       ret = -ENOMEM;
-                       mlog_errno(ret);
-                       goto out;
+               dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
+               if (!dx_inline) {
+                       /* Add one more cluster for an index leaf */
+                       dx_alloc++;
+                       dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
+                                                               &num_dx_leaves);
+                       if (!dx_leaves) {
+                               ret = -ENOMEM;
+                               mlog_errno(ret);
+                               goto out;
+                       }
                }
 
                /* This gets us the dx_root */
@@ -2524,7 +2650,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
        }
        did_quota = 1;
 
-       if (ocfs2_supports_indexed_dirs(osb)) {
+       if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
                /*
                 * Allocate our index cluster first, to maximize the
                 * possibility that unindexed leaves grow
@@ -2587,7 +2713,12 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
                goto out_commit;
        }
 
-       if (ocfs2_supports_indexed_dirs(osb)) {
+       if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
+               /*
+                * Dx dirs with an external cluster need to do this up
+                * front. Inline dx root's get handled later, after
+                * we've allocated our root block.
+                */
                ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
                                               num_dx_leaves, dirdata_bh);
                if (ret) {
@@ -2650,17 +2781,23 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
 
        if (ocfs2_supports_indexed_dirs(osb)) {
                ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
-                                               meta_ac, &dx_root_bh);
+                                               meta_ac, dx_inline,
+                                               &dx_root_bh);
                if (ret) {
                        mlog_errno(ret);
                        goto out_commit;
                }
 
-               ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
-               ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
-                                         dx_insert_blkno, 1, 0, NULL);
-               if (ret)
-                       mlog_errno(ret);
+               if (dx_inline) {
+                       ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
+                                                     dirdata_bh);
+               } else {
+                       ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh);
+                       ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0,
+                                                 dx_insert_blkno, 1, 0, NULL);
+                       if (ret)
+                               mlog_errno(ret);
+               }
        }
 
        /*
@@ -2690,14 +2827,18 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
        if (ocfs2_supports_indexed_dirs(osb)) {
                unsigned int off;
 
-               /*
-                * We need to return the correct block within the
-                * cluster which should hold our entry.
-                */
-               off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
-                                           &lookup->dl_hinfo);
-               get_bh(dx_leaves[off]);
-               lookup->dl_dx_leaf_bh = dx_leaves[off];
+               if (!dx_inline) {
+                       /*
+                        * We need to return the correct block within the
+                        * cluster which should hold our entry.
+                        */
+                       off = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb),
+                                                   &lookup->dl_hinfo);
+                       get_bh(dx_leaves[off]);
+                       lookup->dl_dx_leaf_bh = dx_leaves[off];
+               }
+               lookup->dl_dx_root_bh = dx_root_bh;
+               dx_root_bh = NULL;
        }
 
 out_commit:
@@ -3506,6 +3647,138 @@ out:
        return ret;
 }
 
+static int ocfs2_expand_inline_dx_root(struct inode *dir,
+                                      struct buffer_head *dx_root_bh)
+{
+       int ret, num_dx_leaves, i, j, did_quota = 0;
+       struct buffer_head **dx_leaves = NULL;
+       struct ocfs2_extent_tree et;
+       u64 insert_blkno;
+       struct ocfs2_alloc_context *data_ac = NULL;
+       struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
+       handle_t *handle = NULL;
+       struct ocfs2_dx_root_block *dx_root;
+       struct ocfs2_dx_entry_list *entry_list;
+       struct ocfs2_dx_entry *dx_entry;
+       struct ocfs2_dx_leaf *target_leaf;
+
+       ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+
+       dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
+       if (!dx_leaves) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
+
+       handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               mlog_errno(ret);
+               goto out;
+       }
+
+       if (vfs_dq_alloc_space_nodirty(dir,
+                                      ocfs2_clusters_to_bytes(osb->sb, 1))) {
+               ret = -EDQUOT;
+               goto out_commit;
+       }
+       did_quota = 1;
+
+       /*
+        * We do this up front, before the allocation, so that a
+        * failure to add the dx_root_bh to the journal won't result
+        * us losing clusters.
+        */
+       ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh,
+                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret) {
+               mlog_errno(ret);
+               goto out_commit;
+       }
+
+       ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
+                                        num_dx_leaves, &insert_blkno);
+       if (ret) {
+               mlog_errno(ret);
+               goto out_commit;
+       }
+
+       /*
+        * Transfer the entries from our dx_root into the appropriate
+        * block
+        */
+       dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
+       entry_list = &dx_root->dr_entries;
+
+       for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
+               dx_entry = &entry_list->de_entries[i];
+
+               j = __ocfs2_dx_dir_hash_idx(osb,
+                                           le32_to_cpu(dx_entry->dx_minor_hash));
+               target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
+
+               ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
+
+               /* Each leaf has been passed to the journal already
+                * via __ocfs2_dx_dir_new_cluster() */
+       }
+
+       dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
+       memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
+              offsetof(struct ocfs2_dx_root_block, dr_list));
+       dx_root->dr_list.l_count =
+               cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
+
+       /* This should never fail considering we start with an empty
+        * dx_root. */
+       ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+       ret = ocfs2_insert_extent(osb, handle, dir, &et, 0,
+                                 insert_blkno, 1, 0, NULL);
+       if (ret)
+               mlog_errno(ret);
+       did_quota = 0;
+
+       ocfs2_journal_dirty(handle, dx_root_bh);
+
+out_commit:
+       if (ret < 0 && did_quota)
+               vfs_dq_free_space_nodirty(dir,
+                                         ocfs2_clusters_to_bytes(dir->i_sb, 1));
+
+       ocfs2_commit_trans(osb, handle);
+
+out:
+       if (data_ac)
+               ocfs2_free_alloc_context(data_ac);
+
+       if (dx_leaves) {
+               for (i = 0; i < num_dx_leaves; i++)
+                       brelse(dx_leaves[i]);
+               kfree(dx_leaves);
+       }
+       return ret;
+}
+
+static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
+{
+       struct ocfs2_dx_root_block *dx_root;
+       struct ocfs2_dx_entry_list *entry_list;
+
+       dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
+       entry_list = &dx_root->dr_entries;
+
+       if (le16_to_cpu(entry_list->de_num_used) >=
+           le16_to_cpu(entry_list->de_count))
+               return -ENOSPC;
+
+       return 0;
+}
+
 static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
                                   struct buffer_head *di_bh, const char *name,
                                   int namelen,
@@ -3527,6 +3800,23 @@ static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
        }
 
        dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+       if (ocfs2_dx_root_inline(dx_root)) {
+               ret = ocfs2_inline_dx_has_space(dx_root_bh);
+
+               if (ret == 0)
+                       goto search_el;
+
+               /*
+                * We ran out of room in the root block. Expand it to
+                * an extent, then allow ocfs2_find_dir_space_dx to do
+                * the rest.
+                */
+               ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+       }
 
 restart_search:
        ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
@@ -3578,8 +3868,11 @@ restart_search:
                goto restart_search;
        }
 
+search_el:
        lookup->dl_dx_leaf_bh = dx_leaf_bh;
        dx_leaf_bh = NULL;
+       lookup->dl_dx_root_bh = dx_root_bh;
+       dx_root_bh = NULL;
 
 out:
        brelse(dx_leaf_bh);
@@ -3774,10 +4067,12 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
                mlog_errno(ret);
                goto out;
        }
+       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
 
-       ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
+       if (ocfs2_dx_root_inline(dx_root))
+               goto remove_index;
 
-       dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
+       ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh);
 
        /* XXX: What if dr_clusters is too large? */
        while (le32_to_cpu(dx_root->dr_clusters)) {
@@ -3803,6 +4098,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
                major_hash = cpos - 1;
        }
 
+remove_index:
        ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
        if (ret) {
                mlog_errno(ret);
index d273aae..07b0416 100644 (file)
@@ -37,6 +37,8 @@ struct ocfs2_dir_lookup_result {
        struct ocfs2_dir_entry          *dl_entry;      /* Target dirent in
                                                         * unindexed leaf */
 
+       struct buffer_head              *dl_dx_root_bh; /* Root of indexed
+                                                        * tree */
        struct buffer_head              *dl_dx_leaf_bh; /* Indexed leaf block */
        struct ocfs2_dx_entry           *dl_dx_entry;   /* Target dx_entry in
                                                         * indexed leaf */
index 4939c04..5585dde 100644 (file)
@@ -458,6 +458,16 @@ static inline int ocfs2_rename_credits(struct super_block *sb)
 #define OCFS2_DX_ROOT_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS +     \
                                      OCFS2_SUBALLOC_FREE)
 
+static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
+{
+       int credits = 1 + OCFS2_SUBALLOC_ALLOC;
+
+       credits += ocfs2_clusters_to_blocks(sb, 1);
+       credits += ocfs2_quota_trans_credits(sb);
+
+       return credits;
+}
+
 /*
  * Please note that the caller must make sure that root_el is the root
  * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
index 0c55071..f911edc 100644 (file)
@@ -321,10 +321,8 @@ static int ocfs2_mknod(struct inode *dir,
                want_clusters += 1;
 
                /* Dir indexing requires extra space as well */
-               if (ocfs2_supports_indexed_dirs(osb)) {
-                       want_clusters++;
+               if (ocfs2_supports_indexed_dirs(osb))
                        want_meta++;
-               }
        }
 
        status = ocfs2_reserve_new_metadata_blocks(osb, want_meta, &meta_ac);
index 036eb03..1d1c54e 100644 (file)
@@ -815,6 +815,8 @@ struct ocfs2_dx_entry_list {
                                                         * length de_num_used */
 };
 
+#define OCFS2_DX_FLAG_INLINE   0x01
+
 /*
  * A directory indexing block. Each indexed directory has one of these,
  * pointed to by ocfs2_dinode.
@@ -835,13 +837,21 @@ struct ocfs2_dx_root_block {
                                                 * extent block */
        __le32          dr_clusters;            /* Clusters allocated
                                                 * to the indexed tree. */
-       __le32          dr_reserved1;
+       __u8            dr_flags;               /* OCFS2_DX_FLAG_* flags */
+       __u8            dr_reserved0;
+       __le16          dr_reserved1;
        __le64          dr_dir_blkno;           /* Pointer to parent inode */
        __le64          dr_reserved2;
        __le64          dr_reserved3[16];
-       struct ocfs2_extent_list        dr_list; /* Keep this aligned to 128
-                                                 * bits for maximum space
-                                                 * efficiency. */
+       union {
+               struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
+                                                  * bits for maximum space
+                                                  * efficiency. */
+               struct ocfs2_dx_entry_list dr_entries; /* In-root-block list of
+                                                       * entries. We grow out
+                                                       * to extents if this
+                                                       * gets too big. */
+       };
 };
 
 /*
@@ -1228,6 +1238,16 @@ static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
        return size / sizeof(struct ocfs2_dx_entry);
 }
 
+static inline int ocfs2_dx_entries_per_root(struct super_block *sb)
+{
+       int size;
+
+       size = sb->s_blocksize -
+               offsetof(struct ocfs2_dx_root_block, dr_entries.de_entries);
+
+       return size / sizeof(struct ocfs2_dx_entry);
+}
+
 static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
 {
        u16 size;