radix_tree: exceptional entries and indices
Hugh Dickins [Wed, 3 Aug 2011 23:21:18 +0000 (16:21 -0700)]
A patchset to extend tmpfs to MAX_LFS_FILESIZE by abandoning its
peculiar swap vector, instead keeping a file's swap entries in the same
radix tree as its struct page pointers: thus saving memory, and
simplifying its code and locking.

This patch:

The radix_tree is used by several subsystems for different purposes.  A
major use is to store the struct page pointers of a file's pagecache for
memory management.  But what if mm wanted to store something other than
page pointers there too?

The low bit of a radix_tree entry is already used to denote an indirect
pointer, for internal use, and the unlikely radix_tree_deref_retry()
case.

Define the next bit as denoting an exceptional entry, and supply inline
functions radix_tree_exception() to return non-0 in either unlikely
case, and radix_tree_exceptional_entry() to return non-0 in the second
case.

If a subsystem already uses radix_tree with that bit set, no problem: it
does not affect internal workings at all, but is defined for the
convenience of those storing well-aligned pointers in the radix_tree.

The radix_tree_gang_lookups have an implicit assumption that the caller
can deduce the offset of each entry returned e.g.  by the page->index of
a struct page.  But that may not be feasible for some kinds of item to
be stored there.

radix_tree_gang_lookup_slot() allow for an optional indices argument,
output array in which to return those offsets.  The same could be added
to other radix_tree_gang_lookups, but for now keep it to the only one
for which we need it.

Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/radix-tree.h
lib/radix-tree.c
mm/filemap.c

index 23241c2..b7edf82 100644 (file)
  * when it is shrunk, before we rcu free the node. See shrink code for
  * details.
  */
-#define RADIX_TREE_INDIRECT_PTR        1
+#define RADIX_TREE_INDIRECT_PTR                1
+/*
+ * A common use of the radix tree is to store pointers to struct pages;
+ * but shmem/tmpfs needs also to store swap entries in the same tree:
+ * those are marked as exceptional entries to distinguish them.
+ * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
+ */
+#define RADIX_TREE_EXCEPTIONAL_ENTRY   2
+#define RADIX_TREE_EXCEPTIONAL_SHIFT   2
 
 #define radix_tree_indirect_to_ptr(ptr) \
        radix_tree_indirect_to_ptr((void __force *)(ptr))
@@ -174,6 +182,28 @@ static inline int radix_tree_deref_retry(void *arg)
 }
 
 /**
+ * radix_tree_exceptional_entry        - radix_tree_deref_slot gave exceptional entry?
+ * @arg:       value returned by radix_tree_deref_slot
+ * Returns:    0 if well-aligned pointer, non-0 if exceptional entry.
+ */
+static inline int radix_tree_exceptional_entry(void *arg)
+{
+       /* Not unlikely because radix_tree_exception often tested first */
+       return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
+}
+
+/**
+ * radix_tree_exception        - radix_tree_deref_slot returned either exception?
+ * @arg:       value returned by radix_tree_deref_slot
+ * Returns:    0 if well-aligned pointer, non-0 if either kind of exception.
+ */
+static inline int radix_tree_exception(void *arg)
+{
+       return unlikely((unsigned long)arg &
+               (RADIX_TREE_INDIRECT_PTR | RADIX_TREE_EXCEPTIONAL_ENTRY));
+}
+
+/**
  * radix_tree_replace_slot     - replace item in a slot
  * @pslot:     pointer to slot, returned by radix_tree_lookup_slot
  * @item:      new item to store in the slot.
@@ -194,8 +224,8 @@ void *radix_tree_delete(struct radix_tree_root *, unsigned long);
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                        unsigned long first_index, unsigned int max_items);
-unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+unsigned int radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+                       void ***results, unsigned long *indices,
                        unsigned long first_index, unsigned int max_items);
 unsigned long radix_tree_next_hole(struct radix_tree_root *root,
                                unsigned long index, unsigned long max_scan);
index 7ea2e03..348eaef 100644 (file)
@@ -823,8 +823,8 @@ unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
 EXPORT_SYMBOL(radix_tree_prev_hole);
 
 static unsigned int
-__lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
-       unsigned int max_items, unsigned long *next_index)
+__lookup(struct radix_tree_node *slot, void ***results, unsigned long *indices,
+       unsigned long index, unsigned int max_items, unsigned long *next_index)
 {
        unsigned int nr_found = 0;
        unsigned int shift, height;
@@ -857,12 +857,16 @@ __lookup(struct radix_tree_node *slot, void ***results, unsigned long index,
 
        /* Bottom level: grab some items */
        for (i = index & RADIX_TREE_MAP_MASK; i < RADIX_TREE_MAP_SIZE; i++) {
-               index++;
                if (slot->slots[i]) {
-                       results[nr_found++] = &(slot->slots[i]);
-                       if (nr_found == max_items)
+                       results[nr_found] = &(slot->slots[i]);
+                       if (indices)
+                               indices[nr_found] = index;
+                       if (++nr_found == max_items) {
+                               index++;
                                goto out;
+                       }
                }
+               index++;
        }
 out:
        *next_index = index;
@@ -918,8 +922,8 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
 
                if (cur_index > max_index)
                        break;
-               slots_found = __lookup(node, (void ***)results + ret, cur_index,
-                                       max_items - ret, &next_index);
+               slots_found = __lookup(node, (void ***)results + ret, NULL,
+                               cur_index, max_items - ret, &next_index);
                nr_found = 0;
                for (i = 0; i < slots_found; i++) {
                        struct radix_tree_node *slot;
@@ -944,6 +948,7 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *     radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
  *     @root:          radix tree root
  *     @results:       where the results of the lookup are placed
+ *     @indices:       where their indices should be placed (but usually NULL)
  *     @first_index:   start the lookup from this key
  *     @max_items:     place up to this many items at *results
  *
@@ -958,7 +963,8 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  *     protection, radix_tree_deref_slot may fail requiring a retry.
  */
 unsigned int
-radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
+radix_tree_gang_lookup_slot(struct radix_tree_root *root,
+                       void ***results, unsigned long *indices,
                        unsigned long first_index, unsigned int max_items)
 {
        unsigned long max_index;
@@ -974,6 +980,8 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
                if (first_index > 0)
                        return 0;
                results[0] = (void **)&root->rnode;
+               if (indices)
+                       indices[0] = 0;
                return 1;
        }
        node = indirect_to_ptr(node);
@@ -987,8 +995,9 @@ radix_tree_gang_lookup_slot(struct radix_tree_root *root, void ***results,
 
                if (cur_index > max_index)
                        break;
-               slots_found = __lookup(node, results + ret, cur_index,
-                                       max_items - ret, &next_index);
+               slots_found = __lookup(node, results + ret,
+                               indices ? indices + ret : NULL,
+                               cur_index, max_items - ret, &next_index);
                ret += slots_found;
                if (next_index == 0)
                        break;
index 867d402..b83aebf 100644 (file)
@@ -840,7 +840,7 @@ unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
        rcu_read_lock();
 restart:
        nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-                               (void ***)pages, start, nr_pages);
+                               (void ***)pages, NULL, start, nr_pages);
        ret = 0;
        for (i = 0; i < nr_found; i++) {
                struct page *page;
@@ -903,7 +903,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
        rcu_read_lock();
 restart:
        nr_found = radix_tree_gang_lookup_slot(&mapping->page_tree,
-                               (void ***)pages, index, nr_pages);
+                               (void ***)pages, NULL, index, nr_pages);
        ret = 0;
        for (i = 0; i < nr_found; i++) {
                struct page *page;