blob: f46f88620c709415acd1c4d64fe21bf58a3aa57a [file] [log] [blame]
Chris Masond1310b22008-01-24 16:13:08 -05001#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include <linux/swap.h>
12#include <linux/version.h>
13#include <linux/writeback.h>
14#include <linux/pagevec.h>
15#include "extent_io.h"
16#include "extent_map.h"
David Woodhouse2db04962008-08-07 11:19:43 -040017#include "compat.h"
Chris Masond1310b22008-01-24 16:13:08 -050018
19/* temporary define until extent_map moves out of btrfs */
20struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
21 unsigned long extra_flags,
22 void (*ctor)(void *, struct kmem_cache *,
23 unsigned long));
24
25static struct kmem_cache *extent_state_cache;
26static struct kmem_cache *extent_buffer_cache;
27
28static LIST_HEAD(buffers);
29static LIST_HEAD(states);
Chris Mason2d2ae542008-03-26 16:24:23 -040030static spinlock_t leak_lock = SPIN_LOCK_UNLOCKED;
Chris Masond1310b22008-01-24 16:13:08 -050031
Chris Masond1310b22008-01-24 16:13:08 -050032#define BUFFER_LRU_MAX 64
33
34struct tree_entry {
35 u64 start;
36 u64 end;
Chris Masond1310b22008-01-24 16:13:08 -050037 struct rb_node rb_node;
38};
39
40struct extent_page_data {
41 struct bio *bio;
42 struct extent_io_tree *tree;
43 get_extent_t *get_extent;
44};
45
46int __init extent_io_init(void)
47{
48 extent_state_cache = btrfs_cache_create("extent_state",
49 sizeof(struct extent_state), 0,
50 NULL);
51 if (!extent_state_cache)
52 return -ENOMEM;
53
54 extent_buffer_cache = btrfs_cache_create("extent_buffers",
55 sizeof(struct extent_buffer), 0,
56 NULL);
57 if (!extent_buffer_cache)
58 goto free_state_cache;
59 return 0;
60
61free_state_cache:
62 kmem_cache_destroy(extent_state_cache);
63 return -ENOMEM;
64}
65
66void extent_io_exit(void)
67{
68 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -040069 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -050070
71 while (!list_empty(&states)) {
Chris Mason2d2ae542008-03-26 16:24:23 -040072 state = list_entry(states.next, struct extent_state, leak_list);
Chris Mason70dec802008-01-29 09:59:12 -050073 printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
Chris Mason2d2ae542008-03-26 16:24:23 -040074 list_del(&state->leak_list);
Chris Masond1310b22008-01-24 16:13:08 -050075 kmem_cache_free(extent_state_cache, state);
76
77 }
78
Chris Mason2d2ae542008-03-26 16:24:23 -040079 while (!list_empty(&buffers)) {
80 eb = list_entry(buffers.next, struct extent_buffer, leak_list);
81 printk("buffer leak start %Lu len %lu refs %d\n", eb->start, eb->len, atomic_read(&eb->refs));
82 list_del(&eb->leak_list);
83 kmem_cache_free(extent_buffer_cache, eb);
84 }
Chris Masond1310b22008-01-24 16:13:08 -050085 if (extent_state_cache)
86 kmem_cache_destroy(extent_state_cache);
87 if (extent_buffer_cache)
88 kmem_cache_destroy(extent_buffer_cache);
89}
90
91void extent_io_tree_init(struct extent_io_tree *tree,
92 struct address_space *mapping, gfp_t mask)
93{
94 tree->state.rb_node = NULL;
Chris Mason6af118c2008-07-22 11:18:07 -040095 tree->buffer.rb_node = NULL;
Chris Masond1310b22008-01-24 16:13:08 -050096 tree->ops = NULL;
97 tree->dirty_bytes = 0;
Chris Mason70dec802008-01-29 09:59:12 -050098 spin_lock_init(&tree->lock);
Chris Mason6af118c2008-07-22 11:18:07 -040099 spin_lock_init(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -0500100 tree->mapping = mapping;
Chris Masond1310b22008-01-24 16:13:08 -0500101}
102EXPORT_SYMBOL(extent_io_tree_init);
103
Chris Masond1310b22008-01-24 16:13:08 -0500104struct extent_state *alloc_extent_state(gfp_t mask)
105{
106 struct extent_state *state;
Chris Mason2d2ae542008-03-26 16:24:23 -0400107 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -0500108
109 state = kmem_cache_alloc(extent_state_cache, mask);
Peter2b114d12008-04-01 11:21:40 -0400110 if (!state)
Chris Masond1310b22008-01-24 16:13:08 -0500111 return state;
112 state->state = 0;
Chris Masond1310b22008-01-24 16:13:08 -0500113 state->private = 0;
Chris Mason70dec802008-01-29 09:59:12 -0500114 state->tree = NULL;
Chris Mason2d2ae542008-03-26 16:24:23 -0400115 spin_lock_irqsave(&leak_lock, flags);
116 list_add(&state->leak_list, &states);
117 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500118
119 atomic_set(&state->refs, 1);
120 init_waitqueue_head(&state->wq);
121 return state;
122}
123EXPORT_SYMBOL(alloc_extent_state);
124
125void free_extent_state(struct extent_state *state)
126{
Chris Masond1310b22008-01-24 16:13:08 -0500127 if (!state)
128 return;
129 if (atomic_dec_and_test(&state->refs)) {
Chris Mason2d2ae542008-03-26 16:24:23 -0400130 unsigned long flags;
Chris Mason70dec802008-01-29 09:59:12 -0500131 WARN_ON(state->tree);
Chris Mason2d2ae542008-03-26 16:24:23 -0400132 spin_lock_irqsave(&leak_lock, flags);
133 list_del(&state->leak_list);
134 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500135 kmem_cache_free(extent_state_cache, state);
136 }
137}
138EXPORT_SYMBOL(free_extent_state);
139
140static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
141 struct rb_node *node)
142{
143 struct rb_node ** p = &root->rb_node;
144 struct rb_node * parent = NULL;
145 struct tree_entry *entry;
146
147 while(*p) {
148 parent = *p;
149 entry = rb_entry(parent, struct tree_entry, rb_node);
150
151 if (offset < entry->start)
152 p = &(*p)->rb_left;
153 else if (offset > entry->end)
154 p = &(*p)->rb_right;
155 else
156 return parent;
157 }
158
159 entry = rb_entry(node, struct tree_entry, rb_node);
Chris Masond1310b22008-01-24 16:13:08 -0500160 rb_link_node(node, parent, p);
161 rb_insert_color(node, root);
162 return NULL;
163}
164
Chris Mason80ea96b2008-02-01 14:51:59 -0500165static struct rb_node *__etree_search(struct extent_io_tree *tree, u64 offset,
Chris Masond1310b22008-01-24 16:13:08 -0500166 struct rb_node **prev_ret,
167 struct rb_node **next_ret)
168{
Chris Mason80ea96b2008-02-01 14:51:59 -0500169 struct rb_root *root = &tree->state;
Chris Masond1310b22008-01-24 16:13:08 -0500170 struct rb_node * n = root->rb_node;
171 struct rb_node *prev = NULL;
172 struct rb_node *orig_prev = NULL;
173 struct tree_entry *entry;
174 struct tree_entry *prev_entry = NULL;
175
176 while(n) {
177 entry = rb_entry(n, struct tree_entry, rb_node);
178 prev = n;
179 prev_entry = entry;
180
181 if (offset < entry->start)
182 n = n->rb_left;
183 else if (offset > entry->end)
184 n = n->rb_right;
Chris Mason80ea96b2008-02-01 14:51:59 -0500185 else {
Chris Masond1310b22008-01-24 16:13:08 -0500186 return n;
Chris Mason80ea96b2008-02-01 14:51:59 -0500187 }
Chris Masond1310b22008-01-24 16:13:08 -0500188 }
189
190 if (prev_ret) {
191 orig_prev = prev;
192 while(prev && offset > prev_entry->end) {
193 prev = rb_next(prev);
194 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
195 }
196 *prev_ret = prev;
197 prev = orig_prev;
198 }
199
200 if (next_ret) {
201 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
202 while(prev && offset < prev_entry->start) {
203 prev = rb_prev(prev);
204 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
205 }
206 *next_ret = prev;
207 }
208 return NULL;
209}
210
Chris Mason80ea96b2008-02-01 14:51:59 -0500211static inline struct rb_node *tree_search(struct extent_io_tree *tree,
212 u64 offset)
Chris Masond1310b22008-01-24 16:13:08 -0500213{
Chris Mason70dec802008-01-29 09:59:12 -0500214 struct rb_node *prev = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500215 struct rb_node *ret;
Chris Mason70dec802008-01-29 09:59:12 -0500216
Chris Mason80ea96b2008-02-01 14:51:59 -0500217 ret = __etree_search(tree, offset, &prev, NULL);
218 if (!ret) {
Chris Masond1310b22008-01-24 16:13:08 -0500219 return prev;
Chris Mason80ea96b2008-02-01 14:51:59 -0500220 }
Chris Masond1310b22008-01-24 16:13:08 -0500221 return ret;
222}
223
Chris Mason6af118c2008-07-22 11:18:07 -0400224static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
225 u64 offset, struct rb_node *node)
226{
227 struct rb_root *root = &tree->buffer;
228 struct rb_node ** p = &root->rb_node;
229 struct rb_node * parent = NULL;
230 struct extent_buffer *eb;
231
232 while(*p) {
233 parent = *p;
234 eb = rb_entry(parent, struct extent_buffer, rb_node);
235
236 if (offset < eb->start)
237 p = &(*p)->rb_left;
238 else if (offset > eb->start)
239 p = &(*p)->rb_right;
240 else
241 return eb;
242 }
243
244 rb_link_node(node, parent, p);
245 rb_insert_color(node, root);
246 return NULL;
247}
248
249static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
250 u64 offset)
251{
252 struct rb_root *root = &tree->buffer;
253 struct rb_node * n = root->rb_node;
254 struct extent_buffer *eb;
255
256 while(n) {
257 eb = rb_entry(n, struct extent_buffer, rb_node);
258 if (offset < eb->start)
259 n = n->rb_left;
260 else if (offset > eb->start)
261 n = n->rb_right;
262 else
263 return eb;
264 }
265 return NULL;
266}
267
Chris Masond1310b22008-01-24 16:13:08 -0500268/*
269 * utility function to look for merge candidates inside a given range.
270 * Any extents with matching state are merged together into a single
271 * extent in the tree. Extents with EXTENT_IO in their state field
272 * are not merged because the end_io handlers need to be able to do
273 * operations on them without sleeping (or doing allocations/splits).
274 *
275 * This should be called with the tree lock held.
276 */
277static int merge_state(struct extent_io_tree *tree,
278 struct extent_state *state)
279{
280 struct extent_state *other;
281 struct rb_node *other_node;
282
283 if (state->state & EXTENT_IOBITS)
284 return 0;
285
286 other_node = rb_prev(&state->rb_node);
287 if (other_node) {
288 other = rb_entry(other_node, struct extent_state, rb_node);
289 if (other->end == state->start - 1 &&
290 other->state == state->state) {
291 state->start = other->start;
Chris Mason70dec802008-01-29 09:59:12 -0500292 other->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500293 rb_erase(&other->rb_node, &tree->state);
294 free_extent_state(other);
295 }
296 }
297 other_node = rb_next(&state->rb_node);
298 if (other_node) {
299 other = rb_entry(other_node, struct extent_state, rb_node);
300 if (other->start == state->end + 1 &&
301 other->state == state->state) {
302 other->start = state->start;
Chris Mason70dec802008-01-29 09:59:12 -0500303 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500304 rb_erase(&state->rb_node, &tree->state);
305 free_extent_state(state);
306 }
307 }
308 return 0;
309}
310
Chris Mason291d6732008-01-29 15:55:23 -0500311static void set_state_cb(struct extent_io_tree *tree,
312 struct extent_state *state,
313 unsigned long bits)
314{
315 if (tree->ops && tree->ops->set_bit_hook) {
316 tree->ops->set_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500317 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500318 }
319}
320
321static void clear_state_cb(struct extent_io_tree *tree,
322 struct extent_state *state,
323 unsigned long bits)
324{
325 if (tree->ops && tree->ops->set_bit_hook) {
326 tree->ops->clear_bit_hook(tree->mapping->host, state->start,
Chris Masonb0c68f82008-01-31 11:05:37 -0500327 state->end, state->state, bits);
Chris Mason291d6732008-01-29 15:55:23 -0500328 }
329}
330
Chris Masond1310b22008-01-24 16:13:08 -0500331/*
332 * insert an extent_state struct into the tree. 'bits' are set on the
333 * struct before it is inserted.
334 *
335 * This may return -EEXIST if the extent is already there, in which case the
336 * state struct is freed.
337 *
338 * The tree lock is not taken internally. This is a utility function and
339 * probably isn't what you want to call (see set/clear_extent_bit).
340 */
341static int insert_state(struct extent_io_tree *tree,
342 struct extent_state *state, u64 start, u64 end,
343 int bits)
344{
345 struct rb_node *node;
346
347 if (end < start) {
348 printk("end < start %Lu %Lu\n", end, start);
349 WARN_ON(1);
350 }
351 if (bits & EXTENT_DIRTY)
352 tree->dirty_bytes += end - start + 1;
Chris Masonb0c68f82008-01-31 11:05:37 -0500353 set_state_cb(tree, state, bits);
Chris Masond1310b22008-01-24 16:13:08 -0500354 state->state |= bits;
355 state->start = start;
356 state->end = end;
357 node = tree_insert(&tree->state, end, &state->rb_node);
358 if (node) {
359 struct extent_state *found;
360 found = rb_entry(node, struct extent_state, rb_node);
361 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
362 free_extent_state(state);
363 return -EEXIST;
364 }
Chris Mason70dec802008-01-29 09:59:12 -0500365 state->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500366 merge_state(tree, state);
367 return 0;
368}
369
370/*
371 * split a given extent state struct in two, inserting the preallocated
372 * struct 'prealloc' as the newly created second half. 'split' indicates an
373 * offset inside 'orig' where it should be split.
374 *
375 * Before calling,
376 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
377 * are two extent state structs in the tree:
378 * prealloc: [orig->start, split - 1]
379 * orig: [ split, orig->end ]
380 *
381 * The tree locks are not taken by this function. They need to be held
382 * by the caller.
383 */
384static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
385 struct extent_state *prealloc, u64 split)
386{
387 struct rb_node *node;
388 prealloc->start = orig->start;
389 prealloc->end = split - 1;
390 prealloc->state = orig->state;
391 orig->start = split;
392
393 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
394 if (node) {
395 struct extent_state *found;
396 found = rb_entry(node, struct extent_state, rb_node);
397 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
398 free_extent_state(prealloc);
399 return -EEXIST;
400 }
Chris Mason70dec802008-01-29 09:59:12 -0500401 prealloc->tree = tree;
Chris Masond1310b22008-01-24 16:13:08 -0500402 return 0;
403}
404
405/*
406 * utility function to clear some bits in an extent state struct.
407 * it will optionally wake up any one waiting on this state (wake == 1), or
408 * forcibly remove the state from the tree (delete == 1).
409 *
410 * If no bits are set on the state struct after clearing things, the
411 * struct is freed and removed from the tree
412 */
413static int clear_state_bit(struct extent_io_tree *tree,
414 struct extent_state *state, int bits, int wake,
415 int delete)
416{
417 int ret = state->state & bits;
418
419 if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
420 u64 range = state->end - state->start + 1;
421 WARN_ON(range > tree->dirty_bytes);
422 tree->dirty_bytes -= range;
423 }
Chris Mason291d6732008-01-29 15:55:23 -0500424 clear_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500425 state->state &= ~bits;
Chris Masond1310b22008-01-24 16:13:08 -0500426 if (wake)
427 wake_up(&state->wq);
428 if (delete || state->state == 0) {
Chris Mason70dec802008-01-29 09:59:12 -0500429 if (state->tree) {
Chris Masonae9d1282008-02-01 15:42:15 -0500430 clear_state_cb(tree, state, state->state);
Chris Masond1310b22008-01-24 16:13:08 -0500431 rb_erase(&state->rb_node, &tree->state);
Chris Mason70dec802008-01-29 09:59:12 -0500432 state->tree = NULL;
Chris Masond1310b22008-01-24 16:13:08 -0500433 free_extent_state(state);
434 } else {
435 WARN_ON(1);
436 }
437 } else {
438 merge_state(tree, state);
439 }
440 return ret;
441}
442
443/*
444 * clear some bits on a range in the tree. This may require splitting
445 * or inserting elements in the tree, so the gfp mask is used to
446 * indicate which allocations or sleeping are allowed.
447 *
448 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
449 * the given range from the tree regardless of state (ie for truncate).
450 *
451 * the range [start, end] is inclusive.
452 *
453 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
454 * bits were already set, or zero if none of the bits were already set.
455 */
456int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
457 int bits, int wake, int delete, gfp_t mask)
458{
459 struct extent_state *state;
460 struct extent_state *prealloc = NULL;
461 struct rb_node *node;
462 unsigned long flags;
463 int err;
464 int set = 0;
465
466again:
467 if (!prealloc && (mask & __GFP_WAIT)) {
468 prealloc = alloc_extent_state(mask);
469 if (!prealloc)
470 return -ENOMEM;
471 }
472
Chris Mason70dec802008-01-29 09:59:12 -0500473 spin_lock_irqsave(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500474 /*
475 * this search will find the extents that end after
476 * our range starts
477 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500478 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500479 if (!node)
480 goto out;
481 state = rb_entry(node, struct extent_state, rb_node);
482 if (state->start > end)
483 goto out;
484 WARN_ON(state->end < start);
485
486 /*
487 * | ---- desired range ---- |
488 * | state | or
489 * | ------------- state -------------- |
490 *
491 * We need to split the extent we found, and may flip
492 * bits on second half.
493 *
494 * If the extent we found extends past our range, we
495 * just split and search again. It'll get split again
496 * the next time though.
497 *
498 * If the extent we found is inside our range, we clear
499 * the desired bit on it.
500 */
501
502 if (state->start < start) {
Chris Mason70dec802008-01-29 09:59:12 -0500503 if (!prealloc)
504 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500505 err = split_state(tree, state, prealloc, start);
506 BUG_ON(err == -EEXIST);
507 prealloc = NULL;
508 if (err)
509 goto out;
510 if (state->end <= end) {
511 start = state->end + 1;
512 set |= clear_state_bit(tree, state, bits,
513 wake, delete);
514 } else {
515 start = state->start;
516 }
517 goto search_again;
518 }
519 /*
520 * | ---- desired range ---- |
521 * | state |
522 * We need to split the extent, and clear the bit
523 * on the first half
524 */
525 if (state->start <= end && state->end > end) {
Chris Mason70dec802008-01-29 09:59:12 -0500526 if (!prealloc)
527 prealloc = alloc_extent_state(GFP_ATOMIC);
Chris Masond1310b22008-01-24 16:13:08 -0500528 err = split_state(tree, state, prealloc, end + 1);
529 BUG_ON(err == -EEXIST);
530
531 if (wake)
532 wake_up(&state->wq);
533 set |= clear_state_bit(tree, prealloc, bits,
534 wake, delete);
535 prealloc = NULL;
536 goto out;
537 }
538
539 start = state->end + 1;
540 set |= clear_state_bit(tree, state, bits, wake, delete);
541 goto search_again;
542
543out:
Chris Mason70dec802008-01-29 09:59:12 -0500544 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500545 if (prealloc)
546 free_extent_state(prealloc);
547
548 return set;
549
550search_again:
551 if (start > end)
552 goto out;
Chris Mason70dec802008-01-29 09:59:12 -0500553 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500554 if (mask & __GFP_WAIT)
555 cond_resched();
556 goto again;
557}
558EXPORT_SYMBOL(clear_extent_bit);
559
560static int wait_on_state(struct extent_io_tree *tree,
561 struct extent_state *state)
562{
563 DEFINE_WAIT(wait);
564 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
Chris Mason70dec802008-01-29 09:59:12 -0500565 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500566 schedule();
Chris Mason70dec802008-01-29 09:59:12 -0500567 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500568 finish_wait(&state->wq, &wait);
569 return 0;
570}
571
572/*
573 * waits for one or more bits to clear on a range in the state tree.
574 * The range [start, end] is inclusive.
575 * The tree lock is taken by this function
576 */
577int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
578{
579 struct extent_state *state;
580 struct rb_node *node;
581
Chris Mason70dec802008-01-29 09:59:12 -0500582 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500583again:
584 while (1) {
585 /*
586 * this search will find all the extents that end after
587 * our range starts
588 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500589 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500590 if (!node)
591 break;
592
593 state = rb_entry(node, struct extent_state, rb_node);
594
595 if (state->start > end)
596 goto out;
597
598 if (state->state & bits) {
599 start = state->start;
600 atomic_inc(&state->refs);
601 wait_on_state(tree, state);
602 free_extent_state(state);
603 goto again;
604 }
605 start = state->end + 1;
606
607 if (start > end)
608 break;
609
610 if (need_resched()) {
Chris Mason70dec802008-01-29 09:59:12 -0500611 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500612 cond_resched();
Chris Mason70dec802008-01-29 09:59:12 -0500613 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500614 }
615 }
616out:
Chris Mason70dec802008-01-29 09:59:12 -0500617 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500618 return 0;
619}
620EXPORT_SYMBOL(wait_extent_bit);
621
622static void set_state_bits(struct extent_io_tree *tree,
623 struct extent_state *state,
624 int bits)
625{
626 if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) {
627 u64 range = state->end - state->start + 1;
628 tree->dirty_bytes += range;
629 }
Chris Mason291d6732008-01-29 15:55:23 -0500630 set_state_cb(tree, state, bits);
Chris Masonb0c68f82008-01-31 11:05:37 -0500631 state->state |= bits;
Chris Masond1310b22008-01-24 16:13:08 -0500632}
633
634/*
635 * set some bits on a range in the tree. This may require allocations
636 * or sleeping, so the gfp mask is used to indicate what is allowed.
637 *
638 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
639 * range already has the desired bits set. The start of the existing
640 * range is returned in failed_start in this case.
641 *
642 * [start, end] is inclusive
643 * This takes the tree lock.
644 */
645int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
646 int exclusive, u64 *failed_start, gfp_t mask)
647{
648 struct extent_state *state;
649 struct extent_state *prealloc = NULL;
650 struct rb_node *node;
651 unsigned long flags;
652 int err = 0;
653 int set;
654 u64 last_start;
655 u64 last_end;
656again:
657 if (!prealloc && (mask & __GFP_WAIT)) {
658 prealloc = alloc_extent_state(mask);
659 if (!prealloc)
660 return -ENOMEM;
661 }
662
Chris Mason70dec802008-01-29 09:59:12 -0500663 spin_lock_irqsave(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500664 /*
665 * this search will find all the extents that end after
666 * our range starts.
667 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500668 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -0500669 if (!node) {
670 err = insert_state(tree, prealloc, start, end, bits);
671 prealloc = NULL;
672 BUG_ON(err == -EEXIST);
673 goto out;
674 }
675
676 state = rb_entry(node, struct extent_state, rb_node);
677 last_start = state->start;
678 last_end = state->end;
679
680 /*
681 * | ---- desired range ---- |
682 * | state |
683 *
684 * Just lock what we found and keep going
685 */
686 if (state->start == start && state->end <= end) {
687 set = state->state & bits;
688 if (set && exclusive) {
689 *failed_start = state->start;
690 err = -EEXIST;
691 goto out;
692 }
693 set_state_bits(tree, state, bits);
694 start = state->end + 1;
695 merge_state(tree, state);
696 goto search_again;
697 }
698
699 /*
700 * | ---- desired range ---- |
701 * | state |
702 * or
703 * | ------------- state -------------- |
704 *
705 * We need to split the extent we found, and may flip bits on
706 * second half.
707 *
708 * If the extent we found extends past our
709 * range, we just split and search again. It'll get split
710 * again the next time though.
711 *
712 * If the extent we found is inside our range, we set the
713 * desired bit on it.
714 */
715 if (state->start < start) {
716 set = state->state & bits;
717 if (exclusive && set) {
718 *failed_start = start;
719 err = -EEXIST;
720 goto out;
721 }
722 err = split_state(tree, state, prealloc, start);
723 BUG_ON(err == -EEXIST);
724 prealloc = NULL;
725 if (err)
726 goto out;
727 if (state->end <= end) {
728 set_state_bits(tree, state, bits);
729 start = state->end + 1;
730 merge_state(tree, state);
731 } else {
732 start = state->start;
733 }
734 goto search_again;
735 }
736 /*
737 * | ---- desired range ---- |
738 * | state | or | state |
739 *
740 * There's a hole, we need to insert something in it and
741 * ignore the extent we found.
742 */
743 if (state->start > start) {
744 u64 this_end;
745 if (end < last_start)
746 this_end = end;
747 else
748 this_end = last_start -1;
749 err = insert_state(tree, prealloc, start, this_end,
750 bits);
751 prealloc = NULL;
752 BUG_ON(err == -EEXIST);
753 if (err)
754 goto out;
755 start = this_end + 1;
756 goto search_again;
757 }
758 /*
759 * | ---- desired range ---- |
760 * | state |
761 * We need to split the extent, and set the bit
762 * on the first half
763 */
764 if (state->start <= end && state->end > end) {
765 set = state->state & bits;
766 if (exclusive && set) {
767 *failed_start = start;
768 err = -EEXIST;
769 goto out;
770 }
771 err = split_state(tree, state, prealloc, end + 1);
772 BUG_ON(err == -EEXIST);
773
774 set_state_bits(tree, prealloc, bits);
775 merge_state(tree, prealloc);
776 prealloc = NULL;
777 goto out;
778 }
779
780 goto search_again;
781
782out:
Chris Mason70dec802008-01-29 09:59:12 -0500783 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500784 if (prealloc)
785 free_extent_state(prealloc);
786
787 return err;
788
789search_again:
790 if (start > end)
791 goto out;
Chris Mason70dec802008-01-29 09:59:12 -0500792 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -0500793 if (mask & __GFP_WAIT)
794 cond_resched();
795 goto again;
796}
797EXPORT_SYMBOL(set_extent_bit);
798
799/* wrappers around set/clear extent bit */
800int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
801 gfp_t mask)
802{
803 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
804 mask);
805}
806EXPORT_SYMBOL(set_extent_dirty);
807
Chris Masone6dcd2d2008-07-17 12:53:50 -0400808int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
809 gfp_t mask)
810{
811 return set_extent_bit(tree, start, end, EXTENT_ORDERED, 0, NULL, mask);
812}
813EXPORT_SYMBOL(set_extent_ordered);
814
Chris Masond1310b22008-01-24 16:13:08 -0500815int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
816 int bits, gfp_t mask)
817{
818 return set_extent_bit(tree, start, end, bits, 0, NULL,
819 mask);
820}
821EXPORT_SYMBOL(set_extent_bits);
822
823int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
824 int bits, gfp_t mask)
825{
826 return clear_extent_bit(tree, start, end, bits, 0, 0, mask);
827}
828EXPORT_SYMBOL(clear_extent_bits);
829
830int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
831 gfp_t mask)
832{
833 return set_extent_bit(tree, start, end,
Chris Masone6dcd2d2008-07-17 12:53:50 -0400834 EXTENT_DELALLOC | EXTENT_DIRTY,
835 0, NULL, mask);
Chris Masond1310b22008-01-24 16:13:08 -0500836}
837EXPORT_SYMBOL(set_extent_delalloc);
838
839int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
840 gfp_t mask)
841{
842 return clear_extent_bit(tree, start, end,
843 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
844}
845EXPORT_SYMBOL(clear_extent_dirty);
846
Chris Masone6dcd2d2008-07-17 12:53:50 -0400847int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
848 gfp_t mask)
849{
850 return clear_extent_bit(tree, start, end, EXTENT_ORDERED, 1, 0, mask);
851}
852EXPORT_SYMBOL(clear_extent_ordered);
853
Chris Masond1310b22008-01-24 16:13:08 -0500854int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
855 gfp_t mask)
856{
857 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
858 mask);
859}
860EXPORT_SYMBOL(set_extent_new);
861
862int clear_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
863 gfp_t mask)
864{
865 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
866}
867EXPORT_SYMBOL(clear_extent_new);
868
869int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
870 gfp_t mask)
871{
872 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
873 mask);
874}
875EXPORT_SYMBOL(set_extent_uptodate);
876
877int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
878 gfp_t mask)
879{
880 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
881}
882EXPORT_SYMBOL(clear_extent_uptodate);
883
884int set_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
885 gfp_t mask)
886{
887 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
888 0, NULL, mask);
889}
890EXPORT_SYMBOL(set_extent_writeback);
891
892int clear_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end,
893 gfp_t mask)
894{
895 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
896}
897EXPORT_SYMBOL(clear_extent_writeback);
898
899int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
900{
901 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
902}
903EXPORT_SYMBOL(wait_on_extent_writeback);
904
Chris Masond1310b22008-01-24 16:13:08 -0500905int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
906{
907 int err;
908 u64 failed_start;
909 while (1) {
910 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
911 &failed_start, mask);
912 if (err == -EEXIST && (mask & __GFP_WAIT)) {
913 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
914 start = failed_start;
915 } else {
916 break;
917 }
918 WARN_ON(start > end);
919 }
920 return err;
921}
922EXPORT_SYMBOL(lock_extent);
923
924int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
925 gfp_t mask)
926{
927 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
928}
929EXPORT_SYMBOL(unlock_extent);
930
931/*
932 * helper function to set pages and extents in the tree dirty
933 */
934int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end)
935{
936 unsigned long index = start >> PAGE_CACHE_SHIFT;
937 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
938 struct page *page;
939
940 while (index <= end_index) {
941 page = find_get_page(tree->mapping, index);
942 BUG_ON(!page);
943 __set_page_dirty_nobuffers(page);
944 page_cache_release(page);
945 index++;
946 }
947 set_extent_dirty(tree, start, end, GFP_NOFS);
948 return 0;
949}
950EXPORT_SYMBOL(set_range_dirty);
951
952/*
953 * helper function to set both pages and extents in the tree writeback
954 */
955int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
956{
957 unsigned long index = start >> PAGE_CACHE_SHIFT;
958 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
959 struct page *page;
960
961 while (index <= end_index) {
962 page = find_get_page(tree->mapping, index);
963 BUG_ON(!page);
964 set_page_writeback(page);
965 page_cache_release(page);
966 index++;
967 }
968 set_extent_writeback(tree, start, end, GFP_NOFS);
969 return 0;
970}
971EXPORT_SYMBOL(set_range_writeback);
972
973int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
974 u64 *start_ret, u64 *end_ret, int bits)
975{
976 struct rb_node *node;
977 struct extent_state *state;
978 int ret = 1;
979
Chris Mason70dec802008-01-29 09:59:12 -0500980 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -0500981 /*
982 * this search will find all the extents that end after
983 * our range starts.
984 */
Chris Mason80ea96b2008-02-01 14:51:59 -0500985 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -0400986 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -0500987 goto out;
988 }
989
990 while(1) {
991 state = rb_entry(node, struct extent_state, rb_node);
992 if (state->end >= start && (state->state & bits)) {
993 *start_ret = state->start;
994 *end_ret = state->end;
995 ret = 0;
996 break;
997 }
998 node = rb_next(node);
999 if (!node)
1000 break;
1001 }
1002out:
Chris Mason70dec802008-01-29 09:59:12 -05001003 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001004 return ret;
1005}
1006EXPORT_SYMBOL(find_first_extent_bit);
1007
Chris Masond7fc6402008-02-18 12:12:38 -05001008struct extent_state *find_first_extent_bit_state(struct extent_io_tree *tree,
1009 u64 start, int bits)
1010{
1011 struct rb_node *node;
1012 struct extent_state *state;
1013
1014 /*
1015 * this search will find all the extents that end after
1016 * our range starts.
1017 */
1018 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001019 if (!node) {
Chris Masond7fc6402008-02-18 12:12:38 -05001020 goto out;
1021 }
1022
1023 while(1) {
1024 state = rb_entry(node, struct extent_state, rb_node);
1025 if (state->end >= start && (state->state & bits)) {
1026 return state;
1027 }
1028 node = rb_next(node);
1029 if (!node)
1030 break;
1031 }
1032out:
1033 return NULL;
1034}
1035EXPORT_SYMBOL(find_first_extent_bit_state);
1036
Chris Masond1310b22008-01-24 16:13:08 -05001037u64 find_lock_delalloc_range(struct extent_io_tree *tree,
1038 u64 *start, u64 *end, u64 max_bytes)
1039{
1040 struct rb_node *node;
1041 struct extent_state *state;
1042 u64 cur_start = *start;
1043 u64 found = 0;
1044 u64 total_bytes = 0;
1045
Chris Mason70dec802008-01-29 09:59:12 -05001046 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001047 /*
1048 * this search will find all the extents that end after
1049 * our range starts.
1050 */
1051search_again:
Chris Mason80ea96b2008-02-01 14:51:59 -05001052 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001053 if (!node) {
Chris Mason3b951512008-04-17 11:29:12 -04001054 if (!found)
1055 *end = (u64)-1;
Chris Masond1310b22008-01-24 16:13:08 -05001056 goto out;
1057 }
1058
1059 while(1) {
1060 state = rb_entry(node, struct extent_state, rb_node);
1061 if (found && state->start != cur_start) {
1062 goto out;
1063 }
1064 if (!(state->state & EXTENT_DELALLOC)) {
1065 if (!found)
1066 *end = state->end;
1067 goto out;
1068 }
1069 if (!found) {
1070 struct extent_state *prev_state;
1071 struct rb_node *prev_node = node;
1072 while(1) {
1073 prev_node = rb_prev(prev_node);
1074 if (!prev_node)
1075 break;
1076 prev_state = rb_entry(prev_node,
1077 struct extent_state,
1078 rb_node);
1079 if (!(prev_state->state & EXTENT_DELALLOC))
1080 break;
1081 state = prev_state;
1082 node = prev_node;
1083 }
1084 }
1085 if (state->state & EXTENT_LOCKED) {
1086 DEFINE_WAIT(wait);
1087 atomic_inc(&state->refs);
1088 prepare_to_wait(&state->wq, &wait,
1089 TASK_UNINTERRUPTIBLE);
Chris Mason70dec802008-01-29 09:59:12 -05001090 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001091 schedule();
Chris Mason70dec802008-01-29 09:59:12 -05001092 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001093 finish_wait(&state->wq, &wait);
1094 free_extent_state(state);
1095 goto search_again;
1096 }
Chris Mason291d6732008-01-29 15:55:23 -05001097 set_state_cb(tree, state, EXTENT_LOCKED);
Chris Masonb0c68f82008-01-31 11:05:37 -05001098 state->state |= EXTENT_LOCKED;
Chris Masond1310b22008-01-24 16:13:08 -05001099 if (!found)
1100 *start = state->start;
1101 found++;
1102 *end = state->end;
1103 cur_start = state->end + 1;
1104 node = rb_next(node);
1105 if (!node)
1106 break;
1107 total_bytes += state->end - state->start + 1;
1108 if (total_bytes >= max_bytes)
1109 break;
1110 }
1111out:
Chris Mason70dec802008-01-29 09:59:12 -05001112 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001113 return found;
1114}
1115
1116u64 count_range_bits(struct extent_io_tree *tree,
1117 u64 *start, u64 search_end, u64 max_bytes,
1118 unsigned long bits)
1119{
1120 struct rb_node *node;
1121 struct extent_state *state;
1122 u64 cur_start = *start;
1123 u64 total_bytes = 0;
1124 int found = 0;
1125
1126 if (search_end <= cur_start) {
1127 printk("search_end %Lu start %Lu\n", search_end, cur_start);
1128 WARN_ON(1);
1129 return 0;
1130 }
1131
Chris Mason70dec802008-01-29 09:59:12 -05001132 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001133 if (cur_start == 0 && bits == EXTENT_DIRTY) {
1134 total_bytes = tree->dirty_bytes;
1135 goto out;
1136 }
1137 /*
1138 * this search will find all the extents that end after
1139 * our range starts.
1140 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001141 node = tree_search(tree, cur_start);
Peter2b114d12008-04-01 11:21:40 -04001142 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001143 goto out;
1144 }
1145
1146 while(1) {
1147 state = rb_entry(node, struct extent_state, rb_node);
1148 if (state->start > search_end)
1149 break;
1150 if (state->end >= cur_start && (state->state & bits)) {
1151 total_bytes += min(search_end, state->end) + 1 -
1152 max(cur_start, state->start);
1153 if (total_bytes >= max_bytes)
1154 break;
1155 if (!found) {
1156 *start = state->start;
1157 found = 1;
1158 }
1159 }
1160 node = rb_next(node);
1161 if (!node)
1162 break;
1163 }
1164out:
Chris Mason70dec802008-01-29 09:59:12 -05001165 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001166 return total_bytes;
1167}
1168/*
1169 * helper function to lock both pages and extents in the tree.
1170 * pages must be locked first.
1171 */
1172int lock_range(struct extent_io_tree *tree, u64 start, u64 end)
1173{
1174 unsigned long index = start >> PAGE_CACHE_SHIFT;
1175 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1176 struct page *page;
1177 int err;
1178
1179 while (index <= end_index) {
1180 page = grab_cache_page(tree->mapping, index);
1181 if (!page) {
1182 err = -ENOMEM;
1183 goto failed;
1184 }
1185 if (IS_ERR(page)) {
1186 err = PTR_ERR(page);
1187 goto failed;
1188 }
1189 index++;
1190 }
1191 lock_extent(tree, start, end, GFP_NOFS);
1192 return 0;
1193
1194failed:
1195 /*
1196 * we failed above in getting the page at 'index', so we undo here
1197 * up to but not including the page at 'index'
1198 */
1199 end_index = index;
1200 index = start >> PAGE_CACHE_SHIFT;
1201 while (index < end_index) {
1202 page = find_get_page(tree->mapping, index);
1203 unlock_page(page);
1204 page_cache_release(page);
1205 index++;
1206 }
1207 return err;
1208}
1209EXPORT_SYMBOL(lock_range);
1210
1211/*
1212 * helper function to unlock both pages and extents in the tree.
1213 */
1214int unlock_range(struct extent_io_tree *tree, u64 start, u64 end)
1215{
1216 unsigned long index = start >> PAGE_CACHE_SHIFT;
1217 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1218 struct page *page;
1219
1220 while (index <= end_index) {
1221 page = find_get_page(tree->mapping, index);
1222 unlock_page(page);
1223 page_cache_release(page);
1224 index++;
1225 }
1226 unlock_extent(tree, start, end, GFP_NOFS);
1227 return 0;
1228}
1229EXPORT_SYMBOL(unlock_range);
1230
1231int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
1232{
1233 struct rb_node *node;
1234 struct extent_state *state;
1235 int ret = 0;
1236
Chris Mason70dec802008-01-29 09:59:12 -05001237 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001238 /*
1239 * this search will find all the extents that end after
1240 * our range starts.
1241 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001242 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001243 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001244 ret = -ENOENT;
1245 goto out;
1246 }
1247 state = rb_entry(node, struct extent_state, rb_node);
1248 if (state->start != start) {
1249 ret = -ENOENT;
1250 goto out;
1251 }
1252 state->private = private;
1253out:
Chris Mason70dec802008-01-29 09:59:12 -05001254 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001255 return ret;
1256}
1257
1258int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
1259{
1260 struct rb_node *node;
1261 struct extent_state *state;
1262 int ret = 0;
1263
Chris Mason70dec802008-01-29 09:59:12 -05001264 spin_lock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001265 /*
1266 * this search will find all the extents that end after
1267 * our range starts.
1268 */
Chris Mason80ea96b2008-02-01 14:51:59 -05001269 node = tree_search(tree, start);
Peter2b114d12008-04-01 11:21:40 -04001270 if (!node) {
Chris Masond1310b22008-01-24 16:13:08 -05001271 ret = -ENOENT;
1272 goto out;
1273 }
1274 state = rb_entry(node, struct extent_state, rb_node);
1275 if (state->start != start) {
1276 ret = -ENOENT;
1277 goto out;
1278 }
1279 *private = state->private;
1280out:
Chris Mason70dec802008-01-29 09:59:12 -05001281 spin_unlock_irq(&tree->lock);
Chris Masond1310b22008-01-24 16:13:08 -05001282 return ret;
1283}
1284
1285/*
1286 * searches a range in the state tree for a given mask.
Chris Mason70dec802008-01-29 09:59:12 -05001287 * If 'filled' == 1, this returns 1 only if every extent in the tree
Chris Masond1310b22008-01-24 16:13:08 -05001288 * has the bits set. Otherwise, 1 is returned if any bit in the
1289 * range is found set.
1290 */
1291int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1292 int bits, int filled)
1293{
1294 struct extent_state *state = NULL;
1295 struct rb_node *node;
1296 int bitset = 0;
1297 unsigned long flags;
1298
Chris Mason70dec802008-01-29 09:59:12 -05001299 spin_lock_irqsave(&tree->lock, flags);
Chris Mason80ea96b2008-02-01 14:51:59 -05001300 node = tree_search(tree, start);
Chris Masond1310b22008-01-24 16:13:08 -05001301 while (node && start <= end) {
1302 state = rb_entry(node, struct extent_state, rb_node);
1303
1304 if (filled && state->start > start) {
1305 bitset = 0;
1306 break;
1307 }
1308
1309 if (state->start > end)
1310 break;
1311
1312 if (state->state & bits) {
1313 bitset = 1;
1314 if (!filled)
1315 break;
1316 } else if (filled) {
1317 bitset = 0;
1318 break;
1319 }
1320 start = state->end + 1;
1321 if (start > end)
1322 break;
1323 node = rb_next(node);
1324 if (!node) {
1325 if (filled)
1326 bitset = 0;
1327 break;
1328 }
1329 }
Chris Mason70dec802008-01-29 09:59:12 -05001330 spin_unlock_irqrestore(&tree->lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05001331 return bitset;
1332}
1333EXPORT_SYMBOL(test_range_bit);
1334
1335/*
1336 * helper function to set a given page up to date if all the
1337 * extents in the tree for that page are up to date
1338 */
1339static int check_page_uptodate(struct extent_io_tree *tree,
1340 struct page *page)
1341{
1342 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1343 u64 end = start + PAGE_CACHE_SIZE - 1;
1344 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1345 SetPageUptodate(page);
1346 return 0;
1347}
1348
1349/*
1350 * helper function to unlock a page if all the extents in the tree
1351 * for that page are unlocked
1352 */
1353static int check_page_locked(struct extent_io_tree *tree,
1354 struct page *page)
1355{
1356 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1357 u64 end = start + PAGE_CACHE_SIZE - 1;
1358 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1359 unlock_page(page);
1360 return 0;
1361}
1362
1363/*
1364 * helper function to end page writeback if all the extents
1365 * in the tree for that page are done with writeback
1366 */
1367static int check_page_writeback(struct extent_io_tree *tree,
1368 struct page *page)
1369{
1370 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1371 u64 end = start + PAGE_CACHE_SIZE - 1;
1372 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1373 end_page_writeback(page);
1374 return 0;
1375}
1376
1377/* lots and lots of room for performance fixes in the end_bio funcs */
1378
1379/*
1380 * after a writepage IO is done, we need to:
1381 * clear the uptodate bits on error
1382 * clear the writeback bits in the extent tree for this IO
1383 * end_page_writeback if the page has no more pending IO
1384 *
1385 * Scheduling is not allowed, so the extent state tree is expected
1386 * to have one and only one object corresponding to this IO.
1387 */
1388#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1389static void end_bio_extent_writepage(struct bio *bio, int err)
1390#else
1391static int end_bio_extent_writepage(struct bio *bio,
1392 unsigned int bytes_done, int err)
1393#endif
1394{
Chris Mason1259ab72008-05-12 13:39:03 -04001395 int uptodate = err == 0;
Chris Masond1310b22008-01-24 16:13:08 -05001396 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
Chris Mason70dec802008-01-29 09:59:12 -05001397 struct extent_state *state = bio->bi_private;
1398 struct extent_io_tree *tree = state->tree;
1399 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -05001400 u64 start;
1401 u64 end;
Chris Mason70dec802008-01-29 09:59:12 -05001402 u64 cur;
Chris Masond1310b22008-01-24 16:13:08 -05001403 int whole_page;
Chris Mason1259ab72008-05-12 13:39:03 -04001404 int ret;
Chris Mason70dec802008-01-29 09:59:12 -05001405 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -05001406
1407#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1408 if (bio->bi_size)
1409 return 1;
1410#endif
Chris Masond1310b22008-01-24 16:13:08 -05001411 do {
1412 struct page *page = bvec->bv_page;
1413 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1414 bvec->bv_offset;
1415 end = start + bvec->bv_len - 1;
1416
1417 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1418 whole_page = 1;
1419 else
1420 whole_page = 0;
1421
1422 if (--bvec >= bio->bi_io_vec)
1423 prefetchw(&bvec->bv_page->flags);
Chris Mason1259ab72008-05-12 13:39:03 -04001424 if (tree->ops && tree->ops->writepage_end_io_hook) {
1425 ret = tree->ops->writepage_end_io_hook(page, start,
Chris Masone6dcd2d2008-07-17 12:53:50 -04001426 end, state, uptodate);
Chris Mason1259ab72008-05-12 13:39:03 -04001427 if (ret)
1428 uptodate = 0;
1429 }
1430
1431 if (!uptodate && tree->ops &&
1432 tree->ops->writepage_io_failed_hook) {
1433 ret = tree->ops->writepage_io_failed_hook(bio, page,
1434 start, end, state);
1435 if (ret == 0) {
1436 state = NULL;
1437 uptodate = (err == 0);
1438 continue;
1439 }
1440 }
1441
Chris Masond1310b22008-01-24 16:13:08 -05001442 if (!uptodate) {
1443 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1444 ClearPageUptodate(page);
1445 SetPageError(page);
1446 }
Chris Mason70dec802008-01-29 09:59:12 -05001447
Chris Mason70dec802008-01-29 09:59:12 -05001448 /*
1449 * bios can get merged in funny ways, and so we need to
1450 * be careful with the state variable. We know the
1451 * state won't be merged with others because it has
1452 * WRITEBACK set, but we can't be sure each biovec is
1453 * sequential in the file. So, if our cached state
1454 * doesn't match the expected end, search the tree
1455 * for the correct one.
1456 */
1457
1458 spin_lock_irqsave(&tree->lock, flags);
1459 if (!state || state->end != end) {
1460 state = NULL;
Chris Mason80ea96b2008-02-01 14:51:59 -05001461 node = __etree_search(tree, start, NULL, NULL);
Chris Mason70dec802008-01-29 09:59:12 -05001462 if (node) {
1463 state = rb_entry(node, struct extent_state,
1464 rb_node);
1465 if (state->end != end ||
1466 !(state->state & EXTENT_WRITEBACK))
1467 state = NULL;
1468 }
1469 if (!state) {
1470 spin_unlock_irqrestore(&tree->lock, flags);
1471 clear_extent_writeback(tree, start,
1472 end, GFP_ATOMIC);
1473 goto next_io;
1474 }
1475 }
1476 cur = end;
1477 while(1) {
1478 struct extent_state *clear = state;
1479 cur = state->start;
1480 node = rb_prev(&state->rb_node);
1481 if (node) {
1482 state = rb_entry(node,
1483 struct extent_state,
1484 rb_node);
1485 } else {
1486 state = NULL;
1487 }
1488
1489 clear_state_bit(tree, clear, EXTENT_WRITEBACK,
1490 1, 0);
1491 if (cur == start)
1492 break;
1493 if (cur < start) {
1494 WARN_ON(1);
1495 break;
1496 }
1497 if (!node)
1498 break;
1499 }
1500 /* before releasing the lock, make sure the next state
1501 * variable has the expected bits set and corresponds
1502 * to the correct offsets in the file
1503 */
1504 if (state && (state->end + 1 != start ||
Yanc2e639f2008-02-04 08:57:25 -05001505 !(state->state & EXTENT_WRITEBACK))) {
Chris Mason70dec802008-01-29 09:59:12 -05001506 state = NULL;
1507 }
1508 spin_unlock_irqrestore(&tree->lock, flags);
1509next_io:
Chris Masond1310b22008-01-24 16:13:08 -05001510
1511 if (whole_page)
1512 end_page_writeback(page);
1513 else
1514 check_page_writeback(tree, page);
Chris Masond1310b22008-01-24 16:13:08 -05001515 } while (bvec >= bio->bi_io_vec);
Chris Masond1310b22008-01-24 16:13:08 -05001516 bio_put(bio);
1517#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1518 return 0;
1519#endif
1520}
1521
1522/*
1523 * after a readpage IO is done, we need to:
1524 * clear the uptodate bits on error
1525 * set the uptodate bits if things worked
1526 * set the page up to date if all extents in the tree are uptodate
1527 * clear the lock bit in the extent tree
1528 * unlock the page if there are no other extents locked for it
1529 *
1530 * Scheduling is not allowed, so the extent state tree is expected
1531 * to have one and only one object corresponding to this IO.
1532 */
1533#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1534static void end_bio_extent_readpage(struct bio *bio, int err)
1535#else
1536static int end_bio_extent_readpage(struct bio *bio,
1537 unsigned int bytes_done, int err)
1538#endif
1539{
1540 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1541 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
Chris Mason70dec802008-01-29 09:59:12 -05001542 struct extent_state *state = bio->bi_private;
1543 struct extent_io_tree *tree = state->tree;
1544 struct rb_node *node;
Chris Masond1310b22008-01-24 16:13:08 -05001545 u64 start;
1546 u64 end;
Chris Mason70dec802008-01-29 09:59:12 -05001547 u64 cur;
1548 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -05001549 int whole_page;
1550 int ret;
1551
1552#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1553 if (bio->bi_size)
1554 return 1;
1555#endif
1556
1557 do {
1558 struct page *page = bvec->bv_page;
1559 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1560 bvec->bv_offset;
1561 end = start + bvec->bv_len - 1;
1562
1563 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1564 whole_page = 1;
1565 else
1566 whole_page = 0;
1567
1568 if (--bvec >= bio->bi_io_vec)
1569 prefetchw(&bvec->bv_page->flags);
1570
1571 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
Chris Mason70dec802008-01-29 09:59:12 -05001572 ret = tree->ops->readpage_end_io_hook(page, start, end,
1573 state);
Chris Masond1310b22008-01-24 16:13:08 -05001574 if (ret)
1575 uptodate = 0;
1576 }
Chris Mason7e383262008-04-09 16:28:12 -04001577 if (!uptodate && tree->ops &&
1578 tree->ops->readpage_io_failed_hook) {
1579 ret = tree->ops->readpage_io_failed_hook(bio, page,
1580 start, end, state);
1581 if (ret == 0) {
1582 state = NULL;
Chris Mason3b951512008-04-17 11:29:12 -04001583 uptodate =
1584 test_bit(BIO_UPTODATE, &bio->bi_flags);
Chris Mason7e383262008-04-09 16:28:12 -04001585 continue;
1586 }
1587 }
Chris Mason70dec802008-01-29 09:59:12 -05001588
1589 spin_lock_irqsave(&tree->lock, flags);
1590 if (!state || state->end != end) {
1591 state = NULL;
Chris Mason80ea96b2008-02-01 14:51:59 -05001592 node = __etree_search(tree, start, NULL, NULL);
Chris Mason70dec802008-01-29 09:59:12 -05001593 if (node) {
1594 state = rb_entry(node, struct extent_state,
1595 rb_node);
1596 if (state->end != end ||
1597 !(state->state & EXTENT_LOCKED))
1598 state = NULL;
1599 }
Chris Mason3b951512008-04-17 11:29:12 -04001600 if (!state) {
Chris Mason70dec802008-01-29 09:59:12 -05001601 spin_unlock_irqrestore(&tree->lock, flags);
Chris Mason3b951512008-04-17 11:29:12 -04001602 if (uptodate)
1603 set_extent_uptodate(tree, start, end,
1604 GFP_ATOMIC);
Chris Mason70dec802008-01-29 09:59:12 -05001605 unlock_extent(tree, start, end, GFP_ATOMIC);
1606 goto next_io;
1607 }
Chris Masond1310b22008-01-24 16:13:08 -05001608 }
1609
Chris Mason70dec802008-01-29 09:59:12 -05001610 cur = end;
1611 while(1) {
1612 struct extent_state *clear = state;
1613 cur = state->start;
1614 node = rb_prev(&state->rb_node);
1615 if (node) {
1616 state = rb_entry(node,
1617 struct extent_state,
1618 rb_node);
1619 } else {
1620 state = NULL;
1621 }
Chris Masonf1885912008-04-09 16:28:12 -04001622 if (uptodate) {
1623 set_state_cb(tree, clear, EXTENT_UPTODATE);
1624 clear->state |= EXTENT_UPTODATE;
1625 }
Chris Mason70dec802008-01-29 09:59:12 -05001626 clear_state_bit(tree, clear, EXTENT_LOCKED,
1627 1, 0);
1628 if (cur == start)
1629 break;
1630 if (cur < start) {
1631 WARN_ON(1);
1632 break;
1633 }
1634 if (!node)
1635 break;
1636 }
1637 /* before releasing the lock, make sure the next state
1638 * variable has the expected bits set and corresponds
1639 * to the correct offsets in the file
1640 */
1641 if (state && (state->end + 1 != start ||
Yanc2e639f2008-02-04 08:57:25 -05001642 !(state->state & EXTENT_LOCKED))) {
Chris Mason70dec802008-01-29 09:59:12 -05001643 state = NULL;
1644 }
1645 spin_unlock_irqrestore(&tree->lock, flags);
1646next_io:
1647 if (whole_page) {
1648 if (uptodate) {
1649 SetPageUptodate(page);
1650 } else {
1651 ClearPageUptodate(page);
1652 SetPageError(page);
1653 }
Chris Masond1310b22008-01-24 16:13:08 -05001654 unlock_page(page);
Chris Mason70dec802008-01-29 09:59:12 -05001655 } else {
1656 if (uptodate) {
1657 check_page_uptodate(tree, page);
1658 } else {
1659 ClearPageUptodate(page);
1660 SetPageError(page);
1661 }
Chris Masond1310b22008-01-24 16:13:08 -05001662 check_page_locked(tree, page);
Chris Mason70dec802008-01-29 09:59:12 -05001663 }
Chris Masond1310b22008-01-24 16:13:08 -05001664 } while (bvec >= bio->bi_io_vec);
1665
1666 bio_put(bio);
1667#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1668 return 0;
1669#endif
1670}
1671
1672/*
1673 * IO done from prepare_write is pretty simple, we just unlock
1674 * the structs in the extent tree when done, and set the uptodate bits
1675 * as appropriate.
1676 */
1677#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1678static void end_bio_extent_preparewrite(struct bio *bio, int err)
1679#else
1680static int end_bio_extent_preparewrite(struct bio *bio,
1681 unsigned int bytes_done, int err)
1682#endif
1683{
1684 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1685 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
Chris Mason70dec802008-01-29 09:59:12 -05001686 struct extent_state *state = bio->bi_private;
1687 struct extent_io_tree *tree = state->tree;
Chris Masond1310b22008-01-24 16:13:08 -05001688 u64 start;
1689 u64 end;
1690
1691#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1692 if (bio->bi_size)
1693 return 1;
1694#endif
1695
1696 do {
1697 struct page *page = bvec->bv_page;
1698 start = ((u64)page->index << PAGE_CACHE_SHIFT) +
1699 bvec->bv_offset;
1700 end = start + bvec->bv_len - 1;
1701
1702 if (--bvec >= bio->bi_io_vec)
1703 prefetchw(&bvec->bv_page->flags);
1704
1705 if (uptodate) {
1706 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1707 } else {
1708 ClearPageUptodate(page);
1709 SetPageError(page);
1710 }
1711
1712 unlock_extent(tree, start, end, GFP_ATOMIC);
1713
1714 } while (bvec >= bio->bi_io_vec);
1715
1716 bio_put(bio);
1717#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1718 return 0;
1719#endif
1720}
1721
1722static struct bio *
1723extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
1724 gfp_t gfp_flags)
1725{
1726 struct bio *bio;
1727
1728 bio = bio_alloc(gfp_flags, nr_vecs);
1729
1730 if (bio == NULL && (current->flags & PF_MEMALLOC)) {
1731 while (!bio && (nr_vecs /= 2))
1732 bio = bio_alloc(gfp_flags, nr_vecs);
1733 }
1734
1735 if (bio) {
Chris Masone1c4b742008-04-22 13:26:46 -04001736 bio->bi_size = 0;
Chris Masond1310b22008-01-24 16:13:08 -05001737 bio->bi_bdev = bdev;
1738 bio->bi_sector = first_sector;
1739 }
1740 return bio;
1741}
1742
Chris Masonf1885912008-04-09 16:28:12 -04001743static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001744{
Chris Masond1310b22008-01-24 16:13:08 -05001745 int ret = 0;
Chris Mason70dec802008-01-29 09:59:12 -05001746 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1747 struct page *page = bvec->bv_page;
1748 struct extent_io_tree *tree = bio->bi_private;
1749 struct rb_node *node;
1750 struct extent_state *state;
1751 u64 start;
1752 u64 end;
1753
1754 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1755 end = start + bvec->bv_len - 1;
1756
1757 spin_lock_irq(&tree->lock);
Chris Mason80ea96b2008-02-01 14:51:59 -05001758 node = __etree_search(tree, start, NULL, NULL);
Chris Mason70dec802008-01-29 09:59:12 -05001759 BUG_ON(!node);
1760 state = rb_entry(node, struct extent_state, rb_node);
1761 while(state->end < end) {
1762 node = rb_next(node);
1763 state = rb_entry(node, struct extent_state, rb_node);
1764 }
1765 BUG_ON(state->end != end);
1766 spin_unlock_irq(&tree->lock);
1767
1768 bio->bi_private = state;
Chris Masond1310b22008-01-24 16:13:08 -05001769
1770 bio_get(bio);
1771
Chris Mason065631f2008-02-20 12:07:25 -05001772 if (tree->ops && tree->ops->submit_bio_hook)
Chris Masonf1885912008-04-09 16:28:12 -04001773 tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
1774 mirror_num);
Chris Mason0b86a832008-03-24 15:01:56 -04001775 else
1776 submit_bio(rw, bio);
Chris Masond1310b22008-01-24 16:13:08 -05001777 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1778 ret = -EOPNOTSUPP;
1779 bio_put(bio);
1780 return ret;
1781}
1782
1783static int submit_extent_page(int rw, struct extent_io_tree *tree,
1784 struct page *page, sector_t sector,
1785 size_t size, unsigned long offset,
1786 struct block_device *bdev,
1787 struct bio **bio_ret,
1788 unsigned long max_pages,
Chris Masonf1885912008-04-09 16:28:12 -04001789 bio_end_io_t end_io_func,
1790 int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001791{
1792 int ret = 0;
1793 struct bio *bio;
1794 int nr;
1795
1796 if (bio_ret && *bio_ret) {
1797 bio = *bio_ret;
1798 if (bio->bi_sector + (bio->bi_size >> 9) != sector ||
Chris Mason239b14b2008-03-24 15:02:07 -04001799 (tree->ops && tree->ops->merge_bio_hook &&
1800 tree->ops->merge_bio_hook(page, offset, size, bio)) ||
Chris Masond1310b22008-01-24 16:13:08 -05001801 bio_add_page(bio, page, size, offset) < size) {
Chris Masonf1885912008-04-09 16:28:12 -04001802 ret = submit_one_bio(rw, bio, mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05001803 bio = NULL;
1804 } else {
1805 return 0;
1806 }
1807 }
Chris Mason961d0232008-02-06 11:01:42 -05001808 nr = bio_get_nr_vecs(bdev);
Chris Masond1310b22008-01-24 16:13:08 -05001809 bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
1810 if (!bio) {
1811 printk("failed to allocate bio nr %d\n", nr);
1812 }
Chris Mason70dec802008-01-29 09:59:12 -05001813
1814
Chris Masond1310b22008-01-24 16:13:08 -05001815 bio_add_page(bio, page, size, offset);
1816 bio->bi_end_io = end_io_func;
1817 bio->bi_private = tree;
Chris Mason70dec802008-01-29 09:59:12 -05001818
Chris Masond1310b22008-01-24 16:13:08 -05001819 if (bio_ret) {
1820 *bio_ret = bio;
1821 } else {
Chris Masonf1885912008-04-09 16:28:12 -04001822 ret = submit_one_bio(rw, bio, mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05001823 }
1824
1825 return ret;
1826}
1827
1828void set_page_extent_mapped(struct page *page)
1829{
1830 if (!PagePrivate(page)) {
1831 SetPagePrivate(page);
Chris Masond1310b22008-01-24 16:13:08 -05001832 page_cache_get(page);
Chris Mason6af118c2008-07-22 11:18:07 -04001833 set_page_private(page, EXTENT_PAGE_PRIVATE);
Chris Masond1310b22008-01-24 16:13:08 -05001834 }
1835}
1836
1837void set_page_extent_head(struct page *page, unsigned long len)
1838{
1839 set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
1840}
1841
1842/*
1843 * basic readpage implementation. Locked extent state structs are inserted
1844 * into the tree that are removed when the IO is done (by the end_io
1845 * handlers)
1846 */
1847static int __extent_read_full_page(struct extent_io_tree *tree,
1848 struct page *page,
1849 get_extent_t *get_extent,
Chris Masonf1885912008-04-09 16:28:12 -04001850 struct bio **bio, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05001851{
1852 struct inode *inode = page->mapping->host;
1853 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
1854 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1855 u64 end;
1856 u64 cur = start;
1857 u64 extent_offset;
1858 u64 last_byte = i_size_read(inode);
1859 u64 block_start;
1860 u64 cur_end;
1861 sector_t sector;
1862 struct extent_map *em;
1863 struct block_device *bdev;
1864 int ret;
1865 int nr = 0;
1866 size_t page_offset = 0;
1867 size_t iosize;
1868 size_t blocksize = inode->i_sb->s_blocksize;
1869
1870 set_page_extent_mapped(page);
1871
1872 end = page_end;
1873 lock_extent(tree, start, end, GFP_NOFS);
1874
1875 while (cur <= end) {
1876 if (cur >= last_byte) {
1877 char *userpage;
1878 iosize = PAGE_CACHE_SIZE - page_offset;
1879 userpage = kmap_atomic(page, KM_USER0);
1880 memset(userpage + page_offset, 0, iosize);
1881 flush_dcache_page(page);
1882 kunmap_atomic(userpage, KM_USER0);
1883 set_extent_uptodate(tree, cur, cur + iosize - 1,
1884 GFP_NOFS);
1885 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1886 break;
1887 }
1888 em = get_extent(inode, page, page_offset, cur,
1889 end - cur + 1, 0);
1890 if (IS_ERR(em) || !em) {
1891 SetPageError(page);
1892 unlock_extent(tree, cur, end, GFP_NOFS);
1893 break;
1894 }
Chris Masond1310b22008-01-24 16:13:08 -05001895 extent_offset = cur - em->start;
Chris Masone6dcd2d2008-07-17 12:53:50 -04001896 if (extent_map_end(em) <= cur) {
1897printk("bad mapping em [%Lu %Lu] cur %Lu\n", em->start, extent_map_end(em), cur);
1898 }
Chris Masond1310b22008-01-24 16:13:08 -05001899 BUG_ON(extent_map_end(em) <= cur);
Chris Masone6dcd2d2008-07-17 12:53:50 -04001900 if (end < cur) {
1901printk("2bad mapping end %Lu cur %Lu\n", end, cur);
1902 }
Chris Masond1310b22008-01-24 16:13:08 -05001903 BUG_ON(end < cur);
1904
1905 iosize = min(extent_map_end(em) - cur, end - cur + 1);
1906 cur_end = min(extent_map_end(em) - 1, end);
1907 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1908 sector = (em->block_start + extent_offset) >> 9;
1909 bdev = em->bdev;
1910 block_start = em->block_start;
1911 free_extent_map(em);
1912 em = NULL;
1913
1914 /* we've found a hole, just zero and go on */
1915 if (block_start == EXTENT_MAP_HOLE) {
1916 char *userpage;
1917 userpage = kmap_atomic(page, KM_USER0);
1918 memset(userpage + page_offset, 0, iosize);
1919 flush_dcache_page(page);
1920 kunmap_atomic(userpage, KM_USER0);
1921
1922 set_extent_uptodate(tree, cur, cur + iosize - 1,
1923 GFP_NOFS);
1924 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1925 cur = cur + iosize;
1926 page_offset += iosize;
1927 continue;
1928 }
1929 /* the get_extent function already copied into the page */
1930 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1931 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1932 cur = cur + iosize;
1933 page_offset += iosize;
1934 continue;
1935 }
Chris Mason70dec802008-01-29 09:59:12 -05001936 /* we have an inline extent but it didn't get marked up
1937 * to date. Error out
1938 */
1939 if (block_start == EXTENT_MAP_INLINE) {
1940 SetPageError(page);
1941 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1942 cur = cur + iosize;
1943 page_offset += iosize;
1944 continue;
1945 }
Chris Masond1310b22008-01-24 16:13:08 -05001946
1947 ret = 0;
1948 if (tree->ops && tree->ops->readpage_io_hook) {
1949 ret = tree->ops->readpage_io_hook(page, cur,
1950 cur + iosize - 1);
1951 }
1952 if (!ret) {
Chris Mason89642222008-07-24 09:41:53 -04001953 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
1954 pnr -= page->index;
Chris Masond1310b22008-01-24 16:13:08 -05001955 ret = submit_extent_page(READ, tree, page,
1956 sector, iosize, page_offset,
Chris Mason89642222008-07-24 09:41:53 -04001957 bdev, bio, pnr,
Chris Masonf1885912008-04-09 16:28:12 -04001958 end_bio_extent_readpage, mirror_num);
Chris Mason89642222008-07-24 09:41:53 -04001959 nr++;
Chris Masond1310b22008-01-24 16:13:08 -05001960 }
1961 if (ret)
1962 SetPageError(page);
1963 cur = cur + iosize;
1964 page_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05001965 }
1966 if (!nr) {
1967 if (!PageError(page))
1968 SetPageUptodate(page);
1969 unlock_page(page);
1970 }
1971 return 0;
1972}
1973
1974int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
1975 get_extent_t *get_extent)
1976{
1977 struct bio *bio = NULL;
1978 int ret;
1979
Chris Masonf1885912008-04-09 16:28:12 -04001980 ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05001981 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04001982 submit_one_bio(READ, bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05001983 return ret;
1984}
1985EXPORT_SYMBOL(extent_read_full_page);
1986
1987/*
1988 * the writepage semantics are similar to regular writepage. extent
1989 * records are inserted to lock ranges in the tree, and as dirty areas
1990 * are found, they are marked writeback. Then the lock bits are removed
1991 * and the end_io handler clears the writeback ranges
1992 */
1993static int __extent_writepage(struct page *page, struct writeback_control *wbc,
1994 void *data)
1995{
1996 struct inode *inode = page->mapping->host;
1997 struct extent_page_data *epd = data;
1998 struct extent_io_tree *tree = epd->tree;
1999 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2000 u64 delalloc_start;
2001 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2002 u64 end;
2003 u64 cur = start;
2004 u64 extent_offset;
2005 u64 last_byte = i_size_read(inode);
2006 u64 block_start;
2007 u64 iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04002008 u64 unlock_start;
Chris Masond1310b22008-01-24 16:13:08 -05002009 sector_t sector;
2010 struct extent_map *em;
2011 struct block_device *bdev;
2012 int ret;
2013 int nr = 0;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002014 size_t pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002015 size_t blocksize;
2016 loff_t i_size = i_size_read(inode);
2017 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
2018 u64 nr_delalloc;
2019 u64 delalloc_end;
2020
2021 WARN_ON(!PageLocked(page));
Chris Mason7f3c74f2008-07-18 12:01:11 -04002022 pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
Chris Mason211c17f2008-05-15 09:13:45 -04002023 if (page->index > end_index ||
Chris Mason7f3c74f2008-07-18 12:01:11 -04002024 (page->index == end_index && !pg_offset)) {
Chris Mason211c17f2008-05-15 09:13:45 -04002025 page->mapping->a_ops->invalidatepage(page, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002026 unlock_page(page);
2027 return 0;
2028 }
2029
2030 if (page->index == end_index) {
2031 char *userpage;
2032
Chris Masond1310b22008-01-24 16:13:08 -05002033 userpage = kmap_atomic(page, KM_USER0);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002034 memset(userpage + pg_offset, 0,
2035 PAGE_CACHE_SIZE - pg_offset);
Chris Masond1310b22008-01-24 16:13:08 -05002036 kunmap_atomic(userpage, KM_USER0);
Chris Mason211c17f2008-05-15 09:13:45 -04002037 flush_dcache_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002038 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002039 pg_offset = 0;
Chris Masond1310b22008-01-24 16:13:08 -05002040
2041 set_page_extent_mapped(page);
2042
2043 delalloc_start = start;
2044 delalloc_end = 0;
2045 while(delalloc_end < page_end) {
2046 nr_delalloc = find_lock_delalloc_range(tree, &delalloc_start,
2047 &delalloc_end,
2048 128 * 1024 * 1024);
2049 if (nr_delalloc == 0) {
2050 delalloc_start = delalloc_end + 1;
2051 continue;
2052 }
2053 tree->ops->fill_delalloc(inode, delalloc_start,
2054 delalloc_end);
2055 clear_extent_bit(tree, delalloc_start,
2056 delalloc_end,
2057 EXTENT_LOCKED | EXTENT_DELALLOC,
2058 1, 0, GFP_NOFS);
2059 delalloc_start = delalloc_end + 1;
2060 }
2061 lock_extent(tree, start, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002062 unlock_start = start;
Chris Masond1310b22008-01-24 16:13:08 -05002063
Chris Mason247e7432008-07-17 12:53:51 -04002064 if (tree->ops && tree->ops->writepage_start_hook) {
2065 ret = tree->ops->writepage_start_hook(page, start, page_end);
2066 if (ret == -EAGAIN) {
2067 unlock_extent(tree, start, page_end, GFP_NOFS);
2068 redirty_page_for_writepage(wbc, page);
2069 unlock_page(page);
2070 return 0;
2071 }
2072 }
2073
Chris Masond1310b22008-01-24 16:13:08 -05002074 end = page_end;
2075 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
2076 printk("found delalloc bits after lock_extent\n");
2077 }
2078
2079 if (last_byte <= start) {
2080 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002081 unlock_extent(tree, start, page_end, GFP_NOFS);
2082 if (tree->ops && tree->ops->writepage_end_io_hook)
2083 tree->ops->writepage_end_io_hook(page, start,
2084 page_end, NULL, 1);
2085 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002086 goto done;
2087 }
2088
2089 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
2090 blocksize = inode->i_sb->s_blocksize;
2091
2092 while (cur <= end) {
2093 if (cur >= last_byte) {
2094 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002095 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
2096 if (tree->ops && tree->ops->writepage_end_io_hook)
2097 tree->ops->writepage_end_io_hook(page, cur,
2098 page_end, NULL, 1);
2099 unlock_start = page_end + 1;
Chris Masond1310b22008-01-24 16:13:08 -05002100 break;
2101 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002102 em = epd->get_extent(inode, page, pg_offset, cur,
Chris Masond1310b22008-01-24 16:13:08 -05002103 end - cur + 1, 1);
2104 if (IS_ERR(em) || !em) {
2105 SetPageError(page);
2106 break;
2107 }
2108
2109 extent_offset = cur - em->start;
2110 BUG_ON(extent_map_end(em) <= cur);
2111 BUG_ON(end < cur);
2112 iosize = min(extent_map_end(em) - cur, end - cur + 1);
2113 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
2114 sector = (em->block_start + extent_offset) >> 9;
2115 bdev = em->bdev;
2116 block_start = em->block_start;
2117 free_extent_map(em);
2118 em = NULL;
2119
2120 if (block_start == EXTENT_MAP_HOLE ||
2121 block_start == EXTENT_MAP_INLINE) {
2122 clear_extent_dirty(tree, cur,
2123 cur + iosize - 1, GFP_NOFS);
Chris Masone6dcd2d2008-07-17 12:53:50 -04002124
2125 unlock_extent(tree, unlock_start, cur + iosize -1,
2126 GFP_NOFS);
Chris Mason7f3c74f2008-07-18 12:01:11 -04002127
Chris Masone6dcd2d2008-07-17 12:53:50 -04002128 if (tree->ops && tree->ops->writepage_end_io_hook)
2129 tree->ops->writepage_end_io_hook(page, cur,
2130 cur + iosize - 1,
2131 NULL, 1);
Chris Masond1310b22008-01-24 16:13:08 -05002132 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002133 pg_offset += iosize;
Chris Masone6dcd2d2008-07-17 12:53:50 -04002134 unlock_start = cur;
Chris Masond1310b22008-01-24 16:13:08 -05002135 continue;
2136 }
2137
2138 /* leave this out until we have a page_mkwrite call */
2139 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
2140 EXTENT_DIRTY, 0)) {
2141 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002142 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002143 continue;
2144 }
2145 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
2146 if (tree->ops && tree->ops->writepage_io_hook) {
2147 ret = tree->ops->writepage_io_hook(page, cur,
2148 cur + iosize - 1);
2149 } else {
2150 ret = 0;
2151 }
Chris Mason1259ab72008-05-12 13:39:03 -04002152 if (ret) {
Chris Masond1310b22008-01-24 16:13:08 -05002153 SetPageError(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002154 } else {
Chris Masond1310b22008-01-24 16:13:08 -05002155 unsigned long max_nr = end_index + 1;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002156
Chris Masond1310b22008-01-24 16:13:08 -05002157 set_range_writeback(tree, cur, cur + iosize - 1);
2158 if (!PageWriteback(page)) {
2159 printk("warning page %lu not writeback, "
2160 "cur %llu end %llu\n", page->index,
2161 (unsigned long long)cur,
2162 (unsigned long long)end);
2163 }
2164
2165 ret = submit_extent_page(WRITE, tree, page, sector,
Chris Mason7f3c74f2008-07-18 12:01:11 -04002166 iosize, pg_offset, bdev,
Chris Masond1310b22008-01-24 16:13:08 -05002167 &epd->bio, max_nr,
Chris Masonf1885912008-04-09 16:28:12 -04002168 end_bio_extent_writepage, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002169 if (ret)
2170 SetPageError(page);
2171 }
2172 cur = cur + iosize;
Chris Mason7f3c74f2008-07-18 12:01:11 -04002173 pg_offset += iosize;
Chris Masond1310b22008-01-24 16:13:08 -05002174 nr++;
2175 }
2176done:
2177 if (nr == 0) {
2178 /* make sure the mapping tag for page dirty gets cleared */
2179 set_page_writeback(page);
2180 end_page_writeback(page);
2181 }
Chris Masone6dcd2d2008-07-17 12:53:50 -04002182 if (unlock_start <= page_end)
2183 unlock_extent(tree, unlock_start, page_end, GFP_NOFS);
Chris Masond1310b22008-01-24 16:13:08 -05002184 unlock_page(page);
2185 return 0;
2186}
2187
Chris Mason5e478dc2008-04-25 09:10:45 -04002188#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
Chris Masond1310b22008-01-24 16:13:08 -05002189/* Taken directly from 2.6.23 for 2.6.18 back port */
2190typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
2191 void *data);
2192
2193/**
2194 * write_cache_pages - walk the list of dirty pages of the given address space
2195 * and write all of them.
2196 * @mapping: address space structure to write
2197 * @wbc: subtract the number of written pages from *@wbc->nr_to_write
2198 * @writepage: function called for each page
2199 * @data: data passed to writepage function
2200 *
2201 * If a page is already under I/O, write_cache_pages() skips it, even
2202 * if it's dirty. This is desirable behaviour for memory-cleaning writeback,
2203 * but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
2204 * and msync() need to guarantee that all the data which was dirty at the time
2205 * the call was made get new I/O started against them. If wbc->sync_mode is
2206 * WB_SYNC_ALL then we were called for data integrity and we must wait for
2207 * existing IO to complete.
2208 */
2209static int write_cache_pages(struct address_space *mapping,
2210 struct writeback_control *wbc, writepage_t writepage,
2211 void *data)
2212{
2213 struct backing_dev_info *bdi = mapping->backing_dev_info;
2214 int ret = 0;
2215 int done = 0;
2216 struct pagevec pvec;
2217 int nr_pages;
2218 pgoff_t index;
2219 pgoff_t end; /* Inclusive */
2220 int scanned = 0;
2221 int range_whole = 0;
2222
2223 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2224 wbc->encountered_congestion = 1;
2225 return 0;
2226 }
2227
2228 pagevec_init(&pvec, 0);
2229 if (wbc->range_cyclic) {
2230 index = mapping->writeback_index; /* Start from prev offset */
2231 end = -1;
2232 } else {
2233 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2234 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2235 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2236 range_whole = 1;
2237 scanned = 1;
2238 }
2239retry:
2240 while (!done && (index <= end) &&
2241 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
2242 PAGECACHE_TAG_DIRTY,
2243 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
2244 unsigned i;
2245
2246 scanned = 1;
2247 for (i = 0; i < nr_pages; i++) {
2248 struct page *page = pvec.pages[i];
2249
2250 /*
2251 * At this point we hold neither mapping->tree_lock nor
2252 * lock on the page itself: the page may be truncated or
2253 * invalidated (changing page->mapping to NULL), or even
2254 * swizzled back from swapper_space to tmpfs file
2255 * mapping
2256 */
2257 lock_page(page);
2258
2259 if (unlikely(page->mapping != mapping)) {
2260 unlock_page(page);
2261 continue;
2262 }
2263
2264 if (!wbc->range_cyclic && page->index > end) {
2265 done = 1;
2266 unlock_page(page);
2267 continue;
2268 }
2269
2270 if (wbc->sync_mode != WB_SYNC_NONE)
2271 wait_on_page_writeback(page);
2272
2273 if (PageWriteback(page) ||
2274 !clear_page_dirty_for_io(page)) {
2275 unlock_page(page);
2276 continue;
2277 }
2278
2279 ret = (*writepage)(page, wbc, data);
2280
2281 if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) {
2282 unlock_page(page);
2283 ret = 0;
2284 }
2285 if (ret || (--(wbc->nr_to_write) <= 0))
2286 done = 1;
2287 if (wbc->nonblocking && bdi_write_congested(bdi)) {
2288 wbc->encountered_congestion = 1;
2289 done = 1;
2290 }
2291 }
2292 pagevec_release(&pvec);
2293 cond_resched();
2294 }
2295 if (!scanned && !done) {
2296 /*
2297 * We hit the last page and there is more work to be done: wrap
2298 * back to the start of the file
2299 */
2300 scanned = 1;
2301 index = 0;
2302 goto retry;
2303 }
2304 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2305 mapping->writeback_index = index;
2306 return ret;
2307}
2308#endif
2309
2310int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
2311 get_extent_t *get_extent,
2312 struct writeback_control *wbc)
2313{
2314 int ret;
2315 struct address_space *mapping = page->mapping;
2316 struct extent_page_data epd = {
2317 .bio = NULL,
2318 .tree = tree,
2319 .get_extent = get_extent,
2320 };
2321 struct writeback_control wbc_writepages = {
2322 .bdi = wbc->bdi,
2323 .sync_mode = WB_SYNC_NONE,
2324 .older_than_this = NULL,
2325 .nr_to_write = 64,
2326 .range_start = page_offset(page) + PAGE_CACHE_SIZE,
2327 .range_end = (loff_t)-1,
2328 };
2329
2330
2331 ret = __extent_writepage(page, wbc, &epd);
2332
2333 write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
2334 if (epd.bio) {
Chris Masonf1885912008-04-09 16:28:12 -04002335 submit_one_bio(WRITE, epd.bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002336 }
2337 return ret;
2338}
2339EXPORT_SYMBOL(extent_write_full_page);
2340
2341
2342int extent_writepages(struct extent_io_tree *tree,
2343 struct address_space *mapping,
2344 get_extent_t *get_extent,
2345 struct writeback_control *wbc)
2346{
2347 int ret = 0;
2348 struct extent_page_data epd = {
2349 .bio = NULL,
2350 .tree = tree,
2351 .get_extent = get_extent,
2352 };
2353
2354 ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
2355 if (epd.bio) {
Chris Masonf1885912008-04-09 16:28:12 -04002356 submit_one_bio(WRITE, epd.bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002357 }
2358 return ret;
2359}
2360EXPORT_SYMBOL(extent_writepages);
2361
2362int extent_readpages(struct extent_io_tree *tree,
2363 struct address_space *mapping,
2364 struct list_head *pages, unsigned nr_pages,
2365 get_extent_t get_extent)
2366{
2367 struct bio *bio = NULL;
2368 unsigned page_idx;
2369 struct pagevec pvec;
2370
2371 pagevec_init(&pvec, 0);
2372 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2373 struct page *page = list_entry(pages->prev, struct page, lru);
2374
2375 prefetchw(&page->flags);
2376 list_del(&page->lru);
2377 /*
2378 * what we want to do here is call add_to_page_cache_lru,
2379 * but that isn't exported, so we reproduce it here
2380 */
2381 if (!add_to_page_cache(page, mapping,
2382 page->index, GFP_KERNEL)) {
2383
2384 /* open coding of lru_cache_add, also not exported */
2385 page_cache_get(page);
2386 if (!pagevec_add(&pvec, page))
2387 __pagevec_lru_add(&pvec);
Chris Masonf1885912008-04-09 16:28:12 -04002388 __extent_read_full_page(tree, page, get_extent,
2389 &bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002390 }
2391 page_cache_release(page);
2392 }
2393 if (pagevec_count(&pvec))
2394 __pagevec_lru_add(&pvec);
2395 BUG_ON(!list_empty(pages));
2396 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04002397 submit_one_bio(READ, bio, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002398 return 0;
2399}
2400EXPORT_SYMBOL(extent_readpages);
2401
2402/*
2403 * basic invalidatepage code, this waits on any locked or writeback
2404 * ranges corresponding to the page, and then deletes any extent state
2405 * records from the tree
2406 */
2407int extent_invalidatepage(struct extent_io_tree *tree,
2408 struct page *page, unsigned long offset)
2409{
2410 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2411 u64 end = start + PAGE_CACHE_SIZE - 1;
2412 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
2413
2414 start += (offset + blocksize -1) & ~(blocksize - 1);
2415 if (start > end)
2416 return 0;
2417
2418 lock_extent(tree, start, end, GFP_NOFS);
2419 wait_on_extent_writeback(tree, start, end);
2420 clear_extent_bit(tree, start, end,
2421 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
2422 1, 1, GFP_NOFS);
2423 return 0;
2424}
2425EXPORT_SYMBOL(extent_invalidatepage);
2426
2427/*
2428 * simple commit_write call, set_range_dirty is used to mark both
2429 * the pages and the extent records as dirty
2430 */
2431int extent_commit_write(struct extent_io_tree *tree,
2432 struct inode *inode, struct page *page,
2433 unsigned from, unsigned to)
2434{
2435 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
2436
2437 set_page_extent_mapped(page);
2438 set_page_dirty(page);
2439
2440 if (pos > inode->i_size) {
2441 i_size_write(inode, pos);
2442 mark_inode_dirty(inode);
2443 }
2444 return 0;
2445}
2446EXPORT_SYMBOL(extent_commit_write);
2447
2448int extent_prepare_write(struct extent_io_tree *tree,
2449 struct inode *inode, struct page *page,
2450 unsigned from, unsigned to, get_extent_t *get_extent)
2451{
2452 u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2453 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
2454 u64 block_start;
2455 u64 orig_block_start;
2456 u64 block_end;
2457 u64 cur_end;
2458 struct extent_map *em;
2459 unsigned blocksize = 1 << inode->i_blkbits;
2460 size_t page_offset = 0;
2461 size_t block_off_start;
2462 size_t block_off_end;
2463 int err = 0;
2464 int iocount = 0;
2465 int ret = 0;
2466 int isnew;
2467
2468 set_page_extent_mapped(page);
2469
2470 block_start = (page_start + from) & ~((u64)blocksize - 1);
2471 block_end = (page_start + to - 1) | (blocksize - 1);
2472 orig_block_start = block_start;
2473
2474 lock_extent(tree, page_start, page_end, GFP_NOFS);
2475 while(block_start <= block_end) {
2476 em = get_extent(inode, page, page_offset, block_start,
2477 block_end - block_start + 1, 1);
2478 if (IS_ERR(em) || !em) {
2479 goto err;
2480 }
2481 cur_end = min(block_end, extent_map_end(em) - 1);
2482 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
2483 block_off_end = block_off_start + blocksize;
2484 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
2485
2486 if (!PageUptodate(page) && isnew &&
2487 (block_off_end > to || block_off_start < from)) {
2488 void *kaddr;
2489
2490 kaddr = kmap_atomic(page, KM_USER0);
2491 if (block_off_end > to)
2492 memset(kaddr + to, 0, block_off_end - to);
2493 if (block_off_start < from)
2494 memset(kaddr + block_off_start, 0,
2495 from - block_off_start);
2496 flush_dcache_page(page);
2497 kunmap_atomic(kaddr, KM_USER0);
2498 }
2499 if ((em->block_start != EXTENT_MAP_HOLE &&
2500 em->block_start != EXTENT_MAP_INLINE) &&
2501 !isnew && !PageUptodate(page) &&
2502 (block_off_end > to || block_off_start < from) &&
2503 !test_range_bit(tree, block_start, cur_end,
2504 EXTENT_UPTODATE, 1)) {
2505 u64 sector;
2506 u64 extent_offset = block_start - em->start;
2507 size_t iosize;
2508 sector = (em->block_start + extent_offset) >> 9;
2509 iosize = (cur_end - block_start + blocksize) &
2510 ~((u64)blocksize - 1);
2511 /*
2512 * we've already got the extent locked, but we
2513 * need to split the state such that our end_bio
2514 * handler can clear the lock.
2515 */
2516 set_extent_bit(tree, block_start,
2517 block_start + iosize - 1,
2518 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
2519 ret = submit_extent_page(READ, tree, page,
2520 sector, iosize, page_offset, em->bdev,
2521 NULL, 1,
Chris Masonf1885912008-04-09 16:28:12 -04002522 end_bio_extent_preparewrite, 0);
Chris Masond1310b22008-01-24 16:13:08 -05002523 iocount++;
2524 block_start = block_start + iosize;
2525 } else {
2526 set_extent_uptodate(tree, block_start, cur_end,
2527 GFP_NOFS);
2528 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
2529 block_start = cur_end + 1;
2530 }
2531 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
2532 free_extent_map(em);
2533 }
2534 if (iocount) {
2535 wait_extent_bit(tree, orig_block_start,
2536 block_end, EXTENT_LOCKED);
2537 }
2538 check_page_uptodate(tree, page);
2539err:
2540 /* FIXME, zero out newly allocated blocks on error */
2541 return err;
2542}
2543EXPORT_SYMBOL(extent_prepare_write);
2544
2545/*
Chris Mason7b13b7b2008-04-18 10:29:50 -04002546 * a helper for releasepage, this tests for areas of the page that
2547 * are locked or under IO and drops the related state bits if it is safe
2548 * to drop the page.
2549 */
2550int try_release_extent_state(struct extent_map_tree *map,
2551 struct extent_io_tree *tree, struct page *page,
2552 gfp_t mask)
2553{
2554 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2555 u64 end = start + PAGE_CACHE_SIZE - 1;
2556 int ret = 1;
2557
Chris Mason211f90e2008-07-18 11:56:15 -04002558 if (test_range_bit(tree, start, end,
2559 EXTENT_IOBITS | EXTENT_ORDERED, 0))
Chris Mason7b13b7b2008-04-18 10:29:50 -04002560 ret = 0;
2561 else {
2562 if ((mask & GFP_NOFS) == GFP_NOFS)
2563 mask = GFP_NOFS;
2564 clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
2565 1, 1, mask);
2566 }
2567 return ret;
2568}
2569EXPORT_SYMBOL(try_release_extent_state);
2570
2571/*
Chris Masond1310b22008-01-24 16:13:08 -05002572 * a helper for releasepage. As long as there are no locked extents
2573 * in the range corresponding to the page, both state records and extent
2574 * map records are removed
2575 */
2576int try_release_extent_mapping(struct extent_map_tree *map,
Chris Mason70dec802008-01-29 09:59:12 -05002577 struct extent_io_tree *tree, struct page *page,
2578 gfp_t mask)
Chris Masond1310b22008-01-24 16:13:08 -05002579{
2580 struct extent_map *em;
2581 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
2582 u64 end = start + PAGE_CACHE_SIZE - 1;
Chris Mason7b13b7b2008-04-18 10:29:50 -04002583
Chris Mason70dec802008-01-29 09:59:12 -05002584 if ((mask & __GFP_WAIT) &&
2585 page->mapping->host->i_size > 16 * 1024 * 1024) {
Yan39b56372008-02-15 10:40:50 -05002586 u64 len;
Chris Mason70dec802008-01-29 09:59:12 -05002587 while (start <= end) {
Yan39b56372008-02-15 10:40:50 -05002588 len = end - start + 1;
Chris Mason70dec802008-01-29 09:59:12 -05002589 spin_lock(&map->lock);
Yan39b56372008-02-15 10:40:50 -05002590 em = lookup_extent_mapping(map, start, len);
Chris Mason70dec802008-01-29 09:59:12 -05002591 if (!em || IS_ERR(em)) {
2592 spin_unlock(&map->lock);
2593 break;
2594 }
Chris Mason7f3c74f2008-07-18 12:01:11 -04002595 if (test_bit(EXTENT_FLAG_PINNED, &em->flags) ||
2596 em->start != start) {
Chris Mason70dec802008-01-29 09:59:12 -05002597 spin_unlock(&map->lock);
2598 free_extent_map(em);
2599 break;
2600 }
2601 if (!test_range_bit(tree, em->start,
2602 extent_map_end(em) - 1,
2603 EXTENT_LOCKED, 0)) {
2604 remove_extent_mapping(map, em);
2605 /* once for the rb tree */
2606 free_extent_map(em);
2607 }
2608 start = extent_map_end(em);
Chris Masond1310b22008-01-24 16:13:08 -05002609 spin_unlock(&map->lock);
Chris Mason70dec802008-01-29 09:59:12 -05002610
2611 /* once for us */
Chris Masond1310b22008-01-24 16:13:08 -05002612 free_extent_map(em);
2613 }
Chris Masond1310b22008-01-24 16:13:08 -05002614 }
Chris Mason7b13b7b2008-04-18 10:29:50 -04002615 return try_release_extent_state(map, tree, page, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002616}
2617EXPORT_SYMBOL(try_release_extent_mapping);
2618
2619sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2620 get_extent_t *get_extent)
2621{
2622 struct inode *inode = mapping->host;
2623 u64 start = iblock << inode->i_blkbits;
2624 sector_t sector = 0;
2625 struct extent_map *em;
2626
2627 em = get_extent(inode, NULL, 0, start, (1 << inode->i_blkbits), 0);
2628 if (!em || IS_ERR(em))
2629 return 0;
2630
2631 if (em->block_start == EXTENT_MAP_INLINE ||
2632 em->block_start == EXTENT_MAP_HOLE)
2633 goto out;
2634
2635 sector = (em->block_start + start - em->start) >> inode->i_blkbits;
Chris Masond1310b22008-01-24 16:13:08 -05002636out:
2637 free_extent_map(em);
2638 return sector;
2639}
2640
Chris Masond1310b22008-01-24 16:13:08 -05002641static inline struct page *extent_buffer_page(struct extent_buffer *eb,
2642 unsigned long i)
2643{
2644 struct page *p;
2645 struct address_space *mapping;
2646
2647 if (i == 0)
2648 return eb->first_page;
2649 i += eb->start >> PAGE_CACHE_SHIFT;
2650 mapping = eb->first_page->mapping;
Chris Mason33958dc2008-07-30 10:29:12 -04002651 if (!mapping)
2652 return NULL;
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002653
2654 /*
2655 * extent_buffer_page is only called after pinning the page
2656 * by increasing the reference count. So we know the page must
2657 * be in the radix tree.
2658 */
2659#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2660 rcu_read_lock();
2661#else
Chris Masond1310b22008-01-24 16:13:08 -05002662 read_lock_irq(&mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002663#endif
Chris Masond1310b22008-01-24 16:13:08 -05002664 p = radix_tree_lookup(&mapping->page_tree, i);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002665
2666#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2667 rcu_read_unlock();
2668#else
Chris Masond1310b22008-01-24 16:13:08 -05002669 read_unlock_irq(&mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002670#endif
Chris Masond1310b22008-01-24 16:13:08 -05002671 return p;
2672}
2673
Chris Mason6af118c2008-07-22 11:18:07 -04002674static inline unsigned long num_extent_pages(u64 start, u64 len)
Chris Masonce9adaa2008-04-09 16:28:12 -04002675{
Chris Mason6af118c2008-07-22 11:18:07 -04002676 return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
2677 (start >> PAGE_CACHE_SHIFT);
Chris Mason728131d2008-04-09 16:28:12 -04002678}
2679
Chris Masond1310b22008-01-24 16:13:08 -05002680static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
2681 u64 start,
2682 unsigned long len,
2683 gfp_t mask)
2684{
2685 struct extent_buffer *eb = NULL;
Chris Mason2d2ae542008-03-26 16:24:23 -04002686 unsigned long flags;
Chris Masond1310b22008-01-24 16:13:08 -05002687
Chris Masond1310b22008-01-24 16:13:08 -05002688 eb = kmem_cache_zalloc(extent_buffer_cache, mask);
Chris Masond1310b22008-01-24 16:13:08 -05002689 eb->start = start;
2690 eb->len = len;
Chris Masona61e6f22008-07-22 11:18:08 -04002691 mutex_init(&eb->mutex);
Chris Mason2d2ae542008-03-26 16:24:23 -04002692 spin_lock_irqsave(&leak_lock, flags);
2693 list_add(&eb->leak_list, &buffers);
2694 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05002695 atomic_set(&eb->refs, 1);
2696
2697 return eb;
2698}
2699
2700static void __free_extent_buffer(struct extent_buffer *eb)
2701{
Chris Mason2d2ae542008-03-26 16:24:23 -04002702 unsigned long flags;
2703 spin_lock_irqsave(&leak_lock, flags);
2704 list_del(&eb->leak_list);
2705 spin_unlock_irqrestore(&leak_lock, flags);
Chris Masond1310b22008-01-24 16:13:08 -05002706 kmem_cache_free(extent_buffer_cache, eb);
2707}
2708
2709struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
2710 u64 start, unsigned long len,
2711 struct page *page0,
2712 gfp_t mask)
2713{
2714 unsigned long num_pages = num_extent_pages(start, len);
2715 unsigned long i;
2716 unsigned long index = start >> PAGE_CACHE_SHIFT;
2717 struct extent_buffer *eb;
Chris Mason6af118c2008-07-22 11:18:07 -04002718 struct extent_buffer *exists = NULL;
Chris Masond1310b22008-01-24 16:13:08 -05002719 struct page *p;
2720 struct address_space *mapping = tree->mapping;
2721 int uptodate = 1;
2722
Chris Mason6af118c2008-07-22 11:18:07 -04002723 spin_lock(&tree->buffer_lock);
2724 eb = buffer_search(tree, start);
2725 if (eb) {
2726 atomic_inc(&eb->refs);
2727 spin_unlock(&tree->buffer_lock);
2728 return eb;
2729 }
2730 spin_unlock(&tree->buffer_lock);
2731
Chris Masond1310b22008-01-24 16:13:08 -05002732 eb = __alloc_extent_buffer(tree, start, len, mask);
Peter2b114d12008-04-01 11:21:40 -04002733 if (!eb)
Chris Masond1310b22008-01-24 16:13:08 -05002734 return NULL;
2735
Chris Masond1310b22008-01-24 16:13:08 -05002736 if (page0) {
2737 eb->first_page = page0;
2738 i = 1;
2739 index++;
2740 page_cache_get(page0);
2741 mark_page_accessed(page0);
2742 set_page_extent_mapped(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002743 set_page_extent_head(page0, len);
Chris Masonf1885912008-04-09 16:28:12 -04002744 uptodate = PageUptodate(page0);
Chris Masond1310b22008-01-24 16:13:08 -05002745 } else {
2746 i = 0;
2747 }
2748 for (; i < num_pages; i++, index++) {
2749 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
2750 if (!p) {
2751 WARN_ON(1);
Chris Mason6af118c2008-07-22 11:18:07 -04002752 goto free_eb;
Chris Masond1310b22008-01-24 16:13:08 -05002753 }
2754 set_page_extent_mapped(p);
2755 mark_page_accessed(p);
2756 if (i == 0) {
2757 eb->first_page = p;
2758 set_page_extent_head(p, len);
2759 } else {
2760 set_page_private(p, EXTENT_PAGE_PRIVATE);
2761 }
2762 if (!PageUptodate(p))
2763 uptodate = 0;
2764 unlock_page(p);
2765 }
2766 if (uptodate)
2767 eb->flags |= EXTENT_UPTODATE;
2768 eb->flags |= EXTENT_BUFFER_FILLED;
2769
Chris Mason6af118c2008-07-22 11:18:07 -04002770 spin_lock(&tree->buffer_lock);
2771 exists = buffer_tree_insert(tree, start, &eb->rb_node);
2772 if (exists) {
2773 /* add one reference for the caller */
2774 atomic_inc(&exists->refs);
2775 spin_unlock(&tree->buffer_lock);
2776 goto free_eb;
2777 }
2778 spin_unlock(&tree->buffer_lock);
2779
2780 /* add one reference for the tree */
2781 atomic_inc(&eb->refs);
Chris Masond1310b22008-01-24 16:13:08 -05002782 return eb;
2783
Chris Mason6af118c2008-07-22 11:18:07 -04002784free_eb:
Chris Masond1310b22008-01-24 16:13:08 -05002785 if (!atomic_dec_and_test(&eb->refs))
Chris Mason6af118c2008-07-22 11:18:07 -04002786 return exists;
2787 for (index = 1; index < i; index++)
Chris Masond1310b22008-01-24 16:13:08 -05002788 page_cache_release(extent_buffer_page(eb, index));
Chris Mason6af118c2008-07-22 11:18:07 -04002789 page_cache_release(extent_buffer_page(eb, 0));
Chris Masond1310b22008-01-24 16:13:08 -05002790 __free_extent_buffer(eb);
Chris Mason6af118c2008-07-22 11:18:07 -04002791 return exists;
Chris Masond1310b22008-01-24 16:13:08 -05002792}
2793EXPORT_SYMBOL(alloc_extent_buffer);
2794
2795struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
2796 u64 start, unsigned long len,
2797 gfp_t mask)
2798{
Chris Masond1310b22008-01-24 16:13:08 -05002799 struct extent_buffer *eb;
Chris Masond1310b22008-01-24 16:13:08 -05002800
Chris Mason6af118c2008-07-22 11:18:07 -04002801 spin_lock(&tree->buffer_lock);
2802 eb = buffer_search(tree, start);
2803 if (eb)
2804 atomic_inc(&eb->refs);
2805 spin_unlock(&tree->buffer_lock);
Chris Masond1310b22008-01-24 16:13:08 -05002806
Chris Masond1310b22008-01-24 16:13:08 -05002807 return eb;
Chris Masond1310b22008-01-24 16:13:08 -05002808}
2809EXPORT_SYMBOL(find_extent_buffer);
2810
2811void free_extent_buffer(struct extent_buffer *eb)
2812{
Chris Masond1310b22008-01-24 16:13:08 -05002813 if (!eb)
2814 return;
2815
2816 if (!atomic_dec_and_test(&eb->refs))
2817 return;
2818
Chris Mason6af118c2008-07-22 11:18:07 -04002819 WARN_ON(1);
Chris Masond1310b22008-01-24 16:13:08 -05002820}
2821EXPORT_SYMBOL(free_extent_buffer);
2822
2823int clear_extent_buffer_dirty(struct extent_io_tree *tree,
2824 struct extent_buffer *eb)
2825{
2826 int set;
2827 unsigned long i;
2828 unsigned long num_pages;
2829 struct page *page;
2830
2831 u64 start = eb->start;
2832 u64 end = start + eb->len - 1;
2833
2834 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
2835 num_pages = num_extent_pages(eb->start, eb->len);
2836
2837 for (i = 0; i < num_pages; i++) {
2838 page = extent_buffer_page(eb, i);
Chris Masona61e6f22008-07-22 11:18:08 -04002839 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002840 if (i == 0)
2841 set_page_extent_head(page, eb->len);
2842 else
2843 set_page_private(page, EXTENT_PAGE_PRIVATE);
2844
2845 /*
2846 * if we're on the last page or the first page and the
2847 * block isn't aligned on a page boundary, do extra checks
2848 * to make sure we don't clean page that is partially dirty
2849 */
2850 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2851 ((i == num_pages - 1) &&
2852 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2853 start = (u64)page->index << PAGE_CACHE_SHIFT;
2854 end = start + PAGE_CACHE_SIZE - 1;
2855 if (test_range_bit(tree, start, end,
2856 EXTENT_DIRTY, 0)) {
Chris Masona61e6f22008-07-22 11:18:08 -04002857 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002858 continue;
2859 }
2860 }
2861 clear_page_dirty_for_io(page);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002862#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2863 spin_lock_irq(&page->mapping->tree_lock);
2864#else
Chris Mason70dec802008-01-29 09:59:12 -05002865 read_lock_irq(&page->mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002866#endif
Chris Masond1310b22008-01-24 16:13:08 -05002867 if (!PageDirty(page)) {
2868 radix_tree_tag_clear(&page->mapping->page_tree,
2869 page_index(page),
2870 PAGECACHE_TAG_DIRTY);
2871 }
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002872#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
2873 spin_unlock_irq(&page->mapping->tree_lock);
2874#else
Chris Mason70dec802008-01-29 09:59:12 -05002875 read_unlock_irq(&page->mapping->tree_lock);
Sven Wegener0ee0fda2008-07-30 16:54:26 -04002876#endif
Chris Masona61e6f22008-07-22 11:18:08 -04002877 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002878 }
2879 return 0;
2880}
2881EXPORT_SYMBOL(clear_extent_buffer_dirty);
2882
2883int wait_on_extent_buffer_writeback(struct extent_io_tree *tree,
2884 struct extent_buffer *eb)
2885{
2886 return wait_on_extent_writeback(tree, eb->start,
2887 eb->start + eb->len - 1);
2888}
2889EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2890
2891int set_extent_buffer_dirty(struct extent_io_tree *tree,
2892 struct extent_buffer *eb)
2893{
2894 unsigned long i;
2895 unsigned long num_pages;
2896
2897 num_pages = num_extent_pages(eb->start, eb->len);
2898 for (i = 0; i < num_pages; i++) {
2899 struct page *page = extent_buffer_page(eb, i);
2900 /* writepage may need to do something special for the
2901 * first page, we have to make sure page->private is
2902 * properly set. releasepage may drop page->private
2903 * on us if the page isn't already dirty.
2904 */
2905 if (i == 0) {
Chris Masona61e6f22008-07-22 11:18:08 -04002906 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002907 set_page_extent_head(page, eb->len);
2908 } else if (PagePrivate(page) &&
2909 page->private != EXTENT_PAGE_PRIVATE) {
Chris Masona61e6f22008-07-22 11:18:08 -04002910 lock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002911 set_page_extent_mapped(page);
Chris Masona61e6f22008-07-22 11:18:08 -04002912 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002913 }
2914 __set_page_dirty_nobuffers(extent_buffer_page(eb, i));
Chris Masona61e6f22008-07-22 11:18:08 -04002915 if (i == 0)
2916 unlock_page(page);
Chris Masond1310b22008-01-24 16:13:08 -05002917 }
2918 return set_extent_dirty(tree, eb->start,
2919 eb->start + eb->len - 1, GFP_NOFS);
2920}
2921EXPORT_SYMBOL(set_extent_buffer_dirty);
2922
Chris Mason1259ab72008-05-12 13:39:03 -04002923int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
2924 struct extent_buffer *eb)
2925{
2926 unsigned long i;
2927 struct page *page;
2928 unsigned long num_pages;
2929
2930 num_pages = num_extent_pages(eb->start, eb->len);
2931 eb->flags &= ~EXTENT_UPTODATE;
2932
2933 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2934 GFP_NOFS);
2935 for (i = 0; i < num_pages; i++) {
2936 page = extent_buffer_page(eb, i);
Chris Mason33958dc2008-07-30 10:29:12 -04002937 if (page)
2938 ClearPageUptodate(page);
Chris Mason1259ab72008-05-12 13:39:03 -04002939 }
2940 return 0;
2941}
2942
Chris Masond1310b22008-01-24 16:13:08 -05002943int set_extent_buffer_uptodate(struct extent_io_tree *tree,
2944 struct extent_buffer *eb)
2945{
2946 unsigned long i;
2947 struct page *page;
2948 unsigned long num_pages;
2949
2950 num_pages = num_extent_pages(eb->start, eb->len);
2951
2952 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2953 GFP_NOFS);
2954 for (i = 0; i < num_pages; i++) {
2955 page = extent_buffer_page(eb, i);
2956 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2957 ((i == num_pages - 1) &&
2958 ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) {
2959 check_page_uptodate(tree, page);
2960 continue;
2961 }
2962 SetPageUptodate(page);
2963 }
2964 return 0;
2965}
2966EXPORT_SYMBOL(set_extent_buffer_uptodate);
2967
Chris Masonce9adaa2008-04-09 16:28:12 -04002968int extent_range_uptodate(struct extent_io_tree *tree,
2969 u64 start, u64 end)
2970{
2971 struct page *page;
2972 int ret;
2973 int pg_uptodate = 1;
2974 int uptodate;
2975 unsigned long index;
2976
2977 ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1);
2978 if (ret)
2979 return 1;
2980 while(start <= end) {
2981 index = start >> PAGE_CACHE_SHIFT;
2982 page = find_get_page(tree->mapping, index);
2983 uptodate = PageUptodate(page);
2984 page_cache_release(page);
2985 if (!uptodate) {
2986 pg_uptodate = 0;
2987 break;
2988 }
2989 start += PAGE_CACHE_SIZE;
2990 }
2991 return pg_uptodate;
2992}
2993
Chris Masond1310b22008-01-24 16:13:08 -05002994int extent_buffer_uptodate(struct extent_io_tree *tree,
Chris Masonce9adaa2008-04-09 16:28:12 -04002995 struct extent_buffer *eb)
Chris Masond1310b22008-01-24 16:13:08 -05002996{
Chris Mason728131d2008-04-09 16:28:12 -04002997 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04002998 unsigned long num_pages;
2999 unsigned long i;
Chris Mason728131d2008-04-09 16:28:12 -04003000 struct page *page;
3001 int pg_uptodate = 1;
3002
Chris Masond1310b22008-01-24 16:13:08 -05003003 if (eb->flags & EXTENT_UPTODATE)
Chris Mason42352982008-04-28 16:40:52 -04003004 return 1;
Chris Mason728131d2008-04-09 16:28:12 -04003005
Chris Mason42352982008-04-28 16:40:52 -04003006 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05003007 EXTENT_UPTODATE, 1);
Chris Mason42352982008-04-28 16:40:52 -04003008 if (ret)
3009 return ret;
Chris Mason728131d2008-04-09 16:28:12 -04003010
3011 num_pages = num_extent_pages(eb->start, eb->len);
3012 for (i = 0; i < num_pages; i++) {
3013 page = extent_buffer_page(eb, i);
3014 if (!PageUptodate(page)) {
3015 pg_uptodate = 0;
3016 break;
3017 }
3018 }
Chris Mason42352982008-04-28 16:40:52 -04003019 return pg_uptodate;
Chris Masond1310b22008-01-24 16:13:08 -05003020}
3021EXPORT_SYMBOL(extent_buffer_uptodate);
3022
3023int read_extent_buffer_pages(struct extent_io_tree *tree,
3024 struct extent_buffer *eb,
Chris Masona86c12c2008-02-07 10:50:54 -05003025 u64 start, int wait,
Chris Masonf1885912008-04-09 16:28:12 -04003026 get_extent_t *get_extent, int mirror_num)
Chris Masond1310b22008-01-24 16:13:08 -05003027{
3028 unsigned long i;
3029 unsigned long start_i;
3030 struct page *page;
3031 int err;
3032 int ret = 0;
Chris Masonce9adaa2008-04-09 16:28:12 -04003033 int locked_pages = 0;
3034 int all_uptodate = 1;
3035 int inc_all_pages = 0;
Chris Masond1310b22008-01-24 16:13:08 -05003036 unsigned long num_pages;
Chris Masona86c12c2008-02-07 10:50:54 -05003037 struct bio *bio = NULL;
3038
Chris Masond1310b22008-01-24 16:13:08 -05003039 if (eb->flags & EXTENT_UPTODATE)
3040 return 0;
3041
Chris Masonce9adaa2008-04-09 16:28:12 -04003042 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
Chris Masond1310b22008-01-24 16:13:08 -05003043 EXTENT_UPTODATE, 1)) {
3044 return 0;
3045 }
3046
3047 if (start) {
3048 WARN_ON(start < eb->start);
3049 start_i = (start >> PAGE_CACHE_SHIFT) -
3050 (eb->start >> PAGE_CACHE_SHIFT);
3051 } else {
3052 start_i = 0;
3053 }
3054
3055 num_pages = num_extent_pages(eb->start, eb->len);
3056 for (i = start_i; i < num_pages; i++) {
3057 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003058 if (!wait) {
David Woodhouse2db04962008-08-07 11:19:43 -04003059 if (!trylock_page(page))
Chris Masonce9adaa2008-04-09 16:28:12 -04003060 goto unlock_exit;
Chris Masond1310b22008-01-24 16:13:08 -05003061 } else {
3062 lock_page(page);
3063 }
Chris Masonce9adaa2008-04-09 16:28:12 -04003064 locked_pages++;
Chris Masond1310b22008-01-24 16:13:08 -05003065 if (!PageUptodate(page)) {
Chris Masonce9adaa2008-04-09 16:28:12 -04003066 all_uptodate = 0;
3067 }
3068 }
3069 if (all_uptodate) {
3070 if (start_i == 0)
3071 eb->flags |= EXTENT_UPTODATE;
3072 goto unlock_exit;
3073 }
3074
3075 for (i = start_i; i < num_pages; i++) {
3076 page = extent_buffer_page(eb, i);
3077 if (inc_all_pages)
3078 page_cache_get(page);
3079 if (!PageUptodate(page)) {
3080 if (start_i == 0)
3081 inc_all_pages = 1;
Chris Masonf1885912008-04-09 16:28:12 -04003082 ClearPageError(page);
Chris Masona86c12c2008-02-07 10:50:54 -05003083 err = __extent_read_full_page(tree, page,
Chris Masonf1885912008-04-09 16:28:12 -04003084 get_extent, &bio,
3085 mirror_num);
Chris Masond1310b22008-01-24 16:13:08 -05003086 if (err) {
3087 ret = err;
3088 }
3089 } else {
3090 unlock_page(page);
3091 }
3092 }
3093
Chris Masona86c12c2008-02-07 10:50:54 -05003094 if (bio)
Chris Masonf1885912008-04-09 16:28:12 -04003095 submit_one_bio(READ, bio, mirror_num);
Chris Masona86c12c2008-02-07 10:50:54 -05003096
Chris Masond1310b22008-01-24 16:13:08 -05003097 if (ret || !wait) {
3098 return ret;
3099 }
Chris Masond1310b22008-01-24 16:13:08 -05003100 for (i = start_i; i < num_pages; i++) {
3101 page = extent_buffer_page(eb, i);
3102 wait_on_page_locked(page);
3103 if (!PageUptodate(page)) {
3104 ret = -EIO;
3105 }
3106 }
3107 if (!ret)
3108 eb->flags |= EXTENT_UPTODATE;
3109 return ret;
Chris Masonce9adaa2008-04-09 16:28:12 -04003110
3111unlock_exit:
3112 i = start_i;
3113 while(locked_pages > 0) {
3114 page = extent_buffer_page(eb, i);
3115 i++;
3116 unlock_page(page);
3117 locked_pages--;
3118 }
3119 return ret;
Chris Masond1310b22008-01-24 16:13:08 -05003120}
3121EXPORT_SYMBOL(read_extent_buffer_pages);
3122
3123void read_extent_buffer(struct extent_buffer *eb, void *dstv,
3124 unsigned long start,
3125 unsigned long len)
3126{
3127 size_t cur;
3128 size_t offset;
3129 struct page *page;
3130 char *kaddr;
3131 char *dst = (char *)dstv;
3132 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3133 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
Chris Masond1310b22008-01-24 16:13:08 -05003134
3135 WARN_ON(start > eb->len);
3136 WARN_ON(start + len > eb->start + eb->len);
3137
3138 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3139
3140 while(len > 0) {
3141 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003142
3143 cur = min(len, (PAGE_CACHE_SIZE - offset));
3144 kaddr = kmap_atomic(page, KM_USER1);
3145 memcpy(dst, kaddr + offset, cur);
3146 kunmap_atomic(kaddr, KM_USER1);
3147
3148 dst += cur;
3149 len -= cur;
3150 offset = 0;
3151 i++;
3152 }
3153}
3154EXPORT_SYMBOL(read_extent_buffer);
3155
3156int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
3157 unsigned long min_len, char **token, char **map,
3158 unsigned long *map_start,
3159 unsigned long *map_len, int km)
3160{
3161 size_t offset = start & (PAGE_CACHE_SIZE - 1);
3162 char *kaddr;
3163 struct page *p;
3164 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3165 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3166 unsigned long end_i = (start_offset + start + min_len - 1) >>
3167 PAGE_CACHE_SHIFT;
3168
3169 if (i != end_i)
3170 return -EINVAL;
3171
3172 if (i == 0) {
3173 offset = start_offset;
3174 *map_start = 0;
3175 } else {
3176 offset = 0;
3177 *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
3178 }
3179 if (start + min_len > eb->len) {
3180printk("bad mapping eb start %Lu len %lu, wanted %lu %lu\n", eb->start, eb->len, start, min_len);
3181 WARN_ON(1);
3182 }
3183
3184 p = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003185 kaddr = kmap_atomic(p, km);
3186 *token = kaddr;
3187 *map = kaddr + offset;
3188 *map_len = PAGE_CACHE_SIZE - offset;
3189 return 0;
3190}
3191EXPORT_SYMBOL(map_private_extent_buffer);
3192
3193int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
3194 unsigned long min_len,
3195 char **token, char **map,
3196 unsigned long *map_start,
3197 unsigned long *map_len, int km)
3198{
3199 int err;
3200 int save = 0;
3201 if (eb->map_token) {
3202 unmap_extent_buffer(eb, eb->map_token, km);
3203 eb->map_token = NULL;
3204 save = 1;
3205 }
3206 err = map_private_extent_buffer(eb, start, min_len, token, map,
3207 map_start, map_len, km);
3208 if (!err && save) {
3209 eb->map_token = *token;
3210 eb->kaddr = *map;
3211 eb->map_start = *map_start;
3212 eb->map_len = *map_len;
3213 }
3214 return err;
3215}
3216EXPORT_SYMBOL(map_extent_buffer);
3217
3218void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
3219{
3220 kunmap_atomic(token, km);
3221}
3222EXPORT_SYMBOL(unmap_extent_buffer);
3223
3224int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
3225 unsigned long start,
3226 unsigned long len)
3227{
3228 size_t cur;
3229 size_t offset;
3230 struct page *page;
3231 char *kaddr;
3232 char *ptr = (char *)ptrv;
3233 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3234 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3235 int ret = 0;
3236
3237 WARN_ON(start > eb->len);
3238 WARN_ON(start + len > eb->start + eb->len);
3239
3240 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3241
3242 while(len > 0) {
3243 page = extent_buffer_page(eb, i);
Chris Masond1310b22008-01-24 16:13:08 -05003244
3245 cur = min(len, (PAGE_CACHE_SIZE - offset));
3246
3247 kaddr = kmap_atomic(page, KM_USER0);
3248 ret = memcmp(ptr, kaddr + offset, cur);
3249 kunmap_atomic(kaddr, KM_USER0);
3250 if (ret)
3251 break;
3252
3253 ptr += cur;
3254 len -= cur;
3255 offset = 0;
3256 i++;
3257 }
3258 return ret;
3259}
3260EXPORT_SYMBOL(memcmp_extent_buffer);
3261
3262void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
3263 unsigned long start, unsigned long len)
3264{
3265 size_t cur;
3266 size_t offset;
3267 struct page *page;
3268 char *kaddr;
3269 char *src = (char *)srcv;
3270 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3271 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3272
3273 WARN_ON(start > eb->len);
3274 WARN_ON(start + len > eb->start + eb->len);
3275
3276 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3277
3278 while(len > 0) {
3279 page = extent_buffer_page(eb, i);
3280 WARN_ON(!PageUptodate(page));
3281
3282 cur = min(len, PAGE_CACHE_SIZE - offset);
3283 kaddr = kmap_atomic(page, KM_USER1);
3284 memcpy(kaddr + offset, src, cur);
3285 kunmap_atomic(kaddr, KM_USER1);
3286
3287 src += cur;
3288 len -= cur;
3289 offset = 0;
3290 i++;
3291 }
3292}
3293EXPORT_SYMBOL(write_extent_buffer);
3294
3295void memset_extent_buffer(struct extent_buffer *eb, char c,
3296 unsigned long start, unsigned long len)
3297{
3298 size_t cur;
3299 size_t offset;
3300 struct page *page;
3301 char *kaddr;
3302 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
3303 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
3304
3305 WARN_ON(start > eb->len);
3306 WARN_ON(start + len > eb->start + eb->len);
3307
3308 offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
3309
3310 while(len > 0) {
3311 page = extent_buffer_page(eb, i);
3312 WARN_ON(!PageUptodate(page));
3313
3314 cur = min(len, PAGE_CACHE_SIZE - offset);
3315 kaddr = kmap_atomic(page, KM_USER0);
3316 memset(kaddr + offset, c, cur);
3317 kunmap_atomic(kaddr, KM_USER0);
3318
3319 len -= cur;
3320 offset = 0;
3321 i++;
3322 }
3323}
3324EXPORT_SYMBOL(memset_extent_buffer);
3325
3326void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
3327 unsigned long dst_offset, unsigned long src_offset,
3328 unsigned long len)
3329{
3330 u64 dst_len = dst->len;
3331 size_t cur;
3332 size_t offset;
3333 struct page *page;
3334 char *kaddr;
3335 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3336 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3337
3338 WARN_ON(src->len != dst_len);
3339
3340 offset = (start_offset + dst_offset) &
3341 ((unsigned long)PAGE_CACHE_SIZE - 1);
3342
3343 while(len > 0) {
3344 page = extent_buffer_page(dst, i);
3345 WARN_ON(!PageUptodate(page));
3346
3347 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
3348
3349 kaddr = kmap_atomic(page, KM_USER0);
3350 read_extent_buffer(src, kaddr + offset, src_offset, cur);
3351 kunmap_atomic(kaddr, KM_USER0);
3352
3353 src_offset += cur;
3354 len -= cur;
3355 offset = 0;
3356 i++;
3357 }
3358}
3359EXPORT_SYMBOL(copy_extent_buffer);
3360
3361static void move_pages(struct page *dst_page, struct page *src_page,
3362 unsigned long dst_off, unsigned long src_off,
3363 unsigned long len)
3364{
3365 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3366 if (dst_page == src_page) {
3367 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
3368 } else {
3369 char *src_kaddr = kmap_atomic(src_page, KM_USER1);
3370 char *p = dst_kaddr + dst_off + len;
3371 char *s = src_kaddr + src_off + len;
3372
3373 while (len--)
3374 *--p = *--s;
3375
3376 kunmap_atomic(src_kaddr, KM_USER1);
3377 }
3378 kunmap_atomic(dst_kaddr, KM_USER0);
3379}
3380
3381static void copy_pages(struct page *dst_page, struct page *src_page,
3382 unsigned long dst_off, unsigned long src_off,
3383 unsigned long len)
3384{
3385 char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
3386 char *src_kaddr;
3387
3388 if (dst_page != src_page)
3389 src_kaddr = kmap_atomic(src_page, KM_USER1);
3390 else
3391 src_kaddr = dst_kaddr;
3392
3393 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
3394 kunmap_atomic(dst_kaddr, KM_USER0);
3395 if (dst_page != src_page)
3396 kunmap_atomic(src_kaddr, KM_USER1);
3397}
3398
3399void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3400 unsigned long src_offset, unsigned long len)
3401{
3402 size_t cur;
3403 size_t dst_off_in_page;
3404 size_t src_off_in_page;
3405 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3406 unsigned long dst_i;
3407 unsigned long src_i;
3408
3409 if (src_offset + len > dst->len) {
3410 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3411 src_offset, len, dst->len);
3412 BUG_ON(1);
3413 }
3414 if (dst_offset + len > dst->len) {
3415 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3416 dst_offset, len, dst->len);
3417 BUG_ON(1);
3418 }
3419
3420 while(len > 0) {
3421 dst_off_in_page = (start_offset + dst_offset) &
3422 ((unsigned long)PAGE_CACHE_SIZE - 1);
3423 src_off_in_page = (start_offset + src_offset) &
3424 ((unsigned long)PAGE_CACHE_SIZE - 1);
3425
3426 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
3427 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
3428
3429 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
3430 src_off_in_page));
3431 cur = min_t(unsigned long, cur,
3432 (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
3433
3434 copy_pages(extent_buffer_page(dst, dst_i),
3435 extent_buffer_page(dst, src_i),
3436 dst_off_in_page, src_off_in_page, cur);
3437
3438 src_offset += cur;
3439 dst_offset += cur;
3440 len -= cur;
3441 }
3442}
3443EXPORT_SYMBOL(memcpy_extent_buffer);
3444
3445void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3446 unsigned long src_offset, unsigned long len)
3447{
3448 size_t cur;
3449 size_t dst_off_in_page;
3450 size_t src_off_in_page;
3451 unsigned long dst_end = dst_offset + len - 1;
3452 unsigned long src_end = src_offset + len - 1;
3453 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
3454 unsigned long dst_i;
3455 unsigned long src_i;
3456
3457 if (src_offset + len > dst->len) {
3458 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
3459 src_offset, len, dst->len);
3460 BUG_ON(1);
3461 }
3462 if (dst_offset + len > dst->len) {
3463 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
3464 dst_offset, len, dst->len);
3465 BUG_ON(1);
3466 }
3467 if (dst_offset < src_offset) {
3468 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
3469 return;
3470 }
3471 while(len > 0) {
3472 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
3473 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
3474
3475 dst_off_in_page = (start_offset + dst_end) &
3476 ((unsigned long)PAGE_CACHE_SIZE - 1);
3477 src_off_in_page = (start_offset + src_end) &
3478 ((unsigned long)PAGE_CACHE_SIZE - 1);
3479
3480 cur = min_t(unsigned long, len, src_off_in_page + 1);
3481 cur = min(cur, dst_off_in_page + 1);
3482 move_pages(extent_buffer_page(dst, dst_i),
3483 extent_buffer_page(dst, src_i),
3484 dst_off_in_page - cur + 1,
3485 src_off_in_page - cur + 1, cur);
3486
3487 dst_end -= cur;
3488 src_end -= cur;
3489 len -= cur;
3490 }
3491}
3492EXPORT_SYMBOL(memmove_extent_buffer);
Chris Mason6af118c2008-07-22 11:18:07 -04003493
3494int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3495{
3496 u64 start = page_offset(page);
3497 struct extent_buffer *eb;
3498 int ret = 1;
3499 unsigned long i;
3500 unsigned long num_pages;
3501
3502 spin_lock(&tree->buffer_lock);
3503 eb = buffer_search(tree, start);
3504 if (!eb)
3505 goto out;
3506
3507 if (atomic_read(&eb->refs) > 1) {
3508 ret = 0;
3509 goto out;
3510 }
3511 /* at this point we can safely release the extent buffer */
3512 num_pages = num_extent_pages(eb->start, eb->len);
3513 for (i = 0; i < num_pages; i++) {
3514 struct page *page = extent_buffer_page(eb, i);
3515 page_cache_release(page);
3516 }
3517 rb_erase(&eb->rb_node, &tree->buffer);
3518 __free_extent_buffer(eb);
3519out:
3520 spin_unlock(&tree->buffer_lock);
3521 return ret;
3522}
3523EXPORT_SYMBOL(try_release_extent_buffer);