139b113e8338b5dd6c66ff1f8880eb07c13ef7ac
[linux-3.10.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 /**
35  * struct nilfs_btree_path - A path on which B-tree operations are executed
36  * @bp_bh: buffer head of node block
37  * @bp_sib_bh: buffer head of sibling node block
38  * @bp_index: index of child node
39  * @bp_oldreq: ptr end request for old ptr
40  * @bp_newreq: ptr alloc request for new ptr
41  * @bp_op: rebalance operation
42  */
43 struct nilfs_btree_path {
44         struct buffer_head *bp_bh;
45         struct buffer_head *bp_sib_bh;
46         int bp_index;
47         union nilfs_bmap_ptr_req bp_oldreq;
48         union nilfs_bmap_ptr_req bp_newreq;
49         struct nilfs_btnode_chkey_ctxt bp_ctxt;
50         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
51                       int, __u64 *, __u64 *);
52 };
53
54 /*
55  * B-tree path operations
56  */
57
58 static struct kmem_cache *nilfs_btree_path_cache;
59
60 int __init nilfs_btree_path_cache_init(void)
61 {
62         nilfs_btree_path_cache =
63                 kmem_cache_create("nilfs2_btree_path_cache",
64                                   sizeof(struct nilfs_btree_path) *
65                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
66         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
67 }
68
69 void nilfs_btree_path_cache_destroy(void)
70 {
71         kmem_cache_destroy(nilfs_btree_path_cache);
72 }
73
74 static inline struct nilfs_btree_path *nilfs_btree_alloc_path(void)
75 {
76         return kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
77 }
78
79 static inline void nilfs_btree_free_path(struct nilfs_btree_path *path)
80 {
81         kmem_cache_free(nilfs_btree_path_cache, path);
82 }
83
84 static void nilfs_btree_init_path(struct nilfs_btree_path *path)
85 {
86         int level;
87
88         for (level = NILFS_BTREE_LEVEL_DATA;
89              level < NILFS_BTREE_LEVEL_MAX;
90              level++) {
91                 path[level].bp_bh = NULL;
92                 path[level].bp_sib_bh = NULL;
93                 path[level].bp_index = 0;
94                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
95                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
96                 path[level].bp_op = NULL;
97         }
98 }
99
100 static void nilfs_btree_release_path(struct nilfs_btree_path *path)
101 {
102         int level;
103
104         for (level = NILFS_BTREE_LEVEL_DATA; level < NILFS_BTREE_LEVEL_MAX;
105              level++)
106                 brelse(path[level].bp_bh);
107 }
108
109 /*
110  * B-tree node operations
111  */
112 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
113                                  struct buffer_head **bhp)
114 {
115         struct address_space *btnc =
116                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
117         int err;
118
119         err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
120         if (err)
121                 return err == -EEXIST ? 0 : err;
122
123         wait_on_buffer(*bhp);
124         if (!buffer_uptodate(*bhp)) {
125                 brelse(*bhp);
126                 return -EIO;
127         }
128         return 0;
129 }
130
131 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
132                                      __u64 ptr, struct buffer_head **bhp)
133 {
134         struct address_space *btnc =
135                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
136         struct buffer_head *bh;
137
138         bh = nilfs_btnode_create_block(btnc, ptr);
139         if (!bh)
140                 return -ENOMEM;
141
142         set_buffer_nilfs_volatile(bh);
143         *bhp = bh;
144         return 0;
145 }
146
147 static inline int
148 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
149 {
150         return node->bn_flags;
151 }
152
153 static inline void
154 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
155 {
156         node->bn_flags = flags;
157 }
158
159 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
160 {
161         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
162 }
163
164 static inline int
165 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
166 {
167         return node->bn_level;
168 }
169
170 static inline void
171 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
172 {
173         node->bn_level = level;
174 }
175
176 static inline int
177 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
178 {
179         return le16_to_cpu(node->bn_nchildren);
180 }
181
182 static inline void
183 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
184 {
185         node->bn_nchildren = cpu_to_le16(nchildren);
186 }
187
188 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
189 {
190         return 1 << btree->bt_bmap.b_inode->i_blkbits;
191 }
192
193 static inline int
194 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
195                                const struct nilfs_btree *btree)
196 {
197         return nilfs_btree_node_root(node) ?
198                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
199                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
200 }
201
202 static inline int
203 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
204                                const struct nilfs_btree *btree)
205 {
206         return nilfs_btree_node_root(node) ?
207                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
208                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
209 }
210
211 static inline __le64 *
212 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
213 {
214         return (__le64 *)((char *)(node + 1) +
215                           (nilfs_btree_node_root(node) ?
216                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
217 }
218
219 static inline __le64 *
220 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
221                        const struct nilfs_btree *btree)
222 {
223         return (__le64 *)(nilfs_btree_node_dkeys(node) +
224                           nilfs_btree_node_nchildren_max(node, btree));
225 }
226
227 static inline __u64
228 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
229 {
230         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index));
231 }
232
233 static inline void
234 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
235 {
236         *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key);
237 }
238
239 static inline __u64
240 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241                          const struct nilfs_btree_node *node, int index)
242 {
243         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) +
244                                         index));
245 }
246
247 static inline void
248 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
249                          struct nilfs_btree_node *node, int index, __u64 ptr)
250 {
251         *(nilfs_btree_node_dptrs(node, btree) + index) =
252                 nilfs_bmap_ptr_to_dptr(ptr);
253 }
254
255 static void nilfs_btree_node_init(struct nilfs_btree *btree,
256                                   struct nilfs_btree_node *node,
257                                   int flags, int level, int nchildren,
258                                   const __u64 *keys, const __u64 *ptrs)
259 {
260         __le64 *dkeys;
261         __le64 *dptrs;
262         int i;
263
264         nilfs_btree_node_set_flags(node, flags);
265         nilfs_btree_node_set_level(node, level);
266         nilfs_btree_node_set_nchildren(node, nchildren);
267
268         dkeys = nilfs_btree_node_dkeys(node);
269         dptrs = nilfs_btree_node_dptrs(node, btree);
270         for (i = 0; i < nchildren; i++) {
271                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
272                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
273         }
274 }
275
276 /* Assume the buffer heads corresponding to left and right are locked. */
277 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
278                                        struct nilfs_btree_node *left,
279                                        struct nilfs_btree_node *right,
280                                        int n)
281 {
282         __le64 *ldkeys, *rdkeys;
283         __le64 *ldptrs, *rdptrs;
284         int lnchildren, rnchildren;
285
286         ldkeys = nilfs_btree_node_dkeys(left);
287         ldptrs = nilfs_btree_node_dptrs(left, btree);
288         lnchildren = nilfs_btree_node_get_nchildren(left);
289
290         rdkeys = nilfs_btree_node_dkeys(right);
291         rdptrs = nilfs_btree_node_dptrs(right, btree);
292         rnchildren = nilfs_btree_node_get_nchildren(right);
293
294         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
295         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
296         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
297         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
298
299         lnchildren += n;
300         rnchildren -= n;
301         nilfs_btree_node_set_nchildren(left, lnchildren);
302         nilfs_btree_node_set_nchildren(right, rnchildren);
303 }
304
305 /* Assume that the buffer heads corresponding to left and right are locked. */
306 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
307                                         struct nilfs_btree_node *left,
308                                         struct nilfs_btree_node *right,
309                                         int n)
310 {
311         __le64 *ldkeys, *rdkeys;
312         __le64 *ldptrs, *rdptrs;
313         int lnchildren, rnchildren;
314
315         ldkeys = nilfs_btree_node_dkeys(left);
316         ldptrs = nilfs_btree_node_dptrs(left, btree);
317         lnchildren = nilfs_btree_node_get_nchildren(left);
318
319         rdkeys = nilfs_btree_node_dkeys(right);
320         rdptrs = nilfs_btree_node_dptrs(right, btree);
321         rnchildren = nilfs_btree_node_get_nchildren(right);
322
323         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
324         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
325         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
326         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
327
328         lnchildren -= n;
329         rnchildren += n;
330         nilfs_btree_node_set_nchildren(left, lnchildren);
331         nilfs_btree_node_set_nchildren(right, rnchildren);
332 }
333
334 /* Assume that the buffer head corresponding to node is locked. */
335 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
336                                     struct nilfs_btree_node *node,
337                                     __u64 key, __u64 ptr, int index)
338 {
339         __le64 *dkeys;
340         __le64 *dptrs;
341         int nchildren;
342
343         dkeys = nilfs_btree_node_dkeys(node);
344         dptrs = nilfs_btree_node_dptrs(node, btree);
345         nchildren = nilfs_btree_node_get_nchildren(node);
346         if (index < nchildren) {
347                 memmove(dkeys + index + 1, dkeys + index,
348                         (nchildren - index) * sizeof(*dkeys));
349                 memmove(dptrs + index + 1, dptrs + index,
350                         (nchildren - index) * sizeof(*dptrs));
351         }
352         dkeys[index] = nilfs_bmap_key_to_dkey(key);
353         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
354         nchildren++;
355         nilfs_btree_node_set_nchildren(node, nchildren);
356 }
357
358 /* Assume that the buffer head corresponding to node is locked. */
359 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
360                                     struct nilfs_btree_node *node,
361                                     __u64 *keyp, __u64 *ptrp, int index)
362 {
363         __u64 key;
364         __u64 ptr;
365         __le64 *dkeys;
366         __le64 *dptrs;
367         int nchildren;
368
369         dkeys = nilfs_btree_node_dkeys(node);
370         dptrs = nilfs_btree_node_dptrs(node, btree);
371         key = nilfs_bmap_dkey_to_key(dkeys[index]);
372         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
373         nchildren = nilfs_btree_node_get_nchildren(node);
374         if (keyp != NULL)
375                 *keyp = key;
376         if (ptrp != NULL)
377                 *ptrp = ptr;
378
379         if (index < nchildren - 1) {
380                 memmove(dkeys + index, dkeys + index + 1,
381                         (nchildren - index - 1) * sizeof(*dkeys));
382                 memmove(dptrs + index, dptrs + index + 1,
383                         (nchildren - index - 1) * sizeof(*dptrs));
384         }
385         nchildren--;
386         nilfs_btree_node_set_nchildren(node, nchildren);
387 }
388
389 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
390                                    __u64 key, int *indexp)
391 {
392         __u64 nkey;
393         int index, low, high, s;
394
395         /* binary search */
396         low = 0;
397         high = nilfs_btree_node_get_nchildren(node) - 1;
398         index = 0;
399         s = 0;
400         while (low <= high) {
401                 index = (low + high) / 2;
402                 nkey = nilfs_btree_node_get_key(node, index);
403                 if (nkey == key) {
404                         s = 0;
405                         goto out;
406                 } else if (nkey < key) {
407                         low = index + 1;
408                         s = -1;
409                 } else {
410                         high = index - 1;
411                         s = 1;
412                 }
413         }
414
415         /* adjust index */
416         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
417                 if (s > 0 && index > 0)
418                         index--;
419         } else if (s < 0)
420                 index++;
421
422  out:
423         *indexp = index;
424
425         return s == 0;
426 }
427
428 static inline struct nilfs_btree_node *
429 nilfs_btree_get_root(const struct nilfs_btree *btree)
430 {
431         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
432 }
433
434 static inline struct nilfs_btree_node *
435 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
436 {
437         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
438 }
439
440 static inline struct nilfs_btree_node *
441 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
442 {
443         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
444 }
445
446 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
447 {
448         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
449 }
450
451 static inline struct nilfs_btree_node *
452 nilfs_btree_get_node(const struct nilfs_btree *btree,
453                      const struct nilfs_btree_path *path,
454                      int level)
455 {
456         return (level == nilfs_btree_height(btree) - 1) ?
457                 nilfs_btree_get_root(btree) :
458                 nilfs_btree_get_nonroot_node(path, level);
459 }
460
461 static inline int
462 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
463 {
464         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
465                 dump_stack();
466                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
467                        nilfs_btree_node_get_level(node), level);
468                 return 1;
469         }
470         return 0;
471 }
472
473 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
474                                  struct nilfs_btree_path *path,
475                                  __u64 key, __u64 *ptrp, int minlevel)
476 {
477         struct nilfs_btree_node *node;
478         __u64 ptr;
479         int level, index, found, ret;
480
481         node = nilfs_btree_get_root(btree);
482         level = nilfs_btree_node_get_level(node);
483         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
484                 return -ENOENT;
485
486         found = nilfs_btree_node_lookup(node, key, &index);
487         ptr = nilfs_btree_node_get_ptr(btree, node, index);
488         path[level].bp_bh = NULL;
489         path[level].bp_index = index;
490
491         for (level--; level >= minlevel; level--) {
492                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
493                 if (ret < 0)
494                         return ret;
495                 node = nilfs_btree_get_nonroot_node(path, level);
496                 if (nilfs_btree_bad_node(node, level))
497                         return -EINVAL;
498                 if (!found)
499                         found = nilfs_btree_node_lookup(node, key, &index);
500                 else
501                         index = 0;
502                 if (index < nilfs_btree_node_nchildren_max(node, btree))
503                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
504                 else {
505                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
506                         /* insert */
507                         ptr = NILFS_BMAP_INVALID_PTR;
508                 }
509                 path[level].bp_index = index;
510         }
511         if (!found)
512                 return -ENOENT;
513
514         if (ptrp != NULL)
515                 *ptrp = ptr;
516
517         return 0;
518 }
519
520 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
521                                       struct nilfs_btree_path *path,
522                                       __u64 *keyp, __u64 *ptrp)
523 {
524         struct nilfs_btree_node *node;
525         __u64 ptr;
526         int index, level, ret;
527
528         node = nilfs_btree_get_root(btree);
529         index = nilfs_btree_node_get_nchildren(node) - 1;
530         if (index < 0)
531                 return -ENOENT;
532         level = nilfs_btree_node_get_level(node);
533         ptr = nilfs_btree_node_get_ptr(btree, node, index);
534         path[level].bp_bh = NULL;
535         path[level].bp_index = index;
536
537         for (level--; level > 0; level--) {
538                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
539                 if (ret < 0)
540                         return ret;
541                 node = nilfs_btree_get_nonroot_node(path, level);
542                 if (nilfs_btree_bad_node(node, level))
543                         return -EINVAL;
544                 index = nilfs_btree_node_get_nchildren(node) - 1;
545                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
546                 path[level].bp_index = index;
547         }
548
549         if (keyp != NULL)
550                 *keyp = nilfs_btree_node_get_key(node, index);
551         if (ptrp != NULL)
552                 *ptrp = ptr;
553
554         return 0;
555 }
556
557 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
558                               __u64 key, int level, __u64 *ptrp)
559 {
560         struct nilfs_btree *btree;
561         struct nilfs_btree_path *path;
562         __u64 ptr;
563         int ret;
564
565         btree = (struct nilfs_btree *)bmap;
566         path = nilfs_btree_alloc_path();
567         if (path == NULL)
568                 return -ENOMEM;
569         nilfs_btree_init_path(path);
570
571         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572
573         if (ptrp != NULL)
574                 *ptrp = ptr;
575
576         nilfs_btree_release_path(path);
577         nilfs_btree_free_path(path);
578
579         return ret;
580 }
581
582 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
583                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
584 {
585         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
586         struct nilfs_btree_path *path;
587         struct nilfs_btree_node *node;
588         struct inode *dat = NULL;
589         __u64 ptr, ptr2;
590         sector_t blocknr;
591         int level = NILFS_BTREE_LEVEL_NODE_MIN;
592         int ret, cnt, index, maxlevel;
593
594         path = nilfs_btree_alloc_path();
595         if (path == NULL)
596                 return -ENOMEM;
597         nilfs_btree_init_path(path);
598         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
599         if (ret < 0)
600                 goto out;
601
602         if (NILFS_BMAP_USE_VBN(bmap)) {
603                 dat = nilfs_bmap_get_dat(bmap);
604                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
605                 if (ret < 0)
606                         goto out;
607                 ptr = blocknr;
608         }
609         cnt = 1;
610         if (cnt == maxblocks)
611                 goto end;
612
613         maxlevel = nilfs_btree_height(btree) - 1;
614         node = nilfs_btree_get_node(btree, path, level);
615         index = path[level].bp_index + 1;
616         for (;;) {
617                 while (index < nilfs_btree_node_get_nchildren(node)) {
618                         if (nilfs_btree_node_get_key(node, index) !=
619                             key + cnt)
620                                 goto end;
621                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
622                         if (dat) {
623                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
624                                 if (ret < 0)
625                                         goto out;
626                                 ptr2 = blocknr;
627                         }
628                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
629                                 goto end;
630                         index++;
631                         continue;
632                 }
633                 if (level == maxlevel)
634                         break;
635
636                 /* look-up right sibling node */
637                 node = nilfs_btree_get_node(btree, path, level + 1);
638                 index = path[level + 1].bp_index + 1;
639                 if (index >= nilfs_btree_node_get_nchildren(node) ||
640                     nilfs_btree_node_get_key(node, index) != key + cnt)
641                         break;
642                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
643                 path[level + 1].bp_index = index;
644
645                 brelse(path[level].bp_bh);
646                 path[level].bp_bh = NULL;
647                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
648                 if (ret < 0)
649                         goto out;
650                 node = nilfs_btree_get_nonroot_node(path, level);
651                 index = 0;
652                 path[level].bp_index = index;
653         }
654  end:
655         *ptrp = ptr;
656         ret = cnt;
657  out:
658         nilfs_btree_release_path(path);
659         nilfs_btree_free_path(path);
660         return ret;
661 }
662
663 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
664                                     struct nilfs_btree_path *path,
665                                     int level, __u64 key)
666 {
667         if (level < nilfs_btree_height(btree) - 1) {
668                 do {
669                         nilfs_btree_node_set_key(
670                                 nilfs_btree_get_nonroot_node(path, level),
671                                 path[level].bp_index, key);
672                         if (!buffer_dirty(path[level].bp_bh))
673                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
674                 } while ((path[level].bp_index == 0) &&
675                          (++level < nilfs_btree_height(btree) - 1));
676         }
677
678         /* root */
679         if (level == nilfs_btree_height(btree) - 1) {
680                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
681                                          path[level].bp_index, key);
682         }
683 }
684
685 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
686                                   struct nilfs_btree_path *path,
687                                   int level, __u64 *keyp, __u64 *ptrp)
688 {
689         struct nilfs_btree_node *node;
690
691         if (level < nilfs_btree_height(btree) - 1) {
692                 node = nilfs_btree_get_nonroot_node(path, level);
693                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
694                                         path[level].bp_index);
695                 if (!buffer_dirty(path[level].bp_bh))
696                         nilfs_btnode_mark_dirty(path[level].bp_bh);
697
698                 if (path[level].bp_index == 0)
699                         nilfs_btree_promote_key(btree, path, level + 1,
700                                                 nilfs_btree_node_get_key(node,
701                                                                          0));
702         } else {
703                 node = nilfs_btree_get_root(btree);
704                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
705                                         path[level].bp_index);
706         }
707 }
708
709 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
710                                    struct nilfs_btree_path *path,
711                                    int level, __u64 *keyp, __u64 *ptrp)
712 {
713         struct nilfs_btree_node *node, *left;
714         int nchildren, lnchildren, n, move;
715
716         node = nilfs_btree_get_nonroot_node(path, level);
717         left = nilfs_btree_get_sib_node(path, level);
718         nchildren = nilfs_btree_node_get_nchildren(node);
719         lnchildren = nilfs_btree_node_get_nchildren(left);
720         move = 0;
721
722         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
723         if (n > path[level].bp_index) {
724                 /* move insert point */
725                 n--;
726                 move = 1;
727         }
728
729         nilfs_btree_node_move_left(btree, left, node, n);
730
731         if (!buffer_dirty(path[level].bp_bh))
732                 nilfs_btnode_mark_dirty(path[level].bp_bh);
733         if (!buffer_dirty(path[level].bp_sib_bh))
734                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
735
736         nilfs_btree_promote_key(btree, path, level + 1,
737                                 nilfs_btree_node_get_key(node, 0));
738
739         if (move) {
740                 brelse(path[level].bp_bh);
741                 path[level].bp_bh = path[level].bp_sib_bh;
742                 path[level].bp_sib_bh = NULL;
743                 path[level].bp_index += lnchildren;
744                 path[level + 1].bp_index--;
745         } else {
746                 brelse(path[level].bp_sib_bh);
747                 path[level].bp_sib_bh = NULL;
748                 path[level].bp_index -= n;
749         }
750
751         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
752 }
753
754 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
755                                     struct nilfs_btree_path *path,
756                                     int level, __u64 *keyp, __u64 *ptrp)
757 {
758         struct nilfs_btree_node *node, *right;
759         int nchildren, rnchildren, n, move;
760
761         node = nilfs_btree_get_nonroot_node(path, level);
762         right = nilfs_btree_get_sib_node(path, level);
763         nchildren = nilfs_btree_node_get_nchildren(node);
764         rnchildren = nilfs_btree_node_get_nchildren(right);
765         move = 0;
766
767         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
768         if (n > nchildren - path[level].bp_index) {
769                 /* move insert point */
770                 n--;
771                 move = 1;
772         }
773
774         nilfs_btree_node_move_right(btree, node, right, n);
775
776         if (!buffer_dirty(path[level].bp_bh))
777                 nilfs_btnode_mark_dirty(path[level].bp_bh);
778         if (!buffer_dirty(path[level].bp_sib_bh))
779                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
780
781         path[level + 1].bp_index++;
782         nilfs_btree_promote_key(btree, path, level + 1,
783                                 nilfs_btree_node_get_key(right, 0));
784         path[level + 1].bp_index--;
785
786         if (move) {
787                 brelse(path[level].bp_bh);
788                 path[level].bp_bh = path[level].bp_sib_bh;
789                 path[level].bp_sib_bh = NULL;
790                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
791                 path[level + 1].bp_index++;
792         } else {
793                 brelse(path[level].bp_sib_bh);
794                 path[level].bp_sib_bh = NULL;
795         }
796
797         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
798 }
799
800 static void nilfs_btree_split(struct nilfs_btree *btree,
801                               struct nilfs_btree_path *path,
802                               int level, __u64 *keyp, __u64 *ptrp)
803 {
804         struct nilfs_btree_node *node, *right;
805         __u64 newkey;
806         __u64 newptr;
807         int nchildren, n, move;
808
809         node = nilfs_btree_get_nonroot_node(path, level);
810         right = nilfs_btree_get_sib_node(path, level);
811         nchildren = nilfs_btree_node_get_nchildren(node);
812         move = 0;
813
814         n = (nchildren + 1) / 2;
815         if (n > nchildren - path[level].bp_index) {
816                 n--;
817                 move = 1;
818         }
819
820         nilfs_btree_node_move_right(btree, node, right, n);
821
822         if (!buffer_dirty(path[level].bp_bh))
823                 nilfs_btnode_mark_dirty(path[level].bp_bh);
824         if (!buffer_dirty(path[level].bp_sib_bh))
825                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
826
827         newkey = nilfs_btree_node_get_key(right, 0);
828         newptr = path[level].bp_newreq.bpr_ptr;
829
830         if (move) {
831                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
832                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
833                                         path[level].bp_index);
834
835                 *keyp = nilfs_btree_node_get_key(right, 0);
836                 *ptrp = path[level].bp_newreq.bpr_ptr;
837
838                 brelse(path[level].bp_bh);
839                 path[level].bp_bh = path[level].bp_sib_bh;
840                 path[level].bp_sib_bh = NULL;
841         } else {
842                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
843
844                 *keyp = nilfs_btree_node_get_key(right, 0);
845                 *ptrp = path[level].bp_newreq.bpr_ptr;
846
847                 brelse(path[level].bp_sib_bh);
848                 path[level].bp_sib_bh = NULL;
849         }
850
851         path[level + 1].bp_index++;
852 }
853
854 static void nilfs_btree_grow(struct nilfs_btree *btree,
855                              struct nilfs_btree_path *path,
856                              int level, __u64 *keyp, __u64 *ptrp)
857 {
858         struct nilfs_btree_node *root, *child;
859         int n;
860
861         root = nilfs_btree_get_root(btree);
862         child = nilfs_btree_get_sib_node(path, level);
863
864         n = nilfs_btree_node_get_nchildren(root);
865
866         nilfs_btree_node_move_right(btree, root, child, n);
867         nilfs_btree_node_set_level(root, level + 1);
868
869         if (!buffer_dirty(path[level].bp_sib_bh))
870                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
871
872         path[level].bp_bh = path[level].bp_sib_bh;
873         path[level].bp_sib_bh = NULL;
874
875         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
876
877         *keyp = nilfs_btree_node_get_key(child, 0);
878         *ptrp = path[level].bp_newreq.bpr_ptr;
879 }
880
881 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
882                                    const struct nilfs_btree_path *path)
883 {
884         struct nilfs_btree_node *node;
885         int level;
886
887         if (path == NULL)
888                 return NILFS_BMAP_INVALID_PTR;
889
890         /* left sibling */
891         level = NILFS_BTREE_LEVEL_NODE_MIN;
892         if (path[level].bp_index > 0) {
893                 node = nilfs_btree_get_node(btree, path, level);
894                 return nilfs_btree_node_get_ptr(btree, node,
895                                                 path[level].bp_index - 1);
896         }
897
898         /* parent */
899         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
900         if (level <= nilfs_btree_height(btree) - 1) {
901                 node = nilfs_btree_get_node(btree, path, level);
902                 return nilfs_btree_node_get_ptr(btree, node,
903                                                 path[level].bp_index);
904         }
905
906         return NILFS_BMAP_INVALID_PTR;
907 }
908
909 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
910                                        const struct nilfs_btree_path *path,
911                                        __u64 key)
912 {
913         __u64 ptr;
914
915         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
916         if (ptr != NILFS_BMAP_INVALID_PTR)
917                 /* sequential access */
918                 return ptr;
919         else {
920                 ptr = nilfs_btree_find_near(btree, path);
921                 if (ptr != NILFS_BMAP_INVALID_PTR)
922                         /* near */
923                         return ptr;
924         }
925         /* block group */
926         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
927 }
928
929 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
930                                      __u64 ptr)
931 {
932         btree->bt_bmap.b_last_allocated_key = key;
933         btree->bt_bmap.b_last_allocated_ptr = ptr;
934 }
935
936 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
937                                       struct nilfs_btree_path *path,
938                                       int *levelp, __u64 key, __u64 ptr,
939                                       struct nilfs_bmap_stats *stats)
940 {
941         struct buffer_head *bh;
942         struct nilfs_btree_node *node, *parent, *sib;
943         __u64 sibptr;
944         int pindex, level, ret;
945         struct inode *dat = NULL;
946
947         stats->bs_nblocks = 0;
948         level = NILFS_BTREE_LEVEL_DATA;
949
950         /* allocate a new ptr for data block */
951         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
952                 path[level].bp_newreq.bpr_ptr =
953                         nilfs_btree_find_target_v(btree, path, key);
954                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
955         }
956
957         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
958                                            &path[level].bp_newreq, dat);
959         if (ret < 0)
960                 goto err_out_data;
961
962         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
963              level < nilfs_btree_height(btree) - 1;
964              level++) {
965                 node = nilfs_btree_get_nonroot_node(path, level);
966                 if (nilfs_btree_node_get_nchildren(node) <
967                     nilfs_btree_node_nchildren_max(node, btree)) {
968                         path[level].bp_op = nilfs_btree_do_insert;
969                         stats->bs_nblocks++;
970                         goto out;
971                 }
972
973                 parent = nilfs_btree_get_node(btree, path, level + 1);
974                 pindex = path[level + 1].bp_index;
975
976                 /* left sibling */
977                 if (pindex > 0) {
978                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
979                                                           pindex - 1);
980                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
981                         if (ret < 0)
982                                 goto err_out_child_node;
983                         sib = (struct nilfs_btree_node *)bh->b_data;
984                         if (nilfs_btree_node_get_nchildren(sib) <
985                             nilfs_btree_node_nchildren_max(sib, btree)) {
986                                 path[level].bp_sib_bh = bh;
987                                 path[level].bp_op = nilfs_btree_carry_left;
988                                 stats->bs_nblocks++;
989                                 goto out;
990                         } else
991                                 brelse(bh);
992                 }
993
994                 /* right sibling */
995                 if (pindex <
996                     nilfs_btree_node_get_nchildren(parent) - 1) {
997                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
998                                                           pindex + 1);
999                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1000                         if (ret < 0)
1001                                 goto err_out_child_node;
1002                         sib = (struct nilfs_btree_node *)bh->b_data;
1003                         if (nilfs_btree_node_get_nchildren(sib) <
1004                             nilfs_btree_node_nchildren_max(sib, btree)) {
1005                                 path[level].bp_sib_bh = bh;
1006                                 path[level].bp_op = nilfs_btree_carry_right;
1007                                 stats->bs_nblocks++;
1008                                 goto out;
1009                         } else
1010                                 brelse(bh);
1011                 }
1012
1013                 /* split */
1014                 path[level].bp_newreq.bpr_ptr =
1015                         path[level - 1].bp_newreq.bpr_ptr + 1;
1016                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1017                                                    &path[level].bp_newreq, dat);
1018                 if (ret < 0)
1019                         goto err_out_child_node;
1020                 ret = nilfs_btree_get_new_block(btree,
1021                                                 path[level].bp_newreq.bpr_ptr,
1022                                                 &bh);
1023                 if (ret < 0)
1024                         goto err_out_curr_node;
1025
1026                 stats->bs_nblocks++;
1027
1028                 nilfs_btree_node_init(btree,
1029                                       (struct nilfs_btree_node *)bh->b_data,
1030                                       0, level, 0, NULL, NULL);
1031                 path[level].bp_sib_bh = bh;
1032                 path[level].bp_op = nilfs_btree_split;
1033         }
1034
1035         /* root */
1036         node = nilfs_btree_get_root(btree);
1037         if (nilfs_btree_node_get_nchildren(node) <
1038             nilfs_btree_node_nchildren_max(node, btree)) {
1039                 path[level].bp_op = nilfs_btree_do_insert;
1040                 stats->bs_nblocks++;
1041                 goto out;
1042         }
1043
1044         /* grow */
1045         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1046         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1047                                            &path[level].bp_newreq, dat);
1048         if (ret < 0)
1049                 goto err_out_child_node;
1050         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1051                                         &bh);
1052         if (ret < 0)
1053                 goto err_out_curr_node;
1054
1055         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1056                               0, level, 0, NULL, NULL);
1057         path[level].bp_sib_bh = bh;
1058         path[level].bp_op = nilfs_btree_grow;
1059
1060         level++;
1061         path[level].bp_op = nilfs_btree_do_insert;
1062
1063         /* a newly-created node block and a data block are added */
1064         stats->bs_nblocks += 2;
1065
1066         /* success */
1067  out:
1068         *levelp = level;
1069         return ret;
1070
1071         /* error */
1072  err_out_curr_node:
1073         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1074                                    dat);
1075  err_out_child_node:
1076         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1077                 nilfs_btnode_delete(path[level].bp_sib_bh);
1078                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1079                                            &path[level].bp_newreq, dat);
1080
1081         }
1082
1083         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1084                                    dat);
1085  err_out_data:
1086         *levelp = level;
1087         stats->bs_nblocks = 0;
1088         return ret;
1089 }
1090
1091 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1092                                       struct nilfs_btree_path *path,
1093                                       int maxlevel, __u64 key, __u64 ptr)
1094 {
1095         struct inode *dat = NULL;
1096         int level;
1097
1098         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1099         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1100         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1101                 nilfs_btree_set_target_v(btree, key, ptr);
1102                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1103         }
1104
1105         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1106                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1107                                             &path[level - 1].bp_newreq, dat);
1108                 path[level].bp_op(btree, path, level, &key, &ptr);
1109         }
1110
1111         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1112                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1113 }
1114
1115 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1116 {
1117         struct nilfs_btree *btree;
1118         struct nilfs_btree_path *path;
1119         struct nilfs_bmap_stats stats;
1120         int level, ret;
1121
1122         btree = (struct nilfs_btree *)bmap;
1123         path = nilfs_btree_alloc_path();
1124         if (path == NULL)
1125                 return -ENOMEM;
1126         nilfs_btree_init_path(path);
1127
1128         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1129                                     NILFS_BTREE_LEVEL_NODE_MIN);
1130         if (ret != -ENOENT) {
1131                 if (ret == 0)
1132                         ret = -EEXIST;
1133                 goto out;
1134         }
1135
1136         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1137         if (ret < 0)
1138                 goto out;
1139         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1140         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1141
1142  out:
1143         nilfs_btree_release_path(path);
1144         nilfs_btree_free_path(path);
1145         return ret;
1146 }
1147
1148 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1149                                   struct nilfs_btree_path *path,
1150                                   int level, __u64 *keyp, __u64 *ptrp)
1151 {
1152         struct nilfs_btree_node *node;
1153
1154         if (level < nilfs_btree_height(btree) - 1) {
1155                 node = nilfs_btree_get_nonroot_node(path, level);
1156                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1157                                         path[level].bp_index);
1158                 if (!buffer_dirty(path[level].bp_bh))
1159                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1160                 if (path[level].bp_index == 0)
1161                         nilfs_btree_promote_key(btree, path, level + 1,
1162                                 nilfs_btree_node_get_key(node, 0));
1163         } else {
1164                 node = nilfs_btree_get_root(btree);
1165                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1166                                         path[level].bp_index);
1167         }
1168 }
1169
1170 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1171                                     struct nilfs_btree_path *path,
1172                                     int level, __u64 *keyp, __u64 *ptrp)
1173 {
1174         struct nilfs_btree_node *node, *left;
1175         int nchildren, lnchildren, n;
1176
1177         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1178
1179         node = nilfs_btree_get_nonroot_node(path, level);
1180         left = nilfs_btree_get_sib_node(path, level);
1181         nchildren = nilfs_btree_node_get_nchildren(node);
1182         lnchildren = nilfs_btree_node_get_nchildren(left);
1183
1184         n = (nchildren + lnchildren) / 2 - nchildren;
1185
1186         nilfs_btree_node_move_right(btree, left, node, n);
1187
1188         if (!buffer_dirty(path[level].bp_bh))
1189                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1190         if (!buffer_dirty(path[level].bp_sib_bh))
1191                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1192
1193         nilfs_btree_promote_key(btree, path, level + 1,
1194                                 nilfs_btree_node_get_key(node, 0));
1195
1196         brelse(path[level].bp_sib_bh);
1197         path[level].bp_sib_bh = NULL;
1198         path[level].bp_index += n;
1199 }
1200
1201 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1202                                      struct nilfs_btree_path *path,
1203                                      int level, __u64 *keyp, __u64 *ptrp)
1204 {
1205         struct nilfs_btree_node *node, *right;
1206         int nchildren, rnchildren, n;
1207
1208         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1209
1210         node = nilfs_btree_get_nonroot_node(path, level);
1211         right = nilfs_btree_get_sib_node(path, level);
1212         nchildren = nilfs_btree_node_get_nchildren(node);
1213         rnchildren = nilfs_btree_node_get_nchildren(right);
1214
1215         n = (nchildren + rnchildren) / 2 - nchildren;
1216
1217         nilfs_btree_node_move_left(btree, node, right, n);
1218
1219         if (!buffer_dirty(path[level].bp_bh))
1220                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1221         if (!buffer_dirty(path[level].bp_sib_bh))
1222                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1223
1224         path[level + 1].bp_index++;
1225         nilfs_btree_promote_key(btree, path, level + 1,
1226                                 nilfs_btree_node_get_key(right, 0));
1227         path[level + 1].bp_index--;
1228
1229         brelse(path[level].bp_sib_bh);
1230         path[level].bp_sib_bh = NULL;
1231 }
1232
1233 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1234                                     struct nilfs_btree_path *path,
1235                                     int level, __u64 *keyp, __u64 *ptrp)
1236 {
1237         struct nilfs_btree_node *node, *left;
1238         int n;
1239
1240         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1241
1242         node = nilfs_btree_get_nonroot_node(path, level);
1243         left = nilfs_btree_get_sib_node(path, level);
1244
1245         n = nilfs_btree_node_get_nchildren(node);
1246
1247         nilfs_btree_node_move_left(btree, left, node, n);
1248
1249         if (!buffer_dirty(path[level].bp_sib_bh))
1250                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1251
1252         nilfs_btnode_delete(path[level].bp_bh);
1253         path[level].bp_bh = path[level].bp_sib_bh;
1254         path[level].bp_sib_bh = NULL;
1255         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1256 }
1257
1258 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1259                                      struct nilfs_btree_path *path,
1260                                      int level, __u64 *keyp, __u64 *ptrp)
1261 {
1262         struct nilfs_btree_node *node, *right;
1263         int n;
1264
1265         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1266
1267         node = nilfs_btree_get_nonroot_node(path, level);
1268         right = nilfs_btree_get_sib_node(path, level);
1269
1270         n = nilfs_btree_node_get_nchildren(right);
1271
1272         nilfs_btree_node_move_left(btree, node, right, n);
1273
1274         if (!buffer_dirty(path[level].bp_bh))
1275                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1276
1277         nilfs_btnode_delete(path[level].bp_sib_bh);
1278         path[level].bp_sib_bh = NULL;
1279         path[level + 1].bp_index++;
1280 }
1281
1282 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1283                                struct nilfs_btree_path *path,
1284                                int level, __u64 *keyp, __u64 *ptrp)
1285 {
1286         struct nilfs_btree_node *root, *child;
1287         int n;
1288
1289         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1290
1291         root = nilfs_btree_get_root(btree);
1292         child = nilfs_btree_get_nonroot_node(path, level);
1293
1294         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1295         nilfs_btree_node_set_level(root, level);
1296         n = nilfs_btree_node_get_nchildren(child);
1297         nilfs_btree_node_move_left(btree, root, child, n);
1298
1299         nilfs_btnode_delete(path[level].bp_bh);
1300         path[level].bp_bh = NULL;
1301 }
1302
1303
1304 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1305                                       struct nilfs_btree_path *path,
1306                                       int *levelp,
1307                                       struct nilfs_bmap_stats *stats,
1308                                       struct inode *dat)
1309 {
1310         struct buffer_head *bh;
1311         struct nilfs_btree_node *node, *parent, *sib;
1312         __u64 sibptr;
1313         int pindex, level, ret;
1314
1315         ret = 0;
1316         stats->bs_nblocks = 0;
1317         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1318              level < nilfs_btree_height(btree) - 1;
1319              level++) {
1320                 node = nilfs_btree_get_nonroot_node(path, level);
1321                 path[level].bp_oldreq.bpr_ptr =
1322                         nilfs_btree_node_get_ptr(btree, node,
1323                                                  path[level].bp_index);
1324                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1325                                                  &path[level].bp_oldreq, dat);
1326                 if (ret < 0)
1327                         goto err_out_child_node;
1328
1329                 if (nilfs_btree_node_get_nchildren(node) >
1330                     nilfs_btree_node_nchildren_min(node, btree)) {
1331                         path[level].bp_op = nilfs_btree_do_delete;
1332                         stats->bs_nblocks++;
1333                         goto out;
1334                 }
1335
1336                 parent = nilfs_btree_get_node(btree, path, level + 1);
1337                 pindex = path[level + 1].bp_index;
1338
1339                 if (pindex > 0) {
1340                         /* left sibling */
1341                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1342                                                           pindex - 1);
1343                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1344                         if (ret < 0)
1345                                 goto err_out_curr_node;
1346                         sib = (struct nilfs_btree_node *)bh->b_data;
1347                         if (nilfs_btree_node_get_nchildren(sib) >
1348                             nilfs_btree_node_nchildren_min(sib, btree)) {
1349                                 path[level].bp_sib_bh = bh;
1350                                 path[level].bp_op = nilfs_btree_borrow_left;
1351                                 stats->bs_nblocks++;
1352                                 goto out;
1353                         } else {
1354                                 path[level].bp_sib_bh = bh;
1355                                 path[level].bp_op = nilfs_btree_concat_left;
1356                                 stats->bs_nblocks++;
1357                                 /* continue; */
1358                         }
1359                 } else if (pindex <
1360                            nilfs_btree_node_get_nchildren(parent) - 1) {
1361                         /* right sibling */
1362                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1363                                                           pindex + 1);
1364                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1365                         if (ret < 0)
1366                                 goto err_out_curr_node;
1367                         sib = (struct nilfs_btree_node *)bh->b_data;
1368                         if (nilfs_btree_node_get_nchildren(sib) >
1369                             nilfs_btree_node_nchildren_min(sib, btree)) {
1370                                 path[level].bp_sib_bh = bh;
1371                                 path[level].bp_op = nilfs_btree_borrow_right;
1372                                 stats->bs_nblocks++;
1373                                 goto out;
1374                         } else {
1375                                 path[level].bp_sib_bh = bh;
1376                                 path[level].bp_op = nilfs_btree_concat_right;
1377                                 stats->bs_nblocks++;
1378                                 /* continue; */
1379                         }
1380                 } else {
1381                         /* no siblings */
1382                         /* the only child of the root node */
1383                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1384                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1385                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1386                                 path[level].bp_op = nilfs_btree_shrink;
1387                                 stats->bs_nblocks += 2;
1388                         } else {
1389                                 path[level].bp_op = nilfs_btree_do_delete;
1390                                 stats->bs_nblocks++;
1391                         }
1392
1393                         goto out;
1394
1395                 }
1396         }
1397
1398         node = nilfs_btree_get_root(btree);
1399         path[level].bp_oldreq.bpr_ptr =
1400                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1401
1402         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1403                                          &path[level].bp_oldreq, dat);
1404         if (ret < 0)
1405                 goto err_out_child_node;
1406
1407         /* child of the root node is deleted */
1408         path[level].bp_op = nilfs_btree_do_delete;
1409         stats->bs_nblocks++;
1410
1411         /* success */
1412  out:
1413         *levelp = level;
1414         return ret;
1415
1416         /* error */
1417  err_out_curr_node:
1418         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1419  err_out_child_node:
1420         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1421                 brelse(path[level].bp_sib_bh);
1422                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1423                                          &path[level].bp_oldreq, dat);
1424         }
1425         *levelp = level;
1426         stats->bs_nblocks = 0;
1427         return ret;
1428 }
1429
1430 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1431                                       struct nilfs_btree_path *path,
1432                                       int maxlevel, struct inode *dat)
1433 {
1434         int level;
1435
1436         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1437                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1438                                           &path[level].bp_oldreq, dat);
1439                 path[level].bp_op(btree, path, level, NULL, NULL);
1440         }
1441
1442         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1443                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1444 }
1445
1446 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1447
1448 {
1449         struct nilfs_btree *btree;
1450         struct nilfs_btree_path *path;
1451         struct nilfs_bmap_stats stats;
1452         struct inode *dat;
1453         int level, ret;
1454
1455         btree = (struct nilfs_btree *)bmap;
1456         path = nilfs_btree_alloc_path();
1457         if (path == NULL)
1458                 return -ENOMEM;
1459         nilfs_btree_init_path(path);
1460         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1461                                     NILFS_BTREE_LEVEL_NODE_MIN);
1462         if (ret < 0)
1463                 goto out;
1464
1465
1466         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1467                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1468
1469         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1470         if (ret < 0)
1471                 goto out;
1472         nilfs_btree_commit_delete(btree, path, level, dat);
1473         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1474
1475 out:
1476         nilfs_btree_release_path(path);
1477         nilfs_btree_free_path(path);
1478         return ret;
1479 }
1480
1481 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1482 {
1483         struct nilfs_btree *btree;
1484         struct nilfs_btree_path *path;
1485         int ret;
1486
1487         btree = (struct nilfs_btree *)bmap;
1488         path = nilfs_btree_alloc_path();
1489         if (path == NULL)
1490                 return -ENOMEM;
1491         nilfs_btree_init_path(path);
1492
1493         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1494
1495         nilfs_btree_release_path(path);
1496         nilfs_btree_free_path(path);
1497
1498         return ret;
1499 }
1500
1501 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1502 {
1503         struct buffer_head *bh;
1504         struct nilfs_btree *btree;
1505         struct nilfs_btree_node *root, *node;
1506         __u64 maxkey, nextmaxkey;
1507         __u64 ptr;
1508         int nchildren, ret;
1509
1510         btree = (struct nilfs_btree *)bmap;
1511         root = nilfs_btree_get_root(btree);
1512         switch (nilfs_btree_height(btree)) {
1513         case 2:
1514                 bh = NULL;
1515                 node = root;
1516                 break;
1517         case 3:
1518                 nchildren = nilfs_btree_node_get_nchildren(root);
1519                 if (nchildren > 1)
1520                         return 0;
1521                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1522                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1523                 if (ret < 0)
1524                         return ret;
1525                 node = (struct nilfs_btree_node *)bh->b_data;
1526                 break;
1527         default:
1528                 return 0;
1529         }
1530
1531         nchildren = nilfs_btree_node_get_nchildren(node);
1532         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1533         nextmaxkey = (nchildren > 1) ?
1534                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1535         if (bh != NULL)
1536                 brelse(bh);
1537
1538         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1539 }
1540
1541 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1542                                    __u64 *keys, __u64 *ptrs, int nitems)
1543 {
1544         struct buffer_head *bh;
1545         struct nilfs_btree *btree;
1546         struct nilfs_btree_node *node, *root;
1547         __le64 *dkeys;
1548         __le64 *dptrs;
1549         __u64 ptr;
1550         int nchildren, i, ret;
1551
1552         btree = (struct nilfs_btree *)bmap;
1553         root = nilfs_btree_get_root(btree);
1554         switch (nilfs_btree_height(btree)) {
1555         case 2:
1556                 bh = NULL;
1557                 node = root;
1558                 break;
1559         case 3:
1560                 nchildren = nilfs_btree_node_get_nchildren(root);
1561                 WARN_ON(nchildren > 1);
1562                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1563                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1564                 if (ret < 0)
1565                         return ret;
1566                 node = (struct nilfs_btree_node *)bh->b_data;
1567                 break;
1568         default:
1569                 node = NULL;
1570                 return -EINVAL;
1571         }
1572
1573         nchildren = nilfs_btree_node_get_nchildren(node);
1574         if (nchildren < nitems)
1575                 nitems = nchildren;
1576         dkeys = nilfs_btree_node_dkeys(node);
1577         dptrs = nilfs_btree_node_dptrs(node, btree);
1578         for (i = 0; i < nitems; i++) {
1579                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1580                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1581         }
1582
1583         if (bh != NULL)
1584                 brelse(bh);
1585
1586         return nitems;
1587 }
1588
1589 static int
1590 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1591                                        union nilfs_bmap_ptr_req *dreq,
1592                                        union nilfs_bmap_ptr_req *nreq,
1593                                        struct buffer_head **bhp,
1594                                        struct nilfs_bmap_stats *stats)
1595 {
1596         struct buffer_head *bh;
1597         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1598         struct inode *dat = NULL;
1599         int ret;
1600
1601         stats->bs_nblocks = 0;
1602
1603         /* for data */
1604         /* cannot find near ptr */
1605         if (NILFS_BMAP_USE_VBN(bmap)) {
1606                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1607                 dat = nilfs_bmap_get_dat(bmap);
1608         }
1609
1610         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1611         if (ret < 0)
1612                 return ret;
1613
1614         *bhp = NULL;
1615         stats->bs_nblocks++;
1616         if (nreq != NULL) {
1617                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1618                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1619                 if (ret < 0)
1620                         goto err_out_dreq;
1621
1622                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1623                 if (ret < 0)
1624                         goto err_out_nreq;
1625
1626                 *bhp = bh;
1627                 stats->bs_nblocks++;
1628         }
1629
1630         /* success */
1631         return 0;
1632
1633         /* error */
1634  err_out_nreq:
1635         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1636  err_out_dreq:
1637         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1638         stats->bs_nblocks = 0;
1639         return ret;
1640
1641 }
1642
1643 static void
1644 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1645                                       __u64 key, __u64 ptr,
1646                                       const __u64 *keys, const __u64 *ptrs,
1647                                       int n,
1648                                       union nilfs_bmap_ptr_req *dreq,
1649                                       union nilfs_bmap_ptr_req *nreq,
1650                                       struct buffer_head *bh)
1651 {
1652         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1653         struct nilfs_btree_node *node;
1654         struct inode *dat;
1655         __u64 tmpptr;
1656
1657         /* free resources */
1658         if (bmap->b_ops->bop_clear != NULL)
1659                 bmap->b_ops->bop_clear(bmap);
1660
1661         /* ptr must be a pointer to a buffer head. */
1662         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1663
1664         /* convert and insert */
1665         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1666         nilfs_btree_init(bmap);
1667         if (nreq != NULL) {
1668                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1669                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1670
1671                 /* create child node at level 1 */
1672                 node = (struct nilfs_btree_node *)bh->b_data;
1673                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1674                 nilfs_btree_node_insert(btree, node,
1675                                         key, dreq->bpr_ptr, n);
1676                 if (!buffer_dirty(bh))
1677                         nilfs_btnode_mark_dirty(bh);
1678                 if (!nilfs_bmap_dirty(bmap))
1679                         nilfs_bmap_set_dirty(bmap);
1680
1681                 brelse(bh);
1682
1683                 /* create root node at level 2 */
1684                 node = nilfs_btree_get_root(btree);
1685                 tmpptr = nreq->bpr_ptr;
1686                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1687                                       2, 1, &keys[0], &tmpptr);
1688         } else {
1689                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1690
1691                 /* create root node at level 1 */
1692                 node = nilfs_btree_get_root(btree);
1693                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1694                                       1, n, keys, ptrs);
1695                 nilfs_btree_node_insert(btree, node,
1696                                         key, dreq->bpr_ptr, n);
1697                 if (!nilfs_bmap_dirty(bmap))
1698                         nilfs_bmap_set_dirty(bmap);
1699         }
1700
1701         if (NILFS_BMAP_USE_VBN(bmap))
1702                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1703 }
1704
1705 /**
1706  * nilfs_btree_convert_and_insert -
1707  * @bmap:
1708  * @key:
1709  * @ptr:
1710  * @keys:
1711  * @ptrs:
1712  * @n:
1713  */
1714 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1715                                    __u64 key, __u64 ptr,
1716                                    const __u64 *keys, const __u64 *ptrs, int n)
1717 {
1718         struct buffer_head *bh;
1719         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1720         struct nilfs_bmap_stats stats;
1721         int ret;
1722
1723         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1724                 di = &dreq;
1725                 ni = NULL;
1726         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1727                            1 << bmap->b_inode->i_blkbits)) {
1728                 di = &dreq;
1729                 ni = &nreq;
1730         } else {
1731                 di = NULL;
1732                 ni = NULL;
1733                 BUG();
1734         }
1735
1736         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1737                                                      &stats);
1738         if (ret < 0)
1739                 return ret;
1740         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1741                                               di, ni, bh);
1742         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1743         return 0;
1744 }
1745
1746 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1747                                    struct nilfs_btree_path *path,
1748                                    int level,
1749                                    struct buffer_head *bh)
1750 {
1751         while ((++level < nilfs_btree_height(btree) - 1) &&
1752                !buffer_dirty(path[level].bp_bh))
1753                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1754
1755         return 0;
1756 }
1757
1758 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1759                                         struct nilfs_btree_path *path,
1760                                         int level, struct inode *dat)
1761 {
1762         struct nilfs_btree_node *parent;
1763         int ret;
1764
1765         parent = nilfs_btree_get_node(btree, path, level + 1);
1766         path[level].bp_oldreq.bpr_ptr =
1767                 nilfs_btree_node_get_ptr(btree, parent,
1768                                          path[level + 1].bp_index);
1769         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1770         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1771                                        &path[level].bp_newreq.bpr_req);
1772         if (ret < 0)
1773                 return ret;
1774
1775         if (buffer_nilfs_node(path[level].bp_bh)) {
1776                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1777                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1778                 path[level].bp_ctxt.bh = path[level].bp_bh;
1779                 ret = nilfs_btnode_prepare_change_key(
1780                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1781                         &path[level].bp_ctxt);
1782                 if (ret < 0) {
1783                         nilfs_dat_abort_update(dat,
1784                                                &path[level].bp_oldreq.bpr_req,
1785                                                &path[level].bp_newreq.bpr_req);
1786                         return ret;
1787                 }
1788         }
1789
1790         return 0;
1791 }
1792
1793 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1794                                         struct nilfs_btree_path *path,
1795                                         int level, struct inode *dat)
1796 {
1797         struct nilfs_btree_node *parent;
1798
1799         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1800                                 &path[level].bp_newreq.bpr_req,
1801                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1802
1803         if (buffer_nilfs_node(path[level].bp_bh)) {
1804                 nilfs_btnode_commit_change_key(
1805                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1806                         &path[level].bp_ctxt);
1807                 path[level].bp_bh = path[level].bp_ctxt.bh;
1808         }
1809         set_buffer_nilfs_volatile(path[level].bp_bh);
1810
1811         parent = nilfs_btree_get_node(btree, path, level + 1);
1812         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1813                                  path[level].bp_newreq.bpr_ptr);
1814 }
1815
1816 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1817                                        struct nilfs_btree_path *path,
1818                                        int level, struct inode *dat)
1819 {
1820         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1821                                &path[level].bp_newreq.bpr_req);
1822         if (buffer_nilfs_node(path[level].bp_bh))
1823                 nilfs_btnode_abort_change_key(
1824                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1825                         &path[level].bp_ctxt);
1826 }
1827
1828 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1829                                            struct nilfs_btree_path *path,
1830                                            int minlevel, int *maxlevelp,
1831                                            struct inode *dat)
1832 {
1833         int level, ret;
1834
1835         level = minlevel;
1836         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1837                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1838                 if (ret < 0)
1839                         return ret;
1840         }
1841         while ((++level < nilfs_btree_height(btree) - 1) &&
1842                !buffer_dirty(path[level].bp_bh)) {
1843
1844                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1845                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1846                 if (ret < 0)
1847                         goto out;
1848         }
1849
1850         /* success */
1851         *maxlevelp = level - 1;
1852         return 0;
1853
1854         /* error */
1855  out:
1856         while (--level > minlevel)
1857                 nilfs_btree_abort_update_v(btree, path, level, dat);
1858         if (!buffer_nilfs_volatile(path[level].bp_bh))
1859                 nilfs_btree_abort_update_v(btree, path, level, dat);
1860         return ret;
1861 }
1862
1863 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1864                                            struct nilfs_btree_path *path,
1865                                            int minlevel, int maxlevel,
1866                                            struct buffer_head *bh,
1867                                            struct inode *dat)
1868 {
1869         int level;
1870
1871         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1872                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1873
1874         for (level = minlevel + 1; level <= maxlevel; level++)
1875                 nilfs_btree_commit_update_v(btree, path, level, dat);
1876 }
1877
1878 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1879                                    struct nilfs_btree_path *path,
1880                                    int level, struct buffer_head *bh)
1881 {
1882         int maxlevel, ret;
1883         struct nilfs_btree_node *parent;
1884         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1885         __u64 ptr;
1886
1887         get_bh(bh);
1888         path[level].bp_bh = bh;
1889         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1890                                               dat);
1891         if (ret < 0)
1892                 goto out;
1893
1894         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1895                 parent = nilfs_btree_get_node(btree, path, level + 1);
1896                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1897                                                path[level + 1].bp_index);
1898                 ret = nilfs_dat_mark_dirty(dat, ptr);
1899                 if (ret < 0)
1900                         goto out;
1901         }
1902
1903         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1904
1905  out:
1906         brelse(path[level].bp_bh);
1907         path[level].bp_bh = NULL;
1908         return ret;
1909 }
1910
1911 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1912                                  struct buffer_head *bh)
1913 {
1914         struct nilfs_btree *btree;
1915         struct nilfs_btree_path *path;
1916         struct nilfs_btree_node *node;
1917         __u64 key;
1918         int level, ret;
1919
1920         WARN_ON(!buffer_dirty(bh));
1921
1922         btree = (struct nilfs_btree *)bmap;
1923         path = nilfs_btree_alloc_path();
1924         if (path == NULL)
1925                 return -ENOMEM;
1926         nilfs_btree_init_path(path);
1927
1928         if (buffer_nilfs_node(bh)) {
1929                 node = (struct nilfs_btree_node *)bh->b_data;
1930                 key = nilfs_btree_node_get_key(node, 0);
1931                 level = nilfs_btree_node_get_level(node);
1932         } else {
1933                 key = nilfs_bmap_data_get_key(bmap, bh);
1934                 level = NILFS_BTREE_LEVEL_DATA;
1935         }
1936
1937         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1938         if (ret < 0) {
1939                 if (unlikely(ret == -ENOENT))
1940                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1941                                __func__, (unsigned long long)key, level);
1942                 goto out;
1943         }
1944
1945         ret = NILFS_BMAP_USE_VBN(bmap) ?
1946                 nilfs_btree_propagate_v(btree, path, level, bh) :
1947                 nilfs_btree_propagate_p(btree, path, level, bh);
1948
1949  out:
1950         nilfs_btree_release_path(path);
1951         nilfs_btree_free_path(path);
1952
1953         return ret;
1954 }
1955
1956 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1957                                     struct buffer_head *bh)
1958 {
1959         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1960 }
1961
1962 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1963                                          struct list_head *lists,
1964                                          struct buffer_head *bh)
1965 {
1966         struct list_head *head;
1967         struct buffer_head *cbh;
1968         struct nilfs_btree_node *node, *cnode;
1969         __u64 key, ckey;
1970         int level;
1971
1972         get_bh(bh);
1973         node = (struct nilfs_btree_node *)bh->b_data;
1974         key = nilfs_btree_node_get_key(node, 0);
1975         level = nilfs_btree_node_get_level(node);
1976         list_for_each(head, &lists[level]) {
1977                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1978                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1979                 ckey = nilfs_btree_node_get_key(cnode, 0);
1980                 if (key < ckey)
1981                         break;
1982         }
1983         list_add_tail(&bh->b_assoc_buffers, head);
1984 }
1985
1986 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1987                                              struct list_head *listp)
1988 {
1989         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1990         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1991         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1992         struct pagevec pvec;
1993         struct buffer_head *bh, *head;
1994         pgoff_t index = 0;
1995         int level, i;
1996
1997         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1998              level < NILFS_BTREE_LEVEL_MAX;
1999              level++)
2000                 INIT_LIST_HEAD(&lists[level]);
2001
2002         pagevec_init(&pvec, 0);
2003
2004         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2005                                   PAGEVEC_SIZE)) {
2006                 for (i = 0; i < pagevec_count(&pvec); i++) {
2007                         bh = head = page_buffers(pvec.pages[i]);
2008                         do {
2009                                 if (buffer_dirty(bh))
2010                                         nilfs_btree_add_dirty_buffer(btree,
2011                                                                      lists, bh);
2012                         } while ((bh = bh->b_this_page) != head);
2013                 }
2014                 pagevec_release(&pvec);
2015                 cond_resched();
2016         }
2017
2018         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2019              level < NILFS_BTREE_LEVEL_MAX;
2020              level++)
2021                 list_splice(&lists[level], listp->prev);
2022 }
2023
2024 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2025                                 struct nilfs_btree_path *path,
2026                                 int level,
2027                                 struct buffer_head **bh,
2028                                 sector_t blocknr,
2029                                 union nilfs_binfo *binfo)
2030 {
2031         struct nilfs_btree_node *parent;
2032         __u64 key;
2033         __u64 ptr;
2034         int ret;
2035
2036         parent = nilfs_btree_get_node(btree, path, level + 1);
2037         ptr = nilfs_btree_node_get_ptr(btree, parent,
2038                                        path[level + 1].bp_index);
2039         if (buffer_nilfs_node(*bh)) {
2040                 path[level].bp_ctxt.oldkey = ptr;
2041                 path[level].bp_ctxt.newkey = blocknr;
2042                 path[level].bp_ctxt.bh = *bh;
2043                 ret = nilfs_btnode_prepare_change_key(
2044                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2045                         &path[level].bp_ctxt);
2046                 if (ret < 0)
2047                         return ret;
2048                 nilfs_btnode_commit_change_key(
2049                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2050                         &path[level].bp_ctxt);
2051                 *bh = path[level].bp_ctxt.bh;
2052         }
2053
2054         nilfs_btree_node_set_ptr(btree, parent,
2055                                  path[level + 1].bp_index, blocknr);
2056
2057         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2058         /* on-disk format */
2059         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2060         binfo->bi_dat.bi_level = level;
2061
2062         return 0;
2063 }
2064
2065 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2066                                 struct nilfs_btree_path *path,
2067                                 int level,
2068                                 struct buffer_head **bh,
2069                                 sector_t blocknr,
2070                                 union nilfs_binfo *binfo)
2071 {
2072         struct nilfs_btree_node *parent;
2073         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2074         __u64 key;
2075         __u64 ptr;
2076         union nilfs_bmap_ptr_req req;
2077         int ret;
2078
2079         parent = nilfs_btree_get_node(btree, path, level + 1);
2080         ptr = nilfs_btree_node_get_ptr(btree, parent,
2081                                        path[level + 1].bp_index);
2082         req.bpr_ptr = ptr;
2083         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2084         if (ret < 0)
2085                 return ret;
2086         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2087
2088         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2089         /* on-disk format */
2090         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2091         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2092
2093         return 0;
2094 }
2095
2096 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2097                               struct buffer_head **bh,
2098                               sector_t blocknr,
2099                               union nilfs_binfo *binfo)
2100 {
2101         struct nilfs_btree *btree;
2102         struct nilfs_btree_path *path;
2103         struct nilfs_btree_node *node;
2104         __u64 key;
2105         int level, ret;
2106
2107         btree = (struct nilfs_btree *)bmap;
2108         path = nilfs_btree_alloc_path();
2109         if (path == NULL)
2110                 return -ENOMEM;
2111         nilfs_btree_init_path(path);
2112
2113         if (buffer_nilfs_node(*bh)) {
2114                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2115                 key = nilfs_btree_node_get_key(node, 0);
2116                 level = nilfs_btree_node_get_level(node);
2117         } else {
2118                 key = nilfs_bmap_data_get_key(bmap, *bh);
2119                 level = NILFS_BTREE_LEVEL_DATA;
2120         }
2121
2122         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2123         if (ret < 0) {
2124                 WARN_ON(ret == -ENOENT);
2125                 goto out;
2126         }
2127
2128         ret = NILFS_BMAP_USE_VBN(bmap) ?
2129                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2130                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2131
2132  out:
2133         nilfs_btree_release_path(path);
2134         nilfs_btree_free_path(path);
2135
2136         return ret;
2137 }
2138
2139 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2140                                  struct buffer_head **bh,
2141                                  sector_t blocknr,
2142                                  union nilfs_binfo *binfo)
2143 {
2144         struct nilfs_btree_node *node;
2145         __u64 key;
2146         int ret;
2147
2148         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2149                              blocknr);
2150         if (ret < 0)
2151                 return ret;
2152
2153         if (buffer_nilfs_node(*bh)) {
2154                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2155                 key = nilfs_btree_node_get_key(node, 0);
2156         } else
2157                 key = nilfs_bmap_data_get_key(bmap, *bh);
2158
2159         /* on-disk format */
2160         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2161         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2162
2163         return 0;
2164 }
2165
2166 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2167 {
2168         struct buffer_head *bh;
2169         struct nilfs_btree *btree;
2170         struct nilfs_btree_path *path;
2171         __u64 ptr;
2172         int ret;
2173
2174         btree = (struct nilfs_btree *)bmap;
2175         path = nilfs_btree_alloc_path();
2176         if (path == NULL)
2177                 return -ENOMEM;
2178         nilfs_btree_init_path(path);
2179
2180         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2181         if (ret < 0) {
2182                 WARN_ON(ret == -ENOENT);
2183                 goto out;
2184         }
2185         ret = nilfs_btree_get_block(btree, ptr, &bh);
2186         if (ret < 0) {
2187                 WARN_ON(ret == -ENOENT);
2188                 goto out;
2189         }
2190
2191         if (!buffer_dirty(bh))
2192                 nilfs_btnode_mark_dirty(bh);
2193         brelse(bh);
2194         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2195                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2196
2197  out:
2198         nilfs_btree_release_path(path);
2199         nilfs_btree_free_path(path);
2200         return ret;
2201 }
2202
2203 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2204         .bop_lookup             =       nilfs_btree_lookup,
2205         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2206         .bop_insert             =       nilfs_btree_insert,
2207         .bop_delete             =       nilfs_btree_delete,
2208         .bop_clear              =       NULL,
2209
2210         .bop_propagate          =       nilfs_btree_propagate,
2211
2212         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2213
2214         .bop_assign             =       nilfs_btree_assign,
2215         .bop_mark               =       nilfs_btree_mark,
2216
2217         .bop_last_key           =       nilfs_btree_last_key,
2218         .bop_check_insert       =       NULL,
2219         .bop_check_delete       =       nilfs_btree_check_delete,
2220         .bop_gather_data        =       nilfs_btree_gather_data,
2221 };
2222
2223 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2224         .bop_lookup             =       NULL,
2225         .bop_lookup_contig      =       NULL,
2226         .bop_insert             =       NULL,
2227         .bop_delete             =       NULL,
2228         .bop_clear              =       NULL,
2229
2230         .bop_propagate          =       nilfs_btree_propagate_gc,
2231
2232         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2233
2234         .bop_assign             =       nilfs_btree_assign_gc,
2235         .bop_mark               =       NULL,
2236
2237         .bop_last_key           =       NULL,
2238         .bop_check_insert       =       NULL,
2239         .bop_check_delete       =       NULL,
2240         .bop_gather_data        =       NULL,
2241 };
2242
2243 int nilfs_btree_init(struct nilfs_bmap *bmap)
2244 {
2245         bmap->b_ops = &nilfs_btree_ops;
2246         return 0;
2247 }
2248
2249 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2250 {
2251         bmap->b_ops = &nilfs_btree_ops_gc;
2252 }