nilfs2: remove constant qualifier from argument of bmap propagate
[linux-2.6.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36         struct nilfs_btree_path *path;
37         int level = NILFS_BTREE_LEVEL_DATA;
38
39         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40         if (path == NULL)
41                 goto out;
42
43         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44                 path[level].bp_bh = NULL;
45                 path[level].bp_sib_bh = NULL;
46                 path[level].bp_index = 0;
47                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49                 path[level].bp_op = NULL;
50         }
51
52 out:
53         return path;
54 }
55
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58         int level = NILFS_BTREE_LEVEL_DATA;
59
60         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61                 brelse(path[level].bp_bh);
62
63         kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
70                                  struct buffer_head **bhp)
71 {
72         struct address_space *btnc =
73                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
74         struct buffer_head *bh;
75         int err;
76
77         err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp);
78         if (err)
79                 return err == -EEXIST ? 0 : err;
80
81         bh = *bhp;
82         wait_on_buffer(bh);
83         if (!buffer_uptodate(bh)) {
84                 brelse(bh);
85                 return -EIO;
86         }
87         if (nilfs_btree_broken_node_block(bh)) {
88                 clear_buffer_uptodate(bh);
89                 brelse(bh);
90                 return -EINVAL;
91         }
92         return 0;
93 }
94
95 static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
96                                      __u64 ptr, struct buffer_head **bhp)
97 {
98         struct address_space *btnc =
99                 &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
100         struct buffer_head *bh;
101
102         bh = nilfs_btnode_create_block(btnc, ptr);
103         if (!bh)
104                 return -ENOMEM;
105
106         set_buffer_nilfs_volatile(bh);
107         *bhp = bh;
108         return 0;
109 }
110
111 static inline int
112 nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
113 {
114         return node->bn_flags;
115 }
116
117 static inline void
118 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
119 {
120         node->bn_flags = flags;
121 }
122
123 static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node)
124 {
125         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
126 }
127
128 static inline int
129 nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
130 {
131         return node->bn_level;
132 }
133
134 static inline void
135 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
136 {
137         node->bn_level = level;
138 }
139
140 static inline int
141 nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
142 {
143         return le16_to_cpu(node->bn_nchildren);
144 }
145
146 static inline void
147 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
148 {
149         node->bn_nchildren = cpu_to_le16(nchildren);
150 }
151
152 static inline int nilfs_btree_node_size(const struct nilfs_btree *btree)
153 {
154         return 1 << btree->bt_bmap.b_inode->i_blkbits;
155 }
156
157 static inline int
158 nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node,
159                                const struct nilfs_btree *btree)
160 {
161         return nilfs_btree_node_root(node) ?
162                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
163                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
164 }
165
166 static inline int
167 nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node,
168                                const struct nilfs_btree *btree)
169 {
170         return nilfs_btree_node_root(node) ?
171                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
172                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
173 }
174
175 static inline __le64 *
176 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
177 {
178         return (__le64 *)((char *)(node + 1) +
179                           (nilfs_btree_node_root(node) ?
180                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
181 }
182
183 static inline __le64 *
184 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node,
185                        const struct nilfs_btree *btree)
186 {
187         return (__le64 *)(nilfs_btree_node_dkeys(node) +
188                           nilfs_btree_node_nchildren_max(node, btree));
189 }
190
191 static inline __u64
192 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
193 {
194         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
195 }
196
197 static inline void
198 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
199 {
200         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
201 }
202
203 static inline __u64
204 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
205                          const struct nilfs_btree_node *node, int index)
206 {
207         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, btree) + index));
208 }
209
210 static inline void
211 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
212                          struct nilfs_btree_node *node, int index, __u64 ptr)
213 {
214         *(nilfs_btree_node_dptrs(node, btree) + index) = cpu_to_le64(ptr);
215 }
216
217 static void nilfs_btree_node_init(struct nilfs_btree *btree,
218                                   struct nilfs_btree_node *node,
219                                   int flags, int level, int nchildren,
220                                   const __u64 *keys, const __u64 *ptrs)
221 {
222         __le64 *dkeys;
223         __le64 *dptrs;
224         int i;
225
226         nilfs_btree_node_set_flags(node, flags);
227         nilfs_btree_node_set_level(node, level);
228         nilfs_btree_node_set_nchildren(node, nchildren);
229
230         dkeys = nilfs_btree_node_dkeys(node);
231         dptrs = nilfs_btree_node_dptrs(node, btree);
232         for (i = 0; i < nchildren; i++) {
233                 dkeys[i] = cpu_to_le64(keys[i]);
234                 dptrs[i] = cpu_to_le64(ptrs[i]);
235         }
236 }
237
238 /* Assume the buffer heads corresponding to left and right are locked. */
239 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
240                                        struct nilfs_btree_node *left,
241                                        struct nilfs_btree_node *right,
242                                        int n)
243 {
244         __le64 *ldkeys, *rdkeys;
245         __le64 *ldptrs, *rdptrs;
246         int lnchildren, rnchildren;
247
248         ldkeys = nilfs_btree_node_dkeys(left);
249         ldptrs = nilfs_btree_node_dptrs(left, btree);
250         lnchildren = nilfs_btree_node_get_nchildren(left);
251
252         rdkeys = nilfs_btree_node_dkeys(right);
253         rdptrs = nilfs_btree_node_dptrs(right, btree);
254         rnchildren = nilfs_btree_node_get_nchildren(right);
255
256         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
257         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
258         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
259         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
260
261         lnchildren += n;
262         rnchildren -= n;
263         nilfs_btree_node_set_nchildren(left, lnchildren);
264         nilfs_btree_node_set_nchildren(right, rnchildren);
265 }
266
267 /* Assume that the buffer heads corresponding to left and right are locked. */
268 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
269                                         struct nilfs_btree_node *left,
270                                         struct nilfs_btree_node *right,
271                                         int n)
272 {
273         __le64 *ldkeys, *rdkeys;
274         __le64 *ldptrs, *rdptrs;
275         int lnchildren, rnchildren;
276
277         ldkeys = nilfs_btree_node_dkeys(left);
278         ldptrs = nilfs_btree_node_dptrs(left, btree);
279         lnchildren = nilfs_btree_node_get_nchildren(left);
280
281         rdkeys = nilfs_btree_node_dkeys(right);
282         rdptrs = nilfs_btree_node_dptrs(right, btree);
283         rnchildren = nilfs_btree_node_get_nchildren(right);
284
285         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
286         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
287         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
288         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
289
290         lnchildren -= n;
291         rnchildren += n;
292         nilfs_btree_node_set_nchildren(left, lnchildren);
293         nilfs_btree_node_set_nchildren(right, rnchildren);
294 }
295
296 /* Assume that the buffer head corresponding to node is locked. */
297 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
298                                     struct nilfs_btree_node *node,
299                                     __u64 key, __u64 ptr, int index)
300 {
301         __le64 *dkeys;
302         __le64 *dptrs;
303         int nchildren;
304
305         dkeys = nilfs_btree_node_dkeys(node);
306         dptrs = nilfs_btree_node_dptrs(node, btree);
307         nchildren = nilfs_btree_node_get_nchildren(node);
308         if (index < nchildren) {
309                 memmove(dkeys + index + 1, dkeys + index,
310                         (nchildren - index) * sizeof(*dkeys));
311                 memmove(dptrs + index + 1, dptrs + index,
312                         (nchildren - index) * sizeof(*dptrs));
313         }
314         dkeys[index] = cpu_to_le64(key);
315         dptrs[index] = cpu_to_le64(ptr);
316         nchildren++;
317         nilfs_btree_node_set_nchildren(node, nchildren);
318 }
319
320 /* Assume that the buffer head corresponding to node is locked. */
321 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
322                                     struct nilfs_btree_node *node,
323                                     __u64 *keyp, __u64 *ptrp, int index)
324 {
325         __u64 key;
326         __u64 ptr;
327         __le64 *dkeys;
328         __le64 *dptrs;
329         int nchildren;
330
331         dkeys = nilfs_btree_node_dkeys(node);
332         dptrs = nilfs_btree_node_dptrs(node, btree);
333         key = le64_to_cpu(dkeys[index]);
334         ptr = le64_to_cpu(dptrs[index]);
335         nchildren = nilfs_btree_node_get_nchildren(node);
336         if (keyp != NULL)
337                 *keyp = key;
338         if (ptrp != NULL)
339                 *ptrp = ptr;
340
341         if (index < nchildren - 1) {
342                 memmove(dkeys + index, dkeys + index + 1,
343                         (nchildren - index - 1) * sizeof(*dkeys));
344                 memmove(dptrs + index, dptrs + index + 1,
345                         (nchildren - index - 1) * sizeof(*dptrs));
346         }
347         nchildren--;
348         nilfs_btree_node_set_nchildren(node, nchildren);
349 }
350
351 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
352                                    __u64 key, int *indexp)
353 {
354         __u64 nkey;
355         int index, low, high, s;
356
357         /* binary search */
358         low = 0;
359         high = nilfs_btree_node_get_nchildren(node) - 1;
360         index = 0;
361         s = 0;
362         while (low <= high) {
363                 index = (low + high) / 2;
364                 nkey = nilfs_btree_node_get_key(node, index);
365                 if (nkey == key) {
366                         s = 0;
367                         goto out;
368                 } else if (nkey < key) {
369                         low = index + 1;
370                         s = -1;
371                 } else {
372                         high = index - 1;
373                         s = 1;
374                 }
375         }
376
377         /* adjust index */
378         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
379                 if (s > 0 && index > 0)
380                         index--;
381         } else if (s < 0)
382                 index++;
383
384  out:
385         *indexp = index;
386
387         return s == 0;
388 }
389
390 /**
391  * nilfs_btree_node_broken - verify consistency of btree node
392  * @node: btree node block to be examined
393  * @size: node size (in bytes)
394  * @blocknr: block number
395  *
396  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
397  */
398 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
399                                    size_t size, sector_t blocknr)
400 {
401         int level, flags, nchildren;
402         int ret = 0;
403
404         level = nilfs_btree_node_get_level(node);
405         flags = nilfs_btree_node_get_flags(node);
406         nchildren = nilfs_btree_node_get_nchildren(node);
407
408         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
409                      level >= NILFS_BTREE_LEVEL_MAX ||
410                      (flags & NILFS_BTREE_NODE_ROOT) ||
411                      nchildren < 0 ||
412                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
413                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
414                        "level = %d, flags = 0x%x, nchildren = %d\n",
415                        (unsigned long long)blocknr, level, flags, nchildren);
416                 ret = 1;
417         }
418         return ret;
419 }
420
421 int nilfs_btree_broken_node_block(struct buffer_head *bh)
422 {
423         return nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
424                                        bh->b_size, bh->b_blocknr);
425 }
426
427 static inline struct nilfs_btree_node *
428 nilfs_btree_get_root(const struct nilfs_btree *btree)
429 {
430         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
431 }
432
433 static inline struct nilfs_btree_node *
434 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
435 {
436         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
437 }
438
439 static inline struct nilfs_btree_node *
440 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
441 {
442         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
443 }
444
445 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
446 {
447         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
448 }
449
450 static inline struct nilfs_btree_node *
451 nilfs_btree_get_node(const struct nilfs_btree *btree,
452                      const struct nilfs_btree_path *path,
453                      int level)
454 {
455         return (level == nilfs_btree_height(btree) - 1) ?
456                 nilfs_btree_get_root(btree) :
457                 nilfs_btree_get_nonroot_node(path, level);
458 }
459
460 static inline int
461 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
462 {
463         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
464                 dump_stack();
465                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
466                        nilfs_btree_node_get_level(node), level);
467                 return 1;
468         }
469         return 0;
470 }
471
472 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
473                                  struct nilfs_btree_path *path,
474                                  __u64 key, __u64 *ptrp, int minlevel)
475 {
476         struct nilfs_btree_node *node;
477         __u64 ptr;
478         int level, index, found, ret;
479
480         node = nilfs_btree_get_root(btree);
481         level = nilfs_btree_node_get_level(node);
482         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
483                 return -ENOENT;
484
485         found = nilfs_btree_node_lookup(node, key, &index);
486         ptr = nilfs_btree_node_get_ptr(btree, node, index);
487         path[level].bp_bh = NULL;
488         path[level].bp_index = index;
489
490         for (level--; level >= minlevel; level--) {
491                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
492                 if (ret < 0)
493                         return ret;
494                 node = nilfs_btree_get_nonroot_node(path, level);
495                 if (nilfs_btree_bad_node(node, level))
496                         return -EINVAL;
497                 if (!found)
498                         found = nilfs_btree_node_lookup(node, key, &index);
499                 else
500                         index = 0;
501                 if (index < nilfs_btree_node_nchildren_max(node, btree))
502                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
503                 else {
504                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
505                         /* insert */
506                         ptr = NILFS_BMAP_INVALID_PTR;
507                 }
508                 path[level].bp_index = index;
509         }
510         if (!found)
511                 return -ENOENT;
512
513         if (ptrp != NULL)
514                 *ptrp = ptr;
515
516         return 0;
517 }
518
519 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
520                                       struct nilfs_btree_path *path,
521                                       __u64 *keyp, __u64 *ptrp)
522 {
523         struct nilfs_btree_node *node;
524         __u64 ptr;
525         int index, level, ret;
526
527         node = nilfs_btree_get_root(btree);
528         index = nilfs_btree_node_get_nchildren(node) - 1;
529         if (index < 0)
530                 return -ENOENT;
531         level = nilfs_btree_node_get_level(node);
532         ptr = nilfs_btree_node_get_ptr(btree, node, index);
533         path[level].bp_bh = NULL;
534         path[level].bp_index = index;
535
536         for (level--; level > 0; level--) {
537                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
538                 if (ret < 0)
539                         return ret;
540                 node = nilfs_btree_get_nonroot_node(path, level);
541                 if (nilfs_btree_bad_node(node, level))
542                         return -EINVAL;
543                 index = nilfs_btree_node_get_nchildren(node) - 1;
544                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
545                 path[level].bp_index = index;
546         }
547
548         if (keyp != NULL)
549                 *keyp = nilfs_btree_node_get_key(node, index);
550         if (ptrp != NULL)
551                 *ptrp = ptr;
552
553         return 0;
554 }
555
556 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
557                               __u64 key, int level, __u64 *ptrp)
558 {
559         struct nilfs_btree *btree;
560         struct nilfs_btree_path *path;
561         __u64 ptr;
562         int ret;
563
564         btree = (struct nilfs_btree *)bmap;
565         path = nilfs_btree_alloc_path();
566         if (path == NULL)
567                 return -ENOMEM;
568
569         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
570
571         if (ptrp != NULL)
572                 *ptrp = ptr;
573
574         nilfs_btree_free_path(path);
575
576         return ret;
577 }
578
579 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
580                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
581 {
582         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
583         struct nilfs_btree_path *path;
584         struct nilfs_btree_node *node;
585         struct inode *dat = NULL;
586         __u64 ptr, ptr2;
587         sector_t blocknr;
588         int level = NILFS_BTREE_LEVEL_NODE_MIN;
589         int ret, cnt, index, maxlevel;
590
591         path = nilfs_btree_alloc_path();
592         if (path == NULL)
593                 return -ENOMEM;
594
595         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
596         if (ret < 0)
597                 goto out;
598
599         if (NILFS_BMAP_USE_VBN(bmap)) {
600                 dat = nilfs_bmap_get_dat(bmap);
601                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
602                 if (ret < 0)
603                         goto out;
604                 ptr = blocknr;
605         }
606         cnt = 1;
607         if (cnt == maxblocks)
608                 goto end;
609
610         maxlevel = nilfs_btree_height(btree) - 1;
611         node = nilfs_btree_get_node(btree, path, level);
612         index = path[level].bp_index + 1;
613         for (;;) {
614                 while (index < nilfs_btree_node_get_nchildren(node)) {
615                         if (nilfs_btree_node_get_key(node, index) !=
616                             key + cnt)
617                                 goto end;
618                         ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
619                         if (dat) {
620                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
621                                 if (ret < 0)
622                                         goto out;
623                                 ptr2 = blocknr;
624                         }
625                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
626                                 goto end;
627                         index++;
628                         continue;
629                 }
630                 if (level == maxlevel)
631                         break;
632
633                 /* look-up right sibling node */
634                 node = nilfs_btree_get_node(btree, path, level + 1);
635                 index = path[level + 1].bp_index + 1;
636                 if (index >= nilfs_btree_node_get_nchildren(node) ||
637                     nilfs_btree_node_get_key(node, index) != key + cnt)
638                         break;
639                 ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
640                 path[level + 1].bp_index = index;
641
642                 brelse(path[level].bp_bh);
643                 path[level].bp_bh = NULL;
644                 ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
645                 if (ret < 0)
646                         goto out;
647                 node = nilfs_btree_get_nonroot_node(path, level);
648                 index = 0;
649                 path[level].bp_index = index;
650         }
651  end:
652         *ptrp = ptr;
653         ret = cnt;
654  out:
655         nilfs_btree_free_path(path);
656         return ret;
657 }
658
659 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
660                                     struct nilfs_btree_path *path,
661                                     int level, __u64 key)
662 {
663         if (level < nilfs_btree_height(btree) - 1) {
664                 do {
665                         nilfs_btree_node_set_key(
666                                 nilfs_btree_get_nonroot_node(path, level),
667                                 path[level].bp_index, key);
668                         if (!buffer_dirty(path[level].bp_bh))
669                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
670                 } while ((path[level].bp_index == 0) &&
671                          (++level < nilfs_btree_height(btree) - 1));
672         }
673
674         /* root */
675         if (level == nilfs_btree_height(btree) - 1) {
676                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
677                                          path[level].bp_index, key);
678         }
679 }
680
681 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
682                                   struct nilfs_btree_path *path,
683                                   int level, __u64 *keyp, __u64 *ptrp)
684 {
685         struct nilfs_btree_node *node;
686
687         if (level < nilfs_btree_height(btree) - 1) {
688                 node = nilfs_btree_get_nonroot_node(path, level);
689                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
690                                         path[level].bp_index);
691                 if (!buffer_dirty(path[level].bp_bh))
692                         nilfs_btnode_mark_dirty(path[level].bp_bh);
693
694                 if (path[level].bp_index == 0)
695                         nilfs_btree_promote_key(btree, path, level + 1,
696                                                 nilfs_btree_node_get_key(node,
697                                                                          0));
698         } else {
699                 node = nilfs_btree_get_root(btree);
700                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
701                                         path[level].bp_index);
702         }
703 }
704
705 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
706                                    struct nilfs_btree_path *path,
707                                    int level, __u64 *keyp, __u64 *ptrp)
708 {
709         struct nilfs_btree_node *node, *left;
710         int nchildren, lnchildren, n, move;
711
712         node = nilfs_btree_get_nonroot_node(path, level);
713         left = nilfs_btree_get_sib_node(path, level);
714         nchildren = nilfs_btree_node_get_nchildren(node);
715         lnchildren = nilfs_btree_node_get_nchildren(left);
716         move = 0;
717
718         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
719         if (n > path[level].bp_index) {
720                 /* move insert point */
721                 n--;
722                 move = 1;
723         }
724
725         nilfs_btree_node_move_left(btree, left, node, n);
726
727         if (!buffer_dirty(path[level].bp_bh))
728                 nilfs_btnode_mark_dirty(path[level].bp_bh);
729         if (!buffer_dirty(path[level].bp_sib_bh))
730                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
731
732         nilfs_btree_promote_key(btree, path, level + 1,
733                                 nilfs_btree_node_get_key(node, 0));
734
735         if (move) {
736                 brelse(path[level].bp_bh);
737                 path[level].bp_bh = path[level].bp_sib_bh;
738                 path[level].bp_sib_bh = NULL;
739                 path[level].bp_index += lnchildren;
740                 path[level + 1].bp_index--;
741         } else {
742                 brelse(path[level].bp_sib_bh);
743                 path[level].bp_sib_bh = NULL;
744                 path[level].bp_index -= n;
745         }
746
747         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
748 }
749
750 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
751                                     struct nilfs_btree_path *path,
752                                     int level, __u64 *keyp, __u64 *ptrp)
753 {
754         struct nilfs_btree_node *node, *right;
755         int nchildren, rnchildren, n, move;
756
757         node = nilfs_btree_get_nonroot_node(path, level);
758         right = nilfs_btree_get_sib_node(path, level);
759         nchildren = nilfs_btree_node_get_nchildren(node);
760         rnchildren = nilfs_btree_node_get_nchildren(right);
761         move = 0;
762
763         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
764         if (n > nchildren - path[level].bp_index) {
765                 /* move insert point */
766                 n--;
767                 move = 1;
768         }
769
770         nilfs_btree_node_move_right(btree, node, right, n);
771
772         if (!buffer_dirty(path[level].bp_bh))
773                 nilfs_btnode_mark_dirty(path[level].bp_bh);
774         if (!buffer_dirty(path[level].bp_sib_bh))
775                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
776
777         path[level + 1].bp_index++;
778         nilfs_btree_promote_key(btree, path, level + 1,
779                                 nilfs_btree_node_get_key(right, 0));
780         path[level + 1].bp_index--;
781
782         if (move) {
783                 brelse(path[level].bp_bh);
784                 path[level].bp_bh = path[level].bp_sib_bh;
785                 path[level].bp_sib_bh = NULL;
786                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
787                 path[level + 1].bp_index++;
788         } else {
789                 brelse(path[level].bp_sib_bh);
790                 path[level].bp_sib_bh = NULL;
791         }
792
793         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
794 }
795
796 static void nilfs_btree_split(struct nilfs_btree *btree,
797                               struct nilfs_btree_path *path,
798                               int level, __u64 *keyp, __u64 *ptrp)
799 {
800         struct nilfs_btree_node *node, *right;
801         __u64 newkey;
802         __u64 newptr;
803         int nchildren, n, move;
804
805         node = nilfs_btree_get_nonroot_node(path, level);
806         right = nilfs_btree_get_sib_node(path, level);
807         nchildren = nilfs_btree_node_get_nchildren(node);
808         move = 0;
809
810         n = (nchildren + 1) / 2;
811         if (n > nchildren - path[level].bp_index) {
812                 n--;
813                 move = 1;
814         }
815
816         nilfs_btree_node_move_right(btree, node, right, n);
817
818         if (!buffer_dirty(path[level].bp_bh))
819                 nilfs_btnode_mark_dirty(path[level].bp_bh);
820         if (!buffer_dirty(path[level].bp_sib_bh))
821                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
822
823         newkey = nilfs_btree_node_get_key(right, 0);
824         newptr = path[level].bp_newreq.bpr_ptr;
825
826         if (move) {
827                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
828                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
829                                         path[level].bp_index);
830
831                 *keyp = nilfs_btree_node_get_key(right, 0);
832                 *ptrp = path[level].bp_newreq.bpr_ptr;
833
834                 brelse(path[level].bp_bh);
835                 path[level].bp_bh = path[level].bp_sib_bh;
836                 path[level].bp_sib_bh = NULL;
837         } else {
838                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
839
840                 *keyp = nilfs_btree_node_get_key(right, 0);
841                 *ptrp = path[level].bp_newreq.bpr_ptr;
842
843                 brelse(path[level].bp_sib_bh);
844                 path[level].bp_sib_bh = NULL;
845         }
846
847         path[level + 1].bp_index++;
848 }
849
850 static void nilfs_btree_grow(struct nilfs_btree *btree,
851                              struct nilfs_btree_path *path,
852                              int level, __u64 *keyp, __u64 *ptrp)
853 {
854         struct nilfs_btree_node *root, *child;
855         int n;
856
857         root = nilfs_btree_get_root(btree);
858         child = nilfs_btree_get_sib_node(path, level);
859
860         n = nilfs_btree_node_get_nchildren(root);
861
862         nilfs_btree_node_move_right(btree, root, child, n);
863         nilfs_btree_node_set_level(root, level + 1);
864
865         if (!buffer_dirty(path[level].bp_sib_bh))
866                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
867
868         path[level].bp_bh = path[level].bp_sib_bh;
869         path[level].bp_sib_bh = NULL;
870
871         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
872
873         *keyp = nilfs_btree_node_get_key(child, 0);
874         *ptrp = path[level].bp_newreq.bpr_ptr;
875 }
876
877 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
878                                    const struct nilfs_btree_path *path)
879 {
880         struct nilfs_btree_node *node;
881         int level;
882
883         if (path == NULL)
884                 return NILFS_BMAP_INVALID_PTR;
885
886         /* left sibling */
887         level = NILFS_BTREE_LEVEL_NODE_MIN;
888         if (path[level].bp_index > 0) {
889                 node = nilfs_btree_get_node(btree, path, level);
890                 return nilfs_btree_node_get_ptr(btree, node,
891                                                 path[level].bp_index - 1);
892         }
893
894         /* parent */
895         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
896         if (level <= nilfs_btree_height(btree) - 1) {
897                 node = nilfs_btree_get_node(btree, path, level);
898                 return nilfs_btree_node_get_ptr(btree, node,
899                                                 path[level].bp_index);
900         }
901
902         return NILFS_BMAP_INVALID_PTR;
903 }
904
905 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
906                                        const struct nilfs_btree_path *path,
907                                        __u64 key)
908 {
909         __u64 ptr;
910
911         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
912         if (ptr != NILFS_BMAP_INVALID_PTR)
913                 /* sequential access */
914                 return ptr;
915         else {
916                 ptr = nilfs_btree_find_near(btree, path);
917                 if (ptr != NILFS_BMAP_INVALID_PTR)
918                         /* near */
919                         return ptr;
920         }
921         /* block group */
922         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
923 }
924
925 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
926                                      __u64 ptr)
927 {
928         btree->bt_bmap.b_last_allocated_key = key;
929         btree->bt_bmap.b_last_allocated_ptr = ptr;
930 }
931
932 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
933                                       struct nilfs_btree_path *path,
934                                       int *levelp, __u64 key, __u64 ptr,
935                                       struct nilfs_bmap_stats *stats)
936 {
937         struct buffer_head *bh;
938         struct nilfs_btree_node *node, *parent, *sib;
939         __u64 sibptr;
940         int pindex, level, ret;
941         struct inode *dat = NULL;
942
943         stats->bs_nblocks = 0;
944         level = NILFS_BTREE_LEVEL_DATA;
945
946         /* allocate a new ptr for data block */
947         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
948                 path[level].bp_newreq.bpr_ptr =
949                         nilfs_btree_find_target_v(btree, path, key);
950                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
951         }
952
953         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
954                                            &path[level].bp_newreq, dat);
955         if (ret < 0)
956                 goto err_out_data;
957
958         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
959              level < nilfs_btree_height(btree) - 1;
960              level++) {
961                 node = nilfs_btree_get_nonroot_node(path, level);
962                 if (nilfs_btree_node_get_nchildren(node) <
963                     nilfs_btree_node_nchildren_max(node, btree)) {
964                         path[level].bp_op = nilfs_btree_do_insert;
965                         stats->bs_nblocks++;
966                         goto out;
967                 }
968
969                 parent = nilfs_btree_get_node(btree, path, level + 1);
970                 pindex = path[level + 1].bp_index;
971
972                 /* left sibling */
973                 if (pindex > 0) {
974                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
975                                                           pindex - 1);
976                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
977                         if (ret < 0)
978                                 goto err_out_child_node;
979                         sib = (struct nilfs_btree_node *)bh->b_data;
980                         if (nilfs_btree_node_get_nchildren(sib) <
981                             nilfs_btree_node_nchildren_max(sib, btree)) {
982                                 path[level].bp_sib_bh = bh;
983                                 path[level].bp_op = nilfs_btree_carry_left;
984                                 stats->bs_nblocks++;
985                                 goto out;
986                         } else
987                                 brelse(bh);
988                 }
989
990                 /* right sibling */
991                 if (pindex <
992                     nilfs_btree_node_get_nchildren(parent) - 1) {
993                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
994                                                           pindex + 1);
995                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
996                         if (ret < 0)
997                                 goto err_out_child_node;
998                         sib = (struct nilfs_btree_node *)bh->b_data;
999                         if (nilfs_btree_node_get_nchildren(sib) <
1000                             nilfs_btree_node_nchildren_max(sib, btree)) {
1001                                 path[level].bp_sib_bh = bh;
1002                                 path[level].bp_op = nilfs_btree_carry_right;
1003                                 stats->bs_nblocks++;
1004                                 goto out;
1005                         } else
1006                                 brelse(bh);
1007                 }
1008
1009                 /* split */
1010                 path[level].bp_newreq.bpr_ptr =
1011                         path[level - 1].bp_newreq.bpr_ptr + 1;
1012                 ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1013                                                    &path[level].bp_newreq, dat);
1014                 if (ret < 0)
1015                         goto err_out_child_node;
1016                 ret = nilfs_btree_get_new_block(btree,
1017                                                 path[level].bp_newreq.bpr_ptr,
1018                                                 &bh);
1019                 if (ret < 0)
1020                         goto err_out_curr_node;
1021
1022                 stats->bs_nblocks++;
1023
1024                 nilfs_btree_node_init(btree,
1025                                       (struct nilfs_btree_node *)bh->b_data,
1026                                       0, level, 0, NULL, NULL);
1027                 path[level].bp_sib_bh = bh;
1028                 path[level].bp_op = nilfs_btree_split;
1029         }
1030
1031         /* root */
1032         node = nilfs_btree_get_root(btree);
1033         if (nilfs_btree_node_get_nchildren(node) <
1034             nilfs_btree_node_nchildren_max(node, btree)) {
1035                 path[level].bp_op = nilfs_btree_do_insert;
1036                 stats->bs_nblocks++;
1037                 goto out;
1038         }
1039
1040         /* grow */
1041         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1042         ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
1043                                            &path[level].bp_newreq, dat);
1044         if (ret < 0)
1045                 goto err_out_child_node;
1046         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1047                                         &bh);
1048         if (ret < 0)
1049                 goto err_out_curr_node;
1050
1051         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1052                               0, level, 0, NULL, NULL);
1053         path[level].bp_sib_bh = bh;
1054         path[level].bp_op = nilfs_btree_grow;
1055
1056         level++;
1057         path[level].bp_op = nilfs_btree_do_insert;
1058
1059         /* a newly-created node block and a data block are added */
1060         stats->bs_nblocks += 2;
1061
1062         /* success */
1063  out:
1064         *levelp = level;
1065         return ret;
1066
1067         /* error */
1068  err_out_curr_node:
1069         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1070                                    dat);
1071  err_out_child_node:
1072         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1073                 nilfs_btnode_delete(path[level].bp_sib_bh);
1074                 nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
1075                                            &path[level].bp_newreq, dat);
1076
1077         }
1078
1079         nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq,
1080                                    dat);
1081  err_out_data:
1082         *levelp = level;
1083         stats->bs_nblocks = 0;
1084         return ret;
1085 }
1086
1087 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1088                                       struct nilfs_btree_path *path,
1089                                       int maxlevel, __u64 key, __u64 ptr)
1090 {
1091         struct inode *dat = NULL;
1092         int level;
1093
1094         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1095         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1096         if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) {
1097                 nilfs_btree_set_target_v(btree, key, ptr);
1098                 dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1099         }
1100
1101         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1102                 nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
1103                                             &path[level - 1].bp_newreq, dat);
1104                 path[level].bp_op(btree, path, level, &key, &ptr);
1105         }
1106
1107         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1108                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1109 }
1110
1111 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1112 {
1113         struct nilfs_btree *btree;
1114         struct nilfs_btree_path *path;
1115         struct nilfs_bmap_stats stats;
1116         int level, ret;
1117
1118         btree = (struct nilfs_btree *)bmap;
1119         path = nilfs_btree_alloc_path();
1120         if (path == NULL)
1121                 return -ENOMEM;
1122
1123         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1124                                     NILFS_BTREE_LEVEL_NODE_MIN);
1125         if (ret != -ENOENT) {
1126                 if (ret == 0)
1127                         ret = -EEXIST;
1128                 goto out;
1129         }
1130
1131         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1132         if (ret < 0)
1133                 goto out;
1134         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1135         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1136
1137  out:
1138         nilfs_btree_free_path(path);
1139         return ret;
1140 }
1141
1142 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1143                                   struct nilfs_btree_path *path,
1144                                   int level, __u64 *keyp, __u64 *ptrp)
1145 {
1146         struct nilfs_btree_node *node;
1147
1148         if (level < nilfs_btree_height(btree) - 1) {
1149                 node = nilfs_btree_get_nonroot_node(path, level);
1150                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1151                                         path[level].bp_index);
1152                 if (!buffer_dirty(path[level].bp_bh))
1153                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1154                 if (path[level].bp_index == 0)
1155                         nilfs_btree_promote_key(btree, path, level + 1,
1156                                 nilfs_btree_node_get_key(node, 0));
1157         } else {
1158                 node = nilfs_btree_get_root(btree);
1159                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1160                                         path[level].bp_index);
1161         }
1162 }
1163
1164 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1165                                     struct nilfs_btree_path *path,
1166                                     int level, __u64 *keyp, __u64 *ptrp)
1167 {
1168         struct nilfs_btree_node *node, *left;
1169         int nchildren, lnchildren, n;
1170
1171         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1172
1173         node = nilfs_btree_get_nonroot_node(path, level);
1174         left = nilfs_btree_get_sib_node(path, level);
1175         nchildren = nilfs_btree_node_get_nchildren(node);
1176         lnchildren = nilfs_btree_node_get_nchildren(left);
1177
1178         n = (nchildren + lnchildren) / 2 - nchildren;
1179
1180         nilfs_btree_node_move_right(btree, left, node, n);
1181
1182         if (!buffer_dirty(path[level].bp_bh))
1183                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1184         if (!buffer_dirty(path[level].bp_sib_bh))
1185                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1186
1187         nilfs_btree_promote_key(btree, path, level + 1,
1188                                 nilfs_btree_node_get_key(node, 0));
1189
1190         brelse(path[level].bp_sib_bh);
1191         path[level].bp_sib_bh = NULL;
1192         path[level].bp_index += n;
1193 }
1194
1195 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1196                                      struct nilfs_btree_path *path,
1197                                      int level, __u64 *keyp, __u64 *ptrp)
1198 {
1199         struct nilfs_btree_node *node, *right;
1200         int nchildren, rnchildren, n;
1201
1202         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1203
1204         node = nilfs_btree_get_nonroot_node(path, level);
1205         right = nilfs_btree_get_sib_node(path, level);
1206         nchildren = nilfs_btree_node_get_nchildren(node);
1207         rnchildren = nilfs_btree_node_get_nchildren(right);
1208
1209         n = (nchildren + rnchildren) / 2 - nchildren;
1210
1211         nilfs_btree_node_move_left(btree, node, right, n);
1212
1213         if (!buffer_dirty(path[level].bp_bh))
1214                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1215         if (!buffer_dirty(path[level].bp_sib_bh))
1216                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1217
1218         path[level + 1].bp_index++;
1219         nilfs_btree_promote_key(btree, path, level + 1,
1220                                 nilfs_btree_node_get_key(right, 0));
1221         path[level + 1].bp_index--;
1222
1223         brelse(path[level].bp_sib_bh);
1224         path[level].bp_sib_bh = NULL;
1225 }
1226
1227 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1228                                     struct nilfs_btree_path *path,
1229                                     int level, __u64 *keyp, __u64 *ptrp)
1230 {
1231         struct nilfs_btree_node *node, *left;
1232         int n;
1233
1234         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1235
1236         node = nilfs_btree_get_nonroot_node(path, level);
1237         left = nilfs_btree_get_sib_node(path, level);
1238
1239         n = nilfs_btree_node_get_nchildren(node);
1240
1241         nilfs_btree_node_move_left(btree, left, node, n);
1242
1243         if (!buffer_dirty(path[level].bp_sib_bh))
1244                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1245
1246         nilfs_btnode_delete(path[level].bp_bh);
1247         path[level].bp_bh = path[level].bp_sib_bh;
1248         path[level].bp_sib_bh = NULL;
1249         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1250 }
1251
1252 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1253                                      struct nilfs_btree_path *path,
1254                                      int level, __u64 *keyp, __u64 *ptrp)
1255 {
1256         struct nilfs_btree_node *node, *right;
1257         int n;
1258
1259         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1260
1261         node = nilfs_btree_get_nonroot_node(path, level);
1262         right = nilfs_btree_get_sib_node(path, level);
1263
1264         n = nilfs_btree_node_get_nchildren(right);
1265
1266         nilfs_btree_node_move_left(btree, node, right, n);
1267
1268         if (!buffer_dirty(path[level].bp_bh))
1269                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1270
1271         nilfs_btnode_delete(path[level].bp_sib_bh);
1272         path[level].bp_sib_bh = NULL;
1273         path[level + 1].bp_index++;
1274 }
1275
1276 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1277                                struct nilfs_btree_path *path,
1278                                int level, __u64 *keyp, __u64 *ptrp)
1279 {
1280         struct nilfs_btree_node *root, *child;
1281         int n;
1282
1283         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1284
1285         root = nilfs_btree_get_root(btree);
1286         child = nilfs_btree_get_nonroot_node(path, level);
1287
1288         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1289         nilfs_btree_node_set_level(root, level);
1290         n = nilfs_btree_node_get_nchildren(child);
1291         nilfs_btree_node_move_left(btree, root, child, n);
1292
1293         nilfs_btnode_delete(path[level].bp_bh);
1294         path[level].bp_bh = NULL;
1295 }
1296
1297
1298 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1299                                       struct nilfs_btree_path *path,
1300                                       int *levelp,
1301                                       struct nilfs_bmap_stats *stats,
1302                                       struct inode *dat)
1303 {
1304         struct buffer_head *bh;
1305         struct nilfs_btree_node *node, *parent, *sib;
1306         __u64 sibptr;
1307         int pindex, level, ret;
1308
1309         ret = 0;
1310         stats->bs_nblocks = 0;
1311         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1312              level < nilfs_btree_height(btree) - 1;
1313              level++) {
1314                 node = nilfs_btree_get_nonroot_node(path, level);
1315                 path[level].bp_oldreq.bpr_ptr =
1316                         nilfs_btree_node_get_ptr(btree, node,
1317                                                  path[level].bp_index);
1318                 ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1319                                                  &path[level].bp_oldreq, dat);
1320                 if (ret < 0)
1321                         goto err_out_child_node;
1322
1323                 if (nilfs_btree_node_get_nchildren(node) >
1324                     nilfs_btree_node_nchildren_min(node, btree)) {
1325                         path[level].bp_op = nilfs_btree_do_delete;
1326                         stats->bs_nblocks++;
1327                         goto out;
1328                 }
1329
1330                 parent = nilfs_btree_get_node(btree, path, level + 1);
1331                 pindex = path[level + 1].bp_index;
1332
1333                 if (pindex > 0) {
1334                         /* left sibling */
1335                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1336                                                           pindex - 1);
1337                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1338                         if (ret < 0)
1339                                 goto err_out_curr_node;
1340                         sib = (struct nilfs_btree_node *)bh->b_data;
1341                         if (nilfs_btree_node_get_nchildren(sib) >
1342                             nilfs_btree_node_nchildren_min(sib, btree)) {
1343                                 path[level].bp_sib_bh = bh;
1344                                 path[level].bp_op = nilfs_btree_borrow_left;
1345                                 stats->bs_nblocks++;
1346                                 goto out;
1347                         } else {
1348                                 path[level].bp_sib_bh = bh;
1349                                 path[level].bp_op = nilfs_btree_concat_left;
1350                                 stats->bs_nblocks++;
1351                                 /* continue; */
1352                         }
1353                 } else if (pindex <
1354                            nilfs_btree_node_get_nchildren(parent) - 1) {
1355                         /* right sibling */
1356                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1357                                                           pindex + 1);
1358                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1359                         if (ret < 0)
1360                                 goto err_out_curr_node;
1361                         sib = (struct nilfs_btree_node *)bh->b_data;
1362                         if (nilfs_btree_node_get_nchildren(sib) >
1363                             nilfs_btree_node_nchildren_min(sib, btree)) {
1364                                 path[level].bp_sib_bh = bh;
1365                                 path[level].bp_op = nilfs_btree_borrow_right;
1366                                 stats->bs_nblocks++;
1367                                 goto out;
1368                         } else {
1369                                 path[level].bp_sib_bh = bh;
1370                                 path[level].bp_op = nilfs_btree_concat_right;
1371                                 stats->bs_nblocks++;
1372                                 /* continue; */
1373                         }
1374                 } else {
1375                         /* no siblings */
1376                         /* the only child of the root node */
1377                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1378                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1379                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1380                                 path[level].bp_op = nilfs_btree_shrink;
1381                                 stats->bs_nblocks += 2;
1382                         } else {
1383                                 path[level].bp_op = nilfs_btree_do_delete;
1384                                 stats->bs_nblocks++;
1385                         }
1386
1387                         goto out;
1388
1389                 }
1390         }
1391
1392         node = nilfs_btree_get_root(btree);
1393         path[level].bp_oldreq.bpr_ptr =
1394                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1395
1396         ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
1397                                          &path[level].bp_oldreq, dat);
1398         if (ret < 0)
1399                 goto err_out_child_node;
1400
1401         /* child of the root node is deleted */
1402         path[level].bp_op = nilfs_btree_do_delete;
1403         stats->bs_nblocks++;
1404
1405         /* success */
1406  out:
1407         *levelp = level;
1408         return ret;
1409
1410         /* error */
1411  err_out_curr_node:
1412         nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat);
1413  err_out_child_node:
1414         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1415                 brelse(path[level].bp_sib_bh);
1416                 nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
1417                                          &path[level].bp_oldreq, dat);
1418         }
1419         *levelp = level;
1420         stats->bs_nblocks = 0;
1421         return ret;
1422 }
1423
1424 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1425                                       struct nilfs_btree_path *path,
1426                                       int maxlevel, struct inode *dat)
1427 {
1428         int level;
1429
1430         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1431                 nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
1432                                           &path[level].bp_oldreq, dat);
1433                 path[level].bp_op(btree, path, level, NULL, NULL);
1434         }
1435
1436         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1437                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1438 }
1439
1440 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1441
1442 {
1443         struct nilfs_btree *btree;
1444         struct nilfs_btree_path *path;
1445         struct nilfs_bmap_stats stats;
1446         struct inode *dat;
1447         int level, ret;
1448
1449         btree = (struct nilfs_btree *)bmap;
1450         path = nilfs_btree_alloc_path();
1451         if (path == NULL)
1452                 return -ENOMEM;
1453
1454         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1455                                     NILFS_BTREE_LEVEL_NODE_MIN);
1456         if (ret < 0)
1457                 goto out;
1458
1459
1460         dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ?
1461                 nilfs_bmap_get_dat(&btree->bt_bmap) : NULL;
1462
1463         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1464         if (ret < 0)
1465                 goto out;
1466         nilfs_btree_commit_delete(btree, path, level, dat);
1467         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1468
1469 out:
1470         nilfs_btree_free_path(path);
1471         return ret;
1472 }
1473
1474 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1475 {
1476         struct nilfs_btree *btree;
1477         struct nilfs_btree_path *path;
1478         int ret;
1479
1480         btree = (struct nilfs_btree *)bmap;
1481         path = nilfs_btree_alloc_path();
1482         if (path == NULL)
1483                 return -ENOMEM;
1484
1485         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1486
1487         nilfs_btree_free_path(path);
1488
1489         return ret;
1490 }
1491
1492 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1493 {
1494         struct buffer_head *bh;
1495         struct nilfs_btree *btree;
1496         struct nilfs_btree_node *root, *node;
1497         __u64 maxkey, nextmaxkey;
1498         __u64 ptr;
1499         int nchildren, ret;
1500
1501         btree = (struct nilfs_btree *)bmap;
1502         root = nilfs_btree_get_root(btree);
1503         switch (nilfs_btree_height(btree)) {
1504         case 2:
1505                 bh = NULL;
1506                 node = root;
1507                 break;
1508         case 3:
1509                 nchildren = nilfs_btree_node_get_nchildren(root);
1510                 if (nchildren > 1)
1511                         return 0;
1512                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1513                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1514                 if (ret < 0)
1515                         return ret;
1516                 node = (struct nilfs_btree_node *)bh->b_data;
1517                 break;
1518         default:
1519                 return 0;
1520         }
1521
1522         nchildren = nilfs_btree_node_get_nchildren(node);
1523         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1524         nextmaxkey = (nchildren > 1) ?
1525                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1526         if (bh != NULL)
1527                 brelse(bh);
1528
1529         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1530 }
1531
1532 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1533                                    __u64 *keys, __u64 *ptrs, int nitems)
1534 {
1535         struct buffer_head *bh;
1536         struct nilfs_btree *btree;
1537         struct nilfs_btree_node *node, *root;
1538         __le64 *dkeys;
1539         __le64 *dptrs;
1540         __u64 ptr;
1541         int nchildren, i, ret;
1542
1543         btree = (struct nilfs_btree *)bmap;
1544         root = nilfs_btree_get_root(btree);
1545         switch (nilfs_btree_height(btree)) {
1546         case 2:
1547                 bh = NULL;
1548                 node = root;
1549                 break;
1550         case 3:
1551                 nchildren = nilfs_btree_node_get_nchildren(root);
1552                 WARN_ON(nchildren > 1);
1553                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1554                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1555                 if (ret < 0)
1556                         return ret;
1557                 node = (struct nilfs_btree_node *)bh->b_data;
1558                 break;
1559         default:
1560                 node = NULL;
1561                 return -EINVAL;
1562         }
1563
1564         nchildren = nilfs_btree_node_get_nchildren(node);
1565         if (nchildren < nitems)
1566                 nitems = nchildren;
1567         dkeys = nilfs_btree_node_dkeys(node);
1568         dptrs = nilfs_btree_node_dptrs(node, btree);
1569         for (i = 0; i < nitems; i++) {
1570                 keys[i] = le64_to_cpu(dkeys[i]);
1571                 ptrs[i] = le64_to_cpu(dptrs[i]);
1572         }
1573
1574         if (bh != NULL)
1575                 brelse(bh);
1576
1577         return nitems;
1578 }
1579
1580 static int
1581 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1582                                        union nilfs_bmap_ptr_req *dreq,
1583                                        union nilfs_bmap_ptr_req *nreq,
1584                                        struct buffer_head **bhp,
1585                                        struct nilfs_bmap_stats *stats)
1586 {
1587         struct buffer_head *bh;
1588         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1589         struct inode *dat = NULL;
1590         int ret;
1591
1592         stats->bs_nblocks = 0;
1593
1594         /* for data */
1595         /* cannot find near ptr */
1596         if (NILFS_BMAP_USE_VBN(bmap)) {
1597                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1598                 dat = nilfs_bmap_get_dat(bmap);
1599         }
1600
1601         ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat);
1602         if (ret < 0)
1603                 return ret;
1604
1605         *bhp = NULL;
1606         stats->bs_nblocks++;
1607         if (nreq != NULL) {
1608                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1609                 ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat);
1610                 if (ret < 0)
1611                         goto err_out_dreq;
1612
1613                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1614                 if (ret < 0)
1615                         goto err_out_nreq;
1616
1617                 *bhp = bh;
1618                 stats->bs_nblocks++;
1619         }
1620
1621         /* success */
1622         return 0;
1623
1624         /* error */
1625  err_out_nreq:
1626         nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat);
1627  err_out_dreq:
1628         nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat);
1629         stats->bs_nblocks = 0;
1630         return ret;
1631
1632 }
1633
1634 static void
1635 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1636                                       __u64 key, __u64 ptr,
1637                                       const __u64 *keys, const __u64 *ptrs,
1638                                       int n,
1639                                       union nilfs_bmap_ptr_req *dreq,
1640                                       union nilfs_bmap_ptr_req *nreq,
1641                                       struct buffer_head *bh)
1642 {
1643         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1644         struct nilfs_btree_node *node;
1645         struct inode *dat;
1646         __u64 tmpptr;
1647
1648         /* free resources */
1649         if (bmap->b_ops->bop_clear != NULL)
1650                 bmap->b_ops->bop_clear(bmap);
1651
1652         /* ptr must be a pointer to a buffer head. */
1653         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1654
1655         /* convert and insert */
1656         dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL;
1657         nilfs_btree_init(bmap);
1658         if (nreq != NULL) {
1659                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1660                 nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat);
1661
1662                 /* create child node at level 1 */
1663                 node = (struct nilfs_btree_node *)bh->b_data;
1664                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1665                 nilfs_btree_node_insert(btree, node,
1666                                         key, dreq->bpr_ptr, n);
1667                 if (!buffer_dirty(bh))
1668                         nilfs_btnode_mark_dirty(bh);
1669                 if (!nilfs_bmap_dirty(bmap))
1670                         nilfs_bmap_set_dirty(bmap);
1671
1672                 brelse(bh);
1673
1674                 /* create root node at level 2 */
1675                 node = nilfs_btree_get_root(btree);
1676                 tmpptr = nreq->bpr_ptr;
1677                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678                                       2, 1, &keys[0], &tmpptr);
1679         } else {
1680                 nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat);
1681
1682                 /* create root node at level 1 */
1683                 node = nilfs_btree_get_root(btree);
1684                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1685                                       1, n, keys, ptrs);
1686                 nilfs_btree_node_insert(btree, node,
1687                                         key, dreq->bpr_ptr, n);
1688                 if (!nilfs_bmap_dirty(bmap))
1689                         nilfs_bmap_set_dirty(bmap);
1690         }
1691
1692         if (NILFS_BMAP_USE_VBN(bmap))
1693                 nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
1694 }
1695
1696 /**
1697  * nilfs_btree_convert_and_insert -
1698  * @bmap:
1699  * @key:
1700  * @ptr:
1701  * @keys:
1702  * @ptrs:
1703  * @n:
1704  */
1705 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1706                                    __u64 key, __u64 ptr,
1707                                    const __u64 *keys, const __u64 *ptrs, int n)
1708 {
1709         struct buffer_head *bh;
1710         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1711         struct nilfs_bmap_stats stats;
1712         int ret;
1713
1714         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1715                 di = &dreq;
1716                 ni = NULL;
1717         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1718                            1 << bmap->b_inode->i_blkbits)) {
1719                 di = &dreq;
1720                 ni = &nreq;
1721         } else {
1722                 di = NULL;
1723                 ni = NULL;
1724                 BUG();
1725         }
1726
1727         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1728                                                      &stats);
1729         if (ret < 0)
1730                 return ret;
1731         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1732                                               di, ni, bh);
1733         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1734         return 0;
1735 }
1736
1737 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1738                                    struct nilfs_btree_path *path,
1739                                    int level,
1740                                    struct buffer_head *bh)
1741 {
1742         while ((++level < nilfs_btree_height(btree) - 1) &&
1743                !buffer_dirty(path[level].bp_bh))
1744                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1745
1746         return 0;
1747 }
1748
1749 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1750                                         struct nilfs_btree_path *path,
1751                                         int level, struct inode *dat)
1752 {
1753         struct nilfs_btree_node *parent;
1754         int ret;
1755
1756         parent = nilfs_btree_get_node(btree, path, level + 1);
1757         path[level].bp_oldreq.bpr_ptr =
1758                 nilfs_btree_node_get_ptr(btree, parent,
1759                                          path[level + 1].bp_index);
1760         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1761         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1762                                        &path[level].bp_newreq.bpr_req);
1763         if (ret < 0)
1764                 return ret;
1765
1766         if (buffer_nilfs_node(path[level].bp_bh)) {
1767                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1768                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1769                 path[level].bp_ctxt.bh = path[level].bp_bh;
1770                 ret = nilfs_btnode_prepare_change_key(
1771                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1772                         &path[level].bp_ctxt);
1773                 if (ret < 0) {
1774                         nilfs_dat_abort_update(dat,
1775                                                &path[level].bp_oldreq.bpr_req,
1776                                                &path[level].bp_newreq.bpr_req);
1777                         return ret;
1778                 }
1779         }
1780
1781         return 0;
1782 }
1783
1784 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1785                                         struct nilfs_btree_path *path,
1786                                         int level, struct inode *dat)
1787 {
1788         struct nilfs_btree_node *parent;
1789
1790         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1791                                 &path[level].bp_newreq.bpr_req,
1792                                 btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS);
1793
1794         if (buffer_nilfs_node(path[level].bp_bh)) {
1795                 nilfs_btnode_commit_change_key(
1796                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1797                         &path[level].bp_ctxt);
1798                 path[level].bp_bh = path[level].bp_ctxt.bh;
1799         }
1800         set_buffer_nilfs_volatile(path[level].bp_bh);
1801
1802         parent = nilfs_btree_get_node(btree, path, level + 1);
1803         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1804                                  path[level].bp_newreq.bpr_ptr);
1805 }
1806
1807 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1808                                        struct nilfs_btree_path *path,
1809                                        int level, struct inode *dat)
1810 {
1811         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1812                                &path[level].bp_newreq.bpr_req);
1813         if (buffer_nilfs_node(path[level].bp_bh))
1814                 nilfs_btnode_abort_change_key(
1815                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1816                         &path[level].bp_ctxt);
1817 }
1818
1819 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1820                                            struct nilfs_btree_path *path,
1821                                            int minlevel, int *maxlevelp,
1822                                            struct inode *dat)
1823 {
1824         int level, ret;
1825
1826         level = minlevel;
1827         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1828                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1829                 if (ret < 0)
1830                         return ret;
1831         }
1832         while ((++level < nilfs_btree_height(btree) - 1) &&
1833                !buffer_dirty(path[level].bp_bh)) {
1834
1835                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1836                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1837                 if (ret < 0)
1838                         goto out;
1839         }
1840
1841         /* success */
1842         *maxlevelp = level - 1;
1843         return 0;
1844
1845         /* error */
1846  out:
1847         while (--level > minlevel)
1848                 nilfs_btree_abort_update_v(btree, path, level, dat);
1849         if (!buffer_nilfs_volatile(path[level].bp_bh))
1850                 nilfs_btree_abort_update_v(btree, path, level, dat);
1851         return ret;
1852 }
1853
1854 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1855                                            struct nilfs_btree_path *path,
1856                                            int minlevel, int maxlevel,
1857                                            struct buffer_head *bh,
1858                                            struct inode *dat)
1859 {
1860         int level;
1861
1862         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1863                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1864
1865         for (level = minlevel + 1; level <= maxlevel; level++)
1866                 nilfs_btree_commit_update_v(btree, path, level, dat);
1867 }
1868
1869 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1870                                    struct nilfs_btree_path *path,
1871                                    int level, struct buffer_head *bh)
1872 {
1873         int maxlevel = 0, ret;
1874         struct nilfs_btree_node *parent;
1875         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
1876         __u64 ptr;
1877
1878         get_bh(bh);
1879         path[level].bp_bh = bh;
1880         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1881                                               dat);
1882         if (ret < 0)
1883                 goto out;
1884
1885         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1886                 parent = nilfs_btree_get_node(btree, path, level + 1);
1887                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1888                                                path[level + 1].bp_index);
1889                 ret = nilfs_dat_mark_dirty(dat, ptr);
1890                 if (ret < 0)
1891                         goto out;
1892         }
1893
1894         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1895
1896  out:
1897         brelse(path[level].bp_bh);
1898         path[level].bp_bh = NULL;
1899         return ret;
1900 }
1901
1902 static int nilfs_btree_propagate(struct nilfs_bmap *bmap,
1903                                  struct buffer_head *bh)
1904 {
1905         struct nilfs_btree *btree;
1906         struct nilfs_btree_path *path;
1907         struct nilfs_btree_node *node;
1908         __u64 key;
1909         int level, ret;
1910
1911         WARN_ON(!buffer_dirty(bh));
1912
1913         btree = (struct nilfs_btree *)bmap;
1914         path = nilfs_btree_alloc_path();
1915         if (path == NULL)
1916                 return -ENOMEM;
1917
1918         if (buffer_nilfs_node(bh)) {
1919                 node = (struct nilfs_btree_node *)bh->b_data;
1920                 key = nilfs_btree_node_get_key(node, 0);
1921                 level = nilfs_btree_node_get_level(node);
1922         } else {
1923                 key = nilfs_bmap_data_get_key(bmap, bh);
1924                 level = NILFS_BTREE_LEVEL_DATA;
1925         }
1926
1927         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1928         if (ret < 0) {
1929                 if (unlikely(ret == -ENOENT))
1930                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1931                                __func__, (unsigned long long)key, level);
1932                 goto out;
1933         }
1934
1935         ret = NILFS_BMAP_USE_VBN(bmap) ?
1936                 nilfs_btree_propagate_v(btree, path, level, bh) :
1937                 nilfs_btree_propagate_p(btree, path, level, bh);
1938
1939  out:
1940         nilfs_btree_free_path(path);
1941
1942         return ret;
1943 }
1944
1945 static int nilfs_btree_propagate_gc(struct nilfs_bmap *bmap,
1946                                     struct buffer_head *bh)
1947 {
1948         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr);
1949 }
1950
1951 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1952                                          struct list_head *lists,
1953                                          struct buffer_head *bh)
1954 {
1955         struct list_head *head;
1956         struct buffer_head *cbh;
1957         struct nilfs_btree_node *node, *cnode;
1958         __u64 key, ckey;
1959         int level;
1960
1961         get_bh(bh);
1962         node = (struct nilfs_btree_node *)bh->b_data;
1963         key = nilfs_btree_node_get_key(node, 0);
1964         level = nilfs_btree_node_get_level(node);
1965         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
1966             level >= NILFS_BTREE_LEVEL_MAX) {
1967                 dump_stack();
1968                 printk(KERN_WARNING
1969                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
1970                        "blocknr=%llu)\n",
1971                        __func__, level, (unsigned long long)key,
1972                        NILFS_BMAP_I(&btree->bt_bmap)->vfs_inode.i_ino,
1973                        (unsigned long long)bh->b_blocknr);
1974                 return;
1975         }
1976
1977         list_for_each(head, &lists[level]) {
1978                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1979                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1980                 ckey = nilfs_btree_node_get_key(cnode, 0);
1981                 if (key < ckey)
1982                         break;
1983         }
1984         list_add_tail(&bh->b_assoc_buffers, head);
1985 }
1986
1987 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1988                                              struct list_head *listp)
1989 {
1990         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1991         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1992         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1993         struct pagevec pvec;
1994         struct buffer_head *bh, *head;
1995         pgoff_t index = 0;
1996         int level, i;
1997
1998         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1999              level < NILFS_BTREE_LEVEL_MAX;
2000              level++)
2001                 INIT_LIST_HEAD(&lists[level]);
2002
2003         pagevec_init(&pvec, 0);
2004
2005         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2006                                   PAGEVEC_SIZE)) {
2007                 for (i = 0; i < pagevec_count(&pvec); i++) {
2008                         bh = head = page_buffers(pvec.pages[i]);
2009                         do {
2010                                 if (buffer_dirty(bh))
2011                                         nilfs_btree_add_dirty_buffer(btree,
2012                                                                      lists, bh);
2013                         } while ((bh = bh->b_this_page) != head);
2014                 }
2015                 pagevec_release(&pvec);
2016                 cond_resched();
2017         }
2018
2019         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2020              level < NILFS_BTREE_LEVEL_MAX;
2021              level++)
2022                 list_splice_tail(&lists[level], listp);
2023 }
2024
2025 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2026                                 struct nilfs_btree_path *path,
2027                                 int level,
2028                                 struct buffer_head **bh,
2029                                 sector_t blocknr,
2030                                 union nilfs_binfo *binfo)
2031 {
2032         struct nilfs_btree_node *parent;
2033         __u64 key;
2034         __u64 ptr;
2035         int ret;
2036
2037         parent = nilfs_btree_get_node(btree, path, level + 1);
2038         ptr = nilfs_btree_node_get_ptr(btree, parent,
2039                                        path[level + 1].bp_index);
2040         if (buffer_nilfs_node(*bh)) {
2041                 path[level].bp_ctxt.oldkey = ptr;
2042                 path[level].bp_ctxt.newkey = blocknr;
2043                 path[level].bp_ctxt.bh = *bh;
2044                 ret = nilfs_btnode_prepare_change_key(
2045                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2046                         &path[level].bp_ctxt);
2047                 if (ret < 0)
2048                         return ret;
2049                 nilfs_btnode_commit_change_key(
2050                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2051                         &path[level].bp_ctxt);
2052                 *bh = path[level].bp_ctxt.bh;
2053         }
2054
2055         nilfs_btree_node_set_ptr(btree, parent,
2056                                  path[level + 1].bp_index, blocknr);
2057
2058         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2059         /* on-disk format */
2060         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2061         binfo->bi_dat.bi_level = level;
2062
2063         return 0;
2064 }
2065
2066 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2067                                 struct nilfs_btree_path *path,
2068                                 int level,
2069                                 struct buffer_head **bh,
2070                                 sector_t blocknr,
2071                                 union nilfs_binfo *binfo)
2072 {
2073         struct nilfs_btree_node *parent;
2074         struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap);
2075         __u64 key;
2076         __u64 ptr;
2077         union nilfs_bmap_ptr_req req;
2078         int ret;
2079
2080         parent = nilfs_btree_get_node(btree, path, level + 1);
2081         ptr = nilfs_btree_node_get_ptr(btree, parent,
2082                                        path[level + 1].bp_index);
2083         req.bpr_ptr = ptr;
2084         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2085         if (ret < 0)
2086                 return ret;
2087         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2088
2089         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2090         /* on-disk format */
2091         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2092         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2093
2094         return 0;
2095 }
2096
2097 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2098                               struct buffer_head **bh,
2099                               sector_t blocknr,
2100                               union nilfs_binfo *binfo)
2101 {
2102         struct nilfs_btree *btree;
2103         struct nilfs_btree_path *path;
2104         struct nilfs_btree_node *node;
2105         __u64 key;
2106         int level, ret;
2107
2108         btree = (struct nilfs_btree *)bmap;
2109         path = nilfs_btree_alloc_path();
2110         if (path == NULL)
2111                 return -ENOMEM;
2112
2113         if (buffer_nilfs_node(*bh)) {
2114                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2115                 key = nilfs_btree_node_get_key(node, 0);
2116                 level = nilfs_btree_node_get_level(node);
2117         } else {
2118                 key = nilfs_bmap_data_get_key(bmap, *bh);
2119                 level = NILFS_BTREE_LEVEL_DATA;
2120         }
2121
2122         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2123         if (ret < 0) {
2124                 WARN_ON(ret == -ENOENT);
2125                 goto out;
2126         }
2127
2128         ret = NILFS_BMAP_USE_VBN(bmap) ?
2129                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2130                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2131
2132  out:
2133         nilfs_btree_free_path(path);
2134
2135         return ret;
2136 }
2137
2138 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2139                                  struct buffer_head **bh,
2140                                  sector_t blocknr,
2141                                  union nilfs_binfo *binfo)
2142 {
2143         struct nilfs_btree_node *node;
2144         __u64 key;
2145         int ret;
2146
2147         ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr,
2148                              blocknr);
2149         if (ret < 0)
2150                 return ret;
2151
2152         if (buffer_nilfs_node(*bh)) {
2153                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2154                 key = nilfs_btree_node_get_key(node, 0);
2155         } else
2156                 key = nilfs_bmap_data_get_key(bmap, *bh);
2157
2158         /* on-disk format */
2159         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2160         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2161
2162         return 0;
2163 }
2164
2165 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2166 {
2167         struct buffer_head *bh;
2168         struct nilfs_btree *btree;
2169         struct nilfs_btree_path *path;
2170         __u64 ptr;
2171         int ret;
2172
2173         btree = (struct nilfs_btree *)bmap;
2174         path = nilfs_btree_alloc_path();
2175         if (path == NULL)
2176                 return -ENOMEM;
2177
2178         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2179         if (ret < 0) {
2180                 WARN_ON(ret == -ENOENT);
2181                 goto out;
2182         }
2183         ret = nilfs_btree_get_block(btree, ptr, &bh);
2184         if (ret < 0) {
2185                 WARN_ON(ret == -ENOENT);
2186                 goto out;
2187         }
2188
2189         if (!buffer_dirty(bh))
2190                 nilfs_btnode_mark_dirty(bh);
2191         brelse(bh);
2192         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2193                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2194
2195  out:
2196         nilfs_btree_free_path(path);
2197         return ret;
2198 }
2199
2200 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2201         .bop_lookup             =       nilfs_btree_lookup,
2202         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2203         .bop_insert             =       nilfs_btree_insert,
2204         .bop_delete             =       nilfs_btree_delete,
2205         .bop_clear              =       NULL,
2206
2207         .bop_propagate          =       nilfs_btree_propagate,
2208
2209         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2210
2211         .bop_assign             =       nilfs_btree_assign,
2212         .bop_mark               =       nilfs_btree_mark,
2213
2214         .bop_last_key           =       nilfs_btree_last_key,
2215         .bop_check_insert       =       NULL,
2216         .bop_check_delete       =       nilfs_btree_check_delete,
2217         .bop_gather_data        =       nilfs_btree_gather_data,
2218 };
2219
2220 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2221         .bop_lookup             =       NULL,
2222         .bop_lookup_contig      =       NULL,
2223         .bop_insert             =       NULL,
2224         .bop_delete             =       NULL,
2225         .bop_clear              =       NULL,
2226
2227         .bop_propagate          =       nilfs_btree_propagate_gc,
2228
2229         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2230
2231         .bop_assign             =       nilfs_btree_assign_gc,
2232         .bop_mark               =       NULL,
2233
2234         .bop_last_key           =       NULL,
2235         .bop_check_insert       =       NULL,
2236         .bop_check_delete       =       NULL,
2237         .bop_gather_data        =       NULL,
2238 };
2239
2240 int nilfs_btree_init(struct nilfs_bmap *bmap)
2241 {
2242         bmap->b_ops = &nilfs_btree_ops;
2243         return 0;
2244 }
2245
2246 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2247 {
2248         bmap->b_ops = &nilfs_btree_ops_gc;
2249 }