nilfs2: fiemap support
[linux-2.6.git] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32 #include "dat.h"
33
34 static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
35 {
36         struct nilfs_btree_path *path;
37         int level = NILFS_BTREE_LEVEL_DATA;
38
39         path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
40         if (path == NULL)
41                 goto out;
42
43         for (; level < NILFS_BTREE_LEVEL_MAX; level++) {
44                 path[level].bp_bh = NULL;
45                 path[level].bp_sib_bh = NULL;
46                 path[level].bp_index = 0;
47                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
48                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
49                 path[level].bp_op = NULL;
50         }
51
52 out:
53         return path;
54 }
55
56 static void nilfs_btree_free_path(struct nilfs_btree_path *path)
57 {
58         int level = NILFS_BTREE_LEVEL_DATA;
59
60         for (; level < NILFS_BTREE_LEVEL_MAX; level++)
61                 brelse(path[level].bp_bh);
62
63         kmem_cache_free(nilfs_btree_path_cache, path);
64 }
65
66 /*
67  * B-tree node operations
68  */
69 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree,
70                                      __u64 ptr, struct buffer_head **bhp)
71 {
72         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
73         struct buffer_head *bh;
74
75         bh = nilfs_btnode_create_block(btnc, ptr);
76         if (!bh)
77                 return -ENOMEM;
78
79         set_buffer_nilfs_volatile(bh);
80         *bhp = bh;
81         return 0;
82 }
83
84 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node)
85 {
86         return node->bn_flags;
87 }
88
89 static void
90 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags)
91 {
92         node->bn_flags = flags;
93 }
94
95 static int nilfs_btree_node_root(const struct nilfs_btree_node *node)
96 {
97         return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT;
98 }
99
100 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node)
101 {
102         return node->bn_level;
103 }
104
105 static void
106 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level)
107 {
108         node->bn_level = level;
109 }
110
111 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node)
112 {
113         return le16_to_cpu(node->bn_nchildren);
114 }
115
116 static void
117 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren)
118 {
119         node->bn_nchildren = cpu_to_le16(nchildren);
120 }
121
122 static int nilfs_btree_node_size(const struct nilfs_bmap *btree)
123 {
124         return 1 << btree->b_inode->i_blkbits;
125 }
126
127 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree)
128 {
129         return btree->b_nchildren_per_block;
130 }
131
132 static __le64 *
133 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node)
134 {
135         return (__le64 *)((char *)(node + 1) +
136                           (nilfs_btree_node_root(node) ?
137                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
138 }
139
140 static __le64 *
141 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax)
142 {
143         return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax);
144 }
145
146 static __u64
147 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index)
148 {
149         return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index));
150 }
151
152 static void
153 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key)
154 {
155         *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key);
156 }
157
158 static __u64
159 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index,
160                          int ncmax)
161 {
162         return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index));
163 }
164
165 static void
166 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr,
167                          int ncmax)
168 {
169         *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr);
170 }
171
172 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags,
173                                   int level, int nchildren, int ncmax,
174                                   const __u64 *keys, const __u64 *ptrs)
175 {
176         __le64 *dkeys;
177         __le64 *dptrs;
178         int i;
179
180         nilfs_btree_node_set_flags(node, flags);
181         nilfs_btree_node_set_level(node, level);
182         nilfs_btree_node_set_nchildren(node, nchildren);
183
184         dkeys = nilfs_btree_node_dkeys(node);
185         dptrs = nilfs_btree_node_dptrs(node, ncmax);
186         for (i = 0; i < nchildren; i++) {
187                 dkeys[i] = cpu_to_le64(keys[i]);
188                 dptrs[i] = cpu_to_le64(ptrs[i]);
189         }
190 }
191
192 /* Assume the buffer heads corresponding to left and right are locked. */
193 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left,
194                                        struct nilfs_btree_node *right,
195                                        int n, int lncmax, int rncmax)
196 {
197         __le64 *ldkeys, *rdkeys;
198         __le64 *ldptrs, *rdptrs;
199         int lnchildren, rnchildren;
200
201         ldkeys = nilfs_btree_node_dkeys(left);
202         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
203         lnchildren = nilfs_btree_node_get_nchildren(left);
204
205         rdkeys = nilfs_btree_node_dkeys(right);
206         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
207         rnchildren = nilfs_btree_node_get_nchildren(right);
208
209         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
210         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
211         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
212         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
213
214         lnchildren += n;
215         rnchildren -= n;
216         nilfs_btree_node_set_nchildren(left, lnchildren);
217         nilfs_btree_node_set_nchildren(right, rnchildren);
218 }
219
220 /* Assume that the buffer heads corresponding to left and right are locked. */
221 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left,
222                                         struct nilfs_btree_node *right,
223                                         int n, int lncmax, int rncmax)
224 {
225         __le64 *ldkeys, *rdkeys;
226         __le64 *ldptrs, *rdptrs;
227         int lnchildren, rnchildren;
228
229         ldkeys = nilfs_btree_node_dkeys(left);
230         ldptrs = nilfs_btree_node_dptrs(left, lncmax);
231         lnchildren = nilfs_btree_node_get_nchildren(left);
232
233         rdkeys = nilfs_btree_node_dkeys(right);
234         rdptrs = nilfs_btree_node_dptrs(right, rncmax);
235         rnchildren = nilfs_btree_node_get_nchildren(right);
236
237         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
238         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
239         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
240         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
241
242         lnchildren -= n;
243         rnchildren += n;
244         nilfs_btree_node_set_nchildren(left, lnchildren);
245         nilfs_btree_node_set_nchildren(right, rnchildren);
246 }
247
248 /* Assume that the buffer head corresponding to node is locked. */
249 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index,
250                                     __u64 key, __u64 ptr, int ncmax)
251 {
252         __le64 *dkeys;
253         __le64 *dptrs;
254         int nchildren;
255
256         dkeys = nilfs_btree_node_dkeys(node);
257         dptrs = nilfs_btree_node_dptrs(node, ncmax);
258         nchildren = nilfs_btree_node_get_nchildren(node);
259         if (index < nchildren) {
260                 memmove(dkeys + index + 1, dkeys + index,
261                         (nchildren - index) * sizeof(*dkeys));
262                 memmove(dptrs + index + 1, dptrs + index,
263                         (nchildren - index) * sizeof(*dptrs));
264         }
265         dkeys[index] = cpu_to_le64(key);
266         dptrs[index] = cpu_to_le64(ptr);
267         nchildren++;
268         nilfs_btree_node_set_nchildren(node, nchildren);
269 }
270
271 /* Assume that the buffer head corresponding to node is locked. */
272 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index,
273                                     __u64 *keyp, __u64 *ptrp, int ncmax)
274 {
275         __u64 key;
276         __u64 ptr;
277         __le64 *dkeys;
278         __le64 *dptrs;
279         int nchildren;
280
281         dkeys = nilfs_btree_node_dkeys(node);
282         dptrs = nilfs_btree_node_dptrs(node, ncmax);
283         key = le64_to_cpu(dkeys[index]);
284         ptr = le64_to_cpu(dptrs[index]);
285         nchildren = nilfs_btree_node_get_nchildren(node);
286         if (keyp != NULL)
287                 *keyp = key;
288         if (ptrp != NULL)
289                 *ptrp = ptr;
290
291         if (index < nchildren - 1) {
292                 memmove(dkeys + index, dkeys + index + 1,
293                         (nchildren - index - 1) * sizeof(*dkeys));
294                 memmove(dptrs + index, dptrs + index + 1,
295                         (nchildren - index - 1) * sizeof(*dptrs));
296         }
297         nchildren--;
298         nilfs_btree_node_set_nchildren(node, nchildren);
299 }
300
301 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node,
302                                    __u64 key, int *indexp)
303 {
304         __u64 nkey;
305         int index, low, high, s;
306
307         /* binary search */
308         low = 0;
309         high = nilfs_btree_node_get_nchildren(node) - 1;
310         index = 0;
311         s = 0;
312         while (low <= high) {
313                 index = (low + high) / 2;
314                 nkey = nilfs_btree_node_get_key(node, index);
315                 if (nkey == key) {
316                         s = 0;
317                         goto out;
318                 } else if (nkey < key) {
319                         low = index + 1;
320                         s = -1;
321                 } else {
322                         high = index - 1;
323                         s = 1;
324                 }
325         }
326
327         /* adjust index */
328         if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) {
329                 if (s > 0 && index > 0)
330                         index--;
331         } else if (s < 0)
332                 index++;
333
334  out:
335         *indexp = index;
336
337         return s == 0;
338 }
339
340 /**
341  * nilfs_btree_node_broken - verify consistency of btree node
342  * @node: btree node block to be examined
343  * @size: node size (in bytes)
344  * @blocknr: block number
345  *
346  * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
347  */
348 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
349                                    size_t size, sector_t blocknr)
350 {
351         int level, flags, nchildren;
352         int ret = 0;
353
354         level = nilfs_btree_node_get_level(node);
355         flags = nilfs_btree_node_get_flags(node);
356         nchildren = nilfs_btree_node_get_nchildren(node);
357
358         if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
359                      level >= NILFS_BTREE_LEVEL_MAX ||
360                      (flags & NILFS_BTREE_NODE_ROOT) ||
361                      nchildren < 0 ||
362                      nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
363                 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): "
364                        "level = %d, flags = 0x%x, nchildren = %d\n",
365                        (unsigned long long)blocknr, level, flags, nchildren);
366                 ret = 1;
367         }
368         return ret;
369 }
370
371 int nilfs_btree_broken_node_block(struct buffer_head *bh)
372 {
373         int ret;
374
375         if (buffer_nilfs_checked(bh))
376                 return 0;
377
378         ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data,
379                                        bh->b_size, bh->b_blocknr);
380         if (likely(!ret))
381                 set_buffer_nilfs_checked(bh);
382         return ret;
383 }
384
385 static struct nilfs_btree_node *
386 nilfs_btree_get_root(const struct nilfs_bmap *btree)
387 {
388         return (struct nilfs_btree_node *)btree->b_u.u_data;
389 }
390
391 static struct nilfs_btree_node *
392 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level)
393 {
394         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
395 }
396
397 static struct nilfs_btree_node *
398 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level)
399 {
400         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
401 }
402
403 static int nilfs_btree_height(const struct nilfs_bmap *btree)
404 {
405         return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1;
406 }
407
408 static struct nilfs_btree_node *
409 nilfs_btree_get_node(const struct nilfs_bmap *btree,
410                      const struct nilfs_btree_path *path,
411                      int level, int *ncmaxp)
412 {
413         struct nilfs_btree_node *node;
414
415         if (level == nilfs_btree_height(btree) - 1) {
416                 node = nilfs_btree_get_root(btree);
417                 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX;
418         } else {
419                 node = nilfs_btree_get_nonroot_node(path, level);
420                 *ncmaxp = nilfs_btree_nchildren_per_block(btree);
421         }
422         return node;
423 }
424
425 static int
426 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level)
427 {
428         if (unlikely(nilfs_btree_node_get_level(node) != level)) {
429                 dump_stack();
430                 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n",
431                        nilfs_btree_node_get_level(node), level);
432                 return 1;
433         }
434         return 0;
435 }
436
437 struct nilfs_btree_readahead_info {
438         struct nilfs_btree_node *node;  /* parent node */
439         int max_ra_blocks;              /* max nof blocks to read ahead */
440         int index;                      /* current index on the parent node */
441         int ncmax;                      /* nof children in the parent node */
442 };
443
444 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
445                                    struct buffer_head **bhp,
446                                    const struct nilfs_btree_readahead_info *ra)
447 {
448         struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache;
449         struct buffer_head *bh, *ra_bh;
450         sector_t submit_ptr = 0;
451         int ret;
452
453         ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr);
454         if (ret) {
455                 if (ret != -EEXIST)
456                         return ret;
457                 goto out_check;
458         }
459
460         if (ra) {
461                 int i, n;
462                 __u64 ptr2;
463
464                 /* read ahead sibling nodes */
465                 for (n = ra->max_ra_blocks, i = ra->index + 1;
466                      n > 0 && i < ra->ncmax; n--, i++) {
467                         ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax);
468
469                         ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA,
470                                                         &ra_bh, &submit_ptr);
471                         if (likely(!ret || ret == -EEXIST))
472                                 brelse(ra_bh);
473                         else if (ret != -EBUSY)
474                                 break;
475                         if (!buffer_locked(bh))
476                                 goto out_no_wait;
477                 }
478         }
479
480         wait_on_buffer(bh);
481
482  out_no_wait:
483         if (!buffer_uptodate(bh)) {
484                 brelse(bh);
485                 return -EIO;
486         }
487
488  out_check:
489         if (nilfs_btree_broken_node_block(bh)) {
490                 clear_buffer_uptodate(bh);
491                 brelse(bh);
492                 return -EINVAL;
493         }
494
495         *bhp = bh;
496         return 0;
497 }
498
499 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
500                                    struct buffer_head **bhp)
501 {
502         return __nilfs_btree_get_block(btree, ptr, bhp, NULL);
503 }
504
505 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree,
506                                  struct nilfs_btree_path *path,
507                                  __u64 key, __u64 *ptrp, int minlevel,
508                                  int readahead)
509 {
510         struct nilfs_btree_node *node;
511         struct nilfs_btree_readahead_info p, *ra;
512         __u64 ptr;
513         int level, index, found, ncmax, ret;
514
515         node = nilfs_btree_get_root(btree);
516         level = nilfs_btree_node_get_level(node);
517         if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0)
518                 return -ENOENT;
519
520         found = nilfs_btree_node_lookup(node, key, &index);
521         ptr = nilfs_btree_node_get_ptr(node, index,
522                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
523         path[level].bp_bh = NULL;
524         path[level].bp_index = index;
525
526         ncmax = nilfs_btree_nchildren_per_block(btree);
527
528         while (--level >= minlevel) {
529                 ra = NULL;
530                 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) {
531                         p.node = nilfs_btree_get_node(btree, path, level + 1,
532                                                       &p.ncmax);
533                         p.index = index;
534                         p.max_ra_blocks = 7;
535                         ra = &p;
536                 }
537                 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh,
538                                               ra);
539                 if (ret < 0)
540                         return ret;
541
542                 node = nilfs_btree_get_nonroot_node(path, level);
543                 if (nilfs_btree_bad_node(node, level))
544                         return -EINVAL;
545                 if (!found)
546                         found = nilfs_btree_node_lookup(node, key, &index);
547                 else
548                         index = 0;
549                 if (index < ncmax) {
550                         ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
551                 } else {
552                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
553                         /* insert */
554                         ptr = NILFS_BMAP_INVALID_PTR;
555                 }
556                 path[level].bp_index = index;
557         }
558         if (!found)
559                 return -ENOENT;
560
561         if (ptrp != NULL)
562                 *ptrp = ptr;
563
564         return 0;
565 }
566
567 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree,
568                                       struct nilfs_btree_path *path,
569                                       __u64 *keyp, __u64 *ptrp)
570 {
571         struct nilfs_btree_node *node;
572         __u64 ptr;
573         int index, level, ncmax, ret;
574
575         node = nilfs_btree_get_root(btree);
576         index = nilfs_btree_node_get_nchildren(node) - 1;
577         if (index < 0)
578                 return -ENOENT;
579         level = nilfs_btree_node_get_level(node);
580         ptr = nilfs_btree_node_get_ptr(node, index,
581                                        NILFS_BTREE_ROOT_NCHILDREN_MAX);
582         path[level].bp_bh = NULL;
583         path[level].bp_index = index;
584         ncmax = nilfs_btree_nchildren_per_block(btree);
585
586         for (level--; level > 0; level--) {
587                 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
588                 if (ret < 0)
589                         return ret;
590                 node = nilfs_btree_get_nonroot_node(path, level);
591                 if (nilfs_btree_bad_node(node, level))
592                         return -EINVAL;
593                 index = nilfs_btree_node_get_nchildren(node) - 1;
594                 ptr = nilfs_btree_node_get_ptr(node, index, ncmax);
595                 path[level].bp_index = index;
596         }
597
598         if (keyp != NULL)
599                 *keyp = nilfs_btree_node_get_key(node, index);
600         if (ptrp != NULL)
601                 *ptrp = ptr;
602
603         return 0;
604 }
605
606 static int nilfs_btree_lookup(const struct nilfs_bmap *btree,
607                               __u64 key, int level, __u64 *ptrp)
608 {
609         struct nilfs_btree_path *path;
610         int ret;
611
612         path = nilfs_btree_alloc_path();
613         if (path == NULL)
614                 return -ENOMEM;
615
616         ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0);
617
618         nilfs_btree_free_path(path);
619
620         return ret;
621 }
622
623 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
624                                      __u64 key, __u64 *ptrp, unsigned maxblocks)
625 {
626         struct nilfs_btree_path *path;
627         struct nilfs_btree_node *node;
628         struct inode *dat = NULL;
629         __u64 ptr, ptr2;
630         sector_t blocknr;
631         int level = NILFS_BTREE_LEVEL_NODE_MIN;
632         int ret, cnt, index, maxlevel, ncmax;
633         struct nilfs_btree_readahead_info p;
634
635         path = nilfs_btree_alloc_path();
636         if (path == NULL)
637                 return -ENOMEM;
638
639         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1);
640         if (ret < 0)
641                 goto out;
642
643         if (NILFS_BMAP_USE_VBN(btree)) {
644                 dat = nilfs_bmap_get_dat(btree);
645                 ret = nilfs_dat_translate(dat, ptr, &blocknr);
646                 if (ret < 0)
647                         goto out;
648                 ptr = blocknr;
649         }
650         cnt = 1;
651         if (cnt == maxblocks)
652                 goto end;
653
654         maxlevel = nilfs_btree_height(btree) - 1;
655         node = nilfs_btree_get_node(btree, path, level, &ncmax);
656         index = path[level].bp_index + 1;
657         for (;;) {
658                 while (index < nilfs_btree_node_get_nchildren(node)) {
659                         if (nilfs_btree_node_get_key(node, index) !=
660                             key + cnt)
661                                 goto end;
662                         ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax);
663                         if (dat) {
664                                 ret = nilfs_dat_translate(dat, ptr2, &blocknr);
665                                 if (ret < 0)
666                                         goto out;
667                                 ptr2 = blocknr;
668                         }
669                         if (ptr2 != ptr + cnt || ++cnt == maxblocks)
670                                 goto end;
671                         index++;
672                         continue;
673                 }
674                 if (level == maxlevel)
675                         break;
676
677                 /* look-up right sibling node */
678                 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax);
679                 p.index = path[level + 1].bp_index + 1;
680                 p.max_ra_blocks = 7;
681                 if (p.index >= nilfs_btree_node_get_nchildren(p.node) ||
682                     nilfs_btree_node_get_key(p.node, p.index) != key + cnt)
683                         break;
684                 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax);
685                 path[level + 1].bp_index = p.index;
686
687                 brelse(path[level].bp_bh);
688                 path[level].bp_bh = NULL;
689
690                 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh,
691                                               &p);
692                 if (ret < 0)
693                         goto out;
694                 node = nilfs_btree_get_nonroot_node(path, level);
695                 ncmax = nilfs_btree_nchildren_per_block(btree);
696                 index = 0;
697                 path[level].bp_index = index;
698         }
699  end:
700         *ptrp = ptr;
701         ret = cnt;
702  out:
703         nilfs_btree_free_path(path);
704         return ret;
705 }
706
707 static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
708                                     struct nilfs_btree_path *path,
709                                     int level, __u64 key)
710 {
711         if (level < nilfs_btree_height(btree) - 1) {
712                 do {
713                         nilfs_btree_node_set_key(
714                                 nilfs_btree_get_nonroot_node(path, level),
715                                 path[level].bp_index, key);
716                         if (!buffer_dirty(path[level].bp_bh))
717                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
718                 } while ((path[level].bp_index == 0) &&
719                          (++level < nilfs_btree_height(btree) - 1));
720         }
721
722         /* root */
723         if (level == nilfs_btree_height(btree) - 1) {
724                 nilfs_btree_node_set_key(nilfs_btree_get_root(btree),
725                                          path[level].bp_index, key);
726         }
727 }
728
729 static void nilfs_btree_do_insert(struct nilfs_bmap *btree,
730                                   struct nilfs_btree_path *path,
731                                   int level, __u64 *keyp, __u64 *ptrp)
732 {
733         struct nilfs_btree_node *node;
734         int ncblk;
735
736         if (level < nilfs_btree_height(btree) - 1) {
737                 node = nilfs_btree_get_nonroot_node(path, level);
738                 ncblk = nilfs_btree_nchildren_per_block(btree);
739                 nilfs_btree_node_insert(node, path[level].bp_index,
740                                         *keyp, *ptrp, ncblk);
741                 if (!buffer_dirty(path[level].bp_bh))
742                         nilfs_btnode_mark_dirty(path[level].bp_bh);
743
744                 if (path[level].bp_index == 0)
745                         nilfs_btree_promote_key(btree, path, level + 1,
746                                                 nilfs_btree_node_get_key(node,
747                                                                          0));
748         } else {
749                 node = nilfs_btree_get_root(btree);
750                 nilfs_btree_node_insert(node, path[level].bp_index,
751                                         *keyp, *ptrp,
752                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
753         }
754 }
755
756 static void nilfs_btree_carry_left(struct nilfs_bmap *btree,
757                                    struct nilfs_btree_path *path,
758                                    int level, __u64 *keyp, __u64 *ptrp)
759 {
760         struct nilfs_btree_node *node, *left;
761         int nchildren, lnchildren, n, move, ncblk;
762
763         node = nilfs_btree_get_nonroot_node(path, level);
764         left = nilfs_btree_get_sib_node(path, level);
765         nchildren = nilfs_btree_node_get_nchildren(node);
766         lnchildren = nilfs_btree_node_get_nchildren(left);
767         ncblk = nilfs_btree_nchildren_per_block(btree);
768         move = 0;
769
770         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
771         if (n > path[level].bp_index) {
772                 /* move insert point */
773                 n--;
774                 move = 1;
775         }
776
777         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
778
779         if (!buffer_dirty(path[level].bp_bh))
780                 nilfs_btnode_mark_dirty(path[level].bp_bh);
781         if (!buffer_dirty(path[level].bp_sib_bh))
782                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
783
784         nilfs_btree_promote_key(btree, path, level + 1,
785                                 nilfs_btree_node_get_key(node, 0));
786
787         if (move) {
788                 brelse(path[level].bp_bh);
789                 path[level].bp_bh = path[level].bp_sib_bh;
790                 path[level].bp_sib_bh = NULL;
791                 path[level].bp_index += lnchildren;
792                 path[level + 1].bp_index--;
793         } else {
794                 brelse(path[level].bp_sib_bh);
795                 path[level].bp_sib_bh = NULL;
796                 path[level].bp_index -= n;
797         }
798
799         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
800 }
801
802 static void nilfs_btree_carry_right(struct nilfs_bmap *btree,
803                                     struct nilfs_btree_path *path,
804                                     int level, __u64 *keyp, __u64 *ptrp)
805 {
806         struct nilfs_btree_node *node, *right;
807         int nchildren, rnchildren, n, move, ncblk;
808
809         node = nilfs_btree_get_nonroot_node(path, level);
810         right = nilfs_btree_get_sib_node(path, level);
811         nchildren = nilfs_btree_node_get_nchildren(node);
812         rnchildren = nilfs_btree_node_get_nchildren(right);
813         ncblk = nilfs_btree_nchildren_per_block(btree);
814         move = 0;
815
816         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
817         if (n > nchildren - path[level].bp_index) {
818                 /* move insert point */
819                 n--;
820                 move = 1;
821         }
822
823         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
824
825         if (!buffer_dirty(path[level].bp_bh))
826                 nilfs_btnode_mark_dirty(path[level].bp_bh);
827         if (!buffer_dirty(path[level].bp_sib_bh))
828                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
829
830         path[level + 1].bp_index++;
831         nilfs_btree_promote_key(btree, path, level + 1,
832                                 nilfs_btree_node_get_key(right, 0));
833         path[level + 1].bp_index--;
834
835         if (move) {
836                 brelse(path[level].bp_bh);
837                 path[level].bp_bh = path[level].bp_sib_bh;
838                 path[level].bp_sib_bh = NULL;
839                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
840                 path[level + 1].bp_index++;
841         } else {
842                 brelse(path[level].bp_sib_bh);
843                 path[level].bp_sib_bh = NULL;
844         }
845
846         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
847 }
848
849 static void nilfs_btree_split(struct nilfs_bmap *btree,
850                               struct nilfs_btree_path *path,
851                               int level, __u64 *keyp, __u64 *ptrp)
852 {
853         struct nilfs_btree_node *node, *right;
854         __u64 newkey;
855         __u64 newptr;
856         int nchildren, n, move, ncblk;
857
858         node = nilfs_btree_get_nonroot_node(path, level);
859         right = nilfs_btree_get_sib_node(path, level);
860         nchildren = nilfs_btree_node_get_nchildren(node);
861         ncblk = nilfs_btree_nchildren_per_block(btree);
862         move = 0;
863
864         n = (nchildren + 1) / 2;
865         if (n > nchildren - path[level].bp_index) {
866                 n--;
867                 move = 1;
868         }
869
870         nilfs_btree_node_move_right(node, right, n, ncblk, ncblk);
871
872         if (!buffer_dirty(path[level].bp_bh))
873                 nilfs_btnode_mark_dirty(path[level].bp_bh);
874         if (!buffer_dirty(path[level].bp_sib_bh))
875                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
876
877         newkey = nilfs_btree_node_get_key(right, 0);
878         newptr = path[level].bp_newreq.bpr_ptr;
879
880         if (move) {
881                 path[level].bp_index -= nilfs_btree_node_get_nchildren(node);
882                 nilfs_btree_node_insert(right, path[level].bp_index,
883                                         *keyp, *ptrp, ncblk);
884
885                 *keyp = nilfs_btree_node_get_key(right, 0);
886                 *ptrp = path[level].bp_newreq.bpr_ptr;
887
888                 brelse(path[level].bp_bh);
889                 path[level].bp_bh = path[level].bp_sib_bh;
890                 path[level].bp_sib_bh = NULL;
891         } else {
892                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
893
894                 *keyp = nilfs_btree_node_get_key(right, 0);
895                 *ptrp = path[level].bp_newreq.bpr_ptr;
896
897                 brelse(path[level].bp_sib_bh);
898                 path[level].bp_sib_bh = NULL;
899         }
900
901         path[level + 1].bp_index++;
902 }
903
904 static void nilfs_btree_grow(struct nilfs_bmap *btree,
905                              struct nilfs_btree_path *path,
906                              int level, __u64 *keyp, __u64 *ptrp)
907 {
908         struct nilfs_btree_node *root, *child;
909         int n, ncblk;
910
911         root = nilfs_btree_get_root(btree);
912         child = nilfs_btree_get_sib_node(path, level);
913         ncblk = nilfs_btree_nchildren_per_block(btree);
914
915         n = nilfs_btree_node_get_nchildren(root);
916
917         nilfs_btree_node_move_right(root, child, n,
918                                     NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
919         nilfs_btree_node_set_level(root, level + 1);
920
921         if (!buffer_dirty(path[level].bp_sib_bh))
922                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
923
924         path[level].bp_bh = path[level].bp_sib_bh;
925         path[level].bp_sib_bh = NULL;
926
927         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
928
929         *keyp = nilfs_btree_node_get_key(child, 0);
930         *ptrp = path[level].bp_newreq.bpr_ptr;
931 }
932
933 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree,
934                                    const struct nilfs_btree_path *path)
935 {
936         struct nilfs_btree_node *node;
937         int level, ncmax;
938
939         if (path == NULL)
940                 return NILFS_BMAP_INVALID_PTR;
941
942         /* left sibling */
943         level = NILFS_BTREE_LEVEL_NODE_MIN;
944         if (path[level].bp_index > 0) {
945                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
946                 return nilfs_btree_node_get_ptr(node,
947                                                 path[level].bp_index - 1,
948                                                 ncmax);
949         }
950
951         /* parent */
952         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
953         if (level <= nilfs_btree_height(btree) - 1) {
954                 node = nilfs_btree_get_node(btree, path, level, &ncmax);
955                 return nilfs_btree_node_get_ptr(node, path[level].bp_index,
956                                                 ncmax);
957         }
958
959         return NILFS_BMAP_INVALID_PTR;
960 }
961
962 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree,
963                                        const struct nilfs_btree_path *path,
964                                        __u64 key)
965 {
966         __u64 ptr;
967
968         ptr = nilfs_bmap_find_target_seq(btree, key);
969         if (ptr != NILFS_BMAP_INVALID_PTR)
970                 /* sequential access */
971                 return ptr;
972         else {
973                 ptr = nilfs_btree_find_near(btree, path);
974                 if (ptr != NILFS_BMAP_INVALID_PTR)
975                         /* near */
976                         return ptr;
977         }
978         /* block group */
979         return nilfs_bmap_find_target_in_group(btree);
980 }
981
982 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree,
983                                       struct nilfs_btree_path *path,
984                                       int *levelp, __u64 key, __u64 ptr,
985                                       struct nilfs_bmap_stats *stats)
986 {
987         struct buffer_head *bh;
988         struct nilfs_btree_node *node, *parent, *sib;
989         __u64 sibptr;
990         int pindex, level, ncmax, ncblk, ret;
991         struct inode *dat = NULL;
992
993         stats->bs_nblocks = 0;
994         level = NILFS_BTREE_LEVEL_DATA;
995
996         /* allocate a new ptr for data block */
997         if (NILFS_BMAP_USE_VBN(btree)) {
998                 path[level].bp_newreq.bpr_ptr =
999                         nilfs_btree_find_target_v(btree, path, key);
1000                 dat = nilfs_bmap_get_dat(btree);
1001         }
1002
1003         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1004         if (ret < 0)
1005                 goto err_out_data;
1006
1007         ncblk = nilfs_btree_nchildren_per_block(btree);
1008
1009         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1010              level < nilfs_btree_height(btree) - 1;
1011              level++) {
1012                 node = nilfs_btree_get_nonroot_node(path, level);
1013                 if (nilfs_btree_node_get_nchildren(node) < ncblk) {
1014                         path[level].bp_op = nilfs_btree_do_insert;
1015                         stats->bs_nblocks++;
1016                         goto out;
1017                 }
1018
1019                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1020                 pindex = path[level + 1].bp_index;
1021
1022                 /* left sibling */
1023                 if (pindex > 0) {
1024                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1025                                                           ncmax);
1026                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1027                         if (ret < 0)
1028                                 goto err_out_child_node;
1029                         sib = (struct nilfs_btree_node *)bh->b_data;
1030                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1031                                 path[level].bp_sib_bh = bh;
1032                                 path[level].bp_op = nilfs_btree_carry_left;
1033                                 stats->bs_nblocks++;
1034                                 goto out;
1035                         } else {
1036                                 brelse(bh);
1037                         }
1038                 }
1039
1040                 /* right sibling */
1041                 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) {
1042                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1043                                                           ncmax);
1044                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1045                         if (ret < 0)
1046                                 goto err_out_child_node;
1047                         sib = (struct nilfs_btree_node *)bh->b_data;
1048                         if (nilfs_btree_node_get_nchildren(sib) < ncblk) {
1049                                 path[level].bp_sib_bh = bh;
1050                                 path[level].bp_op = nilfs_btree_carry_right;
1051                                 stats->bs_nblocks++;
1052                                 goto out;
1053                         } else {
1054                                 brelse(bh);
1055                         }
1056                 }
1057
1058                 /* split */
1059                 path[level].bp_newreq.bpr_ptr =
1060                         path[level - 1].bp_newreq.bpr_ptr + 1;
1061                 ret = nilfs_bmap_prepare_alloc_ptr(btree,
1062                                                    &path[level].bp_newreq, dat);
1063                 if (ret < 0)
1064                         goto err_out_child_node;
1065                 ret = nilfs_btree_get_new_block(btree,
1066                                                 path[level].bp_newreq.bpr_ptr,
1067                                                 &bh);
1068                 if (ret < 0)
1069                         goto err_out_curr_node;
1070
1071                 stats->bs_nblocks++;
1072
1073                 sib = (struct nilfs_btree_node *)bh->b_data;
1074                 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL);
1075                 path[level].bp_sib_bh = bh;
1076                 path[level].bp_op = nilfs_btree_split;
1077         }
1078
1079         /* root */
1080         node = nilfs_btree_get_root(btree);
1081         if (nilfs_btree_node_get_nchildren(node) <
1082             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1083                 path[level].bp_op = nilfs_btree_do_insert;
1084                 stats->bs_nblocks++;
1085                 goto out;
1086         }
1087
1088         /* grow */
1089         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
1090         ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat);
1091         if (ret < 0)
1092                 goto err_out_child_node;
1093         ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
1094                                         &bh);
1095         if (ret < 0)
1096                 goto err_out_curr_node;
1097
1098         nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data,
1099                               0, level, 0, ncblk, NULL, NULL);
1100         path[level].bp_sib_bh = bh;
1101         path[level].bp_op = nilfs_btree_grow;
1102
1103         level++;
1104         path[level].bp_op = nilfs_btree_do_insert;
1105
1106         /* a newly-created node block and a data block are added */
1107         stats->bs_nblocks += 2;
1108
1109         /* success */
1110  out:
1111         *levelp = level;
1112         return ret;
1113
1114         /* error */
1115  err_out_curr_node:
1116         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1117  err_out_child_node:
1118         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1119                 nilfs_btnode_delete(path[level].bp_sib_bh);
1120                 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1121
1122         }
1123
1124         nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat);
1125  err_out_data:
1126         *levelp = level;
1127         stats->bs_nblocks = 0;
1128         return ret;
1129 }
1130
1131 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree,
1132                                       struct nilfs_btree_path *path,
1133                                       int maxlevel, __u64 key, __u64 ptr)
1134 {
1135         struct inode *dat = NULL;
1136         int level;
1137
1138         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1139         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1140         if (NILFS_BMAP_USE_VBN(btree)) {
1141                 nilfs_bmap_set_target_v(btree, key, ptr);
1142                 dat = nilfs_bmap_get_dat(btree);
1143         }
1144
1145         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1146                 nilfs_bmap_commit_alloc_ptr(btree,
1147                                             &path[level - 1].bp_newreq, dat);
1148                 path[level].bp_op(btree, path, level, &key, &ptr);
1149         }
1150
1151         if (!nilfs_bmap_dirty(btree))
1152                 nilfs_bmap_set_dirty(btree);
1153 }
1154
1155 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr)
1156 {
1157         struct nilfs_btree_path *path;
1158         struct nilfs_bmap_stats stats;
1159         int level, ret;
1160
1161         path = nilfs_btree_alloc_path();
1162         if (path == NULL)
1163                 return -ENOMEM;
1164
1165         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1166                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1167         if (ret != -ENOENT) {
1168                 if (ret == 0)
1169                         ret = -EEXIST;
1170                 goto out;
1171         }
1172
1173         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1174         if (ret < 0)
1175                 goto out;
1176         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1177         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1178
1179  out:
1180         nilfs_btree_free_path(path);
1181         return ret;
1182 }
1183
1184 static void nilfs_btree_do_delete(struct nilfs_bmap *btree,
1185                                   struct nilfs_btree_path *path,
1186                                   int level, __u64 *keyp, __u64 *ptrp)
1187 {
1188         struct nilfs_btree_node *node;
1189         int ncblk;
1190
1191         if (level < nilfs_btree_height(btree) - 1) {
1192                 node = nilfs_btree_get_nonroot_node(path, level);
1193                 ncblk = nilfs_btree_nchildren_per_block(btree);
1194                 nilfs_btree_node_delete(node, path[level].bp_index,
1195                                         keyp, ptrp, ncblk);
1196                 if (!buffer_dirty(path[level].bp_bh))
1197                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1198                 if (path[level].bp_index == 0)
1199                         nilfs_btree_promote_key(btree, path, level + 1,
1200                                 nilfs_btree_node_get_key(node, 0));
1201         } else {
1202                 node = nilfs_btree_get_root(btree);
1203                 nilfs_btree_node_delete(node, path[level].bp_index,
1204                                         keyp, ptrp,
1205                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1206         }
1207 }
1208
1209 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree,
1210                                     struct nilfs_btree_path *path,
1211                                     int level, __u64 *keyp, __u64 *ptrp)
1212 {
1213         struct nilfs_btree_node *node, *left;
1214         int nchildren, lnchildren, n, ncblk;
1215
1216         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1217
1218         node = nilfs_btree_get_nonroot_node(path, level);
1219         left = nilfs_btree_get_sib_node(path, level);
1220         nchildren = nilfs_btree_node_get_nchildren(node);
1221         lnchildren = nilfs_btree_node_get_nchildren(left);
1222         ncblk = nilfs_btree_nchildren_per_block(btree);
1223
1224         n = (nchildren + lnchildren) / 2 - nchildren;
1225
1226         nilfs_btree_node_move_right(left, node, n, ncblk, ncblk);
1227
1228         if (!buffer_dirty(path[level].bp_bh))
1229                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1230         if (!buffer_dirty(path[level].bp_sib_bh))
1231                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1232
1233         nilfs_btree_promote_key(btree, path, level + 1,
1234                                 nilfs_btree_node_get_key(node, 0));
1235
1236         brelse(path[level].bp_sib_bh);
1237         path[level].bp_sib_bh = NULL;
1238         path[level].bp_index += n;
1239 }
1240
1241 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree,
1242                                      struct nilfs_btree_path *path,
1243                                      int level, __u64 *keyp, __u64 *ptrp)
1244 {
1245         struct nilfs_btree_node *node, *right;
1246         int nchildren, rnchildren, n, ncblk;
1247
1248         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1249
1250         node = nilfs_btree_get_nonroot_node(path, level);
1251         right = nilfs_btree_get_sib_node(path, level);
1252         nchildren = nilfs_btree_node_get_nchildren(node);
1253         rnchildren = nilfs_btree_node_get_nchildren(right);
1254         ncblk = nilfs_btree_nchildren_per_block(btree);
1255
1256         n = (nchildren + rnchildren) / 2 - nchildren;
1257
1258         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1259
1260         if (!buffer_dirty(path[level].bp_bh))
1261                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1262         if (!buffer_dirty(path[level].bp_sib_bh))
1263                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1264
1265         path[level + 1].bp_index++;
1266         nilfs_btree_promote_key(btree, path, level + 1,
1267                                 nilfs_btree_node_get_key(right, 0));
1268         path[level + 1].bp_index--;
1269
1270         brelse(path[level].bp_sib_bh);
1271         path[level].bp_sib_bh = NULL;
1272 }
1273
1274 static void nilfs_btree_concat_left(struct nilfs_bmap *btree,
1275                                     struct nilfs_btree_path *path,
1276                                     int level, __u64 *keyp, __u64 *ptrp)
1277 {
1278         struct nilfs_btree_node *node, *left;
1279         int n, ncblk;
1280
1281         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1282
1283         node = nilfs_btree_get_nonroot_node(path, level);
1284         left = nilfs_btree_get_sib_node(path, level);
1285         ncblk = nilfs_btree_nchildren_per_block(btree);
1286
1287         n = nilfs_btree_node_get_nchildren(node);
1288
1289         nilfs_btree_node_move_left(left, node, n, ncblk, ncblk);
1290
1291         if (!buffer_dirty(path[level].bp_sib_bh))
1292                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1293
1294         nilfs_btnode_delete(path[level].bp_bh);
1295         path[level].bp_bh = path[level].bp_sib_bh;
1296         path[level].bp_sib_bh = NULL;
1297         path[level].bp_index += nilfs_btree_node_get_nchildren(left);
1298 }
1299
1300 static void nilfs_btree_concat_right(struct nilfs_bmap *btree,
1301                                      struct nilfs_btree_path *path,
1302                                      int level, __u64 *keyp, __u64 *ptrp)
1303 {
1304         struct nilfs_btree_node *node, *right;
1305         int n, ncblk;
1306
1307         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1308
1309         node = nilfs_btree_get_nonroot_node(path, level);
1310         right = nilfs_btree_get_sib_node(path, level);
1311         ncblk = nilfs_btree_nchildren_per_block(btree);
1312
1313         n = nilfs_btree_node_get_nchildren(right);
1314
1315         nilfs_btree_node_move_left(node, right, n, ncblk, ncblk);
1316
1317         if (!buffer_dirty(path[level].bp_bh))
1318                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1319
1320         nilfs_btnode_delete(path[level].bp_sib_bh);
1321         path[level].bp_sib_bh = NULL;
1322         path[level + 1].bp_index++;
1323 }
1324
1325 static void nilfs_btree_shrink(struct nilfs_bmap *btree,
1326                                struct nilfs_btree_path *path,
1327                                int level, __u64 *keyp, __u64 *ptrp)
1328 {
1329         struct nilfs_btree_node *root, *child;
1330         int n, ncblk;
1331
1332         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1333
1334         root = nilfs_btree_get_root(btree);
1335         child = nilfs_btree_get_nonroot_node(path, level);
1336         ncblk = nilfs_btree_nchildren_per_block(btree);
1337
1338         nilfs_btree_node_delete(root, 0, NULL, NULL,
1339                                 NILFS_BTREE_ROOT_NCHILDREN_MAX);
1340         nilfs_btree_node_set_level(root, level);
1341         n = nilfs_btree_node_get_nchildren(child);
1342         nilfs_btree_node_move_left(root, child, n,
1343                                    NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk);
1344
1345         nilfs_btnode_delete(path[level].bp_bh);
1346         path[level].bp_bh = NULL;
1347 }
1348
1349
1350 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree,
1351                                       struct nilfs_btree_path *path,
1352                                       int *levelp,
1353                                       struct nilfs_bmap_stats *stats,
1354                                       struct inode *dat)
1355 {
1356         struct buffer_head *bh;
1357         struct nilfs_btree_node *node, *parent, *sib;
1358         __u64 sibptr;
1359         int pindex, level, ncmin, ncmax, ncblk, ret;
1360
1361         ret = 0;
1362         stats->bs_nblocks = 0;
1363         ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
1364         ncblk = nilfs_btree_nchildren_per_block(btree);
1365
1366         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1367              level < nilfs_btree_height(btree) - 1;
1368              level++) {
1369                 node = nilfs_btree_get_nonroot_node(path, level);
1370                 path[level].bp_oldreq.bpr_ptr =
1371                         nilfs_btree_node_get_ptr(node, path[level].bp_index,
1372                                                  ncblk);
1373                 ret = nilfs_bmap_prepare_end_ptr(btree,
1374                                                  &path[level].bp_oldreq, dat);
1375                 if (ret < 0)
1376                         goto err_out_child_node;
1377
1378                 if (nilfs_btree_node_get_nchildren(node) > ncmin) {
1379                         path[level].bp_op = nilfs_btree_do_delete;
1380                         stats->bs_nblocks++;
1381                         goto out;
1382                 }
1383
1384                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1385                 pindex = path[level + 1].bp_index;
1386
1387                 if (pindex > 0) {
1388                         /* left sibling */
1389                         sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1,
1390                                                           ncmax);
1391                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1392                         if (ret < 0)
1393                                 goto err_out_curr_node;
1394                         sib = (struct nilfs_btree_node *)bh->b_data;
1395                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1396                                 path[level].bp_sib_bh = bh;
1397                                 path[level].bp_op = nilfs_btree_borrow_left;
1398                                 stats->bs_nblocks++;
1399                                 goto out;
1400                         } else {
1401                                 path[level].bp_sib_bh = bh;
1402                                 path[level].bp_op = nilfs_btree_concat_left;
1403                                 stats->bs_nblocks++;
1404                                 /* continue; */
1405                         }
1406                 } else if (pindex <
1407                            nilfs_btree_node_get_nchildren(parent) - 1) {
1408                         /* right sibling */
1409                         sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1,
1410                                                           ncmax);
1411                         ret = nilfs_btree_get_block(btree, sibptr, &bh);
1412                         if (ret < 0)
1413                                 goto err_out_curr_node;
1414                         sib = (struct nilfs_btree_node *)bh->b_data;
1415                         if (nilfs_btree_node_get_nchildren(sib) > ncmin) {
1416                                 path[level].bp_sib_bh = bh;
1417                                 path[level].bp_op = nilfs_btree_borrow_right;
1418                                 stats->bs_nblocks++;
1419                                 goto out;
1420                         } else {
1421                                 path[level].bp_sib_bh = bh;
1422                                 path[level].bp_op = nilfs_btree_concat_right;
1423                                 stats->bs_nblocks++;
1424                                 /* continue; */
1425                         }
1426                 } else {
1427                         /* no siblings */
1428                         /* the only child of the root node */
1429                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1430                         if (nilfs_btree_node_get_nchildren(node) - 1 <=
1431                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1432                                 path[level].bp_op = nilfs_btree_shrink;
1433                                 stats->bs_nblocks += 2;
1434                         } else {
1435                                 path[level].bp_op = nilfs_btree_do_delete;
1436                                 stats->bs_nblocks++;
1437                         }
1438
1439                         goto out;
1440
1441                 }
1442         }
1443
1444         node = nilfs_btree_get_root(btree);
1445         path[level].bp_oldreq.bpr_ptr =
1446                 nilfs_btree_node_get_ptr(node, path[level].bp_index,
1447                                          NILFS_BTREE_ROOT_NCHILDREN_MAX);
1448
1449         ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat);
1450         if (ret < 0)
1451                 goto err_out_child_node;
1452
1453         /* child of the root node is deleted */
1454         path[level].bp_op = nilfs_btree_do_delete;
1455         stats->bs_nblocks++;
1456
1457         /* success */
1458  out:
1459         *levelp = level;
1460         return ret;
1461
1462         /* error */
1463  err_out_curr_node:
1464         nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1465  err_out_child_node:
1466         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1467                 brelse(path[level].bp_sib_bh);
1468                 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat);
1469         }
1470         *levelp = level;
1471         stats->bs_nblocks = 0;
1472         return ret;
1473 }
1474
1475 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree,
1476                                       struct nilfs_btree_path *path,
1477                                       int maxlevel, struct inode *dat)
1478 {
1479         int level;
1480
1481         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1482                 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat);
1483                 path[level].bp_op(btree, path, level, NULL, NULL);
1484         }
1485
1486         if (!nilfs_bmap_dirty(btree))
1487                 nilfs_bmap_set_dirty(btree);
1488 }
1489
1490 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key)
1491
1492 {
1493         struct nilfs_btree_path *path;
1494         struct nilfs_bmap_stats stats;
1495         struct inode *dat;
1496         int level, ret;
1497
1498         path = nilfs_btree_alloc_path();
1499         if (path == NULL)
1500                 return -ENOMEM;
1501
1502         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1503                                     NILFS_BTREE_LEVEL_NODE_MIN, 0);
1504         if (ret < 0)
1505                 goto out;
1506
1507
1508         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1509
1510         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat);
1511         if (ret < 0)
1512                 goto out;
1513         nilfs_btree_commit_delete(btree, path, level, dat);
1514         nilfs_bmap_sub_blocks(btree, stats.bs_nblocks);
1515
1516 out:
1517         nilfs_btree_free_path(path);
1518         return ret;
1519 }
1520
1521 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp)
1522 {
1523         struct nilfs_btree_path *path;
1524         int ret;
1525
1526         path = nilfs_btree_alloc_path();
1527         if (path == NULL)
1528                 return -ENOMEM;
1529
1530         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1531
1532         nilfs_btree_free_path(path);
1533
1534         return ret;
1535 }
1536
1537 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key)
1538 {
1539         struct buffer_head *bh;
1540         struct nilfs_btree_node *root, *node;
1541         __u64 maxkey, nextmaxkey;
1542         __u64 ptr;
1543         int nchildren, ret;
1544
1545         root = nilfs_btree_get_root(btree);
1546         switch (nilfs_btree_height(btree)) {
1547         case 2:
1548                 bh = NULL;
1549                 node = root;
1550                 break;
1551         case 3:
1552                 nchildren = nilfs_btree_node_get_nchildren(root);
1553                 if (nchildren > 1)
1554                         return 0;
1555                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1556                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1557                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1558                 if (ret < 0)
1559                         return ret;
1560                 node = (struct nilfs_btree_node *)bh->b_data;
1561                 break;
1562         default:
1563                 return 0;
1564         }
1565
1566         nchildren = nilfs_btree_node_get_nchildren(node);
1567         maxkey = nilfs_btree_node_get_key(node, nchildren - 1);
1568         nextmaxkey = (nchildren > 1) ?
1569                 nilfs_btree_node_get_key(node, nchildren - 2) : 0;
1570         if (bh != NULL)
1571                 brelse(bh);
1572
1573         return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
1574 }
1575
1576 static int nilfs_btree_gather_data(struct nilfs_bmap *btree,
1577                                    __u64 *keys, __u64 *ptrs, int nitems)
1578 {
1579         struct buffer_head *bh;
1580         struct nilfs_btree_node *node, *root;
1581         __le64 *dkeys;
1582         __le64 *dptrs;
1583         __u64 ptr;
1584         int nchildren, ncmax, i, ret;
1585
1586         root = nilfs_btree_get_root(btree);
1587         switch (nilfs_btree_height(btree)) {
1588         case 2:
1589                 bh = NULL;
1590                 node = root;
1591                 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX;
1592                 break;
1593         case 3:
1594                 nchildren = nilfs_btree_node_get_nchildren(root);
1595                 WARN_ON(nchildren > 1);
1596                 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1,
1597                                                NILFS_BTREE_ROOT_NCHILDREN_MAX);
1598                 ret = nilfs_btree_get_block(btree, ptr, &bh);
1599                 if (ret < 0)
1600                         return ret;
1601                 node = (struct nilfs_btree_node *)bh->b_data;
1602                 ncmax = nilfs_btree_nchildren_per_block(btree);
1603                 break;
1604         default:
1605                 node = NULL;
1606                 return -EINVAL;
1607         }
1608
1609         nchildren = nilfs_btree_node_get_nchildren(node);
1610         if (nchildren < nitems)
1611                 nitems = nchildren;
1612         dkeys = nilfs_btree_node_dkeys(node);
1613         dptrs = nilfs_btree_node_dptrs(node, ncmax);
1614         for (i = 0; i < nitems; i++) {
1615                 keys[i] = le64_to_cpu(dkeys[i]);
1616                 ptrs[i] = le64_to_cpu(dptrs[i]);
1617         }
1618
1619         if (bh != NULL)
1620                 brelse(bh);
1621
1622         return nitems;
1623 }
1624
1625 static int
1626 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key,
1627                                        union nilfs_bmap_ptr_req *dreq,
1628                                        union nilfs_bmap_ptr_req *nreq,
1629                                        struct buffer_head **bhp,
1630                                        struct nilfs_bmap_stats *stats)
1631 {
1632         struct buffer_head *bh;
1633         struct inode *dat = NULL;
1634         int ret;
1635
1636         stats->bs_nblocks = 0;
1637
1638         /* for data */
1639         /* cannot find near ptr */
1640         if (NILFS_BMAP_USE_VBN(btree)) {
1641                 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
1642                 dat = nilfs_bmap_get_dat(btree);
1643         }
1644
1645         ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat);
1646         if (ret < 0)
1647                 return ret;
1648
1649         *bhp = NULL;
1650         stats->bs_nblocks++;
1651         if (nreq != NULL) {
1652                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1653                 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat);
1654                 if (ret < 0)
1655                         goto err_out_dreq;
1656
1657                 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
1658                 if (ret < 0)
1659                         goto err_out_nreq;
1660
1661                 *bhp = bh;
1662                 stats->bs_nblocks++;
1663         }
1664
1665         /* success */
1666         return 0;
1667
1668         /* error */
1669  err_out_nreq:
1670         nilfs_bmap_abort_alloc_ptr(btree, nreq, dat);
1671  err_out_dreq:
1672         nilfs_bmap_abort_alloc_ptr(btree, dreq, dat);
1673         stats->bs_nblocks = 0;
1674         return ret;
1675
1676 }
1677
1678 static void
1679 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
1680                                       __u64 key, __u64 ptr,
1681                                       const __u64 *keys, const __u64 *ptrs,
1682                                       int n,
1683                                       union nilfs_bmap_ptr_req *dreq,
1684                                       union nilfs_bmap_ptr_req *nreq,
1685                                       struct buffer_head *bh)
1686 {
1687         struct nilfs_btree_node *node;
1688         struct inode *dat;
1689         __u64 tmpptr;
1690         int ncblk;
1691
1692         /* free resources */
1693         if (btree->b_ops->bop_clear != NULL)
1694                 btree->b_ops->bop_clear(btree);
1695
1696         /* ptr must be a pointer to a buffer head. */
1697         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1698
1699         /* convert and insert */
1700         dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
1701         nilfs_btree_init(btree);
1702         if (nreq != NULL) {
1703                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1704                 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
1705
1706                 /* create child node at level 1 */
1707                 node = (struct nilfs_btree_node *)bh->b_data;
1708                 ncblk = nilfs_btree_nchildren_per_block(btree);
1709                 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs);
1710                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk);
1711                 if (!buffer_dirty(bh))
1712                         nilfs_btnode_mark_dirty(bh);
1713                 if (!nilfs_bmap_dirty(btree))
1714                         nilfs_bmap_set_dirty(btree);
1715
1716                 brelse(bh);
1717
1718                 /* create root node at level 2 */
1719                 node = nilfs_btree_get_root(btree);
1720                 tmpptr = nreq->bpr_ptr;
1721                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1,
1722                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1723                                       &keys[0], &tmpptr);
1724         } else {
1725                 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
1726
1727                 /* create root node at level 1 */
1728                 node = nilfs_btree_get_root(btree);
1729                 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n,
1730                                       NILFS_BTREE_ROOT_NCHILDREN_MAX,
1731                                       keys, ptrs);
1732                 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr,
1733                                         NILFS_BTREE_ROOT_NCHILDREN_MAX);
1734                 if (!nilfs_bmap_dirty(btree))
1735                         nilfs_bmap_set_dirty(btree);
1736         }
1737
1738         if (NILFS_BMAP_USE_VBN(btree))
1739                 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr);
1740 }
1741
1742 /**
1743  * nilfs_btree_convert_and_insert -
1744  * @bmap:
1745  * @key:
1746  * @ptr:
1747  * @keys:
1748  * @ptrs:
1749  * @n:
1750  */
1751 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree,
1752                                    __u64 key, __u64 ptr,
1753                                    const __u64 *keys, const __u64 *ptrs, int n)
1754 {
1755         struct buffer_head *bh;
1756         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1757         struct nilfs_bmap_stats stats;
1758         int ret;
1759
1760         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1761                 di = &dreq;
1762                 ni = NULL;
1763         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1764                            1 << btree->b_inode->i_blkbits)) {
1765                 di = &dreq;
1766                 ni = &nreq;
1767         } else {
1768                 di = NULL;
1769                 ni = NULL;
1770                 BUG();
1771         }
1772
1773         ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh,
1774                                                      &stats);
1775         if (ret < 0)
1776                 return ret;
1777         nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n,
1778                                               di, ni, bh);
1779         nilfs_bmap_add_blocks(btree, stats.bs_nblocks);
1780         return 0;
1781 }
1782
1783 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree,
1784                                    struct nilfs_btree_path *path,
1785                                    int level,
1786                                    struct buffer_head *bh)
1787 {
1788         while ((++level < nilfs_btree_height(btree) - 1) &&
1789                !buffer_dirty(path[level].bp_bh))
1790                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1791
1792         return 0;
1793 }
1794
1795 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree,
1796                                         struct nilfs_btree_path *path,
1797                                         int level, struct inode *dat)
1798 {
1799         struct nilfs_btree_node *parent;
1800         int ncmax, ret;
1801
1802         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1803         path[level].bp_oldreq.bpr_ptr =
1804                 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
1805                                          ncmax);
1806         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1807         ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req,
1808                                        &path[level].bp_newreq.bpr_req);
1809         if (ret < 0)
1810                 return ret;
1811
1812         if (buffer_nilfs_node(path[level].bp_bh)) {
1813                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1814                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1815                 path[level].bp_ctxt.bh = path[level].bp_bh;
1816                 ret = nilfs_btnode_prepare_change_key(
1817                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1818                         &path[level].bp_ctxt);
1819                 if (ret < 0) {
1820                         nilfs_dat_abort_update(dat,
1821                                                &path[level].bp_oldreq.bpr_req,
1822                                                &path[level].bp_newreq.bpr_req);
1823                         return ret;
1824                 }
1825         }
1826
1827         return 0;
1828 }
1829
1830 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree,
1831                                         struct nilfs_btree_path *path,
1832                                         int level, struct inode *dat)
1833 {
1834         struct nilfs_btree_node *parent;
1835         int ncmax;
1836
1837         nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req,
1838                                 &path[level].bp_newreq.bpr_req,
1839                                 btree->b_ptr_type == NILFS_BMAP_PTR_VS);
1840
1841         if (buffer_nilfs_node(path[level].bp_bh)) {
1842                 nilfs_btnode_commit_change_key(
1843                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1844                         &path[level].bp_ctxt);
1845                 path[level].bp_bh = path[level].bp_ctxt.bh;
1846         }
1847         set_buffer_nilfs_volatile(path[level].bp_bh);
1848
1849         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1850         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index,
1851                                  path[level].bp_newreq.bpr_ptr, ncmax);
1852 }
1853
1854 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree,
1855                                        struct nilfs_btree_path *path,
1856                                        int level, struct inode *dat)
1857 {
1858         nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req,
1859                                &path[level].bp_newreq.bpr_req);
1860         if (buffer_nilfs_node(path[level].bp_bh))
1861                 nilfs_btnode_abort_change_key(
1862                         &NILFS_BMAP_I(btree)->i_btnode_cache,
1863                         &path[level].bp_ctxt);
1864 }
1865
1866 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree,
1867                                            struct nilfs_btree_path *path,
1868                                            int minlevel, int *maxlevelp,
1869                                            struct inode *dat)
1870 {
1871         int level, ret;
1872
1873         level = minlevel;
1874         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1875                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1876                 if (ret < 0)
1877                         return ret;
1878         }
1879         while ((++level < nilfs_btree_height(btree) - 1) &&
1880                !buffer_dirty(path[level].bp_bh)) {
1881
1882                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1883                 ret = nilfs_btree_prepare_update_v(btree, path, level, dat);
1884                 if (ret < 0)
1885                         goto out;
1886         }
1887
1888         /* success */
1889         *maxlevelp = level - 1;
1890         return 0;
1891
1892         /* error */
1893  out:
1894         while (--level > minlevel)
1895                 nilfs_btree_abort_update_v(btree, path, level, dat);
1896         if (!buffer_nilfs_volatile(path[level].bp_bh))
1897                 nilfs_btree_abort_update_v(btree, path, level, dat);
1898         return ret;
1899 }
1900
1901 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree,
1902                                            struct nilfs_btree_path *path,
1903                                            int minlevel, int maxlevel,
1904                                            struct buffer_head *bh,
1905                                            struct inode *dat)
1906 {
1907         int level;
1908
1909         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1910                 nilfs_btree_commit_update_v(btree, path, minlevel, dat);
1911
1912         for (level = minlevel + 1; level <= maxlevel; level++)
1913                 nilfs_btree_commit_update_v(btree, path, level, dat);
1914 }
1915
1916 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree,
1917                                    struct nilfs_btree_path *path,
1918                                    int level, struct buffer_head *bh)
1919 {
1920         int maxlevel = 0, ret;
1921         struct nilfs_btree_node *parent;
1922         struct inode *dat = nilfs_bmap_get_dat(btree);
1923         __u64 ptr;
1924         int ncmax;
1925
1926         get_bh(bh);
1927         path[level].bp_bh = bh;
1928         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel,
1929                                               dat);
1930         if (ret < 0)
1931                 goto out;
1932
1933         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1934                 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
1935                 ptr = nilfs_btree_node_get_ptr(parent,
1936                                                path[level + 1].bp_index,
1937                                                ncmax);
1938                 ret = nilfs_dat_mark_dirty(dat, ptr);
1939                 if (ret < 0)
1940                         goto out;
1941         }
1942
1943         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat);
1944
1945  out:
1946         brelse(path[level].bp_bh);
1947         path[level].bp_bh = NULL;
1948         return ret;
1949 }
1950
1951 static int nilfs_btree_propagate(struct nilfs_bmap *btree,
1952                                  struct buffer_head *bh)
1953 {
1954         struct nilfs_btree_path *path;
1955         struct nilfs_btree_node *node;
1956         __u64 key;
1957         int level, ret;
1958
1959         WARN_ON(!buffer_dirty(bh));
1960
1961         path = nilfs_btree_alloc_path();
1962         if (path == NULL)
1963                 return -ENOMEM;
1964
1965         if (buffer_nilfs_node(bh)) {
1966                 node = (struct nilfs_btree_node *)bh->b_data;
1967                 key = nilfs_btree_node_get_key(node, 0);
1968                 level = nilfs_btree_node_get_level(node);
1969         } else {
1970                 key = nilfs_bmap_data_get_key(btree, bh);
1971                 level = NILFS_BTREE_LEVEL_DATA;
1972         }
1973
1974         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
1975         if (ret < 0) {
1976                 if (unlikely(ret == -ENOENT))
1977                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1978                                __func__, (unsigned long long)key, level);
1979                 goto out;
1980         }
1981
1982         ret = NILFS_BMAP_USE_VBN(btree) ?
1983                 nilfs_btree_propagate_v(btree, path, level, bh) :
1984                 nilfs_btree_propagate_p(btree, path, level, bh);
1985
1986  out:
1987         nilfs_btree_free_path(path);
1988
1989         return ret;
1990 }
1991
1992 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree,
1993                                     struct buffer_head *bh)
1994 {
1995         return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr);
1996 }
1997
1998 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
1999                                          struct list_head *lists,
2000                                          struct buffer_head *bh)
2001 {
2002         struct list_head *head;
2003         struct buffer_head *cbh;
2004         struct nilfs_btree_node *node, *cnode;
2005         __u64 key, ckey;
2006         int level;
2007
2008         get_bh(bh);
2009         node = (struct nilfs_btree_node *)bh->b_data;
2010         key = nilfs_btree_node_get_key(node, 0);
2011         level = nilfs_btree_node_get_level(node);
2012         if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
2013             level >= NILFS_BTREE_LEVEL_MAX) {
2014                 dump_stack();
2015                 printk(KERN_WARNING
2016                        "%s: invalid btree level: %d (key=%llu, ino=%lu, "
2017                        "blocknr=%llu)\n",
2018                        __func__, level, (unsigned long long)key,
2019                        NILFS_BMAP_I(btree)->vfs_inode.i_ino,
2020                        (unsigned long long)bh->b_blocknr);
2021                 return;
2022         }
2023
2024         list_for_each(head, &lists[level]) {
2025                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
2026                 cnode = (struct nilfs_btree_node *)cbh->b_data;
2027                 ckey = nilfs_btree_node_get_key(cnode, 0);
2028                 if (key < ckey)
2029                         break;
2030         }
2031         list_add_tail(&bh->b_assoc_buffers, head);
2032 }
2033
2034 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree,
2035                                              struct list_head *listp)
2036 {
2037         struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache;
2038         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
2039         struct pagevec pvec;
2040         struct buffer_head *bh, *head;
2041         pgoff_t index = 0;
2042         int level, i;
2043
2044         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2045              level < NILFS_BTREE_LEVEL_MAX;
2046              level++)
2047                 INIT_LIST_HEAD(&lists[level]);
2048
2049         pagevec_init(&pvec, 0);
2050
2051         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
2052                                   PAGEVEC_SIZE)) {
2053                 for (i = 0; i < pagevec_count(&pvec); i++) {
2054                         bh = head = page_buffers(pvec.pages[i]);
2055                         do {
2056                                 if (buffer_dirty(bh))
2057                                         nilfs_btree_add_dirty_buffer(btree,
2058                                                                      lists, bh);
2059                         } while ((bh = bh->b_this_page) != head);
2060                 }
2061                 pagevec_release(&pvec);
2062                 cond_resched();
2063         }
2064
2065         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2066              level < NILFS_BTREE_LEVEL_MAX;
2067              level++)
2068                 list_splice_tail(&lists[level], listp);
2069 }
2070
2071 static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
2072                                 struct nilfs_btree_path *path,
2073                                 int level,
2074                                 struct buffer_head **bh,
2075                                 sector_t blocknr,
2076                                 union nilfs_binfo *binfo)
2077 {
2078         struct nilfs_btree_node *parent;
2079         __u64 key;
2080         __u64 ptr;
2081         int ncmax, ret;
2082
2083         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2084         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2085                                        ncmax);
2086         if (buffer_nilfs_node(*bh)) {
2087                 path[level].bp_ctxt.oldkey = ptr;
2088                 path[level].bp_ctxt.newkey = blocknr;
2089                 path[level].bp_ctxt.bh = *bh;
2090                 ret = nilfs_btnode_prepare_change_key(
2091                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2092                         &path[level].bp_ctxt);
2093                 if (ret < 0)
2094                         return ret;
2095                 nilfs_btnode_commit_change_key(
2096                         &NILFS_BMAP_I(btree)->i_btnode_cache,
2097                         &path[level].bp_ctxt);
2098                 *bh = path[level].bp_ctxt.bh;
2099         }
2100
2101         nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr,
2102                                  ncmax);
2103
2104         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2105         /* on-disk format */
2106         binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
2107         binfo->bi_dat.bi_level = level;
2108
2109         return 0;
2110 }
2111
2112 static int nilfs_btree_assign_v(struct nilfs_bmap *btree,
2113                                 struct nilfs_btree_path *path,
2114                                 int level,
2115                                 struct buffer_head **bh,
2116                                 sector_t blocknr,
2117                                 union nilfs_binfo *binfo)
2118 {
2119         struct nilfs_btree_node *parent;
2120         struct inode *dat = nilfs_bmap_get_dat(btree);
2121         __u64 key;
2122         __u64 ptr;
2123         union nilfs_bmap_ptr_req req;
2124         int ncmax, ret;
2125
2126         parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax);
2127         ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index,
2128                                        ncmax);
2129         req.bpr_ptr = ptr;
2130         ret = nilfs_dat_prepare_start(dat, &req.bpr_req);
2131         if (ret < 0)
2132                 return ret;
2133         nilfs_dat_commit_start(dat, &req.bpr_req, blocknr);
2134
2135         key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index);
2136         /* on-disk format */
2137         binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr);
2138         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2139
2140         return 0;
2141 }
2142
2143 static int nilfs_btree_assign(struct nilfs_bmap *btree,
2144                               struct buffer_head **bh,
2145                               sector_t blocknr,
2146                               union nilfs_binfo *binfo)
2147 {
2148         struct nilfs_btree_path *path;
2149         struct nilfs_btree_node *node;
2150         __u64 key;
2151         int level, ret;
2152
2153         path = nilfs_btree_alloc_path();
2154         if (path == NULL)
2155                 return -ENOMEM;
2156
2157         if (buffer_nilfs_node(*bh)) {
2158                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2159                 key = nilfs_btree_node_get_key(node, 0);
2160                 level = nilfs_btree_node_get_level(node);
2161         } else {
2162                 key = nilfs_bmap_data_get_key(btree, *bh);
2163                 level = NILFS_BTREE_LEVEL_DATA;
2164         }
2165
2166         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
2167         if (ret < 0) {
2168                 WARN_ON(ret == -ENOENT);
2169                 goto out;
2170         }
2171
2172         ret = NILFS_BMAP_USE_VBN(btree) ?
2173                 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
2174                 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
2175
2176  out:
2177         nilfs_btree_free_path(path);
2178
2179         return ret;
2180 }
2181
2182 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree,
2183                                  struct buffer_head **bh,
2184                                  sector_t blocknr,
2185                                  union nilfs_binfo *binfo)
2186 {
2187         struct nilfs_btree_node *node;
2188         __u64 key;
2189         int ret;
2190
2191         ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr,
2192                              blocknr);
2193         if (ret < 0)
2194                 return ret;
2195
2196         if (buffer_nilfs_node(*bh)) {
2197                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2198                 key = nilfs_btree_node_get_key(node, 0);
2199         } else
2200                 key = nilfs_bmap_data_get_key(btree, *bh);
2201
2202         /* on-disk format */
2203         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2204         binfo->bi_v.bi_blkoff = cpu_to_le64(key);
2205
2206         return 0;
2207 }
2208
2209 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level)
2210 {
2211         struct buffer_head *bh;
2212         struct nilfs_btree_path *path;
2213         __u64 ptr;
2214         int ret;
2215
2216         path = nilfs_btree_alloc_path();
2217         if (path == NULL)
2218                 return -ENOMEM;
2219
2220         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0);
2221         if (ret < 0) {
2222                 WARN_ON(ret == -ENOENT);
2223                 goto out;
2224         }
2225         ret = nilfs_btree_get_block(btree, ptr, &bh);
2226         if (ret < 0) {
2227                 WARN_ON(ret == -ENOENT);
2228                 goto out;
2229         }
2230
2231         if (!buffer_dirty(bh))
2232                 nilfs_btnode_mark_dirty(bh);
2233         brelse(bh);
2234         if (!nilfs_bmap_dirty(btree))
2235                 nilfs_bmap_set_dirty(btree);
2236
2237  out:
2238         nilfs_btree_free_path(path);
2239         return ret;
2240 }
2241
2242 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2243         .bop_lookup             =       nilfs_btree_lookup,
2244         .bop_lookup_contig      =       nilfs_btree_lookup_contig,
2245         .bop_insert             =       nilfs_btree_insert,
2246         .bop_delete             =       nilfs_btree_delete,
2247         .bop_clear              =       NULL,
2248
2249         .bop_propagate          =       nilfs_btree_propagate,
2250
2251         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2252
2253         .bop_assign             =       nilfs_btree_assign,
2254         .bop_mark               =       nilfs_btree_mark,
2255
2256         .bop_last_key           =       nilfs_btree_last_key,
2257         .bop_check_insert       =       NULL,
2258         .bop_check_delete       =       nilfs_btree_check_delete,
2259         .bop_gather_data        =       nilfs_btree_gather_data,
2260 };
2261
2262 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2263         .bop_lookup             =       NULL,
2264         .bop_lookup_contig      =       NULL,
2265         .bop_insert             =       NULL,
2266         .bop_delete             =       NULL,
2267         .bop_clear              =       NULL,
2268
2269         .bop_propagate          =       nilfs_btree_propagate_gc,
2270
2271         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2272
2273         .bop_assign             =       nilfs_btree_assign_gc,
2274         .bop_mark               =       NULL,
2275
2276         .bop_last_key           =       NULL,
2277         .bop_check_insert       =       NULL,
2278         .bop_check_delete       =       NULL,
2279         .bop_gather_data        =       NULL,
2280 };
2281
2282 int nilfs_btree_init(struct nilfs_bmap *bmap)
2283 {
2284         bmap->b_ops = &nilfs_btree_ops;
2285         bmap->b_nchildren_per_block =
2286                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2287         return 0;
2288 }
2289
2290 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2291 {
2292         bmap->b_ops = &nilfs_btree_ops_gc;
2293         bmap->b_nchildren_per_block =
2294                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
2295 }