Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
[linux-3.10.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and a FLUSH request to the device where
41  *        these blocks are located was received and completed.
42  *    2b. All referenced blocks need to have a generation
43  *        number which is equal to the parent's number.
44  *
45  * One issue that was found using this module was that the log
46  * tree on disk became temporarily corrupted because disk blocks
47  * that had been in use for the log tree had been freed and
48  * reused too early, while being referenced by the written super
49  * block.
50  *
51  * The search term in the kernel log that can be used to filter
52  * on the existence of detected integrity issues is
53  * "btrfs: attempt".
54  *
55  * The integrity check is enabled via mount options. These
56  * mount options are only supported if the integrity check
57  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58  *
59  * Example #1, apply integrity checks to all metadata:
60  * mount /dev/sdb1 /mnt -o check_int
61  *
62  * Example #2, apply integrity checks to all metadata and
63  * to data extents:
64  * mount /dev/sdb1 /mnt -o check_int_data
65  *
66  * Example #3, apply integrity checks to all metadata and dump
67  * the tree that the super block references to kernel messages
68  * each time after a super block was written:
69  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70  *
71  * If the integrity check tool is included and activated in
72  * the mount options, plenty of kernel memory is used, and
73  * plenty of additional CPU cycles are spent. Enabling this
74  * functionality is not intended for normal use. In most
75  * cases, unless you are a btrfs developer who needs to verify
76  * the integrity of (super)-block write requests, do not
77  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78  * include and compile the integrity check tool.
79  */
80
81 #include <linux/sched.h>
82 #include <linux/slab.h>
83 #include <linux/buffer_head.h>
84 #include <linux/mutex.h>
85 #include <linux/crc32c.h>
86 #include <linux/genhd.h>
87 #include <linux/blkdev.h>
88 #include "ctree.h"
89 #include "disk-io.h"
90 #include "transaction.h"
91 #include "extent_io.h"
92 #include "volumes.h"
93 #include "print-tree.h"
94 #include "locking.h"
95 #include "check-integrity.h"
96
97 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
98 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
99 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
100 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
101 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
102 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
103 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
104 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
105                                                          * excluding " [...]" */
106 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
107
108 /*
109  * The definition of the bitmask fields for the print_mask.
110  * They are specified with the mount option check_integrity_print_mask.
111  */
112 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
113 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
114 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
115 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
116 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
117 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
118 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
119 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
120 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
121 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
122 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
123 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
124 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
125
126 struct btrfsic_dev_state;
127 struct btrfsic_state;
128
129 struct btrfsic_block {
130         u32 magic_num;          /* only used for debug purposes */
131         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
132         unsigned int is_superblock:1;   /* if it is one of the superblocks */
133         unsigned int is_iodone:1;       /* if is done by lower subsystem */
134         unsigned int iodone_w_error:1;  /* error was indicated to endio */
135         unsigned int never_written:1;   /* block was added because it was
136                                          * referenced, not because it was
137                                          * written */
138         unsigned int mirror_num:2;      /* large enough to hold
139                                          * BTRFS_SUPER_MIRROR_MAX */
140         struct btrfsic_dev_state *dev_state;
141         u64 dev_bytenr;         /* key, physical byte num on disk */
142         u64 logical_bytenr;     /* logical byte num on disk */
143         u64 generation;
144         struct btrfs_disk_key disk_key; /* extra info to print in case of
145                                          * issues, will not always be correct */
146         struct list_head collision_resolving_node;      /* list node */
147         struct list_head all_blocks_node;       /* list node */
148
149         /* the following two lists contain block_link items */
150         struct list_head ref_to_list;   /* list */
151         struct list_head ref_from_list; /* list */
152         struct btrfsic_block *next_in_same_bio;
153         void *orig_bio_bh_private;
154         union {
155                 bio_end_io_t *bio;
156                 bh_end_io_t *bh;
157         } orig_bio_bh_end_io;
158         int submit_bio_bh_rw;
159         u64 flush_gen; /* only valid if !never_written */
160 };
161
162 /*
163  * Elements of this type are allocated dynamically and required because
164  * each block object can refer to and can be ref from multiple blocks.
165  * The key to lookup them in the hashtable is the dev_bytenr of
166  * the block ref to plus the one from the block refered from.
167  * The fact that they are searchable via a hashtable and that a
168  * ref_cnt is maintained is not required for the btrfs integrity
169  * check algorithm itself, it is only used to make the output more
170  * beautiful in case that an error is detected (an error is defined
171  * as a write operation to a block while that block is still referenced).
172  */
173 struct btrfsic_block_link {
174         u32 magic_num;          /* only used for debug purposes */
175         u32 ref_cnt;
176         struct list_head node_ref_to;   /* list node */
177         struct list_head node_ref_from; /* list node */
178         struct list_head collision_resolving_node;      /* list node */
179         struct btrfsic_block *block_ref_to;
180         struct btrfsic_block *block_ref_from;
181         u64 parent_generation;
182 };
183
184 struct btrfsic_dev_state {
185         u32 magic_num;          /* only used for debug purposes */
186         struct block_device *bdev;
187         struct btrfsic_state *state;
188         struct list_head collision_resolving_node;      /* list node */
189         struct btrfsic_block dummy_block_for_bio_bh_flush;
190         u64 last_flush_gen;
191         char name[BDEVNAME_SIZE];
192 };
193
194 struct btrfsic_block_hashtable {
195         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
196 };
197
198 struct btrfsic_block_link_hashtable {
199         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
200 };
201
202 struct btrfsic_dev_state_hashtable {
203         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
204 };
205
206 struct btrfsic_block_data_ctx {
207         u64 start;              /* virtual bytenr */
208         u64 dev_bytenr;         /* physical bytenr on device */
209         u32 len;
210         struct btrfsic_dev_state *dev;
211         char **datav;
212         struct page **pagev;
213         void *mem_to_free;
214 };
215
216 /* This structure is used to implement recursion without occupying
217  * any stack space, refer to btrfsic_process_metablock() */
218 struct btrfsic_stack_frame {
219         u32 magic;
220         u32 nr;
221         int error;
222         int i;
223         int limit_nesting;
224         int num_copies;
225         int mirror_num;
226         struct btrfsic_block *block;
227         struct btrfsic_block_data_ctx *block_ctx;
228         struct btrfsic_block *next_block;
229         struct btrfsic_block_data_ctx next_block_ctx;
230         struct btrfs_header *hdr;
231         struct btrfsic_stack_frame *prev;
232 };
233
234 /* Some state per mounted filesystem */
235 struct btrfsic_state {
236         u32 print_mask;
237         int include_extent_data;
238         int csum_size;
239         struct list_head all_blocks_list;
240         struct btrfsic_block_hashtable block_hashtable;
241         struct btrfsic_block_link_hashtable block_link_hashtable;
242         struct btrfs_root *root;
243         u64 max_superblock_generation;
244         struct btrfsic_block *latest_superblock;
245         u32 metablock_size;
246         u32 datablock_size;
247 };
248
249 static void btrfsic_block_init(struct btrfsic_block *b);
250 static struct btrfsic_block *btrfsic_block_alloc(void);
251 static void btrfsic_block_free(struct btrfsic_block *b);
252 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
253 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
254 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
255 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
256 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
257 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
258 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
259 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
260                                         struct btrfsic_block_hashtable *h);
261 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
262 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
263                 struct block_device *bdev,
264                 u64 dev_bytenr,
265                 struct btrfsic_block_hashtable *h);
266 static void btrfsic_block_link_hashtable_init(
267                 struct btrfsic_block_link_hashtable *h);
268 static void btrfsic_block_link_hashtable_add(
269                 struct btrfsic_block_link *l,
270                 struct btrfsic_block_link_hashtable *h);
271 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
272 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
273                 struct block_device *bdev_ref_to,
274                 u64 dev_bytenr_ref_to,
275                 struct block_device *bdev_ref_from,
276                 u64 dev_bytenr_ref_from,
277                 struct btrfsic_block_link_hashtable *h);
278 static void btrfsic_dev_state_hashtable_init(
279                 struct btrfsic_dev_state_hashtable *h);
280 static void btrfsic_dev_state_hashtable_add(
281                 struct btrfsic_dev_state *ds,
282                 struct btrfsic_dev_state_hashtable *h);
283 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
284 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
285                 struct block_device *bdev,
286                 struct btrfsic_dev_state_hashtable *h);
287 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
288 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
289 static int btrfsic_process_superblock(struct btrfsic_state *state,
290                                       struct btrfs_fs_devices *fs_devices);
291 static int btrfsic_process_metablock(struct btrfsic_state *state,
292                                      struct btrfsic_block *block,
293                                      struct btrfsic_block_data_ctx *block_ctx,
294                                      int limit_nesting, int force_iodone_flag);
295 static void btrfsic_read_from_block_data(
296         struct btrfsic_block_data_ctx *block_ctx,
297         void *dst, u32 offset, size_t len);
298 static int btrfsic_create_link_to_next_block(
299                 struct btrfsic_state *state,
300                 struct btrfsic_block *block,
301                 struct btrfsic_block_data_ctx
302                 *block_ctx, u64 next_bytenr,
303                 int limit_nesting,
304                 struct btrfsic_block_data_ctx *next_block_ctx,
305                 struct btrfsic_block **next_blockp,
306                 int force_iodone_flag,
307                 int *num_copiesp, int *mirror_nump,
308                 struct btrfs_disk_key *disk_key,
309                 u64 parent_generation);
310 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
311                                       struct btrfsic_block *block,
312                                       struct btrfsic_block_data_ctx *block_ctx,
313                                       u32 item_offset, int force_iodone_flag);
314 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
315                              struct btrfsic_block_data_ctx *block_ctx_out,
316                              int mirror_num);
317 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
318                                   u32 len, struct block_device *bdev,
319                                   struct btrfsic_block_data_ctx *block_ctx_out);
320 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
321 static int btrfsic_read_block(struct btrfsic_state *state,
322                               struct btrfsic_block_data_ctx *block_ctx);
323 static void btrfsic_dump_database(struct btrfsic_state *state);
324 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
325 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
326                                      char **datav, unsigned int num_pages);
327 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
328                                           u64 dev_bytenr, char **mapped_datav,
329                                           unsigned int num_pages,
330                                           struct bio *bio, int *bio_is_patched,
331                                           struct buffer_head *bh,
332                                           int submit_bio_bh_rw);
333 static int btrfsic_process_written_superblock(
334                 struct btrfsic_state *state,
335                 struct btrfsic_block *const block,
336                 struct btrfs_super_block *const super_hdr);
337 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
338 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
339 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
340                                               const struct btrfsic_block *block,
341                                               int recursion_level);
342 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
343                                         struct btrfsic_block *const block,
344                                         int recursion_level);
345 static void btrfsic_print_add_link(const struct btrfsic_state *state,
346                                    const struct btrfsic_block_link *l);
347 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
348                                    const struct btrfsic_block_link *l);
349 static char btrfsic_get_block_type(const struct btrfsic_state *state,
350                                    const struct btrfsic_block *block);
351 static void btrfsic_dump_tree(const struct btrfsic_state *state);
352 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
353                                   const struct btrfsic_block *block,
354                                   int indent_level);
355 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
356                 struct btrfsic_state *state,
357                 struct btrfsic_block_data_ctx *next_block_ctx,
358                 struct btrfsic_block *next_block,
359                 struct btrfsic_block *from_block,
360                 u64 parent_generation);
361 static struct btrfsic_block *btrfsic_block_lookup_or_add(
362                 struct btrfsic_state *state,
363                 struct btrfsic_block_data_ctx *block_ctx,
364                 const char *additional_string,
365                 int is_metadata,
366                 int is_iodone,
367                 int never_written,
368                 int mirror_num,
369                 int *was_created);
370 static int btrfsic_process_superblock_dev_mirror(
371                 struct btrfsic_state *state,
372                 struct btrfsic_dev_state *dev_state,
373                 struct btrfs_device *device,
374                 int superblock_mirror_num,
375                 struct btrfsic_dev_state **selected_dev_state,
376                 struct btrfs_super_block *selected_super);
377 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
378                 struct block_device *bdev);
379 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
380                                            u64 bytenr,
381                                            struct btrfsic_dev_state *dev_state,
382                                            u64 dev_bytenr);
383
384 static struct mutex btrfsic_mutex;
385 static int btrfsic_is_initialized;
386 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
387
388
389 static void btrfsic_block_init(struct btrfsic_block *b)
390 {
391         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
392         b->dev_state = NULL;
393         b->dev_bytenr = 0;
394         b->logical_bytenr = 0;
395         b->generation = BTRFSIC_GENERATION_UNKNOWN;
396         b->disk_key.objectid = 0;
397         b->disk_key.type = 0;
398         b->disk_key.offset = 0;
399         b->is_metadata = 0;
400         b->is_superblock = 0;
401         b->is_iodone = 0;
402         b->iodone_w_error = 0;
403         b->never_written = 0;
404         b->mirror_num = 0;
405         b->next_in_same_bio = NULL;
406         b->orig_bio_bh_private = NULL;
407         b->orig_bio_bh_end_io.bio = NULL;
408         INIT_LIST_HEAD(&b->collision_resolving_node);
409         INIT_LIST_HEAD(&b->all_blocks_node);
410         INIT_LIST_HEAD(&b->ref_to_list);
411         INIT_LIST_HEAD(&b->ref_from_list);
412         b->submit_bio_bh_rw = 0;
413         b->flush_gen = 0;
414 }
415
416 static struct btrfsic_block *btrfsic_block_alloc(void)
417 {
418         struct btrfsic_block *b;
419
420         b = kzalloc(sizeof(*b), GFP_NOFS);
421         if (NULL != b)
422                 btrfsic_block_init(b);
423
424         return b;
425 }
426
427 static void btrfsic_block_free(struct btrfsic_block *b)
428 {
429         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
430         kfree(b);
431 }
432
433 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
434 {
435         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
436         l->ref_cnt = 1;
437         INIT_LIST_HEAD(&l->node_ref_to);
438         INIT_LIST_HEAD(&l->node_ref_from);
439         INIT_LIST_HEAD(&l->collision_resolving_node);
440         l->block_ref_to = NULL;
441         l->block_ref_from = NULL;
442 }
443
444 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
445 {
446         struct btrfsic_block_link *l;
447
448         l = kzalloc(sizeof(*l), GFP_NOFS);
449         if (NULL != l)
450                 btrfsic_block_link_init(l);
451
452         return l;
453 }
454
455 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
456 {
457         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
458         kfree(l);
459 }
460
461 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
462 {
463         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
464         ds->bdev = NULL;
465         ds->state = NULL;
466         ds->name[0] = '\0';
467         INIT_LIST_HEAD(&ds->collision_resolving_node);
468         ds->last_flush_gen = 0;
469         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
470         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
471         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
472 }
473
474 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
475 {
476         struct btrfsic_dev_state *ds;
477
478         ds = kzalloc(sizeof(*ds), GFP_NOFS);
479         if (NULL != ds)
480                 btrfsic_dev_state_init(ds);
481
482         return ds;
483 }
484
485 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
486 {
487         BUG_ON(!(NULL == ds ||
488                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
489         kfree(ds);
490 }
491
492 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
493 {
494         int i;
495
496         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
497                 INIT_LIST_HEAD(h->table + i);
498 }
499
500 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
501                                         struct btrfsic_block_hashtable *h)
502 {
503         const unsigned int hashval =
504             (((unsigned int)(b->dev_bytenr >> 16)) ^
505              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
506              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
507
508         list_add(&b->collision_resolving_node, h->table + hashval);
509 }
510
511 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
512 {
513         list_del(&b->collision_resolving_node);
514 }
515
516 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
517                 struct block_device *bdev,
518                 u64 dev_bytenr,
519                 struct btrfsic_block_hashtable *h)
520 {
521         const unsigned int hashval =
522             (((unsigned int)(dev_bytenr >> 16)) ^
523              ((unsigned int)((uintptr_t)bdev))) &
524              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
525         struct list_head *elem;
526
527         list_for_each(elem, h->table + hashval) {
528                 struct btrfsic_block *const b =
529                     list_entry(elem, struct btrfsic_block,
530                                collision_resolving_node);
531
532                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
533                         return b;
534         }
535
536         return NULL;
537 }
538
539 static void btrfsic_block_link_hashtable_init(
540                 struct btrfsic_block_link_hashtable *h)
541 {
542         int i;
543
544         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
545                 INIT_LIST_HEAD(h->table + i);
546 }
547
548 static void btrfsic_block_link_hashtable_add(
549                 struct btrfsic_block_link *l,
550                 struct btrfsic_block_link_hashtable *h)
551 {
552         const unsigned int hashval =
553             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
554              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
555              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
556              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
557              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
558
559         BUG_ON(NULL == l->block_ref_to);
560         BUG_ON(NULL == l->block_ref_from);
561         list_add(&l->collision_resolving_node, h->table + hashval);
562 }
563
564 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
565 {
566         list_del(&l->collision_resolving_node);
567 }
568
569 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
570                 struct block_device *bdev_ref_to,
571                 u64 dev_bytenr_ref_to,
572                 struct block_device *bdev_ref_from,
573                 u64 dev_bytenr_ref_from,
574                 struct btrfsic_block_link_hashtable *h)
575 {
576         const unsigned int hashval =
577             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
578              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
579              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
580              ((unsigned int)((uintptr_t)bdev_ref_from))) &
581              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
582         struct list_head *elem;
583
584         list_for_each(elem, h->table + hashval) {
585                 struct btrfsic_block_link *const l =
586                     list_entry(elem, struct btrfsic_block_link,
587                                collision_resolving_node);
588
589                 BUG_ON(NULL == l->block_ref_to);
590                 BUG_ON(NULL == l->block_ref_from);
591                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
592                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
593                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
594                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
595                         return l;
596         }
597
598         return NULL;
599 }
600
601 static void btrfsic_dev_state_hashtable_init(
602                 struct btrfsic_dev_state_hashtable *h)
603 {
604         int i;
605
606         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
607                 INIT_LIST_HEAD(h->table + i);
608 }
609
610 static void btrfsic_dev_state_hashtable_add(
611                 struct btrfsic_dev_state *ds,
612                 struct btrfsic_dev_state_hashtable *h)
613 {
614         const unsigned int hashval =
615             (((unsigned int)((uintptr_t)ds->bdev)) &
616              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
617
618         list_add(&ds->collision_resolving_node, h->table + hashval);
619 }
620
621 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
622 {
623         list_del(&ds->collision_resolving_node);
624 }
625
626 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
627                 struct block_device *bdev,
628                 struct btrfsic_dev_state_hashtable *h)
629 {
630         const unsigned int hashval =
631             (((unsigned int)((uintptr_t)bdev)) &
632              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
633         struct list_head *elem;
634
635         list_for_each(elem, h->table + hashval) {
636                 struct btrfsic_dev_state *const ds =
637                     list_entry(elem, struct btrfsic_dev_state,
638                                collision_resolving_node);
639
640                 if (ds->bdev == bdev)
641                         return ds;
642         }
643
644         return NULL;
645 }
646
647 static int btrfsic_process_superblock(struct btrfsic_state *state,
648                                       struct btrfs_fs_devices *fs_devices)
649 {
650         int ret = 0;
651         struct btrfs_super_block *selected_super;
652         struct list_head *dev_head = &fs_devices->devices;
653         struct btrfs_device *device;
654         struct btrfsic_dev_state *selected_dev_state = NULL;
655         int pass;
656
657         BUG_ON(NULL == state);
658         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
659         if (NULL == selected_super) {
660                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
661                 return -1;
662         }
663
664         list_for_each_entry(device, dev_head, dev_list) {
665                 int i;
666                 struct btrfsic_dev_state *dev_state;
667
668                 if (!device->bdev || !device->name)
669                         continue;
670
671                 dev_state = btrfsic_dev_state_lookup(device->bdev);
672                 BUG_ON(NULL == dev_state);
673                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
674                         ret = btrfsic_process_superblock_dev_mirror(
675                                         state, dev_state, device, i,
676                                         &selected_dev_state, selected_super);
677                         if (0 != ret && 0 == i) {
678                                 kfree(selected_super);
679                                 return ret;
680                         }
681                 }
682         }
683
684         if (NULL == state->latest_superblock) {
685                 printk(KERN_INFO "btrfsic: no superblock found!\n");
686                 kfree(selected_super);
687                 return -1;
688         }
689
690         state->csum_size = btrfs_super_csum_size(selected_super);
691
692         for (pass = 0; pass < 3; pass++) {
693                 int num_copies;
694                 int mirror_num;
695                 u64 next_bytenr;
696
697                 switch (pass) {
698                 case 0:
699                         next_bytenr = btrfs_super_root(selected_super);
700                         if (state->print_mask &
701                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
702                                 printk(KERN_INFO "root@%llu\n",
703                                        (unsigned long long)next_bytenr);
704                         break;
705                 case 1:
706                         next_bytenr = btrfs_super_chunk_root(selected_super);
707                         if (state->print_mask &
708                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
709                                 printk(KERN_INFO "chunk@%llu\n",
710                                        (unsigned long long)next_bytenr);
711                         break;
712                 case 2:
713                         next_bytenr = btrfs_super_log_root(selected_super);
714                         if (0 == next_bytenr)
715                                 continue;
716                         if (state->print_mask &
717                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
718                                 printk(KERN_INFO "log@%llu\n",
719                                        (unsigned long long)next_bytenr);
720                         break;
721                 }
722
723                 num_copies =
724                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
725                                      next_bytenr, state->metablock_size);
726                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
727                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
728                                (unsigned long long)next_bytenr, num_copies);
729
730                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
731                         struct btrfsic_block *next_block;
732                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
733                         struct btrfsic_block_link *l;
734
735                         ret = btrfsic_map_block(state, next_bytenr,
736                                                 state->metablock_size,
737                                                 &tmp_next_block_ctx,
738                                                 mirror_num);
739                         if (ret) {
740                                 printk(KERN_INFO "btrfsic:"
741                                        " btrfsic_map_block(root @%llu,"
742                                        " mirror %d) failed!\n",
743                                        (unsigned long long)next_bytenr,
744                                        mirror_num);
745                                 kfree(selected_super);
746                                 return -1;
747                         }
748
749                         next_block = btrfsic_block_hashtable_lookup(
750                                         tmp_next_block_ctx.dev->bdev,
751                                         tmp_next_block_ctx.dev_bytenr,
752                                         &state->block_hashtable);
753                         BUG_ON(NULL == next_block);
754
755                         l = btrfsic_block_link_hashtable_lookup(
756                                         tmp_next_block_ctx.dev->bdev,
757                                         tmp_next_block_ctx.dev_bytenr,
758                                         state->latest_superblock->dev_state->
759                                         bdev,
760                                         state->latest_superblock->dev_bytenr,
761                                         &state->block_link_hashtable);
762                         BUG_ON(NULL == l);
763
764                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
765                         if (ret < (int)PAGE_CACHE_SIZE) {
766                                 printk(KERN_INFO
767                                        "btrfsic: read @logical %llu failed!\n",
768                                        (unsigned long long)
769                                        tmp_next_block_ctx.start);
770                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
771                                 kfree(selected_super);
772                                 return -1;
773                         }
774
775                         ret = btrfsic_process_metablock(state,
776                                                         next_block,
777                                                         &tmp_next_block_ctx,
778                                                         BTRFS_MAX_LEVEL + 3, 1);
779                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
780                 }
781         }
782
783         kfree(selected_super);
784         return ret;
785 }
786
787 static int btrfsic_process_superblock_dev_mirror(
788                 struct btrfsic_state *state,
789                 struct btrfsic_dev_state *dev_state,
790                 struct btrfs_device *device,
791                 int superblock_mirror_num,
792                 struct btrfsic_dev_state **selected_dev_state,
793                 struct btrfs_super_block *selected_super)
794 {
795         struct btrfs_super_block *super_tmp;
796         u64 dev_bytenr;
797         struct buffer_head *bh;
798         struct btrfsic_block *superblock_tmp;
799         int pass;
800         struct block_device *const superblock_bdev = device->bdev;
801
802         /* super block bytenr is always the unmapped device bytenr */
803         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
804         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
805                 return -1;
806         bh = __bread(superblock_bdev, dev_bytenr / 4096,
807                      BTRFS_SUPER_INFO_SIZE);
808         if (NULL == bh)
809                 return -1;
810         super_tmp = (struct btrfs_super_block *)
811             (bh->b_data + (dev_bytenr & 4095));
812
813         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
814             strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
815                     sizeof(super_tmp->magic)) ||
816             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
817             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
818             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
819             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
820                 brelse(bh);
821                 return 0;
822         }
823
824         superblock_tmp =
825             btrfsic_block_hashtable_lookup(superblock_bdev,
826                                            dev_bytenr,
827                                            &state->block_hashtable);
828         if (NULL == superblock_tmp) {
829                 superblock_tmp = btrfsic_block_alloc();
830                 if (NULL == superblock_tmp) {
831                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
832                         brelse(bh);
833                         return -1;
834                 }
835                 /* for superblock, only the dev_bytenr makes sense */
836                 superblock_tmp->dev_bytenr = dev_bytenr;
837                 superblock_tmp->dev_state = dev_state;
838                 superblock_tmp->logical_bytenr = dev_bytenr;
839                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
840                 superblock_tmp->is_metadata = 1;
841                 superblock_tmp->is_superblock = 1;
842                 superblock_tmp->is_iodone = 1;
843                 superblock_tmp->never_written = 0;
844                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
845                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
846                         printk(KERN_INFO "New initial S-block (bdev %p, %s)"
847                                " @%llu (%s/%llu/%d)\n",
848                                superblock_bdev, device->name,
849                                (unsigned long long)dev_bytenr,
850                                dev_state->name,
851                                (unsigned long long)dev_bytenr,
852                                superblock_mirror_num);
853                 list_add(&superblock_tmp->all_blocks_node,
854                          &state->all_blocks_list);
855                 btrfsic_block_hashtable_add(superblock_tmp,
856                                             &state->block_hashtable);
857         }
858
859         /* select the one with the highest generation field */
860         if (btrfs_super_generation(super_tmp) >
861             state->max_superblock_generation ||
862             0 == state->max_superblock_generation) {
863                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
864                 *selected_dev_state = dev_state;
865                 state->max_superblock_generation =
866                     btrfs_super_generation(super_tmp);
867                 state->latest_superblock = superblock_tmp;
868         }
869
870         for (pass = 0; pass < 3; pass++) {
871                 u64 next_bytenr;
872                 int num_copies;
873                 int mirror_num;
874                 const char *additional_string = NULL;
875                 struct btrfs_disk_key tmp_disk_key;
876
877                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
878                 tmp_disk_key.offset = 0;
879                 switch (pass) {
880                 case 0:
881                         tmp_disk_key.objectid =
882                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
883                         additional_string = "initial root ";
884                         next_bytenr = btrfs_super_root(super_tmp);
885                         break;
886                 case 1:
887                         tmp_disk_key.objectid =
888                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
889                         additional_string = "initial chunk ";
890                         next_bytenr = btrfs_super_chunk_root(super_tmp);
891                         break;
892                 case 2:
893                         tmp_disk_key.objectid =
894                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
895                         additional_string = "initial log ";
896                         next_bytenr = btrfs_super_log_root(super_tmp);
897                         if (0 == next_bytenr)
898                                 continue;
899                         break;
900                 }
901
902                 num_copies =
903                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
904                                      next_bytenr, state->metablock_size);
905                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
906                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
907                                (unsigned long long)next_bytenr, num_copies);
908                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
909                         struct btrfsic_block *next_block;
910                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
911                         struct btrfsic_block_link *l;
912
913                         if (btrfsic_map_block(state, next_bytenr,
914                                               state->metablock_size,
915                                               &tmp_next_block_ctx,
916                                               mirror_num)) {
917                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
918                                        "bytenr @%llu, mirror %d) failed!\n",
919                                        (unsigned long long)next_bytenr,
920                                        mirror_num);
921                                 brelse(bh);
922                                 return -1;
923                         }
924
925                         next_block = btrfsic_block_lookup_or_add(
926                                         state, &tmp_next_block_ctx,
927                                         additional_string, 1, 1, 0,
928                                         mirror_num, NULL);
929                         if (NULL == next_block) {
930                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
931                                 brelse(bh);
932                                 return -1;
933                         }
934
935                         next_block->disk_key = tmp_disk_key;
936                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
937                         l = btrfsic_block_link_lookup_or_add(
938                                         state, &tmp_next_block_ctx,
939                                         next_block, superblock_tmp,
940                                         BTRFSIC_GENERATION_UNKNOWN);
941                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
942                         if (NULL == l) {
943                                 brelse(bh);
944                                 return -1;
945                         }
946                 }
947         }
948         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
949                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
950
951         brelse(bh);
952         return 0;
953 }
954
955 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
956 {
957         struct btrfsic_stack_frame *sf;
958
959         sf = kzalloc(sizeof(*sf), GFP_NOFS);
960         if (NULL == sf)
961                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
962         else
963                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
964         return sf;
965 }
966
967 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
968 {
969         BUG_ON(!(NULL == sf ||
970                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
971         kfree(sf);
972 }
973
974 static int btrfsic_process_metablock(
975                 struct btrfsic_state *state,
976                 struct btrfsic_block *const first_block,
977                 struct btrfsic_block_data_ctx *const first_block_ctx,
978                 int first_limit_nesting, int force_iodone_flag)
979 {
980         struct btrfsic_stack_frame initial_stack_frame = { 0 };
981         struct btrfsic_stack_frame *sf;
982         struct btrfsic_stack_frame *next_stack;
983         struct btrfs_header *const first_hdr =
984                 (struct btrfs_header *)first_block_ctx->datav[0];
985
986         BUG_ON(!first_hdr);
987         sf = &initial_stack_frame;
988         sf->error = 0;
989         sf->i = -1;
990         sf->limit_nesting = first_limit_nesting;
991         sf->block = first_block;
992         sf->block_ctx = first_block_ctx;
993         sf->next_block = NULL;
994         sf->hdr = first_hdr;
995         sf->prev = NULL;
996
997 continue_with_new_stack_frame:
998         sf->block->generation = le64_to_cpu(sf->hdr->generation);
999         if (0 == sf->hdr->level) {
1000                 struct btrfs_leaf *const leafhdr =
1001                     (struct btrfs_leaf *)sf->hdr;
1002
1003                 if (-1 == sf->i) {
1004                         sf->nr = le32_to_cpu(leafhdr->header.nritems);
1005
1006                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1007                                 printk(KERN_INFO
1008                                        "leaf %llu items %d generation %llu"
1009                                        " owner %llu\n",
1010                                        (unsigned long long)
1011                                        sf->block_ctx->start,
1012                                        sf->nr,
1013                                        (unsigned long long)
1014                                        le64_to_cpu(leafhdr->header.generation),
1015                                        (unsigned long long)
1016                                        le64_to_cpu(leafhdr->header.owner));
1017                 }
1018
1019 continue_with_current_leaf_stack_frame:
1020                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1021                         sf->i++;
1022                         sf->num_copies = 0;
1023                 }
1024
1025                 if (sf->i < sf->nr) {
1026                         struct btrfs_item disk_item;
1027                         u32 disk_item_offset =
1028                                 (uintptr_t)(leafhdr->items + sf->i) -
1029                                 (uintptr_t)leafhdr;
1030                         struct btrfs_disk_key *disk_key;
1031                         u8 type;
1032                         u32 item_offset;
1033
1034                         if (disk_item_offset + sizeof(struct btrfs_item) >
1035                             sf->block_ctx->len) {
1036 leaf_item_out_of_bounce_error:
1037                                 printk(KERN_INFO
1038                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1039                                        sf->block_ctx->start,
1040                                        sf->block_ctx->dev->name);
1041                                 goto one_stack_frame_backwards;
1042                         }
1043                         btrfsic_read_from_block_data(sf->block_ctx,
1044                                                      &disk_item,
1045                                                      disk_item_offset,
1046                                                      sizeof(struct btrfs_item));
1047                         item_offset = le32_to_cpu(disk_item.offset);
1048                         disk_key = &disk_item.key;
1049                         type = disk_key->type;
1050
1051                         if (BTRFS_ROOT_ITEM_KEY == type) {
1052                                 struct btrfs_root_item root_item;
1053                                 u32 root_item_offset;
1054                                 u64 next_bytenr;
1055
1056                                 root_item_offset = item_offset +
1057                                         offsetof(struct btrfs_leaf, items);
1058                                 if (root_item_offset +
1059                                     sizeof(struct btrfs_root_item) >
1060                                     sf->block_ctx->len)
1061                                         goto leaf_item_out_of_bounce_error;
1062                                 btrfsic_read_from_block_data(
1063                                         sf->block_ctx, &root_item,
1064                                         root_item_offset,
1065                                         sizeof(struct btrfs_root_item));
1066                                 next_bytenr = le64_to_cpu(root_item.bytenr);
1067
1068                                 sf->error =
1069                                     btrfsic_create_link_to_next_block(
1070                                                 state,
1071                                                 sf->block,
1072                                                 sf->block_ctx,
1073                                                 next_bytenr,
1074                                                 sf->limit_nesting,
1075                                                 &sf->next_block_ctx,
1076                                                 &sf->next_block,
1077                                                 force_iodone_flag,
1078                                                 &sf->num_copies,
1079                                                 &sf->mirror_num,
1080                                                 disk_key,
1081                                                 le64_to_cpu(root_item.
1082                                                 generation));
1083                                 if (sf->error)
1084                                         goto one_stack_frame_backwards;
1085
1086                                 if (NULL != sf->next_block) {
1087                                         struct btrfs_header *const next_hdr =
1088                                             (struct btrfs_header *)
1089                                             sf->next_block_ctx.datav[0];
1090
1091                                         next_stack =
1092                                             btrfsic_stack_frame_alloc();
1093                                         if (NULL == next_stack) {
1094                                                 btrfsic_release_block_ctx(
1095                                                                 &sf->
1096                                                                 next_block_ctx);
1097                                                 goto one_stack_frame_backwards;
1098                                         }
1099
1100                                         next_stack->i = -1;
1101                                         next_stack->block = sf->next_block;
1102                                         next_stack->block_ctx =
1103                                             &sf->next_block_ctx;
1104                                         next_stack->next_block = NULL;
1105                                         next_stack->hdr = next_hdr;
1106                                         next_stack->limit_nesting =
1107                                             sf->limit_nesting - 1;
1108                                         next_stack->prev = sf;
1109                                         sf = next_stack;
1110                                         goto continue_with_new_stack_frame;
1111                                 }
1112                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1113                                    state->include_extent_data) {
1114                                 sf->error = btrfsic_handle_extent_data(
1115                                                 state,
1116                                                 sf->block,
1117                                                 sf->block_ctx,
1118                                                 item_offset,
1119                                                 force_iodone_flag);
1120                                 if (sf->error)
1121                                         goto one_stack_frame_backwards;
1122                         }
1123
1124                         goto continue_with_current_leaf_stack_frame;
1125                 }
1126         } else {
1127                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1128
1129                 if (-1 == sf->i) {
1130                         sf->nr = le32_to_cpu(nodehdr->header.nritems);
1131
1132                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1133                                 printk(KERN_INFO "node %llu level %d items %d"
1134                                        " generation %llu owner %llu\n",
1135                                        (unsigned long long)
1136                                        sf->block_ctx->start,
1137                                        nodehdr->header.level, sf->nr,
1138                                        (unsigned long long)
1139                                        le64_to_cpu(nodehdr->header.generation),
1140                                        (unsigned long long)
1141                                        le64_to_cpu(nodehdr->header.owner));
1142                 }
1143
1144 continue_with_current_node_stack_frame:
1145                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1146                         sf->i++;
1147                         sf->num_copies = 0;
1148                 }
1149
1150                 if (sf->i < sf->nr) {
1151                         struct btrfs_key_ptr key_ptr;
1152                         u32 key_ptr_offset;
1153                         u64 next_bytenr;
1154
1155                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1156                                           (uintptr_t)nodehdr;
1157                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1158                             sf->block_ctx->len) {
1159                                 printk(KERN_INFO
1160                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1161                                        sf->block_ctx->start,
1162                                        sf->block_ctx->dev->name);
1163                                 goto one_stack_frame_backwards;
1164                         }
1165                         btrfsic_read_from_block_data(
1166                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1167                                 sizeof(struct btrfs_key_ptr));
1168                         next_bytenr = le64_to_cpu(key_ptr.blockptr);
1169
1170                         sf->error = btrfsic_create_link_to_next_block(
1171                                         state,
1172                                         sf->block,
1173                                         sf->block_ctx,
1174                                         next_bytenr,
1175                                         sf->limit_nesting,
1176                                         &sf->next_block_ctx,
1177                                         &sf->next_block,
1178                                         force_iodone_flag,
1179                                         &sf->num_copies,
1180                                         &sf->mirror_num,
1181                                         &key_ptr.key,
1182                                         le64_to_cpu(key_ptr.generation));
1183                         if (sf->error)
1184                                 goto one_stack_frame_backwards;
1185
1186                         if (NULL != sf->next_block) {
1187                                 struct btrfs_header *const next_hdr =
1188                                     (struct btrfs_header *)
1189                                     sf->next_block_ctx.datav[0];
1190
1191                                 next_stack = btrfsic_stack_frame_alloc();
1192                                 if (NULL == next_stack)
1193                                         goto one_stack_frame_backwards;
1194
1195                                 next_stack->i = -1;
1196                                 next_stack->block = sf->next_block;
1197                                 next_stack->block_ctx = &sf->next_block_ctx;
1198                                 next_stack->next_block = NULL;
1199                                 next_stack->hdr = next_hdr;
1200                                 next_stack->limit_nesting =
1201                                     sf->limit_nesting - 1;
1202                                 next_stack->prev = sf;
1203                                 sf = next_stack;
1204                                 goto continue_with_new_stack_frame;
1205                         }
1206
1207                         goto continue_with_current_node_stack_frame;
1208                 }
1209         }
1210
1211 one_stack_frame_backwards:
1212         if (NULL != sf->prev) {
1213                 struct btrfsic_stack_frame *const prev = sf->prev;
1214
1215                 /* the one for the initial block is freed in the caller */
1216                 btrfsic_release_block_ctx(sf->block_ctx);
1217
1218                 if (sf->error) {
1219                         prev->error = sf->error;
1220                         btrfsic_stack_frame_free(sf);
1221                         sf = prev;
1222                         goto one_stack_frame_backwards;
1223                 }
1224
1225                 btrfsic_stack_frame_free(sf);
1226                 sf = prev;
1227                 goto continue_with_new_stack_frame;
1228         } else {
1229                 BUG_ON(&initial_stack_frame != sf);
1230         }
1231
1232         return sf->error;
1233 }
1234
1235 static void btrfsic_read_from_block_data(
1236         struct btrfsic_block_data_ctx *block_ctx,
1237         void *dstv, u32 offset, size_t len)
1238 {
1239         size_t cur;
1240         size_t offset_in_page;
1241         char *kaddr;
1242         char *dst = (char *)dstv;
1243         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1244         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1245
1246         WARN_ON(offset + len > block_ctx->len);
1247         offset_in_page = (start_offset + offset) &
1248                          ((unsigned long)PAGE_CACHE_SIZE - 1);
1249
1250         while (len > 0) {
1251                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1252                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1253                             PAGE_CACHE_SHIFT);
1254                 kaddr = block_ctx->datav[i];
1255                 memcpy(dst, kaddr + offset_in_page, cur);
1256
1257                 dst += cur;
1258                 len -= cur;
1259                 offset_in_page = 0;
1260                 i++;
1261         }
1262 }
1263
1264 static int btrfsic_create_link_to_next_block(
1265                 struct btrfsic_state *state,
1266                 struct btrfsic_block *block,
1267                 struct btrfsic_block_data_ctx *block_ctx,
1268                 u64 next_bytenr,
1269                 int limit_nesting,
1270                 struct btrfsic_block_data_ctx *next_block_ctx,
1271                 struct btrfsic_block **next_blockp,
1272                 int force_iodone_flag,
1273                 int *num_copiesp, int *mirror_nump,
1274                 struct btrfs_disk_key *disk_key,
1275                 u64 parent_generation)
1276 {
1277         struct btrfsic_block *next_block = NULL;
1278         int ret;
1279         struct btrfsic_block_link *l;
1280         int did_alloc_block_link;
1281         int block_was_created;
1282
1283         *next_blockp = NULL;
1284         if (0 == *num_copiesp) {
1285                 *num_copiesp =
1286                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1287                                      next_bytenr, state->metablock_size);
1288                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1289                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1290                                (unsigned long long)next_bytenr, *num_copiesp);
1291                 *mirror_nump = 1;
1292         }
1293
1294         if (*mirror_nump > *num_copiesp)
1295                 return 0;
1296
1297         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1298                 printk(KERN_INFO
1299                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1300                        *mirror_nump);
1301         ret = btrfsic_map_block(state, next_bytenr,
1302                                 state->metablock_size,
1303                                 next_block_ctx, *mirror_nump);
1304         if (ret) {
1305                 printk(KERN_INFO
1306                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1307                        (unsigned long long)next_bytenr, *mirror_nump);
1308                 btrfsic_release_block_ctx(next_block_ctx);
1309                 *next_blockp = NULL;
1310                 return -1;
1311         }
1312
1313         next_block = btrfsic_block_lookup_or_add(state,
1314                                                  next_block_ctx, "referenced ",
1315                                                  1, force_iodone_flag,
1316                                                  !force_iodone_flag,
1317                                                  *mirror_nump,
1318                                                  &block_was_created);
1319         if (NULL == next_block) {
1320                 btrfsic_release_block_ctx(next_block_ctx);
1321                 *next_blockp = NULL;
1322                 return -1;
1323         }
1324         if (block_was_created) {
1325                 l = NULL;
1326                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1327         } else {
1328                 if (next_block->logical_bytenr != next_bytenr &&
1329                     !(!next_block->is_metadata &&
1330                       0 == next_block->logical_bytenr)) {
1331                         printk(KERN_INFO
1332                                "Referenced block @%llu (%s/%llu/%d)"
1333                                " found in hash table, %c,"
1334                                " bytenr mismatch (!= stored %llu).\n",
1335                                (unsigned long long)next_bytenr,
1336                                next_block_ctx->dev->name,
1337                                (unsigned long long)next_block_ctx->dev_bytenr,
1338                                *mirror_nump,
1339                                btrfsic_get_block_type(state, next_block),
1340                                (unsigned long long)next_block->logical_bytenr);
1341                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1342                         printk(KERN_INFO
1343                                "Referenced block @%llu (%s/%llu/%d)"
1344                                " found in hash table, %c.\n",
1345                                (unsigned long long)next_bytenr,
1346                                next_block_ctx->dev->name,
1347                                (unsigned long long)next_block_ctx->dev_bytenr,
1348                                *mirror_nump,
1349                                btrfsic_get_block_type(state, next_block));
1350                 next_block->logical_bytenr = next_bytenr;
1351
1352                 next_block->mirror_num = *mirror_nump;
1353                 l = btrfsic_block_link_hashtable_lookup(
1354                                 next_block_ctx->dev->bdev,
1355                                 next_block_ctx->dev_bytenr,
1356                                 block_ctx->dev->bdev,
1357                                 block_ctx->dev_bytenr,
1358                                 &state->block_link_hashtable);
1359         }
1360
1361         next_block->disk_key = *disk_key;
1362         if (NULL == l) {
1363                 l = btrfsic_block_link_alloc();
1364                 if (NULL == l) {
1365                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1366                         btrfsic_release_block_ctx(next_block_ctx);
1367                         *next_blockp = NULL;
1368                         return -1;
1369                 }
1370
1371                 did_alloc_block_link = 1;
1372                 l->block_ref_to = next_block;
1373                 l->block_ref_from = block;
1374                 l->ref_cnt = 1;
1375                 l->parent_generation = parent_generation;
1376
1377                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1378                         btrfsic_print_add_link(state, l);
1379
1380                 list_add(&l->node_ref_to, &block->ref_to_list);
1381                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1382
1383                 btrfsic_block_link_hashtable_add(l,
1384                                                  &state->block_link_hashtable);
1385         } else {
1386                 did_alloc_block_link = 0;
1387                 if (0 == limit_nesting) {
1388                         l->ref_cnt++;
1389                         l->parent_generation = parent_generation;
1390                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1391                                 btrfsic_print_add_link(state, l);
1392                 }
1393         }
1394
1395         if (limit_nesting > 0 && did_alloc_block_link) {
1396                 ret = btrfsic_read_block(state, next_block_ctx);
1397                 if (ret < (int)next_block_ctx->len) {
1398                         printk(KERN_INFO
1399                                "btrfsic: read block @logical %llu failed!\n",
1400                                (unsigned long long)next_bytenr);
1401                         btrfsic_release_block_ctx(next_block_ctx);
1402                         *next_blockp = NULL;
1403                         return -1;
1404                 }
1405
1406                 *next_blockp = next_block;
1407         } else {
1408                 *next_blockp = NULL;
1409         }
1410         (*mirror_nump)++;
1411
1412         return 0;
1413 }
1414
1415 static int btrfsic_handle_extent_data(
1416                 struct btrfsic_state *state,
1417                 struct btrfsic_block *block,
1418                 struct btrfsic_block_data_ctx *block_ctx,
1419                 u32 item_offset, int force_iodone_flag)
1420 {
1421         int ret;
1422         struct btrfs_file_extent_item file_extent_item;
1423         u64 file_extent_item_offset;
1424         u64 next_bytenr;
1425         u64 num_bytes;
1426         u64 generation;
1427         struct btrfsic_block_link *l;
1428
1429         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1430                                   item_offset;
1431         if (file_extent_item_offset +
1432             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1433             block_ctx->len) {
1434                 printk(KERN_INFO
1435                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1436                        block_ctx->start, block_ctx->dev->name);
1437                 return -1;
1438         }
1439
1440         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1441                 file_extent_item_offset,
1442                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1443         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1444             ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1445                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1446                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1447                                file_extent_item.type,
1448                                (unsigned long long)
1449                                le64_to_cpu(file_extent_item.disk_bytenr));
1450                 return 0;
1451         }
1452
1453         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1454             block_ctx->len) {
1455                 printk(KERN_INFO
1456                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1457                        block_ctx->start, block_ctx->dev->name);
1458                 return -1;
1459         }
1460         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1461                                      file_extent_item_offset,
1462                                      sizeof(struct btrfs_file_extent_item));
1463         next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1464                       le64_to_cpu(file_extent_item.offset);
1465         generation = le64_to_cpu(file_extent_item.generation);
1466         num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1467         generation = le64_to_cpu(file_extent_item.generation);
1468
1469         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1470                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1471                        " offset = %llu, num_bytes = %llu\n",
1472                        file_extent_item.type,
1473                        (unsigned long long)
1474                        le64_to_cpu(file_extent_item.disk_bytenr),
1475                        (unsigned long long)le64_to_cpu(file_extent_item.offset),
1476                        (unsigned long long)num_bytes);
1477         while (num_bytes > 0) {
1478                 u32 chunk_len;
1479                 int num_copies;
1480                 int mirror_num;
1481
1482                 if (num_bytes > state->datablock_size)
1483                         chunk_len = state->datablock_size;
1484                 else
1485                         chunk_len = num_bytes;
1486
1487                 num_copies =
1488                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1489                                      next_bytenr, state->datablock_size);
1490                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1491                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1492                                (unsigned long long)next_bytenr, num_copies);
1493                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1494                         struct btrfsic_block_data_ctx next_block_ctx;
1495                         struct btrfsic_block *next_block;
1496                         int block_was_created;
1497
1498                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1499                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1500                                        "mirror_num=%d)\n", mirror_num);
1501                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1502                                 printk(KERN_INFO
1503                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1504                                        (unsigned long long)next_bytenr,
1505                                        chunk_len);
1506                         ret = btrfsic_map_block(state, next_bytenr,
1507                                                 chunk_len, &next_block_ctx,
1508                                                 mirror_num);
1509                         if (ret) {
1510                                 printk(KERN_INFO
1511                                        "btrfsic: btrfsic_map_block(@%llu,"
1512                                        " mirror=%d) failed!\n",
1513                                        (unsigned long long)next_bytenr,
1514                                        mirror_num);
1515                                 return -1;
1516                         }
1517
1518                         next_block = btrfsic_block_lookup_or_add(
1519                                         state,
1520                                         &next_block_ctx,
1521                                         "referenced ",
1522                                         0,
1523                                         force_iodone_flag,
1524                                         !force_iodone_flag,
1525                                         mirror_num,
1526                                         &block_was_created);
1527                         if (NULL == next_block) {
1528                                 printk(KERN_INFO
1529                                        "btrfsic: error, kmalloc failed!\n");
1530                                 btrfsic_release_block_ctx(&next_block_ctx);
1531                                 return -1;
1532                         }
1533                         if (!block_was_created) {
1534                                 if (next_block->logical_bytenr != next_bytenr &&
1535                                     !(!next_block->is_metadata &&
1536                                       0 == next_block->logical_bytenr)) {
1537                                         printk(KERN_INFO
1538                                                "Referenced block"
1539                                                " @%llu (%s/%llu/%d)"
1540                                                " found in hash table, D,"
1541                                                " bytenr mismatch"
1542                                                " (!= stored %llu).\n",
1543                                                (unsigned long long)next_bytenr,
1544                                                next_block_ctx.dev->name,
1545                                                (unsigned long long)
1546                                                next_block_ctx.dev_bytenr,
1547                                                mirror_num,
1548                                                (unsigned long long)
1549                                                next_block->logical_bytenr);
1550                                 }
1551                                 next_block->logical_bytenr = next_bytenr;
1552                                 next_block->mirror_num = mirror_num;
1553                         }
1554
1555                         l = btrfsic_block_link_lookup_or_add(state,
1556                                                              &next_block_ctx,
1557                                                              next_block, block,
1558                                                              generation);
1559                         btrfsic_release_block_ctx(&next_block_ctx);
1560                         if (NULL == l)
1561                                 return -1;
1562                 }
1563
1564                 next_bytenr += chunk_len;
1565                 num_bytes -= chunk_len;
1566         }
1567
1568         return 0;
1569 }
1570
1571 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1572                              struct btrfsic_block_data_ctx *block_ctx_out,
1573                              int mirror_num)
1574 {
1575         int ret;
1576         u64 length;
1577         struct btrfs_bio *multi = NULL;
1578         struct btrfs_device *device;
1579
1580         length = len;
1581         ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1582                               bytenr, &length, &multi, mirror_num);
1583
1584         device = multi->stripes[0].dev;
1585         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1586         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1587         block_ctx_out->start = bytenr;
1588         block_ctx_out->len = len;
1589         block_ctx_out->datav = NULL;
1590         block_ctx_out->pagev = NULL;
1591         block_ctx_out->mem_to_free = NULL;
1592
1593         if (0 == ret)
1594                 kfree(multi);
1595         if (NULL == block_ctx_out->dev) {
1596                 ret = -ENXIO;
1597                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1598         }
1599
1600         return ret;
1601 }
1602
1603 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1604                                   u32 len, struct block_device *bdev,
1605                                   struct btrfsic_block_data_ctx *block_ctx_out)
1606 {
1607         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1608         block_ctx_out->dev_bytenr = bytenr;
1609         block_ctx_out->start = bytenr;
1610         block_ctx_out->len = len;
1611         block_ctx_out->datav = NULL;
1612         block_ctx_out->pagev = NULL;
1613         block_ctx_out->mem_to_free = NULL;
1614         if (NULL != block_ctx_out->dev) {
1615                 return 0;
1616         } else {
1617                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1618                 return -ENXIO;
1619         }
1620 }
1621
1622 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1623 {
1624         if (block_ctx->mem_to_free) {
1625                 unsigned int num_pages;
1626
1627                 BUG_ON(!block_ctx->datav);
1628                 BUG_ON(!block_ctx->pagev);
1629                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1630                             PAGE_CACHE_SHIFT;
1631                 while (num_pages > 0) {
1632                         num_pages--;
1633                         if (block_ctx->datav[num_pages]) {
1634                                 kunmap(block_ctx->pagev[num_pages]);
1635                                 block_ctx->datav[num_pages] = NULL;
1636                         }
1637                         if (block_ctx->pagev[num_pages]) {
1638                                 __free_page(block_ctx->pagev[num_pages]);
1639                                 block_ctx->pagev[num_pages] = NULL;
1640                         }
1641                 }
1642
1643                 kfree(block_ctx->mem_to_free);
1644                 block_ctx->mem_to_free = NULL;
1645                 block_ctx->pagev = NULL;
1646                 block_ctx->datav = NULL;
1647         }
1648 }
1649
1650 static int btrfsic_read_block(struct btrfsic_state *state,
1651                               struct btrfsic_block_data_ctx *block_ctx)
1652 {
1653         unsigned int num_pages;
1654         unsigned int i;
1655         u64 dev_bytenr;
1656         int ret;
1657
1658         BUG_ON(block_ctx->datav);
1659         BUG_ON(block_ctx->pagev);
1660         BUG_ON(block_ctx->mem_to_free);
1661         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1662                 printk(KERN_INFO
1663                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1664                        (unsigned long long)block_ctx->dev_bytenr);
1665                 return -1;
1666         }
1667
1668         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1669                     PAGE_CACHE_SHIFT;
1670         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1671                                           sizeof(*block_ctx->pagev)) *
1672                                          num_pages, GFP_NOFS);
1673         if (!block_ctx->mem_to_free)
1674                 return -1;
1675         block_ctx->datav = block_ctx->mem_to_free;
1676         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1677         for (i = 0; i < num_pages; i++) {
1678                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1679                 if (!block_ctx->pagev[i])
1680                         return -1;
1681         }
1682
1683         dev_bytenr = block_ctx->dev_bytenr;
1684         for (i = 0; i < num_pages;) {
1685                 struct bio *bio;
1686                 unsigned int j;
1687                 DECLARE_COMPLETION_ONSTACK(complete);
1688
1689                 bio = bio_alloc(GFP_NOFS, num_pages - i);
1690                 if (!bio) {
1691                         printk(KERN_INFO
1692                                "btrfsic: bio_alloc() for %u pages failed!\n",
1693                                num_pages - i);
1694                         return -1;
1695                 }
1696                 bio->bi_bdev = block_ctx->dev->bdev;
1697                 bio->bi_sector = dev_bytenr >> 9;
1698                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1699                 bio->bi_private = &complete;
1700
1701                 for (j = i; j < num_pages; j++) {
1702                         ret = bio_add_page(bio, block_ctx->pagev[j],
1703                                            PAGE_CACHE_SIZE, 0);
1704                         if (PAGE_CACHE_SIZE != ret)
1705                                 break;
1706                 }
1707                 if (j == i) {
1708                         printk(KERN_INFO
1709                                "btrfsic: error, failed to add a single page!\n");
1710                         return -1;
1711                 }
1712                 submit_bio(READ, bio);
1713
1714                 /* this will also unplug the queue */
1715                 wait_for_completion(&complete);
1716
1717                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1718                         printk(KERN_INFO
1719                                "btrfsic: read error at logical %llu dev %s!\n",
1720                                block_ctx->start, block_ctx->dev->name);
1721                         bio_put(bio);
1722                         return -1;
1723                 }
1724                 bio_put(bio);
1725                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1726                 i = j;
1727         }
1728         for (i = 0; i < num_pages; i++) {
1729                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1730                 if (!block_ctx->datav[i]) {
1731                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1732                                block_ctx->dev->name);
1733                         return -1;
1734                 }
1735         }
1736
1737         return block_ctx->len;
1738 }
1739
1740 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1741 {
1742         complete((struct completion *)bio->bi_private);
1743 }
1744
1745 static void btrfsic_dump_database(struct btrfsic_state *state)
1746 {
1747         struct list_head *elem_all;
1748
1749         BUG_ON(NULL == state);
1750
1751         printk(KERN_INFO "all_blocks_list:\n");
1752         list_for_each(elem_all, &state->all_blocks_list) {
1753                 const struct btrfsic_block *const b_all =
1754                     list_entry(elem_all, struct btrfsic_block,
1755                                all_blocks_node);
1756                 struct list_head *elem_ref_to;
1757                 struct list_head *elem_ref_from;
1758
1759                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1760                        btrfsic_get_block_type(state, b_all),
1761                        (unsigned long long)b_all->logical_bytenr,
1762                        b_all->dev_state->name,
1763                        (unsigned long long)b_all->dev_bytenr,
1764                        b_all->mirror_num);
1765
1766                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1767                         const struct btrfsic_block_link *const l =
1768                             list_entry(elem_ref_to,
1769                                        struct btrfsic_block_link,
1770                                        node_ref_to);
1771
1772                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1773                                " refers %u* to"
1774                                " %c @%llu (%s/%llu/%d)\n",
1775                                btrfsic_get_block_type(state, b_all),
1776                                (unsigned long long)b_all->logical_bytenr,
1777                                b_all->dev_state->name,
1778                                (unsigned long long)b_all->dev_bytenr,
1779                                b_all->mirror_num,
1780                                l->ref_cnt,
1781                                btrfsic_get_block_type(state, l->block_ref_to),
1782                                (unsigned long long)
1783                                l->block_ref_to->logical_bytenr,
1784                                l->block_ref_to->dev_state->name,
1785                                (unsigned long long)l->block_ref_to->dev_bytenr,
1786                                l->block_ref_to->mirror_num);
1787                 }
1788
1789                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1790                         const struct btrfsic_block_link *const l =
1791                             list_entry(elem_ref_from,
1792                                        struct btrfsic_block_link,
1793                                        node_ref_from);
1794
1795                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1796                                " is ref %u* from"
1797                                " %c @%llu (%s/%llu/%d)\n",
1798                                btrfsic_get_block_type(state, b_all),
1799                                (unsigned long long)b_all->logical_bytenr,
1800                                b_all->dev_state->name,
1801                                (unsigned long long)b_all->dev_bytenr,
1802                                b_all->mirror_num,
1803                                l->ref_cnt,
1804                                btrfsic_get_block_type(state, l->block_ref_from),
1805                                (unsigned long long)
1806                                l->block_ref_from->logical_bytenr,
1807                                l->block_ref_from->dev_state->name,
1808                                (unsigned long long)
1809                                l->block_ref_from->dev_bytenr,
1810                                l->block_ref_from->mirror_num);
1811                 }
1812
1813                 printk(KERN_INFO "\n");
1814         }
1815 }
1816
1817 /*
1818  * Test whether the disk block contains a tree block (leaf or node)
1819  * (note that this test fails for the super block)
1820  */
1821 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1822                                      char **datav, unsigned int num_pages)
1823 {
1824         struct btrfs_header *h;
1825         u8 csum[BTRFS_CSUM_SIZE];
1826         u32 crc = ~(u32)0;
1827         unsigned int i;
1828
1829         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1830                 return 1; /* not metadata */
1831         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1832         h = (struct btrfs_header *)datav[0];
1833
1834         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1835                 return 1;
1836
1837         for (i = 0; i < num_pages; i++) {
1838                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1839                 size_t sublen = i ? PAGE_CACHE_SIZE :
1840                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1841
1842                 crc = crc32c(crc, data, sublen);
1843         }
1844         btrfs_csum_final(crc, csum);
1845         if (memcmp(csum, h->csum, state->csum_size))
1846                 return 1;
1847
1848         return 0; /* is metadata */
1849 }
1850
1851 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1852                                           u64 dev_bytenr, char **mapped_datav,
1853                                           unsigned int num_pages,
1854                                           struct bio *bio, int *bio_is_patched,
1855                                           struct buffer_head *bh,
1856                                           int submit_bio_bh_rw)
1857 {
1858         int is_metadata;
1859         struct btrfsic_block *block;
1860         struct btrfsic_block_data_ctx block_ctx;
1861         int ret;
1862         struct btrfsic_state *state = dev_state->state;
1863         struct block_device *bdev = dev_state->bdev;
1864         unsigned int processed_len;
1865
1866         if (NULL != bio_is_patched)
1867                 *bio_is_patched = 0;
1868
1869 again:
1870         if (num_pages == 0)
1871                 return;
1872
1873         processed_len = 0;
1874         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1875                                                       num_pages));
1876
1877         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1878                                                &state->block_hashtable);
1879         if (NULL != block) {
1880                 u64 bytenr = 0;
1881                 struct list_head *elem_ref_to;
1882                 struct list_head *tmp_ref_to;
1883
1884                 if (block->is_superblock) {
1885                         bytenr = le64_to_cpu(((struct btrfs_super_block *)
1886                                               mapped_datav[0])->bytenr);
1887                         if (num_pages * PAGE_CACHE_SIZE <
1888                             BTRFS_SUPER_INFO_SIZE) {
1889                                 printk(KERN_INFO
1890                                        "btrfsic: cannot work with too short bios!\n");
1891                                 return;
1892                         }
1893                         is_metadata = 1;
1894                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1895                         processed_len = BTRFS_SUPER_INFO_SIZE;
1896                         if (state->print_mask &
1897                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1898                                 printk(KERN_INFO
1899                                        "[before new superblock is written]:\n");
1900                                 btrfsic_dump_tree_sub(state, block, 0);
1901                         }
1902                 }
1903                 if (is_metadata) {
1904                         if (!block->is_superblock) {
1905                                 if (num_pages * PAGE_CACHE_SIZE <
1906                                     state->metablock_size) {
1907                                         printk(KERN_INFO
1908                                                "btrfsic: cannot work with too short bios!\n");
1909                                         return;
1910                                 }
1911                                 processed_len = state->metablock_size;
1912                                 bytenr = le64_to_cpu(((struct btrfs_header *)
1913                                                       mapped_datav[0])->bytenr);
1914                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1915                                                                dev_state,
1916                                                                dev_bytenr);
1917                         }
1918                         if (block->logical_bytenr != bytenr) {
1919                                 printk(KERN_INFO
1920                                        "Written block @%llu (%s/%llu/%d)"
1921                                        " found in hash table, %c,"
1922                                        " bytenr mismatch"
1923                                        " (!= stored %llu).\n",
1924                                        (unsigned long long)bytenr,
1925                                        dev_state->name,
1926                                        (unsigned long long)dev_bytenr,
1927                                        block->mirror_num,
1928                                        btrfsic_get_block_type(state, block),
1929                                        (unsigned long long)
1930                                        block->logical_bytenr);
1931                                 block->logical_bytenr = bytenr;
1932                         } else if (state->print_mask &
1933                                    BTRFSIC_PRINT_MASK_VERBOSE)
1934                                 printk(KERN_INFO
1935                                        "Written block @%llu (%s/%llu/%d)"
1936                                        " found in hash table, %c.\n",
1937                                        (unsigned long long)bytenr,
1938                                        dev_state->name,
1939                                        (unsigned long long)dev_bytenr,
1940                                        block->mirror_num,
1941                                        btrfsic_get_block_type(state, block));
1942                 } else {
1943                         if (num_pages * PAGE_CACHE_SIZE <
1944                             state->datablock_size) {
1945                                 printk(KERN_INFO
1946                                        "btrfsic: cannot work with too short bios!\n");
1947                                 return;
1948                         }
1949                         processed_len = state->datablock_size;
1950                         bytenr = block->logical_bytenr;
1951                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1952                                 printk(KERN_INFO
1953                                        "Written block @%llu (%s/%llu/%d)"
1954                                        " found in hash table, %c.\n",
1955                                        (unsigned long long)bytenr,
1956                                        dev_state->name,
1957                                        (unsigned long long)dev_bytenr,
1958                                        block->mirror_num,
1959                                        btrfsic_get_block_type(state, block));
1960                 }
1961
1962                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1963                         printk(KERN_INFO
1964                                "ref_to_list: %cE, ref_from_list: %cE\n",
1965                                list_empty(&block->ref_to_list) ? ' ' : '!',
1966                                list_empty(&block->ref_from_list) ? ' ' : '!');
1967                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1968                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1969                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1970                                " objectid=%llu, type=%d, offset=%llu),"
1971                                " new(gen=%llu),"
1972                                " which is referenced by most recent superblock"
1973                                " (superblockgen=%llu)!\n",
1974                                btrfsic_get_block_type(state, block),
1975                                (unsigned long long)bytenr,
1976                                dev_state->name,
1977                                (unsigned long long)dev_bytenr,
1978                                block->mirror_num,
1979                                (unsigned long long)block->generation,
1980                                (unsigned long long)
1981                                le64_to_cpu(block->disk_key.objectid),
1982                                block->disk_key.type,
1983                                (unsigned long long)
1984                                le64_to_cpu(block->disk_key.offset),
1985                                (unsigned long long)
1986                                le64_to_cpu(((struct btrfs_header *)
1987                                             mapped_datav[0])->generation),
1988                                (unsigned long long)
1989                                state->max_superblock_generation);
1990                         btrfsic_dump_tree(state);
1991                 }
1992
1993                 if (!block->is_iodone && !block->never_written) {
1994                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1995                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1996                                " which is not yet iodone!\n",
1997                                btrfsic_get_block_type(state, block),
1998                                (unsigned long long)bytenr,
1999                                dev_state->name,
2000                                (unsigned long long)dev_bytenr,
2001                                block->mirror_num,
2002                                (unsigned long long)block->generation,
2003                                (unsigned long long)
2004                                le64_to_cpu(((struct btrfs_header *)
2005                                             mapped_datav[0])->generation));
2006                         /* it would not be safe to go on */
2007                         btrfsic_dump_tree(state);
2008                         goto continue_loop;
2009                 }
2010
2011                 /*
2012                  * Clear all references of this block. Do not free
2013                  * the block itself even if is not referenced anymore
2014                  * because it still carries valueable information
2015                  * like whether it was ever written and IO completed.
2016                  */
2017                 list_for_each_safe(elem_ref_to, tmp_ref_to,
2018                                    &block->ref_to_list) {
2019                         struct btrfsic_block_link *const l =
2020                             list_entry(elem_ref_to,
2021                                        struct btrfsic_block_link,
2022                                        node_ref_to);
2023
2024                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2025                                 btrfsic_print_rem_link(state, l);
2026                         l->ref_cnt--;
2027                         if (0 == l->ref_cnt) {
2028                                 list_del(&l->node_ref_to);
2029                                 list_del(&l->node_ref_from);
2030                                 btrfsic_block_link_hashtable_remove(l);
2031                                 btrfsic_block_link_free(l);
2032                         }
2033                 }
2034
2035                 if (block->is_superblock)
2036                         ret = btrfsic_map_superblock(state, bytenr,
2037                                                      processed_len,
2038                                                      bdev, &block_ctx);
2039                 else
2040                         ret = btrfsic_map_block(state, bytenr, processed_len,
2041                                                 &block_ctx, 0);
2042                 if (ret) {
2043                         printk(KERN_INFO
2044                                "btrfsic: btrfsic_map_block(root @%llu)"
2045                                " failed!\n", (unsigned long long)bytenr);
2046                         goto continue_loop;
2047                 }
2048                 block_ctx.datav = mapped_datav;
2049                 /* the following is required in case of writes to mirrors,
2050                  * use the same that was used for the lookup */
2051                 block_ctx.dev = dev_state;
2052                 block_ctx.dev_bytenr = dev_bytenr;
2053
2054                 if (is_metadata || state->include_extent_data) {
2055                         block->never_written = 0;
2056                         block->iodone_w_error = 0;
2057                         if (NULL != bio) {
2058                                 block->is_iodone = 0;
2059                                 BUG_ON(NULL == bio_is_patched);
2060                                 if (!*bio_is_patched) {
2061                                         block->orig_bio_bh_private =
2062                                             bio->bi_private;
2063                                         block->orig_bio_bh_end_io.bio =
2064                                             bio->bi_end_io;
2065                                         block->next_in_same_bio = NULL;
2066                                         bio->bi_private = block;
2067                                         bio->bi_end_io = btrfsic_bio_end_io;
2068                                         *bio_is_patched = 1;
2069                                 } else {
2070                                         struct btrfsic_block *chained_block =
2071                                             (struct btrfsic_block *)
2072                                             bio->bi_private;
2073
2074                                         BUG_ON(NULL == chained_block);
2075                                         block->orig_bio_bh_private =
2076                                             chained_block->orig_bio_bh_private;
2077                                         block->orig_bio_bh_end_io.bio =
2078                                             chained_block->orig_bio_bh_end_io.
2079                                             bio;
2080                                         block->next_in_same_bio = chained_block;
2081                                         bio->bi_private = block;
2082                                 }
2083                         } else if (NULL != bh) {
2084                                 block->is_iodone = 0;
2085                                 block->orig_bio_bh_private = bh->b_private;
2086                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2087                                 block->next_in_same_bio = NULL;
2088                                 bh->b_private = block;
2089                                 bh->b_end_io = btrfsic_bh_end_io;
2090                         } else {
2091                                 block->is_iodone = 1;
2092                                 block->orig_bio_bh_private = NULL;
2093                                 block->orig_bio_bh_end_io.bio = NULL;
2094                                 block->next_in_same_bio = NULL;
2095                         }
2096                 }
2097
2098                 block->flush_gen = dev_state->last_flush_gen + 1;
2099                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2100                 if (is_metadata) {
2101                         block->logical_bytenr = bytenr;
2102                         block->is_metadata = 1;
2103                         if (block->is_superblock) {
2104                                 BUG_ON(PAGE_CACHE_SIZE !=
2105                                        BTRFS_SUPER_INFO_SIZE);
2106                                 ret = btrfsic_process_written_superblock(
2107                                                 state,
2108                                                 block,
2109                                                 (struct btrfs_super_block *)
2110                                                 mapped_datav[0]);
2111                                 if (state->print_mask &
2112                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2113                                         printk(KERN_INFO
2114                                         "[after new superblock is written]:\n");
2115                                         btrfsic_dump_tree_sub(state, block, 0);
2116                                 }
2117                         } else {
2118                                 block->mirror_num = 0;  /* unknown */
2119                                 ret = btrfsic_process_metablock(
2120                                                 state,
2121                                                 block,
2122                                                 &block_ctx,
2123                                                 0, 0);
2124                         }
2125                         if (ret)
2126                                 printk(KERN_INFO
2127                                        "btrfsic: btrfsic_process_metablock"
2128                                        "(root @%llu) failed!\n",
2129                                        (unsigned long long)dev_bytenr);
2130                 } else {
2131                         block->is_metadata = 0;
2132                         block->mirror_num = 0;  /* unknown */
2133                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2134                         if (!state->include_extent_data
2135                             && list_empty(&block->ref_from_list)) {
2136                                 /*
2137                                  * disk block is overwritten with extent
2138                                  * data (not meta data) and we are configured
2139                                  * to not include extent data: take the
2140                                  * chance and free the block's memory
2141                                  */
2142                                 btrfsic_block_hashtable_remove(block);
2143                                 list_del(&block->all_blocks_node);
2144                                 btrfsic_block_free(block);
2145                         }
2146                 }
2147                 btrfsic_release_block_ctx(&block_ctx);
2148         } else {
2149                 /* block has not been found in hash table */
2150                 u64 bytenr;
2151
2152                 if (!is_metadata) {
2153                         processed_len = state->datablock_size;
2154                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2155                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2156                                        " !found in hash table, D.\n",
2157                                        dev_state->name,
2158                                        (unsigned long long)dev_bytenr);
2159                         if (!state->include_extent_data) {
2160                                 /* ignore that written D block */
2161                                 goto continue_loop;
2162                         }
2163
2164                         /* this is getting ugly for the
2165                          * include_extent_data case... */
2166                         bytenr = 0;     /* unknown */
2167                         block_ctx.start = bytenr;
2168                         block_ctx.len = processed_len;
2169                         block_ctx.mem_to_free = NULL;
2170                         block_ctx.pagev = NULL;
2171                 } else {
2172                         processed_len = state->metablock_size;
2173                         bytenr = le64_to_cpu(((struct btrfs_header *)
2174                                               mapped_datav[0])->bytenr);
2175                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2176                                                        dev_bytenr);
2177                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2178                                 printk(KERN_INFO
2179                                        "Written block @%llu (%s/%llu/?)"
2180                                        " !found in hash table, M.\n",
2181                                        (unsigned long long)bytenr,
2182                                        dev_state->name,
2183                                        (unsigned long long)dev_bytenr);
2184
2185                         ret = btrfsic_map_block(state, bytenr, processed_len,
2186                                                 &block_ctx, 0);
2187                         if (ret) {
2188                                 printk(KERN_INFO
2189                                        "btrfsic: btrfsic_map_block(root @%llu)"
2190                                        " failed!\n",
2191                                        (unsigned long long)dev_bytenr);
2192                                 goto continue_loop;
2193                         }
2194                 }
2195                 block_ctx.datav = mapped_datav;
2196                 /* the following is required in case of writes to mirrors,
2197                  * use the same that was used for the lookup */
2198                 block_ctx.dev = dev_state;
2199                 block_ctx.dev_bytenr = dev_bytenr;
2200
2201                 block = btrfsic_block_alloc();
2202                 if (NULL == block) {
2203                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2204                         btrfsic_release_block_ctx(&block_ctx);
2205                         goto continue_loop;
2206                 }
2207                 block->dev_state = dev_state;
2208                 block->dev_bytenr = dev_bytenr;
2209                 block->logical_bytenr = bytenr;
2210                 block->is_metadata = is_metadata;
2211                 block->never_written = 0;
2212                 block->iodone_w_error = 0;
2213                 block->mirror_num = 0;  /* unknown */
2214                 block->flush_gen = dev_state->last_flush_gen + 1;
2215                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2216                 if (NULL != bio) {
2217                         block->is_iodone = 0;
2218                         BUG_ON(NULL == bio_is_patched);
2219                         if (!*bio_is_patched) {
2220                                 block->orig_bio_bh_private = bio->bi_private;
2221                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2222                                 block->next_in_same_bio = NULL;
2223                                 bio->bi_private = block;
2224                                 bio->bi_end_io = btrfsic_bio_end_io;
2225                                 *bio_is_patched = 1;
2226                         } else {
2227                                 struct btrfsic_block *chained_block =
2228                                     (struct btrfsic_block *)
2229                                     bio->bi_private;
2230
2231                                 BUG_ON(NULL == chained_block);
2232                                 block->orig_bio_bh_private =
2233                                     chained_block->orig_bio_bh_private;
2234                                 block->orig_bio_bh_end_io.bio =
2235                                     chained_block->orig_bio_bh_end_io.bio;
2236                                 block->next_in_same_bio = chained_block;
2237                                 bio->bi_private = block;
2238                         }
2239                 } else if (NULL != bh) {
2240                         block->is_iodone = 0;
2241                         block->orig_bio_bh_private = bh->b_private;
2242                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2243                         block->next_in_same_bio = NULL;
2244                         bh->b_private = block;
2245                         bh->b_end_io = btrfsic_bh_end_io;
2246                 } else {
2247                         block->is_iodone = 1;
2248                         block->orig_bio_bh_private = NULL;
2249                         block->orig_bio_bh_end_io.bio = NULL;
2250                         block->next_in_same_bio = NULL;
2251                 }
2252                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2253                         printk(KERN_INFO
2254                                "New written %c-block @%llu (%s/%llu/%d)\n",
2255                                is_metadata ? 'M' : 'D',
2256                                (unsigned long long)block->logical_bytenr,
2257                                block->dev_state->name,
2258                                (unsigned long long)block->dev_bytenr,
2259                                block->mirror_num);
2260                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2261                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2262
2263                 if (is_metadata) {
2264                         ret = btrfsic_process_metablock(state, block,
2265                                                         &block_ctx, 0, 0);
2266                         if (ret)
2267                                 printk(KERN_INFO
2268                                        "btrfsic: process_metablock(root @%llu)"
2269                                        " failed!\n",
2270                                        (unsigned long long)dev_bytenr);
2271                 }
2272                 btrfsic_release_block_ctx(&block_ctx);
2273         }
2274
2275 continue_loop:
2276         BUG_ON(!processed_len);
2277         dev_bytenr += processed_len;
2278         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2279         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2280         goto again;
2281 }
2282
2283 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2284 {
2285         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2286         int iodone_w_error;
2287
2288         /* mutex is not held! This is not save if IO is not yet completed
2289          * on umount */
2290         iodone_w_error = 0;
2291         if (bio_error_status)
2292                 iodone_w_error = 1;
2293
2294         BUG_ON(NULL == block);
2295         bp->bi_private = block->orig_bio_bh_private;
2296         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2297
2298         do {
2299                 struct btrfsic_block *next_block;
2300                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2301
2302                 if ((dev_state->state->print_mask &
2303                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2304                         printk(KERN_INFO
2305                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2306                                bio_error_status,
2307                                btrfsic_get_block_type(dev_state->state, block),
2308                                (unsigned long long)block->logical_bytenr,
2309                                dev_state->name,
2310                                (unsigned long long)block->dev_bytenr,
2311                                block->mirror_num);
2312                 next_block = block->next_in_same_bio;
2313                 block->iodone_w_error = iodone_w_error;
2314                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2315                         dev_state->last_flush_gen++;
2316                         if ((dev_state->state->print_mask &
2317                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2318                                 printk(KERN_INFO
2319                                        "bio_end_io() new %s flush_gen=%llu\n",
2320                                        dev_state->name,
2321                                        (unsigned long long)
2322                                        dev_state->last_flush_gen);
2323                 }
2324                 if (block->submit_bio_bh_rw & REQ_FUA)
2325                         block->flush_gen = 0; /* FUA completed means block is
2326                                                * on disk */
2327                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2328                 block = next_block;
2329         } while (NULL != block);
2330
2331         bp->bi_end_io(bp, bio_error_status);
2332 }
2333
2334 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2335 {
2336         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2337         int iodone_w_error = !uptodate;
2338         struct btrfsic_dev_state *dev_state;
2339
2340         BUG_ON(NULL == block);
2341         dev_state = block->dev_state;
2342         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2343                 printk(KERN_INFO
2344                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2345                        iodone_w_error,
2346                        btrfsic_get_block_type(dev_state->state, block),
2347                        (unsigned long long)block->logical_bytenr,
2348                        block->dev_state->name,
2349                        (unsigned long long)block->dev_bytenr,
2350                        block->mirror_num);
2351
2352         block->iodone_w_error = iodone_w_error;
2353         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2354                 dev_state->last_flush_gen++;
2355                 if ((dev_state->state->print_mask &
2356                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2357                         printk(KERN_INFO
2358                                "bh_end_io() new %s flush_gen=%llu\n",
2359                                dev_state->name,
2360                                (unsigned long long)dev_state->last_flush_gen);
2361         }
2362         if (block->submit_bio_bh_rw & REQ_FUA)
2363                 block->flush_gen = 0; /* FUA completed means block is on disk */
2364
2365         bh->b_private = block->orig_bio_bh_private;
2366         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2367         block->is_iodone = 1; /* for FLUSH, this releases the block */
2368         bh->b_end_io(bh, uptodate);
2369 }
2370
2371 static int btrfsic_process_written_superblock(
2372                 struct btrfsic_state *state,
2373                 struct btrfsic_block *const superblock,
2374                 struct btrfs_super_block *const super_hdr)
2375 {
2376         int pass;
2377
2378         superblock->generation = btrfs_super_generation(super_hdr);
2379         if (!(superblock->generation > state->max_superblock_generation ||
2380               0 == state->max_superblock_generation)) {
2381                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2382                         printk(KERN_INFO
2383                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2384                                " with old gen %llu <= %llu\n",
2385                                (unsigned long long)superblock->logical_bytenr,
2386                                superblock->dev_state->name,
2387                                (unsigned long long)superblock->dev_bytenr,
2388                                superblock->mirror_num,
2389                                (unsigned long long)
2390                                btrfs_super_generation(super_hdr),
2391                                (unsigned long long)
2392                                state->max_superblock_generation);
2393         } else {
2394                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2395                         printk(KERN_INFO
2396                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2397                                " with new gen %llu > %llu\n",
2398                                (unsigned long long)superblock->logical_bytenr,
2399                                superblock->dev_state->name,
2400                                (unsigned long long)superblock->dev_bytenr,
2401                                superblock->mirror_num,
2402                                (unsigned long long)
2403                                btrfs_super_generation(super_hdr),
2404                                (unsigned long long)
2405                                state->max_superblock_generation);
2406
2407                 state->max_superblock_generation =
2408                     btrfs_super_generation(super_hdr);
2409                 state->latest_superblock = superblock;
2410         }
2411
2412         for (pass = 0; pass < 3; pass++) {
2413                 int ret;
2414                 u64 next_bytenr;
2415                 struct btrfsic_block *next_block;
2416                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2417                 struct btrfsic_block_link *l;
2418                 int num_copies;
2419                 int mirror_num;
2420                 const char *additional_string = NULL;
2421                 struct btrfs_disk_key tmp_disk_key;
2422
2423                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2424                 tmp_disk_key.offset = 0;
2425
2426                 switch (pass) {
2427                 case 0:
2428                         tmp_disk_key.objectid =
2429                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2430                         additional_string = "root ";
2431                         next_bytenr = btrfs_super_root(super_hdr);
2432                         if (state->print_mask &
2433                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2434                                 printk(KERN_INFO "root@%llu\n",
2435                                        (unsigned long long)next_bytenr);
2436                         break;
2437                 case 1:
2438                         tmp_disk_key.objectid =
2439                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2440                         additional_string = "chunk ";
2441                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2442                         if (state->print_mask &
2443                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2444                                 printk(KERN_INFO "chunk@%llu\n",
2445                                        (unsigned long long)next_bytenr);
2446                         break;
2447                 case 2:
2448                         tmp_disk_key.objectid =
2449                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2450                         additional_string = "log ";
2451                         next_bytenr = btrfs_super_log_root(super_hdr);
2452                         if (0 == next_bytenr)
2453                                 continue;
2454                         if (state->print_mask &
2455                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2456                                 printk(KERN_INFO "log@%llu\n",
2457                                        (unsigned long long)next_bytenr);
2458                         break;
2459                 }
2460
2461                 num_copies =
2462                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
2463                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2464                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2465                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2466                                (unsigned long long)next_bytenr, num_copies);
2467                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2468                         int was_created;
2469
2470                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2471                                 printk(KERN_INFO
2472                                        "btrfsic_process_written_superblock("
2473                                        "mirror_num=%d)\n", mirror_num);
2474                         ret = btrfsic_map_block(state, next_bytenr,
2475                                                 BTRFS_SUPER_INFO_SIZE,
2476                                                 &tmp_next_block_ctx,
2477                                                 mirror_num);
2478                         if (ret) {
2479                                 printk(KERN_INFO
2480                                        "btrfsic: btrfsic_map_block(@%llu,"
2481                                        " mirror=%d) failed!\n",
2482                                        (unsigned long long)next_bytenr,
2483                                        mirror_num);
2484                                 return -1;
2485                         }
2486
2487                         next_block = btrfsic_block_lookup_or_add(
2488                                         state,
2489                                         &tmp_next_block_ctx,
2490                                         additional_string,
2491                                         1, 0, 1,
2492                                         mirror_num,
2493                                         &was_created);
2494                         if (NULL == next_block) {
2495                                 printk(KERN_INFO
2496                                        "btrfsic: error, kmalloc failed!\n");
2497                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2498                                 return -1;
2499                         }
2500
2501                         next_block->disk_key = tmp_disk_key;
2502                         if (was_created)
2503                                 next_block->generation =
2504                                     BTRFSIC_GENERATION_UNKNOWN;
2505                         l = btrfsic_block_link_lookup_or_add(
2506                                         state,
2507                                         &tmp_next_block_ctx,
2508                                         next_block,
2509                                         superblock,
2510                                         BTRFSIC_GENERATION_UNKNOWN);
2511                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2512                         if (NULL == l)
2513                                 return -1;
2514                 }
2515         }
2516
2517         if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2518                 WARN_ON(1);
2519                 btrfsic_dump_tree(state);
2520         }
2521
2522         return 0;
2523 }
2524
2525 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2526                                         struct btrfsic_block *const block,
2527                                         int recursion_level)
2528 {
2529         struct list_head *elem_ref_to;
2530         int ret = 0;
2531
2532         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2533                 /*
2534                  * Note that this situation can happen and does not
2535                  * indicate an error in regular cases. It happens
2536                  * when disk blocks are freed and later reused.
2537                  * The check-integrity module is not aware of any
2538                  * block free operations, it just recognizes block
2539                  * write operations. Therefore it keeps the linkage
2540                  * information for a block until a block is
2541                  * rewritten. This can temporarily cause incorrect
2542                  * and even circular linkage informations. This
2543                  * causes no harm unless such blocks are referenced
2544                  * by the most recent super block.
2545                  */
2546                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2547                         printk(KERN_INFO
2548                                "btrfsic: abort cyclic linkage (case 1).\n");
2549
2550                 return ret;
2551         }
2552
2553         /*
2554          * This algorithm is recursive because the amount of used stack
2555          * space is very small and the max recursion depth is limited.
2556          */
2557         list_for_each(elem_ref_to, &block->ref_to_list) {
2558                 const struct btrfsic_block_link *const l =
2559                     list_entry(elem_ref_to, struct btrfsic_block_link,
2560                                node_ref_to);
2561
2562                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2563                         printk(KERN_INFO
2564                                "rl=%d, %c @%llu (%s/%llu/%d)"
2565                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2566                                recursion_level,
2567                                btrfsic_get_block_type(state, block),
2568                                (unsigned long long)block->logical_bytenr,
2569                                block->dev_state->name,
2570                                (unsigned long long)block->dev_bytenr,
2571                                block->mirror_num,
2572                                l->ref_cnt,
2573                                btrfsic_get_block_type(state, l->block_ref_to),
2574                                (unsigned long long)
2575                                l->block_ref_to->logical_bytenr,
2576                                l->block_ref_to->dev_state->name,
2577                                (unsigned long long)l->block_ref_to->dev_bytenr,
2578                                l->block_ref_to->mirror_num);
2579                 if (l->block_ref_to->never_written) {
2580                         printk(KERN_INFO "btrfs: attempt to write superblock"
2581                                " which references block %c @%llu (%s/%llu/%d)"
2582                                " which is never written!\n",
2583                                btrfsic_get_block_type(state, l->block_ref_to),
2584                                (unsigned long long)
2585                                l->block_ref_to->logical_bytenr,
2586                                l->block_ref_to->dev_state->name,
2587                                (unsigned long long)l->block_ref_to->dev_bytenr,
2588                                l->block_ref_to->mirror_num);
2589                         ret = -1;
2590                 } else if (!l->block_ref_to->is_iodone) {
2591                         printk(KERN_INFO "btrfs: attempt to write superblock"
2592                                " which references block %c @%llu (%s/%llu/%d)"
2593                                " which is not yet iodone!\n",
2594                                btrfsic_get_block_type(state, l->block_ref_to),
2595                                (unsigned long long)
2596                                l->block_ref_to->logical_bytenr,
2597                                l->block_ref_to->dev_state->name,
2598                                (unsigned long long)l->block_ref_to->dev_bytenr,
2599                                l->block_ref_to->mirror_num);
2600                         ret = -1;
2601                 } else if (l->parent_generation !=
2602                            l->block_ref_to->generation &&
2603                            BTRFSIC_GENERATION_UNKNOWN !=
2604                            l->parent_generation &&
2605                            BTRFSIC_GENERATION_UNKNOWN !=
2606                            l->block_ref_to->generation) {
2607                         printk(KERN_INFO "btrfs: attempt to write superblock"
2608                                " which references block %c @%llu (%s/%llu/%d)"
2609                                " with generation %llu !="
2610                                " parent generation %llu!\n",
2611                                btrfsic_get_block_type(state, l->block_ref_to),
2612                                (unsigned long long)
2613                                l->block_ref_to->logical_bytenr,
2614                                l->block_ref_to->dev_state->name,
2615                                (unsigned long long)l->block_ref_to->dev_bytenr,
2616                                l->block_ref_to->mirror_num,
2617                                (unsigned long long)l->block_ref_to->generation,
2618                                (unsigned long long)l->parent_generation);
2619                         ret = -1;
2620                 } else if (l->block_ref_to->flush_gen >
2621                            l->block_ref_to->dev_state->last_flush_gen) {
2622                         printk(KERN_INFO "btrfs: attempt to write superblock"
2623                                " which references block %c @%llu (%s/%llu/%d)"
2624                                " which is not flushed out of disk's write cache"
2625                                " (block flush_gen=%llu,"
2626                                " dev->flush_gen=%llu)!\n",
2627                                btrfsic_get_block_type(state, l->block_ref_to),
2628                                (unsigned long long)
2629                                l->block_ref_to->logical_bytenr,
2630                                l->block_ref_to->dev_state->name,
2631                                (unsigned long long)l->block_ref_to->dev_bytenr,
2632                                l->block_ref_to->mirror_num,
2633                                (unsigned long long)block->flush_gen,
2634                                (unsigned long long)
2635                                l->block_ref_to->dev_state->last_flush_gen);
2636                         ret = -1;
2637                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2638                                                               l->block_ref_to,
2639                                                               recursion_level +
2640                                                               1)) {
2641                         ret = -1;
2642                 }
2643         }
2644
2645         return ret;
2646 }
2647
2648 static int btrfsic_is_block_ref_by_superblock(
2649                 const struct btrfsic_state *state,
2650                 const struct btrfsic_block *block,
2651                 int recursion_level)
2652 {
2653         struct list_head *elem_ref_from;
2654
2655         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2656                 /* refer to comment at "abort cyclic linkage (case 1)" */
2657                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2658                         printk(KERN_INFO
2659                                "btrfsic: abort cyclic linkage (case 2).\n");
2660
2661                 return 0;
2662         }
2663
2664         /*
2665          * This algorithm is recursive because the amount of used stack space
2666          * is very small and the max recursion depth is limited.
2667          */
2668         list_for_each(elem_ref_from, &block->ref_from_list) {
2669                 const struct btrfsic_block_link *const l =
2670                     list_entry(elem_ref_from, struct btrfsic_block_link,
2671                                node_ref_from);
2672
2673                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2674                         printk(KERN_INFO
2675                                "rl=%d, %c @%llu (%s/%llu/%d)"
2676                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2677                                recursion_level,
2678                                btrfsic_get_block_type(state, block),
2679                                (unsigned long long)block->logical_bytenr,
2680                                block->dev_state->name,
2681                                (unsigned long long)block->dev_bytenr,
2682                                block->mirror_num,
2683                                l->ref_cnt,
2684                                btrfsic_get_block_type(state, l->block_ref_from),
2685                                (unsigned long long)
2686                                l->block_ref_from->logical_bytenr,
2687                                l->block_ref_from->dev_state->name,
2688                                (unsigned long long)
2689                                l->block_ref_from->dev_bytenr,
2690                                l->block_ref_from->mirror_num);
2691                 if (l->block_ref_from->is_superblock &&
2692                     state->latest_superblock->dev_bytenr ==
2693                     l->block_ref_from->dev_bytenr &&
2694                     state->latest_superblock->dev_state->bdev ==
2695                     l->block_ref_from->dev_state->bdev)
2696                         return 1;
2697                 else if (btrfsic_is_block_ref_by_superblock(state,
2698                                                             l->block_ref_from,
2699                                                             recursion_level +
2700                                                             1))
2701                         return 1;
2702         }
2703
2704         return 0;
2705 }
2706
2707 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2708                                    const struct btrfsic_block_link *l)
2709 {
2710         printk(KERN_INFO
2711                "Add %u* link from %c @%llu (%s/%llu/%d)"
2712                " to %c @%llu (%s/%llu/%d).\n",
2713                l->ref_cnt,
2714                btrfsic_get_block_type(state, l->block_ref_from),
2715                (unsigned long long)l->block_ref_from->logical_bytenr,
2716                l->block_ref_from->dev_state->name,
2717                (unsigned long long)l->block_ref_from->dev_bytenr,
2718                l->block_ref_from->mirror_num,
2719                btrfsic_get_block_type(state, l->block_ref_to),
2720                (unsigned long long)l->block_ref_to->logical_bytenr,
2721                l->block_ref_to->dev_state->name,
2722                (unsigned long long)l->block_ref_to->dev_bytenr,
2723                l->block_ref_to->mirror_num);
2724 }
2725
2726 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2727                                    const struct btrfsic_block_link *l)
2728 {
2729         printk(KERN_INFO
2730                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2731                " to %c @%llu (%s/%llu/%d).\n",
2732                l->ref_cnt,
2733                btrfsic_get_block_type(state, l->block_ref_from),
2734                (unsigned long long)l->block_ref_from->logical_bytenr,
2735                l->block_ref_from->dev_state->name,
2736                (unsigned long long)l->block_ref_from->dev_bytenr,
2737                l->block_ref_from->mirror_num,
2738                btrfsic_get_block_type(state, l->block_ref_to),
2739                (unsigned long long)l->block_ref_to->logical_bytenr,
2740                l->block_ref_to->dev_state->name,
2741                (unsigned long long)l->block_ref_to->dev_bytenr,
2742                l->block_ref_to->mirror_num);
2743 }
2744
2745 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2746                                    const struct btrfsic_block *block)
2747 {
2748         if (block->is_superblock &&
2749             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2750             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2751                 return 'S';
2752         else if (block->is_superblock)
2753                 return 's';
2754         else if (block->is_metadata)
2755                 return 'M';
2756         else
2757                 return 'D';
2758 }
2759
2760 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2761 {
2762         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2763 }
2764
2765 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2766                                   const struct btrfsic_block *block,
2767                                   int indent_level)
2768 {
2769         struct list_head *elem_ref_to;
2770         int indent_add;
2771         static char buf[80];
2772         int cursor_position;
2773
2774         /*
2775          * Should better fill an on-stack buffer with a complete line and
2776          * dump it at once when it is time to print a newline character.
2777          */
2778
2779         /*
2780          * This algorithm is recursive because the amount of used stack space
2781          * is very small and the max recursion depth is limited.
2782          */
2783         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2784                              btrfsic_get_block_type(state, block),
2785                              (unsigned long long)block->logical_bytenr,
2786                              block->dev_state->name,
2787                              (unsigned long long)block->dev_bytenr,
2788                              block->mirror_num);
2789         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2790                 printk("[...]\n");
2791                 return;
2792         }
2793         printk(buf);
2794         indent_level += indent_add;
2795         if (list_empty(&block->ref_to_list)) {
2796                 printk("\n");
2797                 return;
2798         }
2799         if (block->mirror_num > 1 &&
2800             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2801                 printk(" [...]\n");
2802                 return;
2803         }
2804
2805         cursor_position = indent_level;
2806         list_for_each(elem_ref_to, &block->ref_to_list) {
2807                 const struct btrfsic_block_link *const l =
2808                     list_entry(elem_ref_to, struct btrfsic_block_link,
2809                                node_ref_to);
2810
2811                 while (cursor_position < indent_level) {
2812                         printk(" ");
2813                         cursor_position++;
2814                 }
2815                 if (l->ref_cnt > 1)
2816                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2817                 else
2818                         indent_add = sprintf(buf, " --> ");
2819                 if (indent_level + indent_add >
2820                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2821                         printk("[...]\n");
2822                         cursor_position = 0;
2823                         continue;
2824                 }
2825
2826                 printk(buf);
2827
2828                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2829                                       indent_level + indent_add);
2830                 cursor_position = 0;
2831         }
2832 }
2833
2834 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2835                 struct btrfsic_state *state,
2836                 struct btrfsic_block_data_ctx *next_block_ctx,
2837                 struct btrfsic_block *next_block,
2838                 struct btrfsic_block *from_block,
2839                 u64 parent_generation)
2840 {
2841         struct btrfsic_block_link *l;
2842
2843         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2844                                                 next_block_ctx->dev_bytenr,
2845                                                 from_block->dev_state->bdev,
2846                                                 from_block->dev_bytenr,
2847                                                 &state->block_link_hashtable);
2848         if (NULL == l) {
2849                 l = btrfsic_block_link_alloc();
2850                 if (NULL == l) {
2851                         printk(KERN_INFO
2852                                "btrfsic: error, kmalloc" " failed!\n");
2853                         return NULL;
2854                 }
2855
2856                 l->block_ref_to = next_block;
2857                 l->block_ref_from = from_block;
2858                 l->ref_cnt = 1;
2859                 l->parent_generation = parent_generation;
2860
2861                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2862                         btrfsic_print_add_link(state, l);
2863
2864                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2865                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2866
2867                 btrfsic_block_link_hashtable_add(l,
2868                                                  &state->block_link_hashtable);
2869         } else {
2870                 l->ref_cnt++;
2871                 l->parent_generation = parent_generation;
2872                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2873                         btrfsic_print_add_link(state, l);
2874         }
2875
2876         return l;
2877 }
2878
2879 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2880                 struct btrfsic_state *state,
2881                 struct btrfsic_block_data_ctx *block_ctx,
2882                 const char *additional_string,
2883                 int is_metadata,
2884                 int is_iodone,
2885                 int never_written,
2886                 int mirror_num,
2887                 int *was_created)
2888 {
2889         struct btrfsic_block *block;
2890
2891         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2892                                                block_ctx->dev_bytenr,
2893                                                &state->block_hashtable);
2894         if (NULL == block) {
2895                 struct btrfsic_dev_state *dev_state;
2896
2897                 block = btrfsic_block_alloc();
2898                 if (NULL == block) {
2899                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2900                         return NULL;
2901                 }
2902                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2903                 if (NULL == dev_state) {
2904                         printk(KERN_INFO
2905                                "btrfsic: error, lookup dev_state failed!\n");
2906                         btrfsic_block_free(block);
2907                         return NULL;
2908                 }
2909                 block->dev_state = dev_state;
2910                 block->dev_bytenr = block_ctx->dev_bytenr;
2911                 block->logical_bytenr = block_ctx->start;
2912                 block->is_metadata = is_metadata;
2913                 block->is_iodone = is_iodone;
2914                 block->never_written = never_written;
2915                 block->mirror_num = mirror_num;
2916                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2917                         printk(KERN_INFO
2918                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2919                                additional_string,
2920                                btrfsic_get_block_type(state, block),
2921                                (unsigned long long)block->logical_bytenr,
2922                                dev_state->name,
2923                                (unsigned long long)block->dev_bytenr,
2924                                mirror_num);
2925                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2926                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2927                 if (NULL != was_created)
2928                         *was_created = 1;
2929         } else {
2930                 if (NULL != was_created)
2931                         *was_created = 0;
2932         }
2933
2934         return block;
2935 }
2936
2937 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2938                                            u64 bytenr,
2939                                            struct btrfsic_dev_state *dev_state,
2940                                            u64 dev_bytenr)
2941 {
2942         int num_copies;
2943         int mirror_num;
2944         int ret;
2945         struct btrfsic_block_data_ctx block_ctx;
2946         int match = 0;
2947
2948         num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2949                                       bytenr, state->metablock_size);
2950
2951         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2952                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2953                                         &block_ctx, mirror_num);
2954                 if (ret) {
2955                         printk(KERN_INFO "btrfsic:"
2956                                " btrfsic_map_block(logical @%llu,"
2957                                " mirror %d) failed!\n",
2958                                (unsigned long long)bytenr, mirror_num);
2959                         continue;
2960                 }
2961
2962                 if (dev_state->bdev == block_ctx.dev->bdev &&
2963                     dev_bytenr == block_ctx.dev_bytenr) {
2964                         match++;
2965                         btrfsic_release_block_ctx(&block_ctx);
2966                         break;
2967                 }
2968                 btrfsic_release_block_ctx(&block_ctx);
2969         }
2970
2971         if (!match) {
2972                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2973                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2974                        " phys_bytenr=%llu)!\n",
2975                        (unsigned long long)bytenr, dev_state->name,
2976                        (unsigned long long)dev_bytenr);
2977                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2978                         ret = btrfsic_map_block(state, bytenr,
2979                                                 state->metablock_size,
2980                                                 &block_ctx, mirror_num);
2981                         if (ret)
2982                                 continue;
2983
2984                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
2985                                " (%s/%llu/%d)\n",
2986                                (unsigned long long)bytenr,
2987                                block_ctx.dev->name,
2988                                (unsigned long long)block_ctx.dev_bytenr,
2989                                mirror_num);
2990                 }
2991                 WARN_ON(1);
2992         }
2993 }
2994
2995 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2996                 struct block_device *bdev)
2997 {
2998         struct btrfsic_dev_state *ds;
2999
3000         ds = btrfsic_dev_state_hashtable_lookup(bdev,
3001                                                 &btrfsic_dev_state_hashtable);
3002         return ds;
3003 }
3004
3005 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3006 {
3007         struct btrfsic_dev_state *dev_state;
3008
3009         if (!btrfsic_is_initialized)
3010                 return submit_bh(rw, bh);
3011
3012         mutex_lock(&btrfsic_mutex);
3013         /* since btrfsic_submit_bh() might also be called before
3014          * btrfsic_mount(), this might return NULL */
3015         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3016
3017         /* Only called to write the superblock (incl. FLUSH/FUA) */
3018         if (NULL != dev_state &&
3019             (rw & WRITE) && bh->b_size > 0) {
3020                 u64 dev_bytenr;
3021
3022                 dev_bytenr = 4096 * bh->b_blocknr;
3023                 if (dev_state->state->print_mask &
3024                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3025                         printk(KERN_INFO
3026                                "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3027                                " size=%lu, data=%p, bdev=%p)\n",
3028                                rw, (unsigned long)bh->b_blocknr,
3029                                (unsigned long long)dev_bytenr,
3030                                (unsigned long)bh->b_size, bh->b_data,
3031                                bh->b_bdev);
3032                 btrfsic_process_written_block(dev_state, dev_bytenr,
3033                                               &bh->b_data, 1, NULL,
3034                                               NULL, bh, rw);
3035         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3036                 if (dev_state->state->print_mask &
3037                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3038                         printk(KERN_INFO
3039                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3040                                rw, bh->b_bdev);
3041                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3042                         if ((dev_state->state->print_mask &
3043                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3044                               BTRFSIC_PRINT_MASK_VERBOSE)))
3045                                 printk(KERN_INFO
3046                                        "btrfsic_submit_bh(%s) with FLUSH"
3047                                        " but dummy block already in use"
3048                                        " (ignored)!\n",
3049                                        dev_state->name);
3050                 } else {
3051                         struct btrfsic_block *const block =
3052                                 &dev_state->dummy_block_for_bio_bh_flush;
3053
3054                         block->is_iodone = 0;
3055                         block->never_written = 0;
3056                         block->iodone_w_error = 0;
3057                         block->flush_gen = dev_state->last_flush_gen + 1;
3058                         block->submit_bio_bh_rw = rw;
3059                         block->orig_bio_bh_private = bh->b_private;
3060                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3061                         block->next_in_same_bio = NULL;
3062                         bh->b_private = block;
3063                         bh->b_end_io = btrfsic_bh_end_io;
3064                 }
3065         }
3066         mutex_unlock(&btrfsic_mutex);
3067         return submit_bh(rw, bh);
3068 }
3069
3070 void btrfsic_submit_bio(int rw, struct bio *bio)
3071 {
3072         struct btrfsic_dev_state *dev_state;
3073
3074         if (!btrfsic_is_initialized) {
3075                 submit_bio(rw, bio);
3076                 return;
3077         }
3078
3079         mutex_lock(&btrfsic_mutex);
3080         /* since btrfsic_submit_bio() is also called before
3081          * btrfsic_mount(), this might return NULL */
3082         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3083         if (NULL != dev_state &&
3084             (rw & WRITE) && NULL != bio->bi_io_vec) {
3085                 unsigned int i;
3086                 u64 dev_bytenr;
3087                 int bio_is_patched;
3088                 char **mapped_datav;
3089
3090                 dev_bytenr = 512 * bio->bi_sector;
3091                 bio_is_patched = 0;
3092                 if (dev_state->state->print_mask &
3093                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3094                         printk(KERN_INFO
3095                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3096                                " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3097                                rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3098                                (unsigned long long)dev_bytenr,
3099                                bio->bi_bdev);
3100
3101                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3102                                        GFP_NOFS);
3103                 if (!mapped_datav)
3104                         goto leave;
3105                 for (i = 0; i < bio->bi_vcnt; i++) {
3106                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3107                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3108                         if (!mapped_datav[i]) {
3109                                 while (i > 0) {
3110                                         i--;
3111                                         kunmap(bio->bi_io_vec[i].bv_page);
3112                                 }
3113                                 kfree(mapped_datav);
3114                                 goto leave;
3115                         }
3116                         if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3117                              BTRFSIC_PRINT_MASK_VERBOSE) ==
3118                             (dev_state->state->print_mask &
3119                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3120                               BTRFSIC_PRINT_MASK_VERBOSE)))
3121                                 printk(KERN_INFO
3122                                        "#%u: page=%p, len=%u, offset=%u\n",
3123                                        i, bio->bi_io_vec[i].bv_page,
3124                                        bio->bi_io_vec[i].bv_len,
3125                                        bio->bi_io_vec[i].bv_offset);
3126                 }
3127                 btrfsic_process_written_block(dev_state, dev_bytenr,
3128                                               mapped_datav, bio->bi_vcnt,
3129                                               bio, &bio_is_patched,
3130                                               NULL, rw);
3131                 while (i > 0) {
3132                         i--;
3133                         kunmap(bio->bi_io_vec[i].bv_page);
3134                 }
3135                 kfree(mapped_datav);
3136         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3137                 if (dev_state->state->print_mask &
3138                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3139                         printk(KERN_INFO
3140                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3141                                rw, bio->bi_bdev);
3142                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3143                         if ((dev_state->state->print_mask &
3144                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3145                               BTRFSIC_PRINT_MASK_VERBOSE)))
3146                                 printk(KERN_INFO
3147                                        "btrfsic_submit_bio(%s) with FLUSH"
3148                                        " but dummy block already in use"
3149                                        " (ignored)!\n",
3150                                        dev_state->name);
3151                 } else {
3152                         struct btrfsic_block *const block =
3153                                 &dev_state->dummy_block_for_bio_bh_flush;
3154
3155                         block->is_iodone = 0;
3156                         block->never_written = 0;
3157                         block->iodone_w_error = 0;
3158                         block->flush_gen = dev_state->last_flush_gen + 1;
3159                         block->submit_bio_bh_rw = rw;
3160                         block->orig_bio_bh_private = bio->bi_private;
3161                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3162                         block->next_in_same_bio = NULL;
3163                         bio->bi_private = block;
3164                         bio->bi_end_io = btrfsic_bio_end_io;
3165                 }
3166         }
3167 leave:
3168         mutex_unlock(&btrfsic_mutex);
3169
3170         submit_bio(rw, bio);
3171 }
3172
3173 int btrfsic_mount(struct btrfs_root *root,
3174                   struct btrfs_fs_devices *fs_devices,
3175                   int including_extent_data, u32 print_mask)
3176 {
3177         int ret;
3178         struct btrfsic_state *state;
3179         struct list_head *dev_head = &fs_devices->devices;
3180         struct btrfs_device *device;
3181
3182         if (root->nodesize != root->leafsize) {
3183                 printk(KERN_INFO
3184                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3185                        root->nodesize, root->leafsize);
3186                 return -1;
3187         }
3188         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3189                 printk(KERN_INFO
3190                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3191                        root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3192                 return -1;
3193         }
3194         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3195                 printk(KERN_INFO
3196                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3197                        root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3198                 return -1;
3199         }
3200         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3201                 printk(KERN_INFO
3202                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3203                        root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3204                 return -1;
3205         }
3206         state = kzalloc(sizeof(*state), GFP_NOFS);
3207         if (NULL == state) {
3208                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3209                 return -1;
3210         }
3211
3212         if (!btrfsic_is_initialized) {
3213                 mutex_init(&btrfsic_mutex);
3214                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3215                 btrfsic_is_initialized = 1;
3216         }
3217         mutex_lock(&btrfsic_mutex);
3218         state->root = root;
3219         state->print_mask = print_mask;
3220         state->include_extent_data = including_extent_data;
3221         state->csum_size = 0;
3222         state->metablock_size = root->nodesize;
3223         state->datablock_size = root->sectorsize;
3224         INIT_LIST_HEAD(&state->all_blocks_list);
3225         btrfsic_block_hashtable_init(&state->block_hashtable);
3226         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3227         state->max_superblock_generation = 0;
3228         state->latest_superblock = NULL;
3229
3230         list_for_each_entry(device, dev_head, dev_list) {
3231                 struct btrfsic_dev_state *ds;
3232                 char *p;
3233
3234                 if (!device->bdev || !device->name)
3235                         continue;
3236
3237                 ds = btrfsic_dev_state_alloc();
3238                 if (NULL == ds) {
3239                         printk(KERN_INFO
3240                                "btrfs check-integrity: kmalloc() failed!\n");
3241                         mutex_unlock(&btrfsic_mutex);
3242                         return -1;
3243                 }
3244                 ds->bdev = device->bdev;
3245                 ds->state = state;
3246                 bdevname(ds->bdev, ds->name);
3247                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3248                 for (p = ds->name; *p != '\0'; p++);
3249                 while (p > ds->name && *p != '/')
3250                         p--;
3251                 if (*p == '/')
3252                         p++;
3253                 strlcpy(ds->name, p, sizeof(ds->name));
3254                 btrfsic_dev_state_hashtable_add(ds,
3255                                                 &btrfsic_dev_state_hashtable);
3256         }
3257
3258         ret = btrfsic_process_superblock(state, fs_devices);
3259         if (0 != ret) {
3260                 mutex_unlock(&btrfsic_mutex);
3261                 btrfsic_unmount(root, fs_devices);
3262                 return ret;
3263         }
3264
3265         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3266                 btrfsic_dump_database(state);
3267         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3268                 btrfsic_dump_tree(state);
3269
3270         mutex_unlock(&btrfsic_mutex);
3271         return 0;
3272 }
3273
3274 void btrfsic_unmount(struct btrfs_root *root,
3275                      struct btrfs_fs_devices *fs_devices)
3276 {
3277         struct list_head *elem_all;
3278         struct list_head *tmp_all;
3279         struct btrfsic_state *state;
3280         struct list_head *dev_head = &fs_devices->devices;
3281         struct btrfs_device *device;
3282
3283         if (!btrfsic_is_initialized)
3284                 return;
3285
3286         mutex_lock(&btrfsic_mutex);
3287
3288         state = NULL;
3289         list_for_each_entry(device, dev_head, dev_list) {
3290                 struct btrfsic_dev_state *ds;
3291
3292                 if (!device->bdev || !device->name)
3293                         continue;
3294
3295                 ds = btrfsic_dev_state_hashtable_lookup(
3296                                 device->bdev,
3297                                 &btrfsic_dev_state_hashtable);
3298                 if (NULL != ds) {
3299                         state = ds->state;
3300                         btrfsic_dev_state_hashtable_remove(ds);
3301                         btrfsic_dev_state_free(ds);
3302                 }
3303         }
3304
3305         if (NULL == state) {
3306                 printk(KERN_INFO
3307                        "btrfsic: error, cannot find state information"
3308                        " on umount!\n");
3309                 mutex_unlock(&btrfsic_mutex);
3310                 return;
3311         }
3312
3313         /*
3314          * Don't care about keeping the lists' state up to date,
3315          * just free all memory that was allocated dynamically.
3316          * Free the blocks and the block_links.
3317          */
3318         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3319                 struct btrfsic_block *const b_all =
3320                     list_entry(elem_all, struct btrfsic_block,
3321                                all_blocks_node);
3322                 struct list_head *elem_ref_to;
3323                 struct list_head *tmp_ref_to;
3324
3325                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3326                                    &b_all->ref_to_list) {
3327                         struct btrfsic_block_link *const l =
3328                             list_entry(elem_ref_to,
3329                                        struct btrfsic_block_link,
3330                                        node_ref_to);
3331
3332                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3333                                 btrfsic_print_rem_link(state, l);
3334
3335                         l->ref_cnt--;
3336                         if (0 == l->ref_cnt)
3337                                 btrfsic_block_link_free(l);
3338                 }
3339
3340                 if (b_all->is_iodone || b_all->never_written)
3341                         btrfsic_block_free(b_all);
3342                 else
3343                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3344                                " @%llu (%s/%llu/%d) on umount which is"
3345                                " not yet iodone!\n",
3346                                btrfsic_get_block_type(state, b_all),
3347                                (unsigned long long)b_all->logical_bytenr,
3348                                b_all->dev_state->name,
3349                                (unsigned long long)b_all->dev_bytenr,
3350                                b_all->mirror_num);
3351         }
3352
3353         mutex_unlock(&btrfsic_mutex);
3354
3355         kfree(state);
3356 }