]> rtime.felk.cvut.cz Git - linux-imx.git/blob - fs/btrfs/check-integrity.c
btrfs: define BTRFS_MAGIC as a u64 value
[linux-imx.git] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and no write error was indicated and a
41  *        FLUSH request to the device where these blocks are
42  *        located was received and completed.
43  *    2b. All referenced blocks need to have a generation
44  *        number which is equal to the parent's number.
45  *
46  * One issue that was found using this module was that the log
47  * tree on disk became temporarily corrupted because disk blocks
48  * that had been in use for the log tree had been freed and
49  * reused too early, while being referenced by the written super
50  * block.
51  *
52  * The search term in the kernel log that can be used to filter
53  * on the existence of detected integrity issues is
54  * "btrfs: attempt".
55  *
56  * The integrity check is enabled via mount options. These
57  * mount options are only supported if the integrity check
58  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
59  *
60  * Example #1, apply integrity checks to all metadata:
61  * mount /dev/sdb1 /mnt -o check_int
62  *
63  * Example #2, apply integrity checks to all metadata and
64  * to data extents:
65  * mount /dev/sdb1 /mnt -o check_int_data
66  *
67  * Example #3, apply integrity checks to all metadata and dump
68  * the tree that the super block references to kernel messages
69  * each time after a super block was written:
70  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
71  *
72  * If the integrity check tool is included and activated in
73  * the mount options, plenty of kernel memory is used, and
74  * plenty of additional CPU cycles are spent. Enabling this
75  * functionality is not intended for normal use. In most
76  * cases, unless you are a btrfs developer who needs to verify
77  * the integrity of (super)-block write requests, do not
78  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
79  * include and compile the integrity check tool.
80  */
81
82 #include <linux/sched.h>
83 #include <linux/slab.h>
84 #include <linux/buffer_head.h>
85 #include <linux/mutex.h>
86 #include <linux/crc32c.h>
87 #include <linux/genhd.h>
88 #include <linux/blkdev.h>
89 #include "ctree.h"
90 #include "disk-io.h"
91 #include "transaction.h"
92 #include "extent_io.h"
93 #include "volumes.h"
94 #include "print-tree.h"
95 #include "locking.h"
96 #include "check-integrity.h"
97 #include "rcu-string.h"
98
99 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
100 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
101 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
102 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
103 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
104 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
105 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
106 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
107                                                          * excluding " [...]" */
108 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
109
110 /*
111  * The definition of the bitmask fields for the print_mask.
112  * They are specified with the mount option check_integrity_print_mask.
113  */
114 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
115 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
116 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
117 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
118 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
119 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
120 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
121 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
122 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
123 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
124 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
125 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
126 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
127
128 struct btrfsic_dev_state;
129 struct btrfsic_state;
130
131 struct btrfsic_block {
132         u32 magic_num;          /* only used for debug purposes */
133         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
134         unsigned int is_superblock:1;   /* if it is one of the superblocks */
135         unsigned int is_iodone:1;       /* if is done by lower subsystem */
136         unsigned int iodone_w_error:1;  /* error was indicated to endio */
137         unsigned int never_written:1;   /* block was added because it was
138                                          * referenced, not because it was
139                                          * written */
140         unsigned int mirror_num;        /* large enough to hold
141                                          * BTRFS_SUPER_MIRROR_MAX */
142         struct btrfsic_dev_state *dev_state;
143         u64 dev_bytenr;         /* key, physical byte num on disk */
144         u64 logical_bytenr;     /* logical byte num on disk */
145         u64 generation;
146         struct btrfs_disk_key disk_key; /* extra info to print in case of
147                                          * issues, will not always be correct */
148         struct list_head collision_resolving_node;      /* list node */
149         struct list_head all_blocks_node;       /* list node */
150
151         /* the following two lists contain block_link items */
152         struct list_head ref_to_list;   /* list */
153         struct list_head ref_from_list; /* list */
154         struct btrfsic_block *next_in_same_bio;
155         void *orig_bio_bh_private;
156         union {
157                 bio_end_io_t *bio;
158                 bh_end_io_t *bh;
159         } orig_bio_bh_end_io;
160         int submit_bio_bh_rw;
161         u64 flush_gen; /* only valid if !never_written */
162 };
163
164 /*
165  * Elements of this type are allocated dynamically and required because
166  * each block object can refer to and can be ref from multiple blocks.
167  * The key to lookup them in the hashtable is the dev_bytenr of
168  * the block ref to plus the one from the block refered from.
169  * The fact that they are searchable via a hashtable and that a
170  * ref_cnt is maintained is not required for the btrfs integrity
171  * check algorithm itself, it is only used to make the output more
172  * beautiful in case that an error is detected (an error is defined
173  * as a write operation to a block while that block is still referenced).
174  */
175 struct btrfsic_block_link {
176         u32 magic_num;          /* only used for debug purposes */
177         u32 ref_cnt;
178         struct list_head node_ref_to;   /* list node */
179         struct list_head node_ref_from; /* list node */
180         struct list_head collision_resolving_node;      /* list node */
181         struct btrfsic_block *block_ref_to;
182         struct btrfsic_block *block_ref_from;
183         u64 parent_generation;
184 };
185
186 struct btrfsic_dev_state {
187         u32 magic_num;          /* only used for debug purposes */
188         struct block_device *bdev;
189         struct btrfsic_state *state;
190         struct list_head collision_resolving_node;      /* list node */
191         struct btrfsic_block dummy_block_for_bio_bh_flush;
192         u64 last_flush_gen;
193         char name[BDEVNAME_SIZE];
194 };
195
196 struct btrfsic_block_hashtable {
197         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
198 };
199
200 struct btrfsic_block_link_hashtable {
201         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
202 };
203
204 struct btrfsic_dev_state_hashtable {
205         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
206 };
207
208 struct btrfsic_block_data_ctx {
209         u64 start;              /* virtual bytenr */
210         u64 dev_bytenr;         /* physical bytenr on device */
211         u32 len;
212         struct btrfsic_dev_state *dev;
213         char **datav;
214         struct page **pagev;
215         void *mem_to_free;
216 };
217
218 /* This structure is used to implement recursion without occupying
219  * any stack space, refer to btrfsic_process_metablock() */
220 struct btrfsic_stack_frame {
221         u32 magic;
222         u32 nr;
223         int error;
224         int i;
225         int limit_nesting;
226         int num_copies;
227         int mirror_num;
228         struct btrfsic_block *block;
229         struct btrfsic_block_data_ctx *block_ctx;
230         struct btrfsic_block *next_block;
231         struct btrfsic_block_data_ctx next_block_ctx;
232         struct btrfs_header *hdr;
233         struct btrfsic_stack_frame *prev;
234 };
235
236 /* Some state per mounted filesystem */
237 struct btrfsic_state {
238         u32 print_mask;
239         int include_extent_data;
240         int csum_size;
241         struct list_head all_blocks_list;
242         struct btrfsic_block_hashtable block_hashtable;
243         struct btrfsic_block_link_hashtable block_link_hashtable;
244         struct btrfs_root *root;
245         u64 max_superblock_generation;
246         struct btrfsic_block *latest_superblock;
247         u32 metablock_size;
248         u32 datablock_size;
249 };
250
251 static void btrfsic_block_init(struct btrfsic_block *b);
252 static struct btrfsic_block *btrfsic_block_alloc(void);
253 static void btrfsic_block_free(struct btrfsic_block *b);
254 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
255 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
256 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
257 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
258 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
259 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
260 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
261 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
262                                         struct btrfsic_block_hashtable *h);
263 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
264 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
265                 struct block_device *bdev,
266                 u64 dev_bytenr,
267                 struct btrfsic_block_hashtable *h);
268 static void btrfsic_block_link_hashtable_init(
269                 struct btrfsic_block_link_hashtable *h);
270 static void btrfsic_block_link_hashtable_add(
271                 struct btrfsic_block_link *l,
272                 struct btrfsic_block_link_hashtable *h);
273 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
274 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
275                 struct block_device *bdev_ref_to,
276                 u64 dev_bytenr_ref_to,
277                 struct block_device *bdev_ref_from,
278                 u64 dev_bytenr_ref_from,
279                 struct btrfsic_block_link_hashtable *h);
280 static void btrfsic_dev_state_hashtable_init(
281                 struct btrfsic_dev_state_hashtable *h);
282 static void btrfsic_dev_state_hashtable_add(
283                 struct btrfsic_dev_state *ds,
284                 struct btrfsic_dev_state_hashtable *h);
285 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
286 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
287                 struct block_device *bdev,
288                 struct btrfsic_dev_state_hashtable *h);
289 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
290 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
291 static int btrfsic_process_superblock(struct btrfsic_state *state,
292                                       struct btrfs_fs_devices *fs_devices);
293 static int btrfsic_process_metablock(struct btrfsic_state *state,
294                                      struct btrfsic_block *block,
295                                      struct btrfsic_block_data_ctx *block_ctx,
296                                      int limit_nesting, int force_iodone_flag);
297 static void btrfsic_read_from_block_data(
298         struct btrfsic_block_data_ctx *block_ctx,
299         void *dst, u32 offset, size_t len);
300 static int btrfsic_create_link_to_next_block(
301                 struct btrfsic_state *state,
302                 struct btrfsic_block *block,
303                 struct btrfsic_block_data_ctx
304                 *block_ctx, u64 next_bytenr,
305                 int limit_nesting,
306                 struct btrfsic_block_data_ctx *next_block_ctx,
307                 struct btrfsic_block **next_blockp,
308                 int force_iodone_flag,
309                 int *num_copiesp, int *mirror_nump,
310                 struct btrfs_disk_key *disk_key,
311                 u64 parent_generation);
312 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
313                                       struct btrfsic_block *block,
314                                       struct btrfsic_block_data_ctx *block_ctx,
315                                       u32 item_offset, int force_iodone_flag);
316 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
317                              struct btrfsic_block_data_ctx *block_ctx_out,
318                              int mirror_num);
319 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
320                                   u32 len, struct block_device *bdev,
321                                   struct btrfsic_block_data_ctx *block_ctx_out);
322 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
323 static int btrfsic_read_block(struct btrfsic_state *state,
324                               struct btrfsic_block_data_ctx *block_ctx);
325 static void btrfsic_dump_database(struct btrfsic_state *state);
326 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
327 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
328                                      char **datav, unsigned int num_pages);
329 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
330                                           u64 dev_bytenr, char **mapped_datav,
331                                           unsigned int num_pages,
332                                           struct bio *bio, int *bio_is_patched,
333                                           struct buffer_head *bh,
334                                           int submit_bio_bh_rw);
335 static int btrfsic_process_written_superblock(
336                 struct btrfsic_state *state,
337                 struct btrfsic_block *const block,
338                 struct btrfs_super_block *const super_hdr);
339 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
340 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
341 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
342                                               const struct btrfsic_block *block,
343                                               int recursion_level);
344 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
345                                         struct btrfsic_block *const block,
346                                         int recursion_level);
347 static void btrfsic_print_add_link(const struct btrfsic_state *state,
348                                    const struct btrfsic_block_link *l);
349 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
350                                    const struct btrfsic_block_link *l);
351 static char btrfsic_get_block_type(const struct btrfsic_state *state,
352                                    const struct btrfsic_block *block);
353 static void btrfsic_dump_tree(const struct btrfsic_state *state);
354 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
355                                   const struct btrfsic_block *block,
356                                   int indent_level);
357 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
358                 struct btrfsic_state *state,
359                 struct btrfsic_block_data_ctx *next_block_ctx,
360                 struct btrfsic_block *next_block,
361                 struct btrfsic_block *from_block,
362                 u64 parent_generation);
363 static struct btrfsic_block *btrfsic_block_lookup_or_add(
364                 struct btrfsic_state *state,
365                 struct btrfsic_block_data_ctx *block_ctx,
366                 const char *additional_string,
367                 int is_metadata,
368                 int is_iodone,
369                 int never_written,
370                 int mirror_num,
371                 int *was_created);
372 static int btrfsic_process_superblock_dev_mirror(
373                 struct btrfsic_state *state,
374                 struct btrfsic_dev_state *dev_state,
375                 struct btrfs_device *device,
376                 int superblock_mirror_num,
377                 struct btrfsic_dev_state **selected_dev_state,
378                 struct btrfs_super_block *selected_super);
379 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
380                 struct block_device *bdev);
381 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
382                                            u64 bytenr,
383                                            struct btrfsic_dev_state *dev_state,
384                                            u64 dev_bytenr);
385
386 static struct mutex btrfsic_mutex;
387 static int btrfsic_is_initialized;
388 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
389
390
391 static void btrfsic_block_init(struct btrfsic_block *b)
392 {
393         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
394         b->dev_state = NULL;
395         b->dev_bytenr = 0;
396         b->logical_bytenr = 0;
397         b->generation = BTRFSIC_GENERATION_UNKNOWN;
398         b->disk_key.objectid = 0;
399         b->disk_key.type = 0;
400         b->disk_key.offset = 0;
401         b->is_metadata = 0;
402         b->is_superblock = 0;
403         b->is_iodone = 0;
404         b->iodone_w_error = 0;
405         b->never_written = 0;
406         b->mirror_num = 0;
407         b->next_in_same_bio = NULL;
408         b->orig_bio_bh_private = NULL;
409         b->orig_bio_bh_end_io.bio = NULL;
410         INIT_LIST_HEAD(&b->collision_resolving_node);
411         INIT_LIST_HEAD(&b->all_blocks_node);
412         INIT_LIST_HEAD(&b->ref_to_list);
413         INIT_LIST_HEAD(&b->ref_from_list);
414         b->submit_bio_bh_rw = 0;
415         b->flush_gen = 0;
416 }
417
418 static struct btrfsic_block *btrfsic_block_alloc(void)
419 {
420         struct btrfsic_block *b;
421
422         b = kzalloc(sizeof(*b), GFP_NOFS);
423         if (NULL != b)
424                 btrfsic_block_init(b);
425
426         return b;
427 }
428
429 static void btrfsic_block_free(struct btrfsic_block *b)
430 {
431         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
432         kfree(b);
433 }
434
435 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
436 {
437         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
438         l->ref_cnt = 1;
439         INIT_LIST_HEAD(&l->node_ref_to);
440         INIT_LIST_HEAD(&l->node_ref_from);
441         INIT_LIST_HEAD(&l->collision_resolving_node);
442         l->block_ref_to = NULL;
443         l->block_ref_from = NULL;
444 }
445
446 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
447 {
448         struct btrfsic_block_link *l;
449
450         l = kzalloc(sizeof(*l), GFP_NOFS);
451         if (NULL != l)
452                 btrfsic_block_link_init(l);
453
454         return l;
455 }
456
457 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
458 {
459         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
460         kfree(l);
461 }
462
463 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
464 {
465         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
466         ds->bdev = NULL;
467         ds->state = NULL;
468         ds->name[0] = '\0';
469         INIT_LIST_HEAD(&ds->collision_resolving_node);
470         ds->last_flush_gen = 0;
471         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
472         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
473         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
474 }
475
476 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
477 {
478         struct btrfsic_dev_state *ds;
479
480         ds = kzalloc(sizeof(*ds), GFP_NOFS);
481         if (NULL != ds)
482                 btrfsic_dev_state_init(ds);
483
484         return ds;
485 }
486
487 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
488 {
489         BUG_ON(!(NULL == ds ||
490                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
491         kfree(ds);
492 }
493
494 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
495 {
496         int i;
497
498         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
499                 INIT_LIST_HEAD(h->table + i);
500 }
501
502 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
503                                         struct btrfsic_block_hashtable *h)
504 {
505         const unsigned int hashval =
506             (((unsigned int)(b->dev_bytenr >> 16)) ^
507              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
508              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
509
510         list_add(&b->collision_resolving_node, h->table + hashval);
511 }
512
513 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
514 {
515         list_del(&b->collision_resolving_node);
516 }
517
518 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
519                 struct block_device *bdev,
520                 u64 dev_bytenr,
521                 struct btrfsic_block_hashtable *h)
522 {
523         const unsigned int hashval =
524             (((unsigned int)(dev_bytenr >> 16)) ^
525              ((unsigned int)((uintptr_t)bdev))) &
526              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
527         struct list_head *elem;
528
529         list_for_each(elem, h->table + hashval) {
530                 struct btrfsic_block *const b =
531                     list_entry(elem, struct btrfsic_block,
532                                collision_resolving_node);
533
534                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
535                         return b;
536         }
537
538         return NULL;
539 }
540
541 static void btrfsic_block_link_hashtable_init(
542                 struct btrfsic_block_link_hashtable *h)
543 {
544         int i;
545
546         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
547                 INIT_LIST_HEAD(h->table + i);
548 }
549
550 static void btrfsic_block_link_hashtable_add(
551                 struct btrfsic_block_link *l,
552                 struct btrfsic_block_link_hashtable *h)
553 {
554         const unsigned int hashval =
555             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
556              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
557              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
558              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
559              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
560
561         BUG_ON(NULL == l->block_ref_to);
562         BUG_ON(NULL == l->block_ref_from);
563         list_add(&l->collision_resolving_node, h->table + hashval);
564 }
565
566 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
567 {
568         list_del(&l->collision_resolving_node);
569 }
570
571 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
572                 struct block_device *bdev_ref_to,
573                 u64 dev_bytenr_ref_to,
574                 struct block_device *bdev_ref_from,
575                 u64 dev_bytenr_ref_from,
576                 struct btrfsic_block_link_hashtable *h)
577 {
578         const unsigned int hashval =
579             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
580              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
581              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
582              ((unsigned int)((uintptr_t)bdev_ref_from))) &
583              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
584         struct list_head *elem;
585
586         list_for_each(elem, h->table + hashval) {
587                 struct btrfsic_block_link *const l =
588                     list_entry(elem, struct btrfsic_block_link,
589                                collision_resolving_node);
590
591                 BUG_ON(NULL == l->block_ref_to);
592                 BUG_ON(NULL == l->block_ref_from);
593                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
594                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
595                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
596                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
597                         return l;
598         }
599
600         return NULL;
601 }
602
603 static void btrfsic_dev_state_hashtable_init(
604                 struct btrfsic_dev_state_hashtable *h)
605 {
606         int i;
607
608         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
609                 INIT_LIST_HEAD(h->table + i);
610 }
611
612 static void btrfsic_dev_state_hashtable_add(
613                 struct btrfsic_dev_state *ds,
614                 struct btrfsic_dev_state_hashtable *h)
615 {
616         const unsigned int hashval =
617             (((unsigned int)((uintptr_t)ds->bdev)) &
618              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
619
620         list_add(&ds->collision_resolving_node, h->table + hashval);
621 }
622
623 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
624 {
625         list_del(&ds->collision_resolving_node);
626 }
627
628 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
629                 struct block_device *bdev,
630                 struct btrfsic_dev_state_hashtable *h)
631 {
632         const unsigned int hashval =
633             (((unsigned int)((uintptr_t)bdev)) &
634              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
635         struct list_head *elem;
636
637         list_for_each(elem, h->table + hashval) {
638                 struct btrfsic_dev_state *const ds =
639                     list_entry(elem, struct btrfsic_dev_state,
640                                collision_resolving_node);
641
642                 if (ds->bdev == bdev)
643                         return ds;
644         }
645
646         return NULL;
647 }
648
649 static int btrfsic_process_superblock(struct btrfsic_state *state,
650                                       struct btrfs_fs_devices *fs_devices)
651 {
652         int ret = 0;
653         struct btrfs_super_block *selected_super;
654         struct list_head *dev_head = &fs_devices->devices;
655         struct btrfs_device *device;
656         struct btrfsic_dev_state *selected_dev_state = NULL;
657         int pass;
658
659         BUG_ON(NULL == state);
660         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
661         if (NULL == selected_super) {
662                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
663                 return -1;
664         }
665
666         list_for_each_entry(device, dev_head, dev_list) {
667                 int i;
668                 struct btrfsic_dev_state *dev_state;
669
670                 if (!device->bdev || !device->name)
671                         continue;
672
673                 dev_state = btrfsic_dev_state_lookup(device->bdev);
674                 BUG_ON(NULL == dev_state);
675                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
676                         ret = btrfsic_process_superblock_dev_mirror(
677                                         state, dev_state, device, i,
678                                         &selected_dev_state, selected_super);
679                         if (0 != ret && 0 == i) {
680                                 kfree(selected_super);
681                                 return ret;
682                         }
683                 }
684         }
685
686         if (NULL == state->latest_superblock) {
687                 printk(KERN_INFO "btrfsic: no superblock found!\n");
688                 kfree(selected_super);
689                 return -1;
690         }
691
692         state->csum_size = btrfs_super_csum_size(selected_super);
693
694         for (pass = 0; pass < 3; pass++) {
695                 int num_copies;
696                 int mirror_num;
697                 u64 next_bytenr;
698
699                 switch (pass) {
700                 case 0:
701                         next_bytenr = btrfs_super_root(selected_super);
702                         if (state->print_mask &
703                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
704                                 printk(KERN_INFO "root@%llu\n",
705                                        (unsigned long long)next_bytenr);
706                         break;
707                 case 1:
708                         next_bytenr = btrfs_super_chunk_root(selected_super);
709                         if (state->print_mask &
710                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
711                                 printk(KERN_INFO "chunk@%llu\n",
712                                        (unsigned long long)next_bytenr);
713                         break;
714                 case 2:
715                         next_bytenr = btrfs_super_log_root(selected_super);
716                         if (0 == next_bytenr)
717                                 continue;
718                         if (state->print_mask &
719                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
720                                 printk(KERN_INFO "log@%llu\n",
721                                        (unsigned long long)next_bytenr);
722                         break;
723                 }
724
725                 num_copies =
726                     btrfs_num_copies(state->root->fs_info,
727                                      next_bytenr, state->metablock_size);
728                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
729                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
730                                (unsigned long long)next_bytenr, num_copies);
731
732                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
733                         struct btrfsic_block *next_block;
734                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
735                         struct btrfsic_block_link *l;
736
737                         ret = btrfsic_map_block(state, next_bytenr,
738                                                 state->metablock_size,
739                                                 &tmp_next_block_ctx,
740                                                 mirror_num);
741                         if (ret) {
742                                 printk(KERN_INFO "btrfsic:"
743                                        " btrfsic_map_block(root @%llu,"
744                                        " mirror %d) failed!\n",
745                                        (unsigned long long)next_bytenr,
746                                        mirror_num);
747                                 kfree(selected_super);
748                                 return -1;
749                         }
750
751                         next_block = btrfsic_block_hashtable_lookup(
752                                         tmp_next_block_ctx.dev->bdev,
753                                         tmp_next_block_ctx.dev_bytenr,
754                                         &state->block_hashtable);
755                         BUG_ON(NULL == next_block);
756
757                         l = btrfsic_block_link_hashtable_lookup(
758                                         tmp_next_block_ctx.dev->bdev,
759                                         tmp_next_block_ctx.dev_bytenr,
760                                         state->latest_superblock->dev_state->
761                                         bdev,
762                                         state->latest_superblock->dev_bytenr,
763                                         &state->block_link_hashtable);
764                         BUG_ON(NULL == l);
765
766                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
767                         if (ret < (int)PAGE_CACHE_SIZE) {
768                                 printk(KERN_INFO
769                                        "btrfsic: read @logical %llu failed!\n",
770                                        (unsigned long long)
771                                        tmp_next_block_ctx.start);
772                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
773                                 kfree(selected_super);
774                                 return -1;
775                         }
776
777                         ret = btrfsic_process_metablock(state,
778                                                         next_block,
779                                                         &tmp_next_block_ctx,
780                                                         BTRFS_MAX_LEVEL + 3, 1);
781                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
782                 }
783         }
784
785         kfree(selected_super);
786         return ret;
787 }
788
789 static int btrfsic_process_superblock_dev_mirror(
790                 struct btrfsic_state *state,
791                 struct btrfsic_dev_state *dev_state,
792                 struct btrfs_device *device,
793                 int superblock_mirror_num,
794                 struct btrfsic_dev_state **selected_dev_state,
795                 struct btrfs_super_block *selected_super)
796 {
797         struct btrfs_super_block *super_tmp;
798         u64 dev_bytenr;
799         struct buffer_head *bh;
800         struct btrfsic_block *superblock_tmp;
801         int pass;
802         struct block_device *const superblock_bdev = device->bdev;
803
804         /* super block bytenr is always the unmapped device bytenr */
805         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
806         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
807                 return -1;
808         bh = __bread(superblock_bdev, dev_bytenr / 4096,
809                      BTRFS_SUPER_INFO_SIZE);
810         if (NULL == bh)
811                 return -1;
812         super_tmp = (struct btrfs_super_block *)
813             (bh->b_data + (dev_bytenr & 4095));
814
815         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
816             super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
817             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
818             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
819             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
820             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
821                 brelse(bh);
822                 return 0;
823         }
824
825         superblock_tmp =
826             btrfsic_block_hashtable_lookup(superblock_bdev,
827                                            dev_bytenr,
828                                            &state->block_hashtable);
829         if (NULL == superblock_tmp) {
830                 superblock_tmp = btrfsic_block_alloc();
831                 if (NULL == superblock_tmp) {
832                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
833                         brelse(bh);
834                         return -1;
835                 }
836                 /* for superblock, only the dev_bytenr makes sense */
837                 superblock_tmp->dev_bytenr = dev_bytenr;
838                 superblock_tmp->dev_state = dev_state;
839                 superblock_tmp->logical_bytenr = dev_bytenr;
840                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
841                 superblock_tmp->is_metadata = 1;
842                 superblock_tmp->is_superblock = 1;
843                 superblock_tmp->is_iodone = 1;
844                 superblock_tmp->never_written = 0;
845                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
846                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
847                         printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
848                                      " @%llu (%s/%llu/%d)\n",
849                                      superblock_bdev,
850                                      rcu_str_deref(device->name),
851                                      (unsigned long long)dev_bytenr,
852                                      dev_state->name,
853                                      (unsigned long long)dev_bytenr,
854                                      superblock_mirror_num);
855                 list_add(&superblock_tmp->all_blocks_node,
856                          &state->all_blocks_list);
857                 btrfsic_block_hashtable_add(superblock_tmp,
858                                             &state->block_hashtable);
859         }
860
861         /* select the one with the highest generation field */
862         if (btrfs_super_generation(super_tmp) >
863             state->max_superblock_generation ||
864             0 == state->max_superblock_generation) {
865                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
866                 *selected_dev_state = dev_state;
867                 state->max_superblock_generation =
868                     btrfs_super_generation(super_tmp);
869                 state->latest_superblock = superblock_tmp;
870         }
871
872         for (pass = 0; pass < 3; pass++) {
873                 u64 next_bytenr;
874                 int num_copies;
875                 int mirror_num;
876                 const char *additional_string = NULL;
877                 struct btrfs_disk_key tmp_disk_key;
878
879                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
880                 tmp_disk_key.offset = 0;
881                 switch (pass) {
882                 case 0:
883                         tmp_disk_key.objectid =
884                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
885                         additional_string = "initial root ";
886                         next_bytenr = btrfs_super_root(super_tmp);
887                         break;
888                 case 1:
889                         tmp_disk_key.objectid =
890                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
891                         additional_string = "initial chunk ";
892                         next_bytenr = btrfs_super_chunk_root(super_tmp);
893                         break;
894                 case 2:
895                         tmp_disk_key.objectid =
896                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
897                         additional_string = "initial log ";
898                         next_bytenr = btrfs_super_log_root(super_tmp);
899                         if (0 == next_bytenr)
900                                 continue;
901                         break;
902                 }
903
904                 num_copies =
905                     btrfs_num_copies(state->root->fs_info,
906                                      next_bytenr, state->metablock_size);
907                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
908                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
909                                (unsigned long long)next_bytenr, num_copies);
910                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
911                         struct btrfsic_block *next_block;
912                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
913                         struct btrfsic_block_link *l;
914
915                         if (btrfsic_map_block(state, next_bytenr,
916                                               state->metablock_size,
917                                               &tmp_next_block_ctx,
918                                               mirror_num)) {
919                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
920                                        "bytenr @%llu, mirror %d) failed!\n",
921                                        (unsigned long long)next_bytenr,
922                                        mirror_num);
923                                 brelse(bh);
924                                 return -1;
925                         }
926
927                         next_block = btrfsic_block_lookup_or_add(
928                                         state, &tmp_next_block_ctx,
929                                         additional_string, 1, 1, 0,
930                                         mirror_num, NULL);
931                         if (NULL == next_block) {
932                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
933                                 brelse(bh);
934                                 return -1;
935                         }
936
937                         next_block->disk_key = tmp_disk_key;
938                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
939                         l = btrfsic_block_link_lookup_or_add(
940                                         state, &tmp_next_block_ctx,
941                                         next_block, superblock_tmp,
942                                         BTRFSIC_GENERATION_UNKNOWN);
943                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
944                         if (NULL == l) {
945                                 brelse(bh);
946                                 return -1;
947                         }
948                 }
949         }
950         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
951                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
952
953         brelse(bh);
954         return 0;
955 }
956
957 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
958 {
959         struct btrfsic_stack_frame *sf;
960
961         sf = kzalloc(sizeof(*sf), GFP_NOFS);
962         if (NULL == sf)
963                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
964         else
965                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
966         return sf;
967 }
968
969 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
970 {
971         BUG_ON(!(NULL == sf ||
972                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
973         kfree(sf);
974 }
975
976 static int btrfsic_process_metablock(
977                 struct btrfsic_state *state,
978                 struct btrfsic_block *const first_block,
979                 struct btrfsic_block_data_ctx *const first_block_ctx,
980                 int first_limit_nesting, int force_iodone_flag)
981 {
982         struct btrfsic_stack_frame initial_stack_frame = { 0 };
983         struct btrfsic_stack_frame *sf;
984         struct btrfsic_stack_frame *next_stack;
985         struct btrfs_header *const first_hdr =
986                 (struct btrfs_header *)first_block_ctx->datav[0];
987
988         BUG_ON(!first_hdr);
989         sf = &initial_stack_frame;
990         sf->error = 0;
991         sf->i = -1;
992         sf->limit_nesting = first_limit_nesting;
993         sf->block = first_block;
994         sf->block_ctx = first_block_ctx;
995         sf->next_block = NULL;
996         sf->hdr = first_hdr;
997         sf->prev = NULL;
998
999 continue_with_new_stack_frame:
1000         sf->block->generation = le64_to_cpu(sf->hdr->generation);
1001         if (0 == sf->hdr->level) {
1002                 struct btrfs_leaf *const leafhdr =
1003                     (struct btrfs_leaf *)sf->hdr;
1004
1005                 if (-1 == sf->i) {
1006                         sf->nr = le32_to_cpu(leafhdr->header.nritems);
1007
1008                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1009                                 printk(KERN_INFO
1010                                        "leaf %llu items %d generation %llu"
1011                                        " owner %llu\n",
1012                                        (unsigned long long)
1013                                        sf->block_ctx->start,
1014                                        sf->nr,
1015                                        (unsigned long long)
1016                                        le64_to_cpu(leafhdr->header.generation),
1017                                        (unsigned long long)
1018                                        le64_to_cpu(leafhdr->header.owner));
1019                 }
1020
1021 continue_with_current_leaf_stack_frame:
1022                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1023                         sf->i++;
1024                         sf->num_copies = 0;
1025                 }
1026
1027                 if (sf->i < sf->nr) {
1028                         struct btrfs_item disk_item;
1029                         u32 disk_item_offset =
1030                                 (uintptr_t)(leafhdr->items + sf->i) -
1031                                 (uintptr_t)leafhdr;
1032                         struct btrfs_disk_key *disk_key;
1033                         u8 type;
1034                         u32 item_offset;
1035                         u32 item_size;
1036
1037                         if (disk_item_offset + sizeof(struct btrfs_item) >
1038                             sf->block_ctx->len) {
1039 leaf_item_out_of_bounce_error:
1040                                 printk(KERN_INFO
1041                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1042                                        sf->block_ctx->start,
1043                                        sf->block_ctx->dev->name);
1044                                 goto one_stack_frame_backwards;
1045                         }
1046                         btrfsic_read_from_block_data(sf->block_ctx,
1047                                                      &disk_item,
1048                                                      disk_item_offset,
1049                                                      sizeof(struct btrfs_item));
1050                         item_offset = le32_to_cpu(disk_item.offset);
1051                         item_size = le32_to_cpu(disk_item.size);
1052                         disk_key = &disk_item.key;
1053                         type = disk_key->type;
1054
1055                         if (BTRFS_ROOT_ITEM_KEY == type) {
1056                                 struct btrfs_root_item root_item;
1057                                 u32 root_item_offset;
1058                                 u64 next_bytenr;
1059
1060                                 root_item_offset = item_offset +
1061                                         offsetof(struct btrfs_leaf, items);
1062                                 if (root_item_offset + item_size >
1063                                     sf->block_ctx->len)
1064                                         goto leaf_item_out_of_bounce_error;
1065                                 btrfsic_read_from_block_data(
1066                                         sf->block_ctx, &root_item,
1067                                         root_item_offset,
1068                                         item_size);
1069                                 next_bytenr = le64_to_cpu(root_item.bytenr);
1070
1071                                 sf->error =
1072                                     btrfsic_create_link_to_next_block(
1073                                                 state,
1074                                                 sf->block,
1075                                                 sf->block_ctx,
1076                                                 next_bytenr,
1077                                                 sf->limit_nesting,
1078                                                 &sf->next_block_ctx,
1079                                                 &sf->next_block,
1080                                                 force_iodone_flag,
1081                                                 &sf->num_copies,
1082                                                 &sf->mirror_num,
1083                                                 disk_key,
1084                                                 le64_to_cpu(root_item.
1085                                                 generation));
1086                                 if (sf->error)
1087                                         goto one_stack_frame_backwards;
1088
1089                                 if (NULL != sf->next_block) {
1090                                         struct btrfs_header *const next_hdr =
1091                                             (struct btrfs_header *)
1092                                             sf->next_block_ctx.datav[0];
1093
1094                                         next_stack =
1095                                             btrfsic_stack_frame_alloc();
1096                                         if (NULL == next_stack) {
1097                                                 btrfsic_release_block_ctx(
1098                                                                 &sf->
1099                                                                 next_block_ctx);
1100                                                 goto one_stack_frame_backwards;
1101                                         }
1102
1103                                         next_stack->i = -1;
1104                                         next_stack->block = sf->next_block;
1105                                         next_stack->block_ctx =
1106                                             &sf->next_block_ctx;
1107                                         next_stack->next_block = NULL;
1108                                         next_stack->hdr = next_hdr;
1109                                         next_stack->limit_nesting =
1110                                             sf->limit_nesting - 1;
1111                                         next_stack->prev = sf;
1112                                         sf = next_stack;
1113                                         goto continue_with_new_stack_frame;
1114                                 }
1115                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1116                                    state->include_extent_data) {
1117                                 sf->error = btrfsic_handle_extent_data(
1118                                                 state,
1119                                                 sf->block,
1120                                                 sf->block_ctx,
1121                                                 item_offset,
1122                                                 force_iodone_flag);
1123                                 if (sf->error)
1124                                         goto one_stack_frame_backwards;
1125                         }
1126
1127                         goto continue_with_current_leaf_stack_frame;
1128                 }
1129         } else {
1130                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1131
1132                 if (-1 == sf->i) {
1133                         sf->nr = le32_to_cpu(nodehdr->header.nritems);
1134
1135                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1136                                 printk(KERN_INFO "node %llu level %d items %d"
1137                                        " generation %llu owner %llu\n",
1138                                        (unsigned long long)
1139                                        sf->block_ctx->start,
1140                                        nodehdr->header.level, sf->nr,
1141                                        (unsigned long long)
1142                                        le64_to_cpu(nodehdr->header.generation),
1143                                        (unsigned long long)
1144                                        le64_to_cpu(nodehdr->header.owner));
1145                 }
1146
1147 continue_with_current_node_stack_frame:
1148                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1149                         sf->i++;
1150                         sf->num_copies = 0;
1151                 }
1152
1153                 if (sf->i < sf->nr) {
1154                         struct btrfs_key_ptr key_ptr;
1155                         u32 key_ptr_offset;
1156                         u64 next_bytenr;
1157
1158                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1159                                           (uintptr_t)nodehdr;
1160                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1161                             sf->block_ctx->len) {
1162                                 printk(KERN_INFO
1163                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1164                                        sf->block_ctx->start,
1165                                        sf->block_ctx->dev->name);
1166                                 goto one_stack_frame_backwards;
1167                         }
1168                         btrfsic_read_from_block_data(
1169                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1170                                 sizeof(struct btrfs_key_ptr));
1171                         next_bytenr = le64_to_cpu(key_ptr.blockptr);
1172
1173                         sf->error = btrfsic_create_link_to_next_block(
1174                                         state,
1175                                         sf->block,
1176                                         sf->block_ctx,
1177                                         next_bytenr,
1178                                         sf->limit_nesting,
1179                                         &sf->next_block_ctx,
1180                                         &sf->next_block,
1181                                         force_iodone_flag,
1182                                         &sf->num_copies,
1183                                         &sf->mirror_num,
1184                                         &key_ptr.key,
1185                                         le64_to_cpu(key_ptr.generation));
1186                         if (sf->error)
1187                                 goto one_stack_frame_backwards;
1188
1189                         if (NULL != sf->next_block) {
1190                                 struct btrfs_header *const next_hdr =
1191                                     (struct btrfs_header *)
1192                                     sf->next_block_ctx.datav[0];
1193
1194                                 next_stack = btrfsic_stack_frame_alloc();
1195                                 if (NULL == next_stack)
1196                                         goto one_stack_frame_backwards;
1197
1198                                 next_stack->i = -1;
1199                                 next_stack->block = sf->next_block;
1200                                 next_stack->block_ctx = &sf->next_block_ctx;
1201                                 next_stack->next_block = NULL;
1202                                 next_stack->hdr = next_hdr;
1203                                 next_stack->limit_nesting =
1204                                     sf->limit_nesting - 1;
1205                                 next_stack->prev = sf;
1206                                 sf = next_stack;
1207                                 goto continue_with_new_stack_frame;
1208                         }
1209
1210                         goto continue_with_current_node_stack_frame;
1211                 }
1212         }
1213
1214 one_stack_frame_backwards:
1215         if (NULL != sf->prev) {
1216                 struct btrfsic_stack_frame *const prev = sf->prev;
1217
1218                 /* the one for the initial block is freed in the caller */
1219                 btrfsic_release_block_ctx(sf->block_ctx);
1220
1221                 if (sf->error) {
1222                         prev->error = sf->error;
1223                         btrfsic_stack_frame_free(sf);
1224                         sf = prev;
1225                         goto one_stack_frame_backwards;
1226                 }
1227
1228                 btrfsic_stack_frame_free(sf);
1229                 sf = prev;
1230                 goto continue_with_new_stack_frame;
1231         } else {
1232                 BUG_ON(&initial_stack_frame != sf);
1233         }
1234
1235         return sf->error;
1236 }
1237
1238 static void btrfsic_read_from_block_data(
1239         struct btrfsic_block_data_ctx *block_ctx,
1240         void *dstv, u32 offset, size_t len)
1241 {
1242         size_t cur;
1243         size_t offset_in_page;
1244         char *kaddr;
1245         char *dst = (char *)dstv;
1246         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1247         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1248
1249         WARN_ON(offset + len > block_ctx->len);
1250         offset_in_page = (start_offset + offset) &
1251                          ((unsigned long)PAGE_CACHE_SIZE - 1);
1252
1253         while (len > 0) {
1254                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1255                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1256                             PAGE_CACHE_SHIFT);
1257                 kaddr = block_ctx->datav[i];
1258                 memcpy(dst, kaddr + offset_in_page, cur);
1259
1260                 dst += cur;
1261                 len -= cur;
1262                 offset_in_page = 0;
1263                 i++;
1264         }
1265 }
1266
1267 static int btrfsic_create_link_to_next_block(
1268                 struct btrfsic_state *state,
1269                 struct btrfsic_block *block,
1270                 struct btrfsic_block_data_ctx *block_ctx,
1271                 u64 next_bytenr,
1272                 int limit_nesting,
1273                 struct btrfsic_block_data_ctx *next_block_ctx,
1274                 struct btrfsic_block **next_blockp,
1275                 int force_iodone_flag,
1276                 int *num_copiesp, int *mirror_nump,
1277                 struct btrfs_disk_key *disk_key,
1278                 u64 parent_generation)
1279 {
1280         struct btrfsic_block *next_block = NULL;
1281         int ret;
1282         struct btrfsic_block_link *l;
1283         int did_alloc_block_link;
1284         int block_was_created;
1285
1286         *next_blockp = NULL;
1287         if (0 == *num_copiesp) {
1288                 *num_copiesp =
1289                     btrfs_num_copies(state->root->fs_info,
1290                                      next_bytenr, state->metablock_size);
1291                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1292                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1293                                (unsigned long long)next_bytenr, *num_copiesp);
1294                 *mirror_nump = 1;
1295         }
1296
1297         if (*mirror_nump > *num_copiesp)
1298                 return 0;
1299
1300         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1301                 printk(KERN_INFO
1302                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1303                        *mirror_nump);
1304         ret = btrfsic_map_block(state, next_bytenr,
1305                                 state->metablock_size,
1306                                 next_block_ctx, *mirror_nump);
1307         if (ret) {
1308                 printk(KERN_INFO
1309                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1310                        (unsigned long long)next_bytenr, *mirror_nump);
1311                 btrfsic_release_block_ctx(next_block_ctx);
1312                 *next_blockp = NULL;
1313                 return -1;
1314         }
1315
1316         next_block = btrfsic_block_lookup_or_add(state,
1317                                                  next_block_ctx, "referenced ",
1318                                                  1, force_iodone_flag,
1319                                                  !force_iodone_flag,
1320                                                  *mirror_nump,
1321                                                  &block_was_created);
1322         if (NULL == next_block) {
1323                 btrfsic_release_block_ctx(next_block_ctx);
1324                 *next_blockp = NULL;
1325                 return -1;
1326         }
1327         if (block_was_created) {
1328                 l = NULL;
1329                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1330         } else {
1331                 if (next_block->logical_bytenr != next_bytenr &&
1332                     !(!next_block->is_metadata &&
1333                       0 == next_block->logical_bytenr)) {
1334                         printk(KERN_INFO
1335                                "Referenced block @%llu (%s/%llu/%d)"
1336                                " found in hash table, %c,"
1337                                " bytenr mismatch (!= stored %llu).\n",
1338                                (unsigned long long)next_bytenr,
1339                                next_block_ctx->dev->name,
1340                                (unsigned long long)next_block_ctx->dev_bytenr,
1341                                *mirror_nump,
1342                                btrfsic_get_block_type(state, next_block),
1343                                (unsigned long long)next_block->logical_bytenr);
1344                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1345                         printk(KERN_INFO
1346                                "Referenced block @%llu (%s/%llu/%d)"
1347                                " found in hash table, %c.\n",
1348                                (unsigned long long)next_bytenr,
1349                                next_block_ctx->dev->name,
1350                                (unsigned long long)next_block_ctx->dev_bytenr,
1351                                *mirror_nump,
1352                                btrfsic_get_block_type(state, next_block));
1353                 next_block->logical_bytenr = next_bytenr;
1354
1355                 next_block->mirror_num = *mirror_nump;
1356                 l = btrfsic_block_link_hashtable_lookup(
1357                                 next_block_ctx->dev->bdev,
1358                                 next_block_ctx->dev_bytenr,
1359                                 block_ctx->dev->bdev,
1360                                 block_ctx->dev_bytenr,
1361                                 &state->block_link_hashtable);
1362         }
1363
1364         next_block->disk_key = *disk_key;
1365         if (NULL == l) {
1366                 l = btrfsic_block_link_alloc();
1367                 if (NULL == l) {
1368                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1369                         btrfsic_release_block_ctx(next_block_ctx);
1370                         *next_blockp = NULL;
1371                         return -1;
1372                 }
1373
1374                 did_alloc_block_link = 1;
1375                 l->block_ref_to = next_block;
1376                 l->block_ref_from = block;
1377                 l->ref_cnt = 1;
1378                 l->parent_generation = parent_generation;
1379
1380                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1381                         btrfsic_print_add_link(state, l);
1382
1383                 list_add(&l->node_ref_to, &block->ref_to_list);
1384                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1385
1386                 btrfsic_block_link_hashtable_add(l,
1387                                                  &state->block_link_hashtable);
1388         } else {
1389                 did_alloc_block_link = 0;
1390                 if (0 == limit_nesting) {
1391                         l->ref_cnt++;
1392                         l->parent_generation = parent_generation;
1393                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1394                                 btrfsic_print_add_link(state, l);
1395                 }
1396         }
1397
1398         if (limit_nesting > 0 && did_alloc_block_link) {
1399                 ret = btrfsic_read_block(state, next_block_ctx);
1400                 if (ret < (int)next_block_ctx->len) {
1401                         printk(KERN_INFO
1402                                "btrfsic: read block @logical %llu failed!\n",
1403                                (unsigned long long)next_bytenr);
1404                         btrfsic_release_block_ctx(next_block_ctx);
1405                         *next_blockp = NULL;
1406                         return -1;
1407                 }
1408
1409                 *next_blockp = next_block;
1410         } else {
1411                 *next_blockp = NULL;
1412         }
1413         (*mirror_nump)++;
1414
1415         return 0;
1416 }
1417
1418 static int btrfsic_handle_extent_data(
1419                 struct btrfsic_state *state,
1420                 struct btrfsic_block *block,
1421                 struct btrfsic_block_data_ctx *block_ctx,
1422                 u32 item_offset, int force_iodone_flag)
1423 {
1424         int ret;
1425         struct btrfs_file_extent_item file_extent_item;
1426         u64 file_extent_item_offset;
1427         u64 next_bytenr;
1428         u64 num_bytes;
1429         u64 generation;
1430         struct btrfsic_block_link *l;
1431
1432         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1433                                   item_offset;
1434         if (file_extent_item_offset +
1435             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1436             block_ctx->len) {
1437                 printk(KERN_INFO
1438                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1439                        block_ctx->start, block_ctx->dev->name);
1440                 return -1;
1441         }
1442
1443         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1444                 file_extent_item_offset,
1445                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1446         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1447             ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1448                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1449                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1450                                file_extent_item.type,
1451                                (unsigned long long)
1452                                le64_to_cpu(file_extent_item.disk_bytenr));
1453                 return 0;
1454         }
1455
1456         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1457             block_ctx->len) {
1458                 printk(KERN_INFO
1459                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1460                        block_ctx->start, block_ctx->dev->name);
1461                 return -1;
1462         }
1463         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1464                                      file_extent_item_offset,
1465                                      sizeof(struct btrfs_file_extent_item));
1466         next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1467                       le64_to_cpu(file_extent_item.offset);
1468         generation = le64_to_cpu(file_extent_item.generation);
1469         num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1470         generation = le64_to_cpu(file_extent_item.generation);
1471
1472         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1473                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1474                        " offset = %llu, num_bytes = %llu\n",
1475                        file_extent_item.type,
1476                        (unsigned long long)
1477                        le64_to_cpu(file_extent_item.disk_bytenr),
1478                        (unsigned long long)le64_to_cpu(file_extent_item.offset),
1479                        (unsigned long long)num_bytes);
1480         while (num_bytes > 0) {
1481                 u32 chunk_len;
1482                 int num_copies;
1483                 int mirror_num;
1484
1485                 if (num_bytes > state->datablock_size)
1486                         chunk_len = state->datablock_size;
1487                 else
1488                         chunk_len = num_bytes;
1489
1490                 num_copies =
1491                     btrfs_num_copies(state->root->fs_info,
1492                                      next_bytenr, state->datablock_size);
1493                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1494                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1495                                (unsigned long long)next_bytenr, num_copies);
1496                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1497                         struct btrfsic_block_data_ctx next_block_ctx;
1498                         struct btrfsic_block *next_block;
1499                         int block_was_created;
1500
1501                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1502                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1503                                        "mirror_num=%d)\n", mirror_num);
1504                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1505                                 printk(KERN_INFO
1506                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1507                                        (unsigned long long)next_bytenr,
1508                                        chunk_len);
1509                         ret = btrfsic_map_block(state, next_bytenr,
1510                                                 chunk_len, &next_block_ctx,
1511                                                 mirror_num);
1512                         if (ret) {
1513                                 printk(KERN_INFO
1514                                        "btrfsic: btrfsic_map_block(@%llu,"
1515                                        " mirror=%d) failed!\n",
1516                                        (unsigned long long)next_bytenr,
1517                                        mirror_num);
1518                                 return -1;
1519                         }
1520
1521                         next_block = btrfsic_block_lookup_or_add(
1522                                         state,
1523                                         &next_block_ctx,
1524                                         "referenced ",
1525                                         0,
1526                                         force_iodone_flag,
1527                                         !force_iodone_flag,
1528                                         mirror_num,
1529                                         &block_was_created);
1530                         if (NULL == next_block) {
1531                                 printk(KERN_INFO
1532                                        "btrfsic: error, kmalloc failed!\n");
1533                                 btrfsic_release_block_ctx(&next_block_ctx);
1534                                 return -1;
1535                         }
1536                         if (!block_was_created) {
1537                                 if (next_block->logical_bytenr != next_bytenr &&
1538                                     !(!next_block->is_metadata &&
1539                                       0 == next_block->logical_bytenr)) {
1540                                         printk(KERN_INFO
1541                                                "Referenced block"
1542                                                " @%llu (%s/%llu/%d)"
1543                                                " found in hash table, D,"
1544                                                " bytenr mismatch"
1545                                                " (!= stored %llu).\n",
1546                                                (unsigned long long)next_bytenr,
1547                                                next_block_ctx.dev->name,
1548                                                (unsigned long long)
1549                                                next_block_ctx.dev_bytenr,
1550                                                mirror_num,
1551                                                (unsigned long long)
1552                                                next_block->logical_bytenr);
1553                                 }
1554                                 next_block->logical_bytenr = next_bytenr;
1555                                 next_block->mirror_num = mirror_num;
1556                         }
1557
1558                         l = btrfsic_block_link_lookup_or_add(state,
1559                                                              &next_block_ctx,
1560                                                              next_block, block,
1561                                                              generation);
1562                         btrfsic_release_block_ctx(&next_block_ctx);
1563                         if (NULL == l)
1564                                 return -1;
1565                 }
1566
1567                 next_bytenr += chunk_len;
1568                 num_bytes -= chunk_len;
1569         }
1570
1571         return 0;
1572 }
1573
1574 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1575                              struct btrfsic_block_data_ctx *block_ctx_out,
1576                              int mirror_num)
1577 {
1578         int ret;
1579         u64 length;
1580         struct btrfs_bio *multi = NULL;
1581         struct btrfs_device *device;
1582
1583         length = len;
1584         ret = btrfs_map_block(state->root->fs_info, READ,
1585                               bytenr, &length, &multi, mirror_num);
1586
1587         if (ret) {
1588                 block_ctx_out->start = 0;
1589                 block_ctx_out->dev_bytenr = 0;
1590                 block_ctx_out->len = 0;
1591                 block_ctx_out->dev = NULL;
1592                 block_ctx_out->datav = NULL;
1593                 block_ctx_out->pagev = NULL;
1594                 block_ctx_out->mem_to_free = NULL;
1595
1596                 return ret;
1597         }
1598
1599         device = multi->stripes[0].dev;
1600         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1601         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1602         block_ctx_out->start = bytenr;
1603         block_ctx_out->len = len;
1604         block_ctx_out->datav = NULL;
1605         block_ctx_out->pagev = NULL;
1606         block_ctx_out->mem_to_free = NULL;
1607
1608         kfree(multi);
1609         if (NULL == block_ctx_out->dev) {
1610                 ret = -ENXIO;
1611                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1612         }
1613
1614         return ret;
1615 }
1616
1617 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1618                                   u32 len, struct block_device *bdev,
1619                                   struct btrfsic_block_data_ctx *block_ctx_out)
1620 {
1621         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1622         block_ctx_out->dev_bytenr = bytenr;
1623         block_ctx_out->start = bytenr;
1624         block_ctx_out->len = len;
1625         block_ctx_out->datav = NULL;
1626         block_ctx_out->pagev = NULL;
1627         block_ctx_out->mem_to_free = NULL;
1628         if (NULL != block_ctx_out->dev) {
1629                 return 0;
1630         } else {
1631                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1632                 return -ENXIO;
1633         }
1634 }
1635
1636 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1637 {
1638         if (block_ctx->mem_to_free) {
1639                 unsigned int num_pages;
1640
1641                 BUG_ON(!block_ctx->datav);
1642                 BUG_ON(!block_ctx->pagev);
1643                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1644                             PAGE_CACHE_SHIFT;
1645                 while (num_pages > 0) {
1646                         num_pages--;
1647                         if (block_ctx->datav[num_pages]) {
1648                                 kunmap(block_ctx->pagev[num_pages]);
1649                                 block_ctx->datav[num_pages] = NULL;
1650                         }
1651                         if (block_ctx->pagev[num_pages]) {
1652                                 __free_page(block_ctx->pagev[num_pages]);
1653                                 block_ctx->pagev[num_pages] = NULL;
1654                         }
1655                 }
1656
1657                 kfree(block_ctx->mem_to_free);
1658                 block_ctx->mem_to_free = NULL;
1659                 block_ctx->pagev = NULL;
1660                 block_ctx->datav = NULL;
1661         }
1662 }
1663
1664 static int btrfsic_read_block(struct btrfsic_state *state,
1665                               struct btrfsic_block_data_ctx *block_ctx)
1666 {
1667         unsigned int num_pages;
1668         unsigned int i;
1669         u64 dev_bytenr;
1670         int ret;
1671
1672         BUG_ON(block_ctx->datav);
1673         BUG_ON(block_ctx->pagev);
1674         BUG_ON(block_ctx->mem_to_free);
1675         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1676                 printk(KERN_INFO
1677                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1678                        (unsigned long long)block_ctx->dev_bytenr);
1679                 return -1;
1680         }
1681
1682         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1683                     PAGE_CACHE_SHIFT;
1684         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1685                                           sizeof(*block_ctx->pagev)) *
1686                                          num_pages, GFP_NOFS);
1687         if (!block_ctx->mem_to_free)
1688                 return -1;
1689         block_ctx->datav = block_ctx->mem_to_free;
1690         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1691         for (i = 0; i < num_pages; i++) {
1692                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1693                 if (!block_ctx->pagev[i])
1694                         return -1;
1695         }
1696
1697         dev_bytenr = block_ctx->dev_bytenr;
1698         for (i = 0; i < num_pages;) {
1699                 struct bio *bio;
1700                 unsigned int j;
1701                 DECLARE_COMPLETION_ONSTACK(complete);
1702
1703                 bio = bio_alloc(GFP_NOFS, num_pages - i);
1704                 if (!bio) {
1705                         printk(KERN_INFO
1706                                "btrfsic: bio_alloc() for %u pages failed!\n",
1707                                num_pages - i);
1708                         return -1;
1709                 }
1710                 bio->bi_bdev = block_ctx->dev->bdev;
1711                 bio->bi_sector = dev_bytenr >> 9;
1712                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1713                 bio->bi_private = &complete;
1714
1715                 for (j = i; j < num_pages; j++) {
1716                         ret = bio_add_page(bio, block_ctx->pagev[j],
1717                                            PAGE_CACHE_SIZE, 0);
1718                         if (PAGE_CACHE_SIZE != ret)
1719                                 break;
1720                 }
1721                 if (j == i) {
1722                         printk(KERN_INFO
1723                                "btrfsic: error, failed to add a single page!\n");
1724                         return -1;
1725                 }
1726                 submit_bio(READ, bio);
1727
1728                 /* this will also unplug the queue */
1729                 wait_for_completion(&complete);
1730
1731                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1732                         printk(KERN_INFO
1733                                "btrfsic: read error at logical %llu dev %s!\n",
1734                                block_ctx->start, block_ctx->dev->name);
1735                         bio_put(bio);
1736                         return -1;
1737                 }
1738                 bio_put(bio);
1739                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1740                 i = j;
1741         }
1742         for (i = 0; i < num_pages; i++) {
1743                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1744                 if (!block_ctx->datav[i]) {
1745                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1746                                block_ctx->dev->name);
1747                         return -1;
1748                 }
1749         }
1750
1751         return block_ctx->len;
1752 }
1753
1754 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1755 {
1756         complete((struct completion *)bio->bi_private);
1757 }
1758
1759 static void btrfsic_dump_database(struct btrfsic_state *state)
1760 {
1761         struct list_head *elem_all;
1762
1763         BUG_ON(NULL == state);
1764
1765         printk(KERN_INFO "all_blocks_list:\n");
1766         list_for_each(elem_all, &state->all_blocks_list) {
1767                 const struct btrfsic_block *const b_all =
1768                     list_entry(elem_all, struct btrfsic_block,
1769                                all_blocks_node);
1770                 struct list_head *elem_ref_to;
1771                 struct list_head *elem_ref_from;
1772
1773                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1774                        btrfsic_get_block_type(state, b_all),
1775                        (unsigned long long)b_all->logical_bytenr,
1776                        b_all->dev_state->name,
1777                        (unsigned long long)b_all->dev_bytenr,
1778                        b_all->mirror_num);
1779
1780                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1781                         const struct btrfsic_block_link *const l =
1782                             list_entry(elem_ref_to,
1783                                        struct btrfsic_block_link,
1784                                        node_ref_to);
1785
1786                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1787                                " refers %u* to"
1788                                " %c @%llu (%s/%llu/%d)\n",
1789                                btrfsic_get_block_type(state, b_all),
1790                                (unsigned long long)b_all->logical_bytenr,
1791                                b_all->dev_state->name,
1792                                (unsigned long long)b_all->dev_bytenr,
1793                                b_all->mirror_num,
1794                                l->ref_cnt,
1795                                btrfsic_get_block_type(state, l->block_ref_to),
1796                                (unsigned long long)
1797                                l->block_ref_to->logical_bytenr,
1798                                l->block_ref_to->dev_state->name,
1799                                (unsigned long long)l->block_ref_to->dev_bytenr,
1800                                l->block_ref_to->mirror_num);
1801                 }
1802
1803                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1804                         const struct btrfsic_block_link *const l =
1805                             list_entry(elem_ref_from,
1806                                        struct btrfsic_block_link,
1807                                        node_ref_from);
1808
1809                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1810                                " is ref %u* from"
1811                                " %c @%llu (%s/%llu/%d)\n",
1812                                btrfsic_get_block_type(state, b_all),
1813                                (unsigned long long)b_all->logical_bytenr,
1814                                b_all->dev_state->name,
1815                                (unsigned long long)b_all->dev_bytenr,
1816                                b_all->mirror_num,
1817                                l->ref_cnt,
1818                                btrfsic_get_block_type(state, l->block_ref_from),
1819                                (unsigned long long)
1820                                l->block_ref_from->logical_bytenr,
1821                                l->block_ref_from->dev_state->name,
1822                                (unsigned long long)
1823                                l->block_ref_from->dev_bytenr,
1824                                l->block_ref_from->mirror_num);
1825                 }
1826
1827                 printk(KERN_INFO "\n");
1828         }
1829 }
1830
1831 /*
1832  * Test whether the disk block contains a tree block (leaf or node)
1833  * (note that this test fails for the super block)
1834  */
1835 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1836                                      char **datav, unsigned int num_pages)
1837 {
1838         struct btrfs_header *h;
1839         u8 csum[BTRFS_CSUM_SIZE];
1840         u32 crc = ~(u32)0;
1841         unsigned int i;
1842
1843         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1844                 return 1; /* not metadata */
1845         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1846         h = (struct btrfs_header *)datav[0];
1847
1848         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1849                 return 1;
1850
1851         for (i = 0; i < num_pages; i++) {
1852                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1853                 size_t sublen = i ? PAGE_CACHE_SIZE :
1854                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1855
1856                 crc = crc32c(crc, data, sublen);
1857         }
1858         btrfs_csum_final(crc, csum);
1859         if (memcmp(csum, h->csum, state->csum_size))
1860                 return 1;
1861
1862         return 0; /* is metadata */
1863 }
1864
1865 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1866                                           u64 dev_bytenr, char **mapped_datav,
1867                                           unsigned int num_pages,
1868                                           struct bio *bio, int *bio_is_patched,
1869                                           struct buffer_head *bh,
1870                                           int submit_bio_bh_rw)
1871 {
1872         int is_metadata;
1873         struct btrfsic_block *block;
1874         struct btrfsic_block_data_ctx block_ctx;
1875         int ret;
1876         struct btrfsic_state *state = dev_state->state;
1877         struct block_device *bdev = dev_state->bdev;
1878         unsigned int processed_len;
1879
1880         if (NULL != bio_is_patched)
1881                 *bio_is_patched = 0;
1882
1883 again:
1884         if (num_pages == 0)
1885                 return;
1886
1887         processed_len = 0;
1888         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1889                                                       num_pages));
1890
1891         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1892                                                &state->block_hashtable);
1893         if (NULL != block) {
1894                 u64 bytenr = 0;
1895                 struct list_head *elem_ref_to;
1896                 struct list_head *tmp_ref_to;
1897
1898                 if (block->is_superblock) {
1899                         bytenr = le64_to_cpu(((struct btrfs_super_block *)
1900                                               mapped_datav[0])->bytenr);
1901                         if (num_pages * PAGE_CACHE_SIZE <
1902                             BTRFS_SUPER_INFO_SIZE) {
1903                                 printk(KERN_INFO
1904                                        "btrfsic: cannot work with too short bios!\n");
1905                                 return;
1906                         }
1907                         is_metadata = 1;
1908                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1909                         processed_len = BTRFS_SUPER_INFO_SIZE;
1910                         if (state->print_mask &
1911                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1912                                 printk(KERN_INFO
1913                                        "[before new superblock is written]:\n");
1914                                 btrfsic_dump_tree_sub(state, block, 0);
1915                         }
1916                 }
1917                 if (is_metadata) {
1918                         if (!block->is_superblock) {
1919                                 if (num_pages * PAGE_CACHE_SIZE <
1920                                     state->metablock_size) {
1921                                         printk(KERN_INFO
1922                                                "btrfsic: cannot work with too short bios!\n");
1923                                         return;
1924                                 }
1925                                 processed_len = state->metablock_size;
1926                                 bytenr = le64_to_cpu(((struct btrfs_header *)
1927                                                       mapped_datav[0])->bytenr);
1928                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1929                                                                dev_state,
1930                                                                dev_bytenr);
1931                         }
1932                         if (block->logical_bytenr != bytenr) {
1933                                 printk(KERN_INFO
1934                                        "Written block @%llu (%s/%llu/%d)"
1935                                        " found in hash table, %c,"
1936                                        " bytenr mismatch"
1937                                        " (!= stored %llu).\n",
1938                                        (unsigned long long)bytenr,
1939                                        dev_state->name,
1940                                        (unsigned long long)dev_bytenr,
1941                                        block->mirror_num,
1942                                        btrfsic_get_block_type(state, block),
1943                                        (unsigned long long)
1944                                        block->logical_bytenr);
1945                                 block->logical_bytenr = bytenr;
1946                         } else if (state->print_mask &
1947                                    BTRFSIC_PRINT_MASK_VERBOSE)
1948                                 printk(KERN_INFO
1949                                        "Written block @%llu (%s/%llu/%d)"
1950                                        " found in hash table, %c.\n",
1951                                        (unsigned long long)bytenr,
1952                                        dev_state->name,
1953                                        (unsigned long long)dev_bytenr,
1954                                        block->mirror_num,
1955                                        btrfsic_get_block_type(state, block));
1956                 } else {
1957                         if (num_pages * PAGE_CACHE_SIZE <
1958                             state->datablock_size) {
1959                                 printk(KERN_INFO
1960                                        "btrfsic: cannot work with too short bios!\n");
1961                                 return;
1962                         }
1963                         processed_len = state->datablock_size;
1964                         bytenr = block->logical_bytenr;
1965                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1966                                 printk(KERN_INFO
1967                                        "Written block @%llu (%s/%llu/%d)"
1968                                        " found in hash table, %c.\n",
1969                                        (unsigned long long)bytenr,
1970                                        dev_state->name,
1971                                        (unsigned long long)dev_bytenr,
1972                                        block->mirror_num,
1973                                        btrfsic_get_block_type(state, block));
1974                 }
1975
1976                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1977                         printk(KERN_INFO
1978                                "ref_to_list: %cE, ref_from_list: %cE\n",
1979                                list_empty(&block->ref_to_list) ? ' ' : '!',
1980                                list_empty(&block->ref_from_list) ? ' ' : '!');
1981                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1982                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1983                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1984                                " objectid=%llu, type=%d, offset=%llu),"
1985                                " new(gen=%llu),"
1986                                " which is referenced by most recent superblock"
1987                                " (superblockgen=%llu)!\n",
1988                                btrfsic_get_block_type(state, block),
1989                                (unsigned long long)bytenr,
1990                                dev_state->name,
1991                                (unsigned long long)dev_bytenr,
1992                                block->mirror_num,
1993                                (unsigned long long)block->generation,
1994                                (unsigned long long)
1995                                le64_to_cpu(block->disk_key.objectid),
1996                                block->disk_key.type,
1997                                (unsigned long long)
1998                                le64_to_cpu(block->disk_key.offset),
1999                                (unsigned long long)
2000                                le64_to_cpu(((struct btrfs_header *)
2001                                             mapped_datav[0])->generation),
2002                                (unsigned long long)
2003                                state->max_superblock_generation);
2004                         btrfsic_dump_tree(state);
2005                 }
2006
2007                 if (!block->is_iodone && !block->never_written) {
2008                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
2009                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
2010                                " which is not yet iodone!\n",
2011                                btrfsic_get_block_type(state, block),
2012                                (unsigned long long)bytenr,
2013                                dev_state->name,
2014                                (unsigned long long)dev_bytenr,
2015                                block->mirror_num,
2016                                (unsigned long long)block->generation,
2017                                (unsigned long long)
2018                                le64_to_cpu(((struct btrfs_header *)
2019                                             mapped_datav[0])->generation));
2020                         /* it would not be safe to go on */
2021                         btrfsic_dump_tree(state);
2022                         goto continue_loop;
2023                 }
2024
2025                 /*
2026                  * Clear all references of this block. Do not free
2027                  * the block itself even if is not referenced anymore
2028                  * because it still carries valueable information
2029                  * like whether it was ever written and IO completed.
2030                  */
2031                 list_for_each_safe(elem_ref_to, tmp_ref_to,
2032                                    &block->ref_to_list) {
2033                         struct btrfsic_block_link *const l =
2034                             list_entry(elem_ref_to,
2035                                        struct btrfsic_block_link,
2036                                        node_ref_to);
2037
2038                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2039                                 btrfsic_print_rem_link(state, l);
2040                         l->ref_cnt--;
2041                         if (0 == l->ref_cnt) {
2042                                 list_del(&l->node_ref_to);
2043                                 list_del(&l->node_ref_from);
2044                                 btrfsic_block_link_hashtable_remove(l);
2045                                 btrfsic_block_link_free(l);
2046                         }
2047                 }
2048
2049                 if (block->is_superblock)
2050                         ret = btrfsic_map_superblock(state, bytenr,
2051                                                      processed_len,
2052                                                      bdev, &block_ctx);
2053                 else
2054                         ret = btrfsic_map_block(state, bytenr, processed_len,
2055                                                 &block_ctx, 0);
2056                 if (ret) {
2057                         printk(KERN_INFO
2058                                "btrfsic: btrfsic_map_block(root @%llu)"
2059                                " failed!\n", (unsigned long long)bytenr);
2060                         goto continue_loop;
2061                 }
2062                 block_ctx.datav = mapped_datav;
2063                 /* the following is required in case of writes to mirrors,
2064                  * use the same that was used for the lookup */
2065                 block_ctx.dev = dev_state;
2066                 block_ctx.dev_bytenr = dev_bytenr;
2067
2068                 if (is_metadata || state->include_extent_data) {
2069                         block->never_written = 0;
2070                         block->iodone_w_error = 0;
2071                         if (NULL != bio) {
2072                                 block->is_iodone = 0;
2073                                 BUG_ON(NULL == bio_is_patched);
2074                                 if (!*bio_is_patched) {
2075                                         block->orig_bio_bh_private =
2076                                             bio->bi_private;
2077                                         block->orig_bio_bh_end_io.bio =
2078                                             bio->bi_end_io;
2079                                         block->next_in_same_bio = NULL;
2080                                         bio->bi_private = block;
2081                                         bio->bi_end_io = btrfsic_bio_end_io;
2082                                         *bio_is_patched = 1;
2083                                 } else {
2084                                         struct btrfsic_block *chained_block =
2085                                             (struct btrfsic_block *)
2086                                             bio->bi_private;
2087
2088                                         BUG_ON(NULL == chained_block);
2089                                         block->orig_bio_bh_private =
2090                                             chained_block->orig_bio_bh_private;
2091                                         block->orig_bio_bh_end_io.bio =
2092                                             chained_block->orig_bio_bh_end_io.
2093                                             bio;
2094                                         block->next_in_same_bio = chained_block;
2095                                         bio->bi_private = block;
2096                                 }
2097                         } else if (NULL != bh) {
2098                                 block->is_iodone = 0;
2099                                 block->orig_bio_bh_private = bh->b_private;
2100                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2101                                 block->next_in_same_bio = NULL;
2102                                 bh->b_private = block;
2103                                 bh->b_end_io = btrfsic_bh_end_io;
2104                         } else {
2105                                 block->is_iodone = 1;
2106                                 block->orig_bio_bh_private = NULL;
2107                                 block->orig_bio_bh_end_io.bio = NULL;
2108                                 block->next_in_same_bio = NULL;
2109                         }
2110                 }
2111
2112                 block->flush_gen = dev_state->last_flush_gen + 1;
2113                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2114                 if (is_metadata) {
2115                         block->logical_bytenr = bytenr;
2116                         block->is_metadata = 1;
2117                         if (block->is_superblock) {
2118                                 BUG_ON(PAGE_CACHE_SIZE !=
2119                                        BTRFS_SUPER_INFO_SIZE);
2120                                 ret = btrfsic_process_written_superblock(
2121                                                 state,
2122                                                 block,
2123                                                 (struct btrfs_super_block *)
2124                                                 mapped_datav[0]);
2125                                 if (state->print_mask &
2126                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2127                                         printk(KERN_INFO
2128                                         "[after new superblock is written]:\n");
2129                                         btrfsic_dump_tree_sub(state, block, 0);
2130                                 }
2131                         } else {
2132                                 block->mirror_num = 0;  /* unknown */
2133                                 ret = btrfsic_process_metablock(
2134                                                 state,
2135                                                 block,
2136                                                 &block_ctx,
2137                                                 0, 0);
2138                         }
2139                         if (ret)
2140                                 printk(KERN_INFO
2141                                        "btrfsic: btrfsic_process_metablock"
2142                                        "(root @%llu) failed!\n",
2143                                        (unsigned long long)dev_bytenr);
2144                 } else {
2145                         block->is_metadata = 0;
2146                         block->mirror_num = 0;  /* unknown */
2147                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2148                         if (!state->include_extent_data
2149                             && list_empty(&block->ref_from_list)) {
2150                                 /*
2151                                  * disk block is overwritten with extent
2152                                  * data (not meta data) and we are configured
2153                                  * to not include extent data: take the
2154                                  * chance and free the block's memory
2155                                  */
2156                                 btrfsic_block_hashtable_remove(block);
2157                                 list_del(&block->all_blocks_node);
2158                                 btrfsic_block_free(block);
2159                         }
2160                 }
2161                 btrfsic_release_block_ctx(&block_ctx);
2162         } else {
2163                 /* block has not been found in hash table */
2164                 u64 bytenr;
2165
2166                 if (!is_metadata) {
2167                         processed_len = state->datablock_size;
2168                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2169                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2170                                        " !found in hash table, D.\n",
2171                                        dev_state->name,
2172                                        (unsigned long long)dev_bytenr);
2173                         if (!state->include_extent_data) {
2174                                 /* ignore that written D block */
2175                                 goto continue_loop;
2176                         }
2177
2178                         /* this is getting ugly for the
2179                          * include_extent_data case... */
2180                         bytenr = 0;     /* unknown */
2181                         block_ctx.start = bytenr;
2182                         block_ctx.len = processed_len;
2183                         block_ctx.mem_to_free = NULL;
2184                         block_ctx.pagev = NULL;
2185                 } else {
2186                         processed_len = state->metablock_size;
2187                         bytenr = le64_to_cpu(((struct btrfs_header *)
2188                                               mapped_datav[0])->bytenr);
2189                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2190                                                        dev_bytenr);
2191                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2192                                 printk(KERN_INFO
2193                                        "Written block @%llu (%s/%llu/?)"
2194                                        " !found in hash table, M.\n",
2195                                        (unsigned long long)bytenr,
2196                                        dev_state->name,
2197                                        (unsigned long long)dev_bytenr);
2198
2199                         ret = btrfsic_map_block(state, bytenr, processed_len,
2200                                                 &block_ctx, 0);
2201                         if (ret) {
2202                                 printk(KERN_INFO
2203                                        "btrfsic: btrfsic_map_block(root @%llu)"
2204                                        " failed!\n",
2205                                        (unsigned long long)dev_bytenr);
2206                                 goto continue_loop;
2207                         }
2208                 }
2209                 block_ctx.datav = mapped_datav;
2210                 /* the following is required in case of writes to mirrors,
2211                  * use the same that was used for the lookup */
2212                 block_ctx.dev = dev_state;
2213                 block_ctx.dev_bytenr = dev_bytenr;
2214
2215                 block = btrfsic_block_alloc();
2216                 if (NULL == block) {
2217                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2218                         btrfsic_release_block_ctx(&block_ctx);
2219                         goto continue_loop;
2220                 }
2221                 block->dev_state = dev_state;
2222                 block->dev_bytenr = dev_bytenr;
2223                 block->logical_bytenr = bytenr;
2224                 block->is_metadata = is_metadata;
2225                 block->never_written = 0;
2226                 block->iodone_w_error = 0;
2227                 block->mirror_num = 0;  /* unknown */
2228                 block->flush_gen = dev_state->last_flush_gen + 1;
2229                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2230                 if (NULL != bio) {
2231                         block->is_iodone = 0;
2232                         BUG_ON(NULL == bio_is_patched);
2233                         if (!*bio_is_patched) {
2234                                 block->orig_bio_bh_private = bio->bi_private;
2235                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2236                                 block->next_in_same_bio = NULL;
2237                                 bio->bi_private = block;
2238                                 bio->bi_end_io = btrfsic_bio_end_io;
2239                                 *bio_is_patched = 1;
2240                         } else {
2241                                 struct btrfsic_block *chained_block =
2242                                     (struct btrfsic_block *)
2243                                     bio->bi_private;
2244
2245                                 BUG_ON(NULL == chained_block);
2246                                 block->orig_bio_bh_private =
2247                                     chained_block->orig_bio_bh_private;
2248                                 block->orig_bio_bh_end_io.bio =
2249                                     chained_block->orig_bio_bh_end_io.bio;
2250                                 block->next_in_same_bio = chained_block;
2251                                 bio->bi_private = block;
2252                         }
2253                 } else if (NULL != bh) {
2254                         block->is_iodone = 0;
2255                         block->orig_bio_bh_private = bh->b_private;
2256                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2257                         block->next_in_same_bio = NULL;
2258                         bh->b_private = block;
2259                         bh->b_end_io = btrfsic_bh_end_io;
2260                 } else {
2261                         block->is_iodone = 1;
2262                         block->orig_bio_bh_private = NULL;
2263                         block->orig_bio_bh_end_io.bio = NULL;
2264                         block->next_in_same_bio = NULL;
2265                 }
2266                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2267                         printk(KERN_INFO
2268                                "New written %c-block @%llu (%s/%llu/%d)\n",
2269                                is_metadata ? 'M' : 'D',
2270                                (unsigned long long)block->logical_bytenr,
2271                                block->dev_state->name,
2272                                (unsigned long long)block->dev_bytenr,
2273                                block->mirror_num);
2274                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2275                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2276
2277                 if (is_metadata) {
2278                         ret = btrfsic_process_metablock(state, block,
2279                                                         &block_ctx, 0, 0);
2280                         if (ret)
2281                                 printk(KERN_INFO
2282                                        "btrfsic: process_metablock(root @%llu)"
2283                                        " failed!\n",
2284                                        (unsigned long long)dev_bytenr);
2285                 }
2286                 btrfsic_release_block_ctx(&block_ctx);
2287         }
2288
2289 continue_loop:
2290         BUG_ON(!processed_len);
2291         dev_bytenr += processed_len;
2292         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2293         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2294         goto again;
2295 }
2296
2297 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2298 {
2299         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2300         int iodone_w_error;
2301
2302         /* mutex is not held! This is not save if IO is not yet completed
2303          * on umount */
2304         iodone_w_error = 0;
2305         if (bio_error_status)
2306                 iodone_w_error = 1;
2307
2308         BUG_ON(NULL == block);
2309         bp->bi_private = block->orig_bio_bh_private;
2310         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2311
2312         do {
2313                 struct btrfsic_block *next_block;
2314                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2315
2316                 if ((dev_state->state->print_mask &
2317                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2318                         printk(KERN_INFO
2319                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2320                                bio_error_status,
2321                                btrfsic_get_block_type(dev_state->state, block),
2322                                (unsigned long long)block->logical_bytenr,
2323                                dev_state->name,
2324                                (unsigned long long)block->dev_bytenr,
2325                                block->mirror_num);
2326                 next_block = block->next_in_same_bio;
2327                 block->iodone_w_error = iodone_w_error;
2328                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2329                         dev_state->last_flush_gen++;
2330                         if ((dev_state->state->print_mask &
2331                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2332                                 printk(KERN_INFO
2333                                        "bio_end_io() new %s flush_gen=%llu\n",
2334                                        dev_state->name,
2335                                        (unsigned long long)
2336                                        dev_state->last_flush_gen);
2337                 }
2338                 if (block->submit_bio_bh_rw & REQ_FUA)
2339                         block->flush_gen = 0; /* FUA completed means block is
2340                                                * on disk */
2341                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2342                 block = next_block;
2343         } while (NULL != block);
2344
2345         bp->bi_end_io(bp, bio_error_status);
2346 }
2347
2348 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2349 {
2350         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2351         int iodone_w_error = !uptodate;
2352         struct btrfsic_dev_state *dev_state;
2353
2354         BUG_ON(NULL == block);
2355         dev_state = block->dev_state;
2356         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2357                 printk(KERN_INFO
2358                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2359                        iodone_w_error,
2360                        btrfsic_get_block_type(dev_state->state, block),
2361                        (unsigned long long)block->logical_bytenr,
2362                        block->dev_state->name,
2363                        (unsigned long long)block->dev_bytenr,
2364                        block->mirror_num);
2365
2366         block->iodone_w_error = iodone_w_error;
2367         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2368                 dev_state->last_flush_gen++;
2369                 if ((dev_state->state->print_mask &
2370                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2371                         printk(KERN_INFO
2372                                "bh_end_io() new %s flush_gen=%llu\n",
2373                                dev_state->name,
2374                                (unsigned long long)dev_state->last_flush_gen);
2375         }
2376         if (block->submit_bio_bh_rw & REQ_FUA)
2377                 block->flush_gen = 0; /* FUA completed means block is on disk */
2378
2379         bh->b_private = block->orig_bio_bh_private;
2380         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2381         block->is_iodone = 1; /* for FLUSH, this releases the block */
2382         bh->b_end_io(bh, uptodate);
2383 }
2384
2385 static int btrfsic_process_written_superblock(
2386                 struct btrfsic_state *state,
2387                 struct btrfsic_block *const superblock,
2388                 struct btrfs_super_block *const super_hdr)
2389 {
2390         int pass;
2391
2392         superblock->generation = btrfs_super_generation(super_hdr);
2393         if (!(superblock->generation > state->max_superblock_generation ||
2394               0 == state->max_superblock_generation)) {
2395                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2396                         printk(KERN_INFO
2397                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2398                                " with old gen %llu <= %llu\n",
2399                                (unsigned long long)superblock->logical_bytenr,
2400                                superblock->dev_state->name,
2401                                (unsigned long long)superblock->dev_bytenr,
2402                                superblock->mirror_num,
2403                                (unsigned long long)
2404                                btrfs_super_generation(super_hdr),
2405                                (unsigned long long)
2406                                state->max_superblock_generation);
2407         } else {
2408                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2409                         printk(KERN_INFO
2410                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2411                                " with new gen %llu > %llu\n",
2412                                (unsigned long long)superblock->logical_bytenr,
2413                                superblock->dev_state->name,
2414                                (unsigned long long)superblock->dev_bytenr,
2415                                superblock->mirror_num,
2416                                (unsigned long long)
2417                                btrfs_super_generation(super_hdr),
2418                                (unsigned long long)
2419                                state->max_superblock_generation);
2420
2421                 state->max_superblock_generation =
2422                     btrfs_super_generation(super_hdr);
2423                 state->latest_superblock = superblock;
2424         }
2425
2426         for (pass = 0; pass < 3; pass++) {
2427                 int ret;
2428                 u64 next_bytenr;
2429                 struct btrfsic_block *next_block;
2430                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2431                 struct btrfsic_block_link *l;
2432                 int num_copies;
2433                 int mirror_num;
2434                 const char *additional_string = NULL;
2435                 struct btrfs_disk_key tmp_disk_key;
2436
2437                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2438                 tmp_disk_key.offset = 0;
2439
2440                 switch (pass) {
2441                 case 0:
2442                         tmp_disk_key.objectid =
2443                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2444                         additional_string = "root ";
2445                         next_bytenr = btrfs_super_root(super_hdr);
2446                         if (state->print_mask &
2447                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2448                                 printk(KERN_INFO "root@%llu\n",
2449                                        (unsigned long long)next_bytenr);
2450                         break;
2451                 case 1:
2452                         tmp_disk_key.objectid =
2453                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2454                         additional_string = "chunk ";
2455                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2456                         if (state->print_mask &
2457                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2458                                 printk(KERN_INFO "chunk@%llu\n",
2459                                        (unsigned long long)next_bytenr);
2460                         break;
2461                 case 2:
2462                         tmp_disk_key.objectid =
2463                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2464                         additional_string = "log ";
2465                         next_bytenr = btrfs_super_log_root(super_hdr);
2466                         if (0 == next_bytenr)
2467                                 continue;
2468                         if (state->print_mask &
2469                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2470                                 printk(KERN_INFO "log@%llu\n",
2471                                        (unsigned long long)next_bytenr);
2472                         break;
2473                 }
2474
2475                 num_copies =
2476                     btrfs_num_copies(state->root->fs_info,
2477                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2478                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2479                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2480                                (unsigned long long)next_bytenr, num_copies);
2481                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2482                         int was_created;
2483
2484                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2485                                 printk(KERN_INFO
2486                                        "btrfsic_process_written_superblock("
2487                                        "mirror_num=%d)\n", mirror_num);
2488                         ret = btrfsic_map_block(state, next_bytenr,
2489                                                 BTRFS_SUPER_INFO_SIZE,
2490                                                 &tmp_next_block_ctx,
2491                                                 mirror_num);
2492                         if (ret) {
2493                                 printk(KERN_INFO
2494                                        "btrfsic: btrfsic_map_block(@%llu,"
2495                                        " mirror=%d) failed!\n",
2496                                        (unsigned long long)next_bytenr,
2497                                        mirror_num);
2498                                 return -1;
2499                         }
2500
2501                         next_block = btrfsic_block_lookup_or_add(
2502                                         state,
2503                                         &tmp_next_block_ctx,
2504                                         additional_string,
2505                                         1, 0, 1,
2506                                         mirror_num,
2507                                         &was_created);
2508                         if (NULL == next_block) {
2509                                 printk(KERN_INFO
2510                                        "btrfsic: error, kmalloc failed!\n");
2511                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2512                                 return -1;
2513                         }
2514
2515                         next_block->disk_key = tmp_disk_key;
2516                         if (was_created)
2517                                 next_block->generation =
2518                                     BTRFSIC_GENERATION_UNKNOWN;
2519                         l = btrfsic_block_link_lookup_or_add(
2520                                         state,
2521                                         &tmp_next_block_ctx,
2522                                         next_block,
2523                                         superblock,
2524                                         BTRFSIC_GENERATION_UNKNOWN);
2525                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2526                         if (NULL == l)
2527                                 return -1;
2528                 }
2529         }
2530
2531         if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2532                 WARN_ON(1);
2533                 btrfsic_dump_tree(state);
2534         }
2535
2536         return 0;
2537 }
2538
2539 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2540                                         struct btrfsic_block *const block,
2541                                         int recursion_level)
2542 {
2543         struct list_head *elem_ref_to;
2544         int ret = 0;
2545
2546         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2547                 /*
2548                  * Note that this situation can happen and does not
2549                  * indicate an error in regular cases. It happens
2550                  * when disk blocks are freed and later reused.
2551                  * The check-integrity module is not aware of any
2552                  * block free operations, it just recognizes block
2553                  * write operations. Therefore it keeps the linkage
2554                  * information for a block until a block is
2555                  * rewritten. This can temporarily cause incorrect
2556                  * and even circular linkage informations. This
2557                  * causes no harm unless such blocks are referenced
2558                  * by the most recent super block.
2559                  */
2560                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2561                         printk(KERN_INFO
2562                                "btrfsic: abort cyclic linkage (case 1).\n");
2563
2564                 return ret;
2565         }
2566
2567         /*
2568          * This algorithm is recursive because the amount of used stack
2569          * space is very small and the max recursion depth is limited.
2570          */
2571         list_for_each(elem_ref_to, &block->ref_to_list) {
2572                 const struct btrfsic_block_link *const l =
2573                     list_entry(elem_ref_to, struct btrfsic_block_link,
2574                                node_ref_to);
2575
2576                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2577                         printk(KERN_INFO
2578                                "rl=%d, %c @%llu (%s/%llu/%d)"
2579                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2580                                recursion_level,
2581                                btrfsic_get_block_type(state, block),
2582                                (unsigned long long)block->logical_bytenr,
2583                                block->dev_state->name,
2584                                (unsigned long long)block->dev_bytenr,
2585                                block->mirror_num,
2586                                l->ref_cnt,
2587                                btrfsic_get_block_type(state, l->block_ref_to),
2588                                (unsigned long long)
2589                                l->block_ref_to->logical_bytenr,
2590                                l->block_ref_to->dev_state->name,
2591                                (unsigned long long)l->block_ref_to->dev_bytenr,
2592                                l->block_ref_to->mirror_num);
2593                 if (l->block_ref_to->never_written) {
2594                         printk(KERN_INFO "btrfs: attempt to write superblock"
2595                                " which references block %c @%llu (%s/%llu/%d)"
2596                                " which is never written!\n",
2597                                btrfsic_get_block_type(state, l->block_ref_to),
2598                                (unsigned long long)
2599                                l->block_ref_to->logical_bytenr,
2600                                l->block_ref_to->dev_state->name,
2601                                (unsigned long long)l->block_ref_to->dev_bytenr,
2602                                l->block_ref_to->mirror_num);
2603                         ret = -1;
2604                 } else if (!l->block_ref_to->is_iodone) {
2605                         printk(KERN_INFO "btrfs: attempt to write superblock"
2606                                " which references block %c @%llu (%s/%llu/%d)"
2607                                " which is not yet iodone!\n",
2608                                btrfsic_get_block_type(state, l->block_ref_to),
2609                                (unsigned long long)
2610                                l->block_ref_to->logical_bytenr,
2611                                l->block_ref_to->dev_state->name,
2612                                (unsigned long long)l->block_ref_to->dev_bytenr,
2613                                l->block_ref_to->mirror_num);
2614                         ret = -1;
2615                 } else if (l->block_ref_to->iodone_w_error) {
2616                         printk(KERN_INFO "btrfs: attempt to write superblock"
2617                                " which references block %c @%llu (%s/%llu/%d)"
2618                                " which has write error!\n",
2619                                btrfsic_get_block_type(state, l->block_ref_to),
2620                                (unsigned long long)
2621                                l->block_ref_to->logical_bytenr,
2622                                l->block_ref_to->dev_state->name,
2623                                (unsigned long long)l->block_ref_to->dev_bytenr,
2624                                l->block_ref_to->mirror_num);
2625                         ret = -1;
2626                 } else if (l->parent_generation !=
2627                            l->block_ref_to->generation &&
2628                            BTRFSIC_GENERATION_UNKNOWN !=
2629                            l->parent_generation &&
2630                            BTRFSIC_GENERATION_UNKNOWN !=
2631                            l->block_ref_to->generation) {
2632                         printk(KERN_INFO "btrfs: attempt to write superblock"
2633                                " which references block %c @%llu (%s/%llu/%d)"
2634                                " with generation %llu !="
2635                                " parent generation %llu!\n",
2636                                btrfsic_get_block_type(state, l->block_ref_to),
2637                                (unsigned long long)
2638                                l->block_ref_to->logical_bytenr,
2639                                l->block_ref_to->dev_state->name,
2640                                (unsigned long long)l->block_ref_to->dev_bytenr,
2641                                l->block_ref_to->mirror_num,
2642                                (unsigned long long)l->block_ref_to->generation,
2643                                (unsigned long long)l->parent_generation);
2644                         ret = -1;
2645                 } else if (l->block_ref_to->flush_gen >
2646                            l->block_ref_to->dev_state->last_flush_gen) {
2647                         printk(KERN_INFO "btrfs: attempt to write superblock"
2648                                " which references block %c @%llu (%s/%llu/%d)"
2649                                " which is not flushed out of disk's write cache"
2650                                " (block flush_gen=%llu,"
2651                                " dev->flush_gen=%llu)!\n",
2652                                btrfsic_get_block_type(state, l->block_ref_to),
2653                                (unsigned long long)
2654                                l->block_ref_to->logical_bytenr,
2655                                l->block_ref_to->dev_state->name,
2656                                (unsigned long long)l->block_ref_to->dev_bytenr,
2657                                l->block_ref_to->mirror_num,
2658                                (unsigned long long)block->flush_gen,
2659                                (unsigned long long)
2660                                l->block_ref_to->dev_state->last_flush_gen);
2661                         ret = -1;
2662                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2663                                                               l->block_ref_to,
2664                                                               recursion_level +
2665                                                               1)) {
2666                         ret = -1;
2667                 }
2668         }
2669
2670         return ret;
2671 }
2672
2673 static int btrfsic_is_block_ref_by_superblock(
2674                 const struct btrfsic_state *state,
2675                 const struct btrfsic_block *block,
2676                 int recursion_level)
2677 {
2678         struct list_head *elem_ref_from;
2679
2680         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2681                 /* refer to comment at "abort cyclic linkage (case 1)" */
2682                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2683                         printk(KERN_INFO
2684                                "btrfsic: abort cyclic linkage (case 2).\n");
2685
2686                 return 0;
2687         }
2688
2689         /*
2690          * This algorithm is recursive because the amount of used stack space
2691          * is very small and the max recursion depth is limited.
2692          */
2693         list_for_each(elem_ref_from, &block->ref_from_list) {
2694                 const struct btrfsic_block_link *const l =
2695                     list_entry(elem_ref_from, struct btrfsic_block_link,
2696                                node_ref_from);
2697
2698                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2699                         printk(KERN_INFO
2700                                "rl=%d, %c @%llu (%s/%llu/%d)"
2701                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2702                                recursion_level,
2703                                btrfsic_get_block_type(state, block),
2704                                (unsigned long long)block->logical_bytenr,
2705                                block->dev_state->name,
2706                                (unsigned long long)block->dev_bytenr,
2707                                block->mirror_num,
2708                                l->ref_cnt,
2709                                btrfsic_get_block_type(state, l->block_ref_from),
2710                                (unsigned long long)
2711                                l->block_ref_from->logical_bytenr,
2712                                l->block_ref_from->dev_state->name,
2713                                (unsigned long long)
2714                                l->block_ref_from->dev_bytenr,
2715                                l->block_ref_from->mirror_num);
2716                 if (l->block_ref_from->is_superblock &&
2717                     state->latest_superblock->dev_bytenr ==
2718                     l->block_ref_from->dev_bytenr &&
2719                     state->latest_superblock->dev_state->bdev ==
2720                     l->block_ref_from->dev_state->bdev)
2721                         return 1;
2722                 else if (btrfsic_is_block_ref_by_superblock(state,
2723                                                             l->block_ref_from,
2724                                                             recursion_level +
2725                                                             1))
2726                         return 1;
2727         }
2728
2729         return 0;
2730 }
2731
2732 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2733                                    const struct btrfsic_block_link *l)
2734 {
2735         printk(KERN_INFO
2736                "Add %u* link from %c @%llu (%s/%llu/%d)"
2737                " to %c @%llu (%s/%llu/%d).\n",
2738                l->ref_cnt,
2739                btrfsic_get_block_type(state, l->block_ref_from),
2740                (unsigned long long)l->block_ref_from->logical_bytenr,
2741                l->block_ref_from->dev_state->name,
2742                (unsigned long long)l->block_ref_from->dev_bytenr,
2743                l->block_ref_from->mirror_num,
2744                btrfsic_get_block_type(state, l->block_ref_to),
2745                (unsigned long long)l->block_ref_to->logical_bytenr,
2746                l->block_ref_to->dev_state->name,
2747                (unsigned long long)l->block_ref_to->dev_bytenr,
2748                l->block_ref_to->mirror_num);
2749 }
2750
2751 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2752                                    const struct btrfsic_block_link *l)
2753 {
2754         printk(KERN_INFO
2755                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2756                " to %c @%llu (%s/%llu/%d).\n",
2757                l->ref_cnt,
2758                btrfsic_get_block_type(state, l->block_ref_from),
2759                (unsigned long long)l->block_ref_from->logical_bytenr,
2760                l->block_ref_from->dev_state->name,
2761                (unsigned long long)l->block_ref_from->dev_bytenr,
2762                l->block_ref_from->mirror_num,
2763                btrfsic_get_block_type(state, l->block_ref_to),
2764                (unsigned long long)l->block_ref_to->logical_bytenr,
2765                l->block_ref_to->dev_state->name,
2766                (unsigned long long)l->block_ref_to->dev_bytenr,
2767                l->block_ref_to->mirror_num);
2768 }
2769
2770 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2771                                    const struct btrfsic_block *block)
2772 {
2773         if (block->is_superblock &&
2774             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2775             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2776                 return 'S';
2777         else if (block->is_superblock)
2778                 return 's';
2779         else if (block->is_metadata)
2780                 return 'M';
2781         else
2782                 return 'D';
2783 }
2784
2785 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2786 {
2787         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2788 }
2789
2790 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2791                                   const struct btrfsic_block *block,
2792                                   int indent_level)
2793 {
2794         struct list_head *elem_ref_to;
2795         int indent_add;
2796         static char buf[80];
2797         int cursor_position;
2798
2799         /*
2800          * Should better fill an on-stack buffer with a complete line and
2801          * dump it at once when it is time to print a newline character.
2802          */
2803
2804         /*
2805          * This algorithm is recursive because the amount of used stack space
2806          * is very small and the max recursion depth is limited.
2807          */
2808         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2809                              btrfsic_get_block_type(state, block),
2810                              (unsigned long long)block->logical_bytenr,
2811                              block->dev_state->name,
2812                              (unsigned long long)block->dev_bytenr,
2813                              block->mirror_num);
2814         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2815                 printk("[...]\n");
2816                 return;
2817         }
2818         printk(buf);
2819         indent_level += indent_add;
2820         if (list_empty(&block->ref_to_list)) {
2821                 printk("\n");
2822                 return;
2823         }
2824         if (block->mirror_num > 1 &&
2825             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2826                 printk(" [...]\n");
2827                 return;
2828         }
2829
2830         cursor_position = indent_level;
2831         list_for_each(elem_ref_to, &block->ref_to_list) {
2832                 const struct btrfsic_block_link *const l =
2833                     list_entry(elem_ref_to, struct btrfsic_block_link,
2834                                node_ref_to);
2835
2836                 while (cursor_position < indent_level) {
2837                         printk(" ");
2838                         cursor_position++;
2839                 }
2840                 if (l->ref_cnt > 1)
2841                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2842                 else
2843                         indent_add = sprintf(buf, " --> ");
2844                 if (indent_level + indent_add >
2845                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2846                         printk("[...]\n");
2847                         cursor_position = 0;
2848                         continue;
2849                 }
2850
2851                 printk(buf);
2852
2853                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2854                                       indent_level + indent_add);
2855                 cursor_position = 0;
2856         }
2857 }
2858
2859 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2860                 struct btrfsic_state *state,
2861                 struct btrfsic_block_data_ctx *next_block_ctx,
2862                 struct btrfsic_block *next_block,
2863                 struct btrfsic_block *from_block,
2864                 u64 parent_generation)
2865 {
2866         struct btrfsic_block_link *l;
2867
2868         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2869                                                 next_block_ctx->dev_bytenr,
2870                                                 from_block->dev_state->bdev,
2871                                                 from_block->dev_bytenr,
2872                                                 &state->block_link_hashtable);
2873         if (NULL == l) {
2874                 l = btrfsic_block_link_alloc();
2875                 if (NULL == l) {
2876                         printk(KERN_INFO
2877                                "btrfsic: error, kmalloc" " failed!\n");
2878                         return NULL;
2879                 }
2880
2881                 l->block_ref_to = next_block;
2882                 l->block_ref_from = from_block;
2883                 l->ref_cnt = 1;
2884                 l->parent_generation = parent_generation;
2885
2886                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2887                         btrfsic_print_add_link(state, l);
2888
2889                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2890                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2891
2892                 btrfsic_block_link_hashtable_add(l,
2893                                                  &state->block_link_hashtable);
2894         } else {
2895                 l->ref_cnt++;
2896                 l->parent_generation = parent_generation;
2897                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2898                         btrfsic_print_add_link(state, l);
2899         }
2900
2901         return l;
2902 }
2903
2904 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2905                 struct btrfsic_state *state,
2906                 struct btrfsic_block_data_ctx *block_ctx,
2907                 const char *additional_string,
2908                 int is_metadata,
2909                 int is_iodone,
2910                 int never_written,
2911                 int mirror_num,
2912                 int *was_created)
2913 {
2914         struct btrfsic_block *block;
2915
2916         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2917                                                block_ctx->dev_bytenr,
2918                                                &state->block_hashtable);
2919         if (NULL == block) {
2920                 struct btrfsic_dev_state *dev_state;
2921
2922                 block = btrfsic_block_alloc();
2923                 if (NULL == block) {
2924                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2925                         return NULL;
2926                 }
2927                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2928                 if (NULL == dev_state) {
2929                         printk(KERN_INFO
2930                                "btrfsic: error, lookup dev_state failed!\n");
2931                         btrfsic_block_free(block);
2932                         return NULL;
2933                 }
2934                 block->dev_state = dev_state;
2935                 block->dev_bytenr = block_ctx->dev_bytenr;
2936                 block->logical_bytenr = block_ctx->start;
2937                 block->is_metadata = is_metadata;
2938                 block->is_iodone = is_iodone;
2939                 block->never_written = never_written;
2940                 block->mirror_num = mirror_num;
2941                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2942                         printk(KERN_INFO
2943                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2944                                additional_string,
2945                                btrfsic_get_block_type(state, block),
2946                                (unsigned long long)block->logical_bytenr,
2947                                dev_state->name,
2948                                (unsigned long long)block->dev_bytenr,
2949                                mirror_num);
2950                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2951                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2952                 if (NULL != was_created)
2953                         *was_created = 1;
2954         } else {
2955                 if (NULL != was_created)
2956                         *was_created = 0;
2957         }
2958
2959         return block;
2960 }
2961
2962 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2963                                            u64 bytenr,
2964                                            struct btrfsic_dev_state *dev_state,
2965                                            u64 dev_bytenr)
2966 {
2967         int num_copies;
2968         int mirror_num;
2969         int ret;
2970         struct btrfsic_block_data_ctx block_ctx;
2971         int match = 0;
2972
2973         num_copies = btrfs_num_copies(state->root->fs_info,
2974                                       bytenr, state->metablock_size);
2975
2976         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2977                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2978                                         &block_ctx, mirror_num);
2979                 if (ret) {
2980                         printk(KERN_INFO "btrfsic:"
2981                                " btrfsic_map_block(logical @%llu,"
2982                                " mirror %d) failed!\n",
2983                                (unsigned long long)bytenr, mirror_num);
2984                         continue;
2985                 }
2986
2987                 if (dev_state->bdev == block_ctx.dev->bdev &&
2988                     dev_bytenr == block_ctx.dev_bytenr) {
2989                         match++;
2990                         btrfsic_release_block_ctx(&block_ctx);
2991                         break;
2992                 }
2993                 btrfsic_release_block_ctx(&block_ctx);
2994         }
2995
2996         if (!match) {
2997                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2998                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2999                        " phys_bytenr=%llu)!\n",
3000                        (unsigned long long)bytenr, dev_state->name,
3001                        (unsigned long long)dev_bytenr);
3002                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
3003                         ret = btrfsic_map_block(state, bytenr,
3004                                                 state->metablock_size,
3005                                                 &block_ctx, mirror_num);
3006                         if (ret)
3007                                 continue;
3008
3009                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
3010                                " (%s/%llu/%d)\n",
3011                                (unsigned long long)bytenr,
3012                                block_ctx.dev->name,
3013                                (unsigned long long)block_ctx.dev_bytenr,
3014                                mirror_num);
3015                 }
3016                 WARN_ON(1);
3017         }
3018 }
3019
3020 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
3021                 struct block_device *bdev)
3022 {
3023         struct btrfsic_dev_state *ds;
3024
3025         ds = btrfsic_dev_state_hashtable_lookup(bdev,
3026                                                 &btrfsic_dev_state_hashtable);
3027         return ds;
3028 }
3029
3030 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3031 {
3032         struct btrfsic_dev_state *dev_state;
3033
3034         if (!btrfsic_is_initialized)
3035                 return submit_bh(rw, bh);
3036
3037         mutex_lock(&btrfsic_mutex);
3038         /* since btrfsic_submit_bh() might also be called before
3039          * btrfsic_mount(), this might return NULL */
3040         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3041
3042         /* Only called to write the superblock (incl. FLUSH/FUA) */
3043         if (NULL != dev_state &&
3044             (rw & WRITE) && bh->b_size > 0) {
3045                 u64 dev_bytenr;
3046
3047                 dev_bytenr = 4096 * bh->b_blocknr;
3048                 if (dev_state->state->print_mask &
3049                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3050                         printk(KERN_INFO
3051                                "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3052                                " size=%lu, data=%p, bdev=%p)\n",
3053                                rw, (unsigned long)bh->b_blocknr,
3054                                (unsigned long long)dev_bytenr,
3055                                (unsigned long)bh->b_size, bh->b_data,
3056                                bh->b_bdev);
3057                 btrfsic_process_written_block(dev_state, dev_bytenr,
3058                                               &bh->b_data, 1, NULL,
3059                                               NULL, bh, rw);
3060         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3061                 if (dev_state->state->print_mask &
3062                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3063                         printk(KERN_INFO
3064                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3065                                rw, bh->b_bdev);
3066                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3067                         if ((dev_state->state->print_mask &
3068                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3069                               BTRFSIC_PRINT_MASK_VERBOSE)))
3070                                 printk(KERN_INFO
3071                                        "btrfsic_submit_bh(%s) with FLUSH"
3072                                        " but dummy block already in use"
3073                                        " (ignored)!\n",
3074                                        dev_state->name);
3075                 } else {
3076                         struct btrfsic_block *const block =
3077                                 &dev_state->dummy_block_for_bio_bh_flush;
3078
3079                         block->is_iodone = 0;
3080                         block->never_written = 0;
3081                         block->iodone_w_error = 0;
3082                         block->flush_gen = dev_state->last_flush_gen + 1;
3083                         block->submit_bio_bh_rw = rw;
3084                         block->orig_bio_bh_private = bh->b_private;
3085                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3086                         block->next_in_same_bio = NULL;
3087                         bh->b_private = block;
3088                         bh->b_end_io = btrfsic_bh_end_io;
3089                 }
3090         }
3091         mutex_unlock(&btrfsic_mutex);
3092         return submit_bh(rw, bh);
3093 }
3094
3095 void btrfsic_submit_bio(int rw, struct bio *bio)
3096 {
3097         struct btrfsic_dev_state *dev_state;
3098
3099         if (!btrfsic_is_initialized) {
3100                 submit_bio(rw, bio);
3101                 return;
3102         }
3103
3104         mutex_lock(&btrfsic_mutex);
3105         /* since btrfsic_submit_bio() is also called before
3106          * btrfsic_mount(), this might return NULL */
3107         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3108         if (NULL != dev_state &&
3109             (rw & WRITE) && NULL != bio->bi_io_vec) {
3110                 unsigned int i;
3111                 u64 dev_bytenr;
3112                 int bio_is_patched;
3113                 char **mapped_datav;
3114
3115                 dev_bytenr = 512 * bio->bi_sector;
3116                 bio_is_patched = 0;
3117                 if (dev_state->state->print_mask &
3118                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3119                         printk(KERN_INFO
3120                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3121                                " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3122                                rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3123                                (unsigned long long)dev_bytenr,
3124                                bio->bi_bdev);
3125
3126                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3127                                        GFP_NOFS);
3128                 if (!mapped_datav)
3129                         goto leave;
3130                 for (i = 0; i < bio->bi_vcnt; i++) {
3131                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3132                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3133                         if (!mapped_datav[i]) {
3134                                 while (i > 0) {
3135                                         i--;
3136                                         kunmap(bio->bi_io_vec[i].bv_page);
3137                                 }
3138                                 kfree(mapped_datav);
3139                                 goto leave;
3140                         }
3141                         if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3142                              BTRFSIC_PRINT_MASK_VERBOSE) ==
3143                             (dev_state->state->print_mask &
3144                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3145                               BTRFSIC_PRINT_MASK_VERBOSE)))
3146                                 printk(KERN_INFO
3147                                        "#%u: page=%p, len=%u, offset=%u\n",
3148                                        i, bio->bi_io_vec[i].bv_page,
3149                                        bio->bi_io_vec[i].bv_len,
3150                                        bio->bi_io_vec[i].bv_offset);
3151                 }
3152                 btrfsic_process_written_block(dev_state, dev_bytenr,
3153                                               mapped_datav, bio->bi_vcnt,
3154                                               bio, &bio_is_patched,
3155                                               NULL, rw);
3156                 while (i > 0) {
3157                         i--;
3158                         kunmap(bio->bi_io_vec[i].bv_page);
3159                 }
3160                 kfree(mapped_datav);
3161         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3162                 if (dev_state->state->print_mask &
3163                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3164                         printk(KERN_INFO
3165                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3166                                rw, bio->bi_bdev);
3167                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3168                         if ((dev_state->state->print_mask &
3169                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3170                               BTRFSIC_PRINT_MASK_VERBOSE)))
3171                                 printk(KERN_INFO
3172                                        "btrfsic_submit_bio(%s) with FLUSH"
3173                                        " but dummy block already in use"
3174                                        " (ignored)!\n",
3175                                        dev_state->name);
3176                 } else {
3177                         struct btrfsic_block *const block =
3178                                 &dev_state->dummy_block_for_bio_bh_flush;
3179
3180                         block->is_iodone = 0;
3181                         block->never_written = 0;
3182                         block->iodone_w_error = 0;
3183                         block->flush_gen = dev_state->last_flush_gen + 1;
3184                         block->submit_bio_bh_rw = rw;
3185                         block->orig_bio_bh_private = bio->bi_private;
3186                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3187                         block->next_in_same_bio = NULL;
3188                         bio->bi_private = block;
3189                         bio->bi_end_io = btrfsic_bio_end_io;
3190                 }
3191         }
3192 leave:
3193         mutex_unlock(&btrfsic_mutex);
3194
3195         submit_bio(rw, bio);
3196 }
3197
3198 int btrfsic_mount(struct btrfs_root *root,
3199                   struct btrfs_fs_devices *fs_devices,
3200                   int including_extent_data, u32 print_mask)
3201 {
3202         int ret;
3203         struct btrfsic_state *state;
3204         struct list_head *dev_head = &fs_devices->devices;
3205         struct btrfs_device *device;
3206
3207         if (root->nodesize != root->leafsize) {
3208                 printk(KERN_INFO
3209                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3210                        root->nodesize, root->leafsize);
3211                 return -1;
3212         }
3213         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3214                 printk(KERN_INFO
3215                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3216                        root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3217                 return -1;
3218         }
3219         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3220                 printk(KERN_INFO
3221                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3222                        root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3223                 return -1;
3224         }
3225         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3226                 printk(KERN_INFO
3227                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3228                        root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3229                 return -1;
3230         }
3231         state = kzalloc(sizeof(*state), GFP_NOFS);
3232         if (NULL == state) {
3233                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3234                 return -1;
3235         }
3236
3237         if (!btrfsic_is_initialized) {
3238                 mutex_init(&btrfsic_mutex);
3239                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3240                 btrfsic_is_initialized = 1;
3241         }
3242         mutex_lock(&btrfsic_mutex);
3243         state->root = root;
3244         state->print_mask = print_mask;
3245         state->include_extent_data = including_extent_data;
3246         state->csum_size = 0;
3247         state->metablock_size = root->nodesize;
3248         state->datablock_size = root->sectorsize;
3249         INIT_LIST_HEAD(&state->all_blocks_list);
3250         btrfsic_block_hashtable_init(&state->block_hashtable);
3251         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3252         state->max_superblock_generation = 0;
3253         state->latest_superblock = NULL;
3254
3255         list_for_each_entry(device, dev_head, dev_list) {
3256                 struct btrfsic_dev_state *ds;
3257                 char *p;
3258
3259                 if (!device->bdev || !device->name)
3260                         continue;
3261
3262                 ds = btrfsic_dev_state_alloc();
3263                 if (NULL == ds) {
3264                         printk(KERN_INFO
3265                                "btrfs check-integrity: kmalloc() failed!\n");
3266                         mutex_unlock(&btrfsic_mutex);
3267                         return -1;
3268                 }
3269                 ds->bdev = device->bdev;
3270                 ds->state = state;
3271                 bdevname(ds->bdev, ds->name);
3272                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3273                 for (p = ds->name; *p != '\0'; p++);
3274                 while (p > ds->name && *p != '/')
3275                         p--;
3276                 if (*p == '/')
3277                         p++;
3278                 strlcpy(ds->name, p, sizeof(ds->name));
3279                 btrfsic_dev_state_hashtable_add(ds,
3280                                                 &btrfsic_dev_state_hashtable);
3281         }
3282
3283         ret = btrfsic_process_superblock(state, fs_devices);
3284         if (0 != ret) {
3285                 mutex_unlock(&btrfsic_mutex);
3286                 btrfsic_unmount(root, fs_devices);
3287                 return ret;
3288         }
3289
3290         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3291                 btrfsic_dump_database(state);
3292         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3293                 btrfsic_dump_tree(state);
3294
3295         mutex_unlock(&btrfsic_mutex);
3296         return 0;
3297 }
3298
3299 void btrfsic_unmount(struct btrfs_root *root,
3300                      struct btrfs_fs_devices *fs_devices)
3301 {
3302         struct list_head *elem_all;
3303         struct list_head *tmp_all;
3304         struct btrfsic_state *state;
3305         struct list_head *dev_head = &fs_devices->devices;
3306         struct btrfs_device *device;
3307
3308         if (!btrfsic_is_initialized)
3309                 return;
3310
3311         mutex_lock(&btrfsic_mutex);
3312
3313         state = NULL;
3314         list_for_each_entry(device, dev_head, dev_list) {
3315                 struct btrfsic_dev_state *ds;
3316
3317                 if (!device->bdev || !device->name)
3318                         continue;
3319
3320                 ds = btrfsic_dev_state_hashtable_lookup(
3321                                 device->bdev,
3322                                 &btrfsic_dev_state_hashtable);
3323                 if (NULL != ds) {
3324                         state = ds->state;
3325                         btrfsic_dev_state_hashtable_remove(ds);
3326                         btrfsic_dev_state_free(ds);
3327                 }
3328         }
3329
3330         if (NULL == state) {
3331                 printk(KERN_INFO
3332                        "btrfsic: error, cannot find state information"
3333                        " on umount!\n");
3334                 mutex_unlock(&btrfsic_mutex);
3335                 return;
3336         }
3337
3338         /*
3339          * Don't care about keeping the lists' state up to date,
3340          * just free all memory that was allocated dynamically.
3341          * Free the blocks and the block_links.
3342          */
3343         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3344                 struct btrfsic_block *const b_all =
3345                     list_entry(elem_all, struct btrfsic_block,
3346                                all_blocks_node);
3347                 struct list_head *elem_ref_to;
3348                 struct list_head *tmp_ref_to;
3349
3350                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3351                                    &b_all->ref_to_list) {
3352                         struct btrfsic_block_link *const l =
3353                             list_entry(elem_ref_to,
3354                                        struct btrfsic_block_link,
3355                                        node_ref_to);
3356
3357                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3358                                 btrfsic_print_rem_link(state, l);
3359
3360                         l->ref_cnt--;
3361                         if (0 == l->ref_cnt)
3362                                 btrfsic_block_link_free(l);
3363                 }
3364
3365                 if (b_all->is_iodone || b_all->never_written)
3366                         btrfsic_block_free(b_all);
3367                 else
3368                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3369                                " @%llu (%s/%llu/%d) on umount which is"
3370                                " not yet iodone!\n",
3371                                btrfsic_get_block_type(state, b_all),
3372                                (unsigned long long)b_all->logical_bytenr,
3373                                b_all->dev_state->name,
3374                                (unsigned long long)b_all->dev_bytenr,
3375                                b_all->mirror_num);
3376         }
3377
3378         mutex_unlock(&btrfsic_mutex);
3379
3380         kfree(state);
3381 }