From 57cdc8db21bf9cfa6b2e45310d56e74e263e8609 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 5 Feb 2014 02:37:48 +0100 Subject: btrfs: cleanup ino cache members of btrfs_root The naming is confusing, generic yet used for a specific cache. Add a prefix 'ino_' or rename appropriately. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a1d36e62179c..354cc3f232bb 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1573,8 +1573,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) root->subv_writers = writers; btrfs_init_free_ino_ctl(root); - spin_lock_init(&root->cache_lock); - init_waitqueue_head(&root->cache_wait); + spin_lock_init(&root->ino_cache_lock); + init_waitqueue_head(&root->ino_cache_wait); ret = get_anon_bdev(&root->anon_dev); if (ret) @@ -3532,7 +3532,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, static void free_fs_root(struct btrfs_root *root) { - iput(root->cache_inode); + iput(root->ino_cache_inode); WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); btrfs_free_block_rsv(root, root->orphan_block_rsv); root->orphan_block_rsv = NULL; -- cgit v1.2.1 From 3abdbd780e9d75f0648b8a502c3789857b1e92ce Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 18:10:45 +0200 Subject: btrfs: make close_ctree return void There's no user of the return value and we can get rid of the comment in put_super. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 354cc3f232bb..ec32bead96a1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3623,7 +3623,7 @@ int btrfs_commit_super(struct btrfs_root *root) return btrfs_commit_transaction(trans, root); } -int close_ctree(struct btrfs_root *root) +void close_ctree(struct btrfs_root *root) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -3711,8 +3711,6 @@ int close_ctree(struct btrfs_root *root) btrfs_free_block_rsv(root, root->orphan_block_rsv); root->orphan_block_rsv = NULL; - - return 0; } int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, -- cgit v1.2.1 From 707e8a071528385a87b63a72a37c2322e463c7b8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Jun 2014 19:22:26 +0200 Subject: btrfs: use nodesize everywhere, kill leafsize The nodesize and leafsize were never of different values. Unify the usage and make nodesize the one. Cleanup the redundant checks and helpers. Shaves a few bytes from .text: text data bss dec hex filename 852418 24560 23112 900090 dbbfa btrfs.ko.before 851074 24584 23112 898770 db6d2 btrfs.ko.after Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 74 ++++++++++++++++++++++++------------------------------ 1 file changed, 33 insertions(+), 41 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ec32bead96a1..508bbee320f6 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1200,16 +1200,14 @@ btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) kfree(writers); } -static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, - u32 stripesize, struct btrfs_root *root, - struct btrfs_fs_info *fs_info, +static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize, + struct btrfs_root *root, struct btrfs_fs_info *fs_info, u64 objectid) { root->node = NULL; root->commit_root = NULL; root->sectorsize = sectorsize; root->nodesize = nodesize; - root->leafsize = leafsize; root->stripesize = stripesize; root->state = 0; root->orphan_cleanup_state = 0; @@ -1295,7 +1293,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void) root = btrfs_alloc_root(NULL); if (!root) return ERR_PTR(-ENOMEM); - __setup_root(4096, 4096, 4096, 4096, root, NULL, 1); + __setup_root(4096, 4096, 4096, root, NULL, 1); set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state); root->alloc_bytenr = 0; @@ -1318,14 +1316,13 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, objectid); root->root_key.objectid = objectid; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); @@ -1396,9 +1393,9 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, if (!root) return ERR_PTR(-ENOMEM); - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, BTRFS_TREE_LOG_OBJECTID); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, + BTRFS_TREE_LOG_OBJECTID); root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; @@ -1413,7 +1410,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, * updated (along with back refs to the log tree). */ - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, + leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, BTRFS_TREE_LOG_OBJECTID, NULL, 0, 0, 0); if (IS_ERR(leaf)) { @@ -1465,7 +1462,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, btrfs_set_stack_inode_generation(inode_item, 1); btrfs_set_stack_inode_size(inode_item, 3); btrfs_set_stack_inode_nlink(inode_item, 1); - btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); + btrfs_set_stack_inode_nbytes(inode_item, root->nodesize); btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); btrfs_set_root_node(&log_root->root_item, log_root->node); @@ -1498,9 +1495,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, goto alloc_fail; } - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, key->objectid); + __setup_root(tree_root->nodesize, tree_root->sectorsize, + tree_root->stripesize, root, fs_info, key->objectid); ret = btrfs_find_root(tree_root, key, path, &root->root_item, &root->root_key); @@ -1511,7 +1507,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); + blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); if (!root->node) { @@ -2143,7 +2139,6 @@ int open_ctree(struct super_block *sb, { u32 sectorsize; u32 nodesize; - u32 leafsize; u32 blocksize; u32 stripesize; u64 generation; @@ -2389,7 +2384,7 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - __setup_root(4096, 4096, 4096, 4096, tree_root, + __setup_root(4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); invalidate_bdev(fs_devices->latest_bdev); @@ -2469,19 +2464,22 @@ int open_ctree(struct super_block *sb, goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) != + /* + * Leafsize and nodesize were always equal, this is only a sanity check. + */ + if (le32_to_cpu(disk_super->__unused_leafsize) != btrfs_super_nodesize(disk_super)) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksizes don't match. node %d leaf %d\n", btrfs_super_nodesize(disk_super), - btrfs_super_leafsize(disk_super)); + le32_to_cpu(disk_super->__unused_leafsize)); err = -EINVAL; goto fail_alloc; } - if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { + if (btrfs_super_nodesize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { printk(KERN_ERR "BTRFS: couldn't mount because metadata " "blocksize (%d) was too large\n", - btrfs_super_leafsize(disk_super)); + btrfs_super_nodesize(disk_super)); err = -EINVAL; goto fail_alloc; } @@ -2498,17 +2496,16 @@ int open_ctree(struct super_block *sb, * flag our filesystem as having big metadata blocks if * they are bigger than the page size */ - if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { + if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) { if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n"); features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; } nodesize = btrfs_super_nodesize(disk_super); - leafsize = btrfs_super_leafsize(disk_super); sectorsize = btrfs_super_sectorsize(disk_super); stripesize = btrfs_super_stripesize(disk_super); - fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); + fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids)); fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); /* @@ -2516,7 +2513,7 @@ int open_ctree(struct super_block *sb, * extent buffers for the same range. It leads to corruptions */ if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && - (sectorsize != leafsize)) { + (sectorsize != nodesize)) { printk(KERN_WARNING "BTRFS: unequal leaf/node/sector sizes " "are not allowed for mixed block groups on %s\n", sb->s_id); @@ -2615,7 +2612,6 @@ int open_ctree(struct super_block *sb, 4 * 1024 * 1024 / PAGE_CACHE_SIZE); tree_root->nodesize = nodesize; - tree_root->leafsize = leafsize; tree_root->sectorsize = sectorsize; tree_root->stripesize = stripesize; @@ -2642,12 +2638,11 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - blocksize = btrfs_level_size(tree_root, - btrfs_super_chunk_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_chunk_root_generation(disk_super); - __setup_root(nodesize, leafsize, sectorsize, stripesize, - chunk_root, fs_info, BTRFS_CHUNK_TREE_OBJECTID); + __setup_root(nodesize, sectorsize, stripesize, chunk_root, + fs_info, BTRFS_CHUNK_TREE_OBJECTID); chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), @@ -2684,8 +2679,7 @@ int open_ctree(struct super_block *sb, } retry_root_backup: - blocksize = btrfs_level_size(tree_root, - btrfs_super_root_level(disk_super)); + blocksize = tree_root->nodesize; generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, @@ -2859,9 +2853,7 @@ retry_root_backup: err = -EIO; goto fail_qgroup; } - blocksize = - btrfs_level_size(tree_root, - btrfs_super_log_root_level(disk_super)); + blocksize = tree_root->nodesize; log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { @@ -2869,7 +2861,7 @@ retry_root_backup: goto fail_qgroup; } - __setup_root(nodesize, leafsize, sectorsize, stripesize, + __setup_root(nodesize, sectorsize, stripesize, log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, @@ -4008,8 +4000,8 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { eb = btrfs_find_tree_block(root, start, - root->leafsize); - start += root->leafsize; + root->nodesize); + start += root->nodesize; if (!eb) continue; wait_on_extent_buffer_writeback(eb); -- cgit v1.2.1 From 29549aec76bd6f1fc8e1723ed5396d65073d6521 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Fri, 4 Jul 2014 17:59:06 +0800 Subject: Btrfs: print btrfs specific info for some fatal error cases Marc argued that if there are several btrfs filesystems mounted, while users even don't know which filesystem hit the corrupted errors something like generation verification failure. Since @extent_buffer structure has a member @fs_info, let's output btrfs device info. Reported-by: Marc MERLIN Signed-off-by: Wang Shilong Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 508bbee320f6..d14847d05f31 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -348,9 +348,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, ret = 0; goto out; } - printk_ratelimited("parent transid verify failed on %llu wanted %llu " - "found %llu\n", - eb->start, parent_transid, btrfs_header_generation(eb)); + printk_ratelimited(KERN_INFO "BTRFS (device %s): parent transid verify failed on %llu wanted %llu found %llu\n", + eb->fs_info->sb->s_id, eb->start, + parent_transid, btrfs_header_generation(eb)); ret = 1; /* @@ -614,15 +614,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { - printk_ratelimited(KERN_INFO "BTRFS: bad tree block start " + printk_ratelimited(KERN_INFO "BTRFS (device %s): bad tree block start " "%llu %llu\n", - found_start, eb->start); + eb->fs_info->sb->s_id, found_start, eb->start); ret = -EIO; goto err; } if (check_tree_block_fsid(root, eb)) { - printk_ratelimited(KERN_INFO "BTRFS: bad fsid on block %llu\n", - eb->start); + printk_ratelimited(KERN_INFO "BTRFS (device %s): bad fsid on block %llu\n", + eb->fs_info->sb->s_id, eb->start); ret = -EIO; goto err; } -- cgit v1.2.1 From 56094eecd32cbb80d098eee5a7cbd60f39f4b764 Mon Sep 17 00:00:00 2001 From: Andrey Utkin Date: Sat, 9 Aug 2014 14:51:15 +0300 Subject: btrfs: Drop stray check of fixup_workers creation The issue was introduced in a79b7d4b3e8118f265dcb4bdf9a572c392f02708, adding allocation of extent_workers, so this stray check is surely not meant to be a check of something else. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=82021 Reported-by: Maks Naumov Signed-off-by: Andrey Utkin Reviewed-by: Eric Sandeen Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d14847d05f31..38b295553544 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2601,7 +2601,7 @@ int open_ctree(struct super_block *sb, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->fixup_workers && fs_info->extent_workers && + fs_info->extent_workers && fs_info->qgroup_rescan_workers)) { err = -ENOMEM; goto fail_sb_buffer; -- cgit v1.2.1 From 82f70d62f7923cc43128e75ae85366f137055b76 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 8 Sep 2014 20:41:09 +0800 Subject: btrfs: remove the wrong comments This comments became wrong after c3c532[bdi: add helper function for doing init and register of a bdi for a file system], so remove them. Signed-off-by: Li RongQing Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 38b295553544..dbd792754b27 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1704,10 +1704,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) return ret; } -/* - * If this fails, caller must call bdi_destroy() to get rid of the - * bdi again. - */ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) { int err; -- cgit v1.2.1 From 935e5cc935bcbf9b3d0dd59fed7dbc0f2ebca6bc Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:33 +0800 Subject: Btrfs: fix wrong disk size when writing super blocks total_size will be changed when resizing a device, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dbd792754b27..0cd18b725554 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3127,7 +3127,8 @@ static int write_dev_supers(struct btrfs_device *device, for (i = 0; i < max_mirrors; i++) { bytenr = btrfs_sb_offset(i); - if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + device->commit_total_bytes) break; if (wait) { @@ -3444,7 +3445,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_type(dev_item, dev->type); btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, - dev->disk_total_bytes); + dev->commit_total_bytes); btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); -- cgit v1.2.1 From ce7213c70c37e3a66bc0b50c45edcbfea505f62f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 3 Sep 2014 21:35:34 +0800 Subject: Btrfs: fix wrong device bytes_used in the super block device->bytes_used will be changed when allocating a new chunk, and disk_total_size will be changed if resizing is successful. Meanwhile, the on-disk super blocks of the previous transaction might not be updated. Considering the consistency of the metadata in the previous transaction, We should use the size in the previous transaction to check if the super block is beyond the boundary of the device. Though it is not big problem because we don't use it now, but anyway it is better that we make it be consistent with the common metadata, maybe we will use it in the future. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0cd18b725554..a224fb9b34a3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3446,7 +3446,8 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) btrfs_set_stack_device_id(dev_item, dev->devid); btrfs_set_stack_device_total_bytes(dev_item, dev->commit_total_bytes); - btrfs_set_stack_device_bytes_used(dev_item, dev->bytes_used); + btrfs_set_stack_device_bytes_used(dev_item, + dev->commit_bytes_used); btrfs_set_stack_device_io_align(dev_item, dev->io_align); btrfs_set_stack_device_io_width(dev_item, dev->io_width); btrfs_set_stack_device_sector_size(dev_item, dev->sector_size); -- cgit v1.2.1 From 8b110e393c5a6e72d50fcdf9fa7ed8b647cfdfc9 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 12 Sep 2014 18:44:03 +0800 Subject: Btrfs: implement repair function when direct read fails This patch implement data repair function when direct read fails. The detail of the implementation is: - When we find the data is not right, we try to read the data from the other mirror. - When the io on the mirror ends, we will insert the endio work into the dedicated btrfs workqueue, not common read endio workqueue, because the original endio work is still blocked in the btrfs endio workqueue, if we insert the endio work of the io on the mirror into that workqueue, deadlock would happen. - After we get right data, we write it back to the corrupted mirror. - And if the data on the new mirror is still corrupted, we will try next mirror until we read right data or all the mirrors are traversed. - After the above work, we set the uptodate flag according to the result. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a224fb9b34a3..48794f951427 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -713,7 +713,11 @@ static void end_workqueue_bio(struct bio *bio, int err) func = btrfs_endio_write_helper; } } else { - if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { + if (unlikely(end_io_wq->metadata == + BTRFS_WQ_ENDIO_DIO_REPAIR)) { + wq = fs_info->endio_repair_workers; + func = btrfs_endio_repair_helper; + } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) { wq = fs_info->endio_raid56_workers; func = btrfs_endio_raid56_helper; } else if (end_io_wq->metadata) { @@ -741,6 +745,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, int metadata) { struct end_io_wq *end_io_wq; + end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); if (!end_io_wq) return -ENOMEM; @@ -2055,6 +2060,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->endio_workers); btrfs_destroy_workqueue(fs_info->endio_meta_workers); btrfs_destroy_workqueue(fs_info->endio_raid56_workers); + btrfs_destroy_workqueue(fs_info->endio_repair_workers); btrfs_destroy_workqueue(fs_info->rmw_workers); btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); btrfs_destroy_workqueue(fs_info->endio_write_workers); @@ -2572,6 +2578,8 @@ int open_ctree(struct super_block *sb, btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); fs_info->endio_raid56_workers = btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); + fs_info->endio_repair_workers = + btrfs_alloc_workqueue("endio-repair", flags, 1, 0); fs_info->rmw_workers = btrfs_alloc_workqueue("rmw", flags, max_active, 2); fs_info->endio_write_workers = @@ -2593,6 +2601,7 @@ int open_ctree(struct super_block *sb, fs_info->submit_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->endio_meta_write_workers && + fs_info->endio_repair_workers && fs_info->endio_write_workers && fs_info->endio_raid56_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && -- cgit v1.2.1 From 47ab2a6c689913db23ccae38349714edf8365e0a Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Sep 2014 11:20:02 -0400 Subject: Btrfs: remove empty block groups automatically One problem that has plagued us is that a user will use up all of his space with data, remove a bunch of that data, and then try to create a bunch of small files and run out of space. This happens because all the chunks were allocated for data since the metadata requirements were so low. But now there's a bunch of empty data block groups and not enough metadata space to do anything. This patch solves this problem by automatically deleting empty block groups. If we notice the used count go down to 0 when deleting or on mount notice that a block group has a used count of 0 then we will queue it to be deleted. When the cleaner thread runs we will double check to make sure the block group is still empty and then we will delete it. This patch has the side effect of no longer having a bunch of BUG_ON()'s in the chunk delete code, which will be helpful for both this and relocate. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 48794f951427..4780e6623c7b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1769,6 +1769,7 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); + btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -2230,6 +2231,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); + spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); @@ -2239,6 +2241,7 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); INIT_LIST_HEAD(&fs_info->space_info); INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); + INIT_LIST_HEAD(&fs_info->unused_bgs); btrfs_mapping_init(&fs_info->mapping_tree); btrfs_init_block_rsv(&fs_info->global_block_rsv, BTRFS_BLOCK_RSV_GLOBAL); @@ -2977,6 +2980,8 @@ retry_root_backup: fs_info->update_uuid_tree_gen = 1; } + fs_info->open = 1; + return 0; fail_qgroup: @@ -3688,6 +3693,7 @@ void close_ctree(struct btrfs_root *root) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); + fs_info->open = 0; free_root_pointers(fs_info, 1); iput(fs_info->btree_inode); -- cgit v1.2.1 From 58dc4ce4325108b35425ffd30e6acfad9644d49d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 00:29:04 +0200 Subject: btrfs: remove unused parameter from readahead_tree_block The parent_transid parameter has been unused since its introduction in ca7a79ad8dbe2466 ("Pass down the expected generation number when reading tree blocks"). In reada_tree_block, it was even wrongly set to leafsize. Transid check is done in the proper read and readahead ignores errors. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7b..ff83748d39da 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1062,8 +1062,7 @@ static const struct address_space_operations btree_aops = { .set_page_dirty = btree_set_page_dirty, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, - u64 parent_transid) +int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; -- cgit v1.2.1 From 6197d86eabb844c1a9c99956d4e6b0f8eb548ad3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 00:49:36 +0200 Subject: btrfs: return void from readahead_tree_block Errors in readahead are not fatal and ignored elsewhere in the code. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff83748d39da..332f63518156 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1062,19 +1062,17 @@ static const struct address_space_operations btree_aops = { .set_page_dirty = btree_set_page_dirty, }; -int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) +void readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { struct extent_buffer *buf = NULL; struct inode *btree_inode = root->fs_info->btree_inode; - int ret = 0; buf = btrfs_find_create_tree_block(root, bytenr, blocksize); if (!buf) - return 0; + return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0, WAIT_NONE, btree_get_extent, 0); free_extent_buffer(buf); - return ret; } int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, -- cgit v1.2.1 From ce86cd59179279a6fe673d2a105d24fb7e70aef3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:07:32 +0200 Subject: btrfs: remove parameter blocksize from read_tree_block We know the tree block size, no need to pass it around. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 332f63518156..03c0973568ef 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1138,12 +1138,12 @@ int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) } struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, - u32 blocksize, u64 parent_transid) + u64 parent_transid) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(root, bytenr, blocksize); + buf = btrfs_find_create_tree_block(root, bytenr, root->nodesize); if (!buf) return NULL; @@ -1484,7 +1484,6 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; u64 generation; - u32 blocksize; int ret; path = btrfs_alloc_path(); @@ -1509,9 +1508,8 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, } generation = btrfs_root_generation(&root->root_item); - blocksize = root->nodesize; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, generation); + generation); if (!root->node) { ret = -ENOMEM; goto find_fail; @@ -2139,7 +2137,6 @@ int open_ctree(struct super_block *sb, { u32 sectorsize; u32 nodesize; - u32 blocksize; u32 stripesize; u64 generation; u64 features; @@ -2643,7 +2640,6 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - blocksize = tree_root->nodesize; generation = btrfs_super_chunk_root_generation(disk_super); __setup_root(nodesize, sectorsize, stripesize, chunk_root, @@ -2651,7 +2647,7 @@ int open_ctree(struct super_block *sb, chunk_root->node = read_tree_block(chunk_root, btrfs_super_chunk_root(disk_super), - blocksize, generation); + generation); if (!chunk_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { printk(KERN_WARNING "BTRFS: failed to read chunk root on %s\n", @@ -2684,12 +2680,11 @@ int open_ctree(struct super_block *sb, } retry_root_backup: - blocksize = tree_root->nodesize; generation = btrfs_super_generation(disk_super); tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), - blocksize, generation); + generation); if (!tree_root->node || !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { printk(KERN_WARNING "BTRFS: failed to read tree root on %s\n", @@ -2858,7 +2853,6 @@ retry_root_backup: err = -EIO; goto fail_qgroup; } - blocksize = tree_root->nodesize; log_tree_root = btrfs_alloc_root(fs_info); if (!log_tree_root) { @@ -2870,7 +2864,6 @@ retry_root_backup: log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); log_tree_root->node = read_tree_block(tree_root, bytenr, - blocksize, generation + 1); if (!log_tree_root->node || !extent_buffer_uptodate(log_tree_root->node)) { -- cgit v1.2.1 From 0308af4465897c889e32754ef37bb465a1b2b872 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:43:40 +0200 Subject: btrfs: remove unused parameter blocksize from btrfs_find_tree_block Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 03c0973568ef..e0293d2fbb3a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1108,7 +1108,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, } struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, - u64 bytenr, u32 blocksize) + u64 bytenr) { return find_extent_buffer(root->fs_info, bytenr); } @@ -4002,8 +4002,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root, clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS); while (start <= end) { - eb = btrfs_find_tree_block(root, start, - root->nodesize); + eb = btrfs_find_tree_block(root, start); start += root->nodesize; if (!eb) continue; -- cgit v1.2.1 From 4d75f8a9c87b843c8ded15b82b8d137b9724cccc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 15 Jun 2014 01:54:12 +0200 Subject: btrfs: remove blocksize from btrfs_alloc_free_block and rename Rename to btrfs_alloc_tree_block as it fits to the alloc/find/free + _tree_block family. The parameter blocksize was set to the metadata block size, directly or indirectly. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0293d2fbb3a..2e5d460d4e0c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1324,8 +1324,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = 0; - leaf = btrfs_alloc_free_block(trans, root, root->nodesize, - 0, objectid, NULL, 0, 0, 0); + leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); leaf = NULL; @@ -1412,9 +1411,8 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, * updated (along with back refs to the log tree). */ - leaf = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - BTRFS_TREE_LOG_OBJECTID, NULL, - 0, 0, 0); + leaf = btrfs_alloc_tree_block(trans, root, 0, BTRFS_TREE_LOG_OBJECTID, + NULL, 0, 0, 0); if (IS_ERR(leaf)) { kfree(root); return ERR_CAST(leaf); -- cgit v1.2.1 From 95ac567af212db3293af3897ccb521efdf1dd7ff Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Thu, 8 Aug 2013 22:45:48 +0100 Subject: Btrfs: set default max_inline to 8KiB instead of 8MiB 8MiB is way too large and likely set by mistake. This is not a significant issue as in practice the max amount of data added to an inline extent is also limited by the page cache and btree leaf sizes. Signed-off-by: Filipe David Borba Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7b..9b2a741370b7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2260,7 +2260,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic64_set(&fs_info->tree_mod_seq, 0); fs_info->sb = sb; - fs_info->max_inline = 8192 * 1024; + fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; fs_info->free_chunk_space = 0; -- cgit v1.2.1 From bfebd8b5441755f228ad02273682d675d3335123 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 30 Jul 2014 00:25:45 +0200 Subject: btrfs: use enum for wq endio metadata type The enum exists but is not consistently used. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b2a741370b7..d7cb58ed2946 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -82,7 +82,7 @@ struct end_io_wq { void *private; struct btrfs_fs_info *info; int error; - int metadata; + enum btrfs_wq_endio_type metadata; struct list_head list; struct btrfs_work work; }; @@ -733,16 +733,8 @@ static void end_workqueue_bio(struct bio *bio, int err) btrfs_queue_work(wq, &end_io_wq->work); } -/* - * For the metadata arg you want - * - * 0 - if data - * 1 - if normal metadta - * 2 - if writing to the free space cache area - * 3 - raid parity work - */ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, - int metadata) + enum btrfs_wq_endio_type metadata) { struct end_io_wq *end_io_wq; @@ -930,7 +922,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, * can happen in the async kernel threads */ ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, - bio, 1); + bio, BTRFS_WQ_ENDIO_METADATA); if (ret) goto out_w_error; ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, -- cgit v1.2.1 From 97eb6b69d1e856cb5e1cf2c3d94afab643e93128 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 30 Jul 2014 00:55:42 +0200 Subject: btrfs: use slab for end_io_wq structures The structure is frequently reused. Rename it according to the slab name. Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d7cb58ed2946..2f075ef20050 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -72,11 +72,11 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root); static void btrfs_error_commit_super(struct btrfs_root *root); /* - * end_io_wq structs are used to do processing in task context when an IO is - * complete. This is used during reads to verify checksums, and it is used + * btrfs_end_io_wq structs are used to do processing in task context when an IO + * is complete. This is used during reads to verify checksums, and it is used * by writes to insert metadata for new file extents after IO is complete. */ -struct end_io_wq { +struct btrfs_end_io_wq { struct bio *bio; bio_end_io_t *end_io; void *private; @@ -87,6 +87,26 @@ struct end_io_wq { struct btrfs_work work; }; +static struct kmem_cache *btrfs_end_io_wq_cache; + +int __init btrfs_end_io_wq_init(void) +{ + btrfs_end_io_wq_cache = kmem_cache_create("btrfs_end_io_wq", + sizeof(struct btrfs_end_io_wq), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_end_io_wq_cache) + return -ENOMEM; + return 0; +} + +void btrfs_end_io_wq_exit(void) +{ + if (btrfs_end_io_wq_cache) + kmem_cache_destroy(btrfs_end_io_wq_cache); +} + /* * async submit bios are used to offload expensive checksumming * onto the worker threads. They checksum file and metadata bios @@ -690,7 +710,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) static void end_workqueue_bio(struct bio *bio, int err) { - struct end_io_wq *end_io_wq = bio->bi_private; + struct btrfs_end_io_wq *end_io_wq = bio->bi_private; struct btrfs_fs_info *fs_info; struct btrfs_workqueue *wq; btrfs_work_func_t func; @@ -736,9 +756,9 @@ static void end_workqueue_bio(struct bio *bio, int err) int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, enum btrfs_wq_endio_type metadata) { - struct end_io_wq *end_io_wq; + struct btrfs_end_io_wq *end_io_wq; - end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS); + end_io_wq = kmem_cache_alloc(btrfs_end_io_wq_cache, GFP_NOFS); if (!end_io_wq) return -ENOMEM; @@ -1723,16 +1743,16 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) static void end_workqueue_fn(struct btrfs_work *work) { struct bio *bio; - struct end_io_wq *end_io_wq; + struct btrfs_end_io_wq *end_io_wq; int error; - end_io_wq = container_of(work, struct end_io_wq, work); + end_io_wq = container_of(work, struct btrfs_end_io_wq, work); bio = end_io_wq->bio; error = end_io_wq->error; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; - kfree(end_io_wq); + kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); bio_endio_nodec(bio, error); } -- cgit v1.2.1 From 2755a0de64693501741fb3603cd8ca928b0b7e81 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 31 Jul 2014 00:43:18 +0200 Subject: btrfs: hide typecast to definition of BTRFS_SEND_TRANS_STUB Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2f075ef20050..0abf4b0a9010 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -347,8 +347,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, { struct extent_state *cached_state = NULL; int ret; - bool need_lock = (current->journal_info == - (void *)BTRFS_SEND_TRANS_STUB); + bool need_lock = (current->journal_info == BTRFS_SEND_TRANS_STUB); if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; -- cgit v1.2.1 From fccb84c94a9755f48668e43d0a44d6ecc750900f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 29 Sep 2014 23:53:21 +0200 Subject: btrfs: move checks for DUMMY_ROOT into a helper Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0abf4b0a9010..14117f85b545 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1130,11 +1130,9 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize) { -#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS - if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state))) + if (btrfs_test_is_dummy_root(root)) return alloc_test_extent_buffer(root->fs_info, bytenr, blocksize); -#endif return alloc_extent_buffer(root->fs_info, bytenr, blocksize); } -- cgit v1.2.1 From 656f30dba7ab8179c9a2e04293b0c7b383fa9ce9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 26 Sep 2014 12:25:56 +0100 Subject: Btrfs: be aware of btree inode write errors to avoid fs corruption While we have a transaction ongoing, the VM might decide at any time to call btree_inode->i_mapping->a_ops->writepages(), which will start writeback of dirty pages belonging to btree nodes/leafs. This call might return an error or the writeback might finish with an error before we attempt to commit the running transaction. If this happens, we might have no way of knowing that such error happened when we are committing the transaction - because the pages might no longer be marked dirty nor tagged for writeback (if a subsequent modification to the extent buffer didn't happen before the transaction commit) which makes filemap_fdata[write|wait]_range unable to find such pages (even if they're marked with SetPageError). So if this happens we must abort the transaction, otherwise we commit a super block with btree roots that point to btree nodes/leafs whose content on disk is invalid - either garbage or the content of some node/leaf from a past generation that got cowed or deleted and is no longer valid (for this later case we end up getting error messages like "parent transid verify failed on 10826481664 wanted 25748 found 29562" when reading btree nodes/leafs from disk). Note that setting and checking AS_EIO/AS_ENOSPC in the btree inode's i_mapping would not be enough because we need to distinguish between log tree extents (not fatal) vs non-log tree extents (fatal) and because the next call to filemap_fdatawait_range() will catch and clear such errors in the mapping - and that call might be from a log sync and not from a transaction commit, which means we would not know about the error at transaction commit time. Also, checking for the eb flag EXTENT_BUFFER_IOERR at transaction commit time isn't done and would not be completely reliable, as the eb might be removed from memory and read back when trying to get it, which clears that flag right before reading the eb's pages from disk, making us not know about the previous write error. Using the new 3 flags for the btree inode also makes us achieve the goal of AS_EIO/AS_ENOSPC when writepages() returns success, started writeback for all dirty pages and before filemap_fdatawait_range() is called, the writeback for all dirty pages had already finished with errors - because we were not using AS_EIO/AS_ENOSPC, filemap_fdatawait_range() would return success, as it could not know that writeback errors happened (the pages were no longer tagged for writeback). Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4780e6623c7b..09b3c8a0c790 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -607,7 +607,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, goto err; eb->read_mirror = mirror; - if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { + if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) { ret = -EIO; goto err; } @@ -680,7 +680,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; eb = (struct extent_buffer *)page->private; - set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); + set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = failed_mirror; atomic_dec(&eb->io_pages); if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) -- cgit v1.2.1 From c926093ec516f5d316ecdf8c1be11f577ac71b85 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 30 Sep 2014 19:16:47 +0200 Subject: btrfs: add more superblock checks Populate btrfs_check_super_valid() with checks that try to verify consistency of superblock by additional conditions that may arise from corrupted devices or bitflips. Some of tests are only hints and issue warnings instead of failing the mount, basically when the checks are derived from the data found in the superblock. Tested on a broken image provided by Qu. Reported-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 09b3c8a0c790..fc8dfaa27967 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3817,10 +3817,73 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, int read_only) { + struct btrfs_super_block *sb = fs_info->super_copy; + int ret = 0; + + if (sb->root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: tree_root level too big: %d > %d\n", + sb->root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + if (sb->chunk_root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: chunk_root level too big: %d > %d\n", + sb->chunk_root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + if (sb->log_root_level > BTRFS_MAX_LEVEL) { + printk(KERN_ERR "BTRFS: log_root level too big: %d > %d\n", + sb->log_root_level, BTRFS_MAX_LEVEL); + ret = -EINVAL; + } + /* - * Placeholder for checks + * The common minimum, we don't know if we can trust the nodesize/sectorsize + * items yet, they'll be verified later. Issue just a warning. */ - return 0; + if (!IS_ALIGNED(sb->root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->root); + if (!IS_ALIGNED(sb->chunk_root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->chunk_root); + if (!IS_ALIGNED(sb->log_root, 4096)) + printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n", + sb->log_root); + + if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) { + printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n", + fs_info->fsid, sb->dev_item.fsid); + ret = -EINVAL; + } + + /* + * Hint to catch really bogus numbers, bitflips or so, more exact checks are + * done later + */ + if (sb->num_devices > (1UL << 31)) + printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n", + sb->num_devices); + + if (sb->bytenr != BTRFS_SUPER_INFO_OFFSET) { + printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n", + sb->bytenr, BTRFS_SUPER_INFO_OFFSET); + ret = -EINVAL; + } + + /* + * The generation is a global counter, we'll trust it more than the others + * but it's still possible that it's the one that's wrong. + */ + if (sb->generation < sb->chunk_root_generation) + printk(KERN_WARNING + "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n", + sb->generation, sb->chunk_root_generation); + if (sb->generation < sb->cache_generation && sb->cache_generation != (u64)-1) + printk(KERN_WARNING + "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n", + sb->generation, sb->cache_generation); + + return ret; } static void btrfs_error_commit_super(struct btrfs_root *root) -- cgit v1.2.1