diff options
author | Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp> | 2015-02-05 12:25:20 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-05 13:35:29 -0800 |
commit | 7ef3ff2fea8bf5e4a21cef47ad87710a3d0fdb52 (patch) | |
tree | fae90c5009ed6bf5009cedf9646c0a4edd71304c /fs/nilfs2/segment.h | |
parent | 81cca6fb1271b91f06b008976ea132bec2f14d86 (diff) | |
download | linux-7ef3ff2fea8bf5e4a21cef47ad87710a3d0fdb52.tar.gz linux-7ef3ff2fea8bf5e4a21cef47ad87710a3d0fdb52.tar.xz |
nilfs2: fix deadlock of segment constructor over I_SYNC flag
Nilfs2 eventually hangs in a stress test with fsstress program. This
issue was caused by the following deadlock over I_SYNC flag between
nilfs_segctor_thread() and writeback_sb_inodes():
nilfs_segctor_thread()
nilfs_segctor_thread_construct()
nilfs_segctor_unlock()
nilfs_dispose_list()
iput()
iput_final()
evict()
inode_wait_for_writeback() * wait for I_SYNC flag
writeback_sb_inodes()
* set I_SYNC flag on inode->i_state
__writeback_single_inode()
do_writepages()
nilfs_writepages()
nilfs_construct_dsync_segment()
nilfs_segctor_sync()
* wait for completion of segment constructor
inode_sync_complete()
* clear I_SYNC flag after __writeback_single_inode() completed
writeback_sb_inodes() calls do_writepages() for dirty inodes after
setting I_SYNC flag on inode->i_state. do_writepages() in turn calls
nilfs_writepages(), which can run segment constructor and wait for its
completion. On the other hand, segment constructor calls iput(), which
can call evict() and wait for the I_SYNC flag on
inode_wait_for_writeback().
Since segment constructor doesn't know when I_SYNC will be set, it
cannot know whether iput() will block or not unless inode->i_nlink has a
non-zero count. We can prevent evict() from being called in iput() by
implementing sop->drop_inode(), but it's not preferable to leave inodes
with i_nlink == 0 for long periods because it even defers file
truncation and inode deallocation. So, this instead resolves the
deadlock by calling iput() asynchronously with a workqueue for inodes
with i_nlink == 0.
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Tested-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/nilfs2/segment.h')
-rw-r--r-- | fs/nilfs2/segment.h | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 38a1d0013314..a48d6de1e02c 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/fs.h> #include <linux/buffer_head.h> +#include <linux/workqueue.h> #include <linux/nilfs2_fs.h> #include "nilfs.h" @@ -92,6 +93,8 @@ struct nilfs_segsum_pointer { * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written + * @sc_iput_queue: list of inodes for which iput should be done + * @sc_iput_work: work struct to defer iput call * @sc_freesegs: array of segment numbers to be freed * @sc_nfreesegs: number of segments on @sc_freesegs * @sc_dsync_inode: inode whose data pages are written for a sync operation @@ -135,6 +138,8 @@ struct nilfs_sc_info { struct list_head sc_dirty_files; struct list_head sc_gc_inodes; + struct list_head sc_iput_queue; + struct work_struct sc_iput_work; __u64 *sc_freesegs; size_t sc_nfreesegs; |