From 899f0429c7d3eed886406cd72182bee3b96aa1f9 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 9 Oct 2017 11:13:18 +0200 Subject: direct-io: Prevent NULL pointer access in submit_page_section In the code added to function submit_page_section by commit b1058b981, sdio->bio can currently be NULL when calling dio_bio_submit. This then leads to a NULL pointer access in dio_bio_submit, so check for a NULL bio in submit_page_section before trying to submit it instead. Fixes xfstest generic/250 on gfs2. Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Andreas Gruenbacher Reviewed-by: Jan Kara Signed-off-by: Al Viro --- fs/direct-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812ed0e5..96415c65bbdc 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -866,7 +866,8 @@ out: */ if (sdio->boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); - dio_bio_submit(dio, sdio); + if (sdio->bio) + dio_bio_submit(dio, sdio); put_page(sdio->cur_page); sdio->cur_page = NULL; } -- cgit v1.2.1 From 5e25c269e17de4c5a23ce886cda612b01365a944 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 13 Oct 2017 09:47:46 -0700 Subject: fs: invalidate page cache after end_io() in dio completion Commit 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") moved page cache invalidation from iomap_dio_rw() to iomap_dio_complete() for iomap based direct write path, but before the dio->end_io() call, and it re-introdued the bug fixed by commit c771c14baa33 ("iomap: invalidate page caches should be after iomap_dio_complete() in direct write"). I found this because fstests generic/418 started failing on XFS with v4.14-rc3 kernel, which is the regression test for this specific bug. So similarly, fix it by moving dio->end_io() (which does the unwritten extent conversion) before page cache invalidation, to make sure next buffer read reads the final real allocations not unwritten extents. I also add some comments about why should end_io() go first in case we get it wrong again in the future. Note that, there's no such problem in the non-iomap based direct write path, because we didn't remove the page cache invalidation after the ->direct_IO() in generic_file_direct_write() call, but I decided to fix dio_complete() too so we don't leave a landmine there, also be consistent with iomap_dio_complete(). Fixes: 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") Signed-off-by: Eryu Guan Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Jan Kara Reviewed-by: Lukas Czerner --- fs/direct-io.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index 96415c65bbdc..19ac3fe57deb 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -259,12 +259,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (ret == 0) ret = transferred; + if (dio->end_io) { + // XXX: ki_pos?? + err = dio->end_io(dio->iocb, offset, ret, dio->private); + if (err) + ret = err; + } + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source * of the write was an mmap'ed region of the file we're writing. Either * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... + * + * And this page cache invalidation has to be after dio->end_io(), as + * some filesystems convert unwritten extents to real allocations in + * end_io() when necessary, otherwise a racing buffer read would cache + * zeros from unwritten extents. */ if (ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { @@ -274,14 +286,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) WARN_ON_ONCE(err); } - if (dio->end_io) { - - // XXX: ki_pos?? - err = dio->end_io(dio->iocb, offset, ret, dio->private); - if (err) - ret = err; - } - if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); -- cgit v1.2.1 From ffe51f0142a291a957eebb9687cafb15f2b3fc14 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 17 Oct 2017 08:43:09 -0600 Subject: fs: Avoid invalidation in interrupt context in dio_complete() Currently we try to defer completion of async DIO to the process context in case there are any mapped pages associated with the inode so that we can invalidate the pages when the IO completes. However the check is racy and the pages can be mapped afterwards. If this happens we might end up calling invalidate_inode_pages2_range() in dio_complete() in interrupt context which could sleep. This can be reproduced by generic/451. Fix this by passing the information whether we can or can't invalidate to the dio_complete(). Thanks Eryu Guan for reporting this and Jan Kara for suggesting a fix. Fixes: 332391a9935d ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO") Reported-by: Eryu Guan Reviewed-by: Jan Kara Tested-by: Eryu Guan Signed-off-by: Lukas Czerner Signed-off-by: Jens Axboe --- fs/direct-io.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'fs/direct-io.c') diff --git a/fs/direct-io.c b/fs/direct-io.c index 62cf812ed0e5..8106a8dddfab 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -44,6 +44,12 @@ */ #define DIO_PAGES 64 +/* + * Flags for dio_complete() + */ +#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */ +#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */ + /* * This code generally works in units of "dio_blocks". A dio_block is * somewhere between the hard sector size and the filesystem block size. it @@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, * filesystems can use it to hold additional state between get_block calls and * dio_complete. */ -static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) +static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) { loff_t offset = dio->iocb->ki_pos; ssize_t transferred = 0; @@ -266,7 +272,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) * one is a pretty crazy thing to do, so we don't support it 100%. If * this invalidation fails, tough, the write still worked... */ - if (ret > 0 && dio->op == REQ_OP_WRITE && + if (flags & DIO_COMPLETE_INVALIDATE && + ret > 0 && dio->op == REQ_OP_WRITE && dio->inode->i_mapping->nrpages) { err = invalidate_inode_pages2_range(dio->inode->i_mapping, offset >> PAGE_SHIFT, @@ -285,7 +292,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) if (!(dio->flags & DIO_SKIP_DIO_COUNT)) inode_dio_end(dio->inode); - if (is_async) { + if (flags & DIO_COMPLETE_ASYNC) { /* * generic_write_sync expects ki_pos to have been updated * already, but the submission path only does this for @@ -306,7 +313,7 @@ static void dio_aio_complete_work(struct work_struct *work) { struct dio *dio = container_of(work, struct dio, complete_work); - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE); } static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); @@ -348,7 +355,7 @@ static void dio_bio_end_aio(struct bio *bio) queue_work(dio->inode->i_sb->s_dio_done_wq, &dio->complete_work); } else { - dio_complete(dio, 0, true); + dio_complete(dio, 0, DIO_COMPLETE_ASYNC); } } } @@ -1359,7 +1366,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, dio_await_completion(dio); if (drop_refcount(dio) == 0) { - retval = dio_complete(dio, retval, false); + retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE); } else BUG_ON(retval != -EIOCBQUEUED); -- cgit v1.2.1