From 292378edcb408c652e841fdc867fc14f8b4995fa Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 26 Sep 2016 08:21:28 +1000 Subject: xfs: remote attribute blocks aren't really userdata When adding a new remote attribute, we write the attribute to the new extent before the allocation transaction is committed. This means we cannot reuse busy extents as that violates crash consistency semantics. Hence we currently treat remote attribute extent allocation like userdata because it has the same overwrite ordering constraints as userdata. Unfortunately, this also allows the allocator to incorrectly apply extent size hints to the remote attribute extent allocation. This results in interesting failures, such as transaction block reservation overruns and in-memory inode attribute fork corruption. To fix this, we need to separate the busy extent reuse configuration from the userdata configuration. This changes the definition of XFS_BMAPI_METADATA slightly - it now means that allocation is metadata and reuse of busy extents is acceptible due to the metadata ordering semantics of the journal. If this flag is not set, it means the allocation is that has unordered data writeback, and hence busy extent reuse is not allowed. It no longer implies the allocation is for user data, just that the data write will not be strictly ordered. This matches the semantics for both user data and remote attribute block allocation. As such, This patch changes the "userdata" field to a "datatype" field, and adds a "no busy reuse" flag to the field. When we detect an unordered data extent allocation, we immediately set the no reuse flag. We then set the "user data" flags based on the inode fork we are allocating the extent to. Hence we only set userdata flags on data fork allocations now and consider attribute fork remote extents to be an unordered metadata extent. The result is that remote attribute extents now have the expected allocation semantics, and the data fork allocation behaviour is completely unchanged. It should be noted that there may be other ways to fix this (e.g. use ordered metadata buffers for the remote attribute extent data write) but they are more invasive and difficult to validate both from a design and implementation POV. Hence this patch takes the simple, obvious route to fixing the problem... Reported-and-tested-by: Ross Zwisler Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 23 ++++++++++++----------- fs/xfs/libxfs/xfs_alloc.h | 17 +++++++++++++++-- fs/xfs/libxfs/xfs_bmap.c | 41 ++++++++++++++++++++++++++--------------- fs/xfs/libxfs/xfs_bmap.h | 2 +- fs/xfs/xfs_bmap_util.c | 2 +- fs/xfs/xfs_extent_busy.c | 2 +- fs/xfs/xfs_filestream.c | 9 ++++++--- fs/xfs/xfs_trace.h | 8 ++++---- 8 files changed, 66 insertions(+), 38 deletions(-) (limited to 'fs') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 05b5243d89f6..1d530c253c0e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -265,7 +265,7 @@ xfs_alloc_compute_diff( xfs_agblock_t wantbno, /* target starting block */ xfs_extlen_t wantlen, /* target length */ xfs_extlen_t alignment, /* target alignment */ - char userdata, /* are we allocating data? */ + int datatype, /* are we allocating data? */ xfs_agblock_t freebno, /* freespace's starting block */ xfs_extlen_t freelen, /* freespace's length */ xfs_agblock_t *newbnop) /* result: best start block from free */ @@ -276,6 +276,7 @@ xfs_alloc_compute_diff( xfs_extlen_t newlen1=0; /* length with newbno1 */ xfs_extlen_t newlen2=0; /* length with newbno2 */ xfs_agblock_t wantend; /* end of target extent */ + bool userdata = xfs_alloc_is_userdata(datatype); ASSERT(freelen >= wantlen); freeend = freebno + freelen; @@ -917,7 +918,7 @@ xfs_alloc_find_best_extent( sdiff = xfs_alloc_compute_diff(args->agbno, args->len, args->alignment, - args->userdata, *sbnoa, + args->datatype, *sbnoa, *slena, &new); /* @@ -1101,7 +1102,7 @@ restart: if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, ltbnoa, + args->alignment, args->datatype, ltbnoa, ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { @@ -1254,7 +1255,7 @@ restart: args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, ltbnoa, + args->alignment, args->datatype, ltbnoa, ltlena, <new); error = xfs_alloc_find_best_extent(args, @@ -1271,7 +1272,7 @@ restart: args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, args->userdata, gtbnoa, + args->alignment, args->datatype, gtbnoa, gtlena, >new); error = xfs_alloc_find_best_extent(args, @@ -1331,7 +1332,7 @@ restart: } rlen = args->len; (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, - args->userdata, ltbnoa, ltlena, <new); + args->datatype, ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); @@ -1608,9 +1609,9 @@ xfs_alloc_ag_vextent_small( goto error0; if (fbno != NULLAGBLOCK) { xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, - args->userdata); + xfs_alloc_allow_busy_reuse(args->datatype)); - if (args->userdata) { + if (xfs_alloc_is_userdata(args->datatype)) { xfs_buf_t *bp; bp = xfs_btree_get_bufs(args->mp, args->tp, @@ -2058,7 +2059,7 @@ xfs_alloc_fix_freelist( * somewhere else if we are not being asked to try harder at this * point */ - if (pag->pagf_metadata && args->userdata && + if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) && (flags & XFS_ALLOC_FLAG_TRYLOCK)) { ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; @@ -2633,7 +2634,7 @@ xfs_alloc_vextent( * Try near allocation first, then anywhere-in-ag after * the first a.g. fails. */ - if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) && + if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && (mp->m_flags & XFS_MOUNT_32BITINODES)) { args->fsbno = XFS_AGB_TO_FSB(mp, ((mp->m_agfrotor / rotorstep) % @@ -2766,7 +2767,7 @@ xfs_alloc_vextent( #endif /* Zero the extent if we were asked to do so */ - if (args->userdata & XFS_ALLOC_USERDATA_ZERO) { + if (args->datatype & XFS_ALLOC_USERDATA_ZERO) { error = xfs_zero_extent(args->ip, args->fsbno, args->len); if (error) goto error0; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 6fe2d6b7cfe9..7fd8eafd9abe 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -85,20 +85,33 @@ typedef struct xfs_alloc_arg { xfs_extlen_t len; /* output: actual size of extent */ xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ xfs_alloctype_t otype; /* original allocation type */ + int datatype; /* mask defining data type treatment */ char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ char isfl; /* set if is freelist blocks - !acctg */ - char userdata; /* mask defining userdata treatment */ xfs_fsblock_t firstblock; /* io first block allocated */ struct xfs_owner_info oinfo; /* owner of blocks being allocated */ } xfs_alloc_arg_t; /* - * Defines for userdata + * Defines for datatype */ #define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/ #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ #define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ +#define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */ + +static inline bool +xfs_alloc_is_userdata(int datatype) +{ + return (datatype & ~XFS_ALLOC_NOBUSY) != 0; +} + +static inline bool +xfs_alloc_allow_busy_reuse(int datatype) +{ + return (datatype & XFS_ALLOC_NOBUSY) == 0; +} /* freespace limit calculations */ #define XFS_ALLOC_AGFL_RESERVE 4 diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index b060bca93402..06d1201b4718 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3347,7 +3347,8 @@ xfs_bmap_adjacent( mp = ap->ip->i_mount; nullfb = *ap->firstblock == NULLFSBLOCK; - rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata; + rt = XFS_IS_REALTIME_INODE(ap->ip) && + xfs_alloc_is_userdata(ap->datatype); fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); /* * If allocating at eof, and there's a previous real block, @@ -3622,7 +3623,7 @@ xfs_bmap_btalloc( { xfs_mount_t *mp; /* mount point structure */ xfs_alloctype_t atype = 0; /* type for allocation routines */ - xfs_extlen_t align; /* minimum allocation alignment */ + xfs_extlen_t align = 0; /* minimum allocation alignment */ xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_agnumber_t ag; xfs_alloc_arg_t args; @@ -3645,7 +3646,8 @@ xfs_bmap_btalloc( else if (mp->m_dalign) stripe_align = mp->m_dalign; - align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; + if (xfs_alloc_is_userdata(ap->datatype)) + align = xfs_get_extsz_hint(ap->ip); if (unlikely(align)) { error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 0, ap->eof, 0, ap->conv, @@ -3658,7 +3660,8 @@ xfs_bmap_btalloc( nullfb = *ap->firstblock == NULLFSBLOCK; fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock); if (nullfb) { - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) { + if (xfs_alloc_is_userdata(ap->datatype) && + xfs_inode_is_filestream(ap->ip)) { ag = xfs_filestream_lookup_ag(ap->ip); ag = (ag != NULLAGNUMBER) ? ag : 0; ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); @@ -3698,7 +3701,8 @@ xfs_bmap_btalloc( * enough for the request. If one isn't found, then adjust * the minimum allocation size to the largest space found. */ - if (ap->userdata && xfs_inode_is_filestream(ap->ip)) + if (xfs_alloc_is_userdata(ap->datatype) && + xfs_inode_is_filestream(ap->ip)) error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); else error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); @@ -3782,8 +3786,8 @@ xfs_bmap_btalloc( args.minleft = ap->minleft; args.wasdel = ap->wasdel; args.isfl = 0; - args.userdata = ap->userdata; - if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) + args.datatype = ap->datatype; + if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) args.ip = ap->ip; error = xfs_alloc_vextent(&args); @@ -3877,7 +3881,8 @@ STATIC int xfs_bmap_alloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { - if (XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata) + if (XFS_IS_REALTIME_INODE(ap->ip) && + xfs_alloc_is_userdata(ap->datatype)) return xfs_bmap_rtalloc(ap); return xfs_bmap_btalloc(ap); } @@ -4287,15 +4292,21 @@ xfs_bmapi_allocate( } /* - * Indicate if this is the first user data in the file, or just any - * user data. And if it is userdata, indicate whether it needs to - * be initialised to zero during allocation. + * Set the data type being allocated. For the data fork, the first data + * in the file is treated differently to all other allocations. For the + * attribute fork, we only need to ensure the allocated range is not on + * the busy list. */ if (!(bma->flags & XFS_BMAPI_METADATA)) { - bma->userdata = (bma->offset == 0) ? - XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; + bma->datatype = XFS_ALLOC_NOBUSY; + if (whichfork == XFS_DATA_FORK) { + if (bma->offset == 0) + bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; + else + bma->datatype |= XFS_ALLOC_USERDATA; + } if (bma->flags & XFS_BMAPI_ZERO) - bma->userdata |= XFS_ALLOC_USERDATA_ZERO; + bma->datatype |= XFS_ALLOC_USERDATA_ZERO; } bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; @@ -4565,7 +4576,7 @@ xfs_bmapi_write( bma.tp = tp; bma.ip = ip; bma.total = total; - bma.userdata = 0; + bma.datatype = 0; bma.dfops = dfops; bma.firstblock = firstblock; diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 254034f96941..05576b7263f6 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -54,7 +54,7 @@ struct xfs_bmalloca { bool wasdel; /* replacing a delayed allocation */ bool aeof; /* allocated space at eof */ bool conv; /* overwriting unwritten extents */ - char userdata;/* userdata mask */ + int datatype;/* data type being allocated */ int flags; }; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 4ece4f2ffc72..e827d657c314 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -182,7 +182,7 @@ xfs_bmap_rtalloc( XFS_TRANS_DQ_RTBCOUNT, (long) ralen); /* Zero the extent if we were asked to do so */ - if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) { + if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) { error = xfs_zero_extent(ap->ip, ap->blkno, ap->length); if (error) return error; diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index c263e079273e..162dc186cf04 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -384,7 +384,7 @@ restart: * If this is a metadata allocation, try to reuse the busy * extent instead of trimming the allocation. */ - if (!args->userdata && + if (!xfs_alloc_is_userdata(args->datatype) && !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) { if (!xfs_extent_busy_update_extent(args->mp, args->pag, busyp, fbno, flen, diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 4a33a3304369..a75f7ab0581c 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -369,7 +369,8 @@ xfs_filestream_new_ag( struct xfs_mount *mp = ip->i_mount; xfs_extlen_t minlen = ap->length; xfs_agnumber_t startag = 0; - int flags, err = 0; + int flags = 0; + int err = 0; struct xfs_mru_cache_elem *mru; *agp = NULLAGNUMBER; @@ -385,8 +386,10 @@ xfs_filestream_new_ag( startag = (item->ag + 1) % mp->m_sb.sb_agcount; } - flags = (ap->userdata ? XFS_PICK_USERDATA : 0) | - (ap->dfops->dop_low ? XFS_PICK_LOWSPACE : 0); + if (xfs_alloc_is_userdata(ap->datatype)) + flags |= XFS_PICK_USERDATA; + if (ap->dfops->dop_low) + flags |= XFS_PICK_LOWSPACE; err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index d303a665dba9..1144522799bb 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1623,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __field(char, wasdel) __field(char, wasfromfl) __field(char, isfl) - __field(char, userdata) + __field(int, datatype) __field(xfs_fsblock_t, firstblock) ), TP_fast_assign( @@ -1644,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->wasdel = args->wasdel; __entry->wasfromfl = args->wasfromfl; __entry->isfl = args->isfl; - __entry->userdata = args->userdata; + __entry->datatype = args->datatype; __entry->firstblock = args->firstblock; ), TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " "prod %u minleft %u total %u alignment %u minalignslop %u " "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " - "userdata %d firstblock 0x%llx", + "datatype 0x%x firstblock 0x%llx", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -1668,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->wasdel, __entry->wasfromfl, __entry->isfl, - __entry->userdata, + __entry->datatype, (unsigned long long)__entry->firstblock) ) -- cgit v1.2.1 From ddeb14f4fb2fa1242829a76edc821f087e50bcdf Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 26 Sep 2016 08:21:44 +1000 Subject: xfs: quiesce the filesystem after recovery on readonly mount Recently we've had a number of reports where log recovery on a v5 filesystem has reported corruptions that looked to be caused by recovery being re-run over the top of an already-recovered metadata. This has uncovered a bug in recovery (fixed elsewhere) but the vector that caused this was largely unknown. A kdump test started tripping over this problem - the system would be crashed, the kdump kernel and environment would boot and dump the kernel core image, and then the system would reboot. After reboot, the root filesystem was triggering log recovery and corruptions were being detected. The metadumps indicated the above log recovery issue. What is happening is that the kdump kernel and environment is mounting the root device read-only to find the binaries needed to do it's work. The result of this is that it is running log recovery. However, because there were unlinked files and EFIs to be processed by recovery, the completion of phase 1 of log recovery could not mark the log clean. And because it's a read-only mount, the unmount process does not write records to the log to mark it clean, either. Hence on the next mount of the filesystem, log recovery was run again across all the metadata that had already been recovered and this is what triggered corruption warnings. To avoid this problem, we need to ensure that a read-only mount always updates the log when it completes the second phase of recovery. We already handle this sort of issue with rw->ro remount transitions, so the solution is as simple as quiescing the filesystem at the appropriate time during the mount process. This results in the log being marked clean so the mount behaviour recorded in the logs on repeated RO mounts will change (i.e. log recovery will no longer be run on every mount until a RW mount is done). This is a user visible change in behaviour, but it is harmless. Signed-off-by: Dave Chinner Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 14 ++++++++++++++ fs/xfs/xfs_super.c | 2 +- fs/xfs/xfs_super.h | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index faeead671f9f..56e85a6c85c7 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -933,6 +933,20 @@ xfs_mountfs( goto out_rtunmount; } + /* + * Now the log is fully replayed, we can transition to full read-only + * mode for read-only mounts. This will sync all the metadata and clean + * the log so that the recovery we just performed does not have to be + * replayed again on the next mount. + * + * We use the same quiesce mechanism as the rw->ro remount, as they are + * semantically identical operations. + */ + if ((mp->m_flags & (XFS_MOUNT_RDONLY|XFS_MOUNT_NORECOVERY)) == + XFS_MOUNT_RDONLY) { + xfs_quiesce_attr(mp); + } + /* * Complete the quota initialisation, post-log-replay component. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index fd6be45b3a1e..c57c31996322 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1137,7 +1137,7 @@ xfs_restore_resvblks(struct xfs_mount *mp) * Note: xfs_log_quiesce() stops background log work - the callers must ensure * it is started again when appropriate. */ -static void +void xfs_quiesce_attr( struct xfs_mount *mp) { diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 529bce9fc37e..b6418abd85ad 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -61,6 +61,7 @@ struct xfs_mount; struct xfs_buftarg; struct block_device; +extern void xfs_quiesce_attr(struct xfs_mount *mp); extern void xfs_flush_inodes(struct xfs_mount *mp); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *, -- cgit v1.2.1 From 12818d24db8ab01836bf423a7c1be639c3135572 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 26 Sep 2016 08:22:16 +1000 Subject: xfs: rework log recovery to submit buffers on LSN boundaries The fix to log recovery to update the metadata LSN in recovered buffers introduces the requirement that a buffer is submitted only once per current LSN. Log recovery currently submits buffers on transaction boundaries. This is not sufficient as the abstraction between log records and transactions allows for various scenarios where multiple transactions can share the same current LSN. If independent transactions share an LSN and both modify the same buffer, log recovery can incorrectly skip updates and leave the filesystem in an inconsisent state. In preparation for proper metadata LSN updates during log recovery, update log recovery to submit buffers for write on LSN change boundaries rather than transaction boundaries. Explicitly track the current LSN in a new struct xlog field to handle the various corner cases of when the current LSN may or may not change. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_priv.h | 3 +- fs/xfs/xfs_log_recover.c | 82 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 66 insertions(+), 19 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 765f084759b5..2b6eec52178e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -413,7 +413,8 @@ struct xlog { /* log record crc error injection factor */ uint32_t l_badcrc_factor; #endif - + /* log recovery lsn tracking (for buffer submission */ + xfs_lsn_t l_recovery_lsn; }; #define XLOG_BUF_CANCEL_BUCKET(log, blkno) \ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e8638fd2c0c3..e24fb7b6f3fb 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3846,14 +3846,13 @@ STATIC int xlog_recover_commit_trans( struct xlog *log, struct xlog_recover *trans, - int pass) + int pass, + struct list_head *buffer_list) { int error = 0; - int error2; int items_queued = 0; struct xlog_recover_item *item; struct xlog_recover_item *next; - LIST_HEAD (buffer_list); LIST_HEAD (ra_list); LIST_HEAD (done_list); @@ -3876,7 +3875,7 @@ xlog_recover_commit_trans( items_queued++; if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { error = xlog_recover_items_pass2(log, trans, - &buffer_list, &ra_list); + buffer_list, &ra_list); list_splice_tail_init(&ra_list, &done_list); items_queued = 0; } @@ -3894,15 +3893,14 @@ out: if (!list_empty(&ra_list)) { if (!error) error = xlog_recover_items_pass2(log, trans, - &buffer_list, &ra_list); + buffer_list, &ra_list); list_splice_tail_init(&ra_list, &done_list); } if (!list_empty(&done_list)) list_splice_init(&done_list, &trans->r_itemq); - error2 = xfs_buf_delwri_submit(&buffer_list); - return error ? error : error2; + return error; } STATIC void @@ -4085,7 +4083,8 @@ xlog_recovery_process_trans( char *dp, unsigned int len, unsigned int flags, - int pass) + int pass, + struct list_head *buffer_list) { int error = 0; bool freeit = false; @@ -4109,7 +4108,8 @@ xlog_recovery_process_trans( error = xlog_recover_add_to_cont_trans(log, trans, dp, len); break; case XLOG_COMMIT_TRANS: - error = xlog_recover_commit_trans(log, trans, pass); + error = xlog_recover_commit_trans(log, trans, pass, + buffer_list); /* success or fail, we are now done with this transaction. */ freeit = true; break; @@ -4191,10 +4191,12 @@ xlog_recover_process_ophdr( struct xlog_op_header *ohead, char *dp, char *end, - int pass) + int pass, + struct list_head *buffer_list) { struct xlog_recover *trans; unsigned int len; + int error; /* Do we understand who wrote this op? */ if (ohead->oh_clientid != XFS_TRANSACTION && @@ -4221,8 +4223,39 @@ xlog_recover_process_ophdr( return 0; } + /* + * The recovered buffer queue is drained only once we know that all + * recovery items for the current LSN have been processed. This is + * required because: + * + * - Buffer write submission updates the metadata LSN of the buffer. + * - Log recovery skips items with a metadata LSN >= the current LSN of + * the recovery item. + * - Separate recovery items against the same metadata buffer can share + * a current LSN. I.e., consider that the LSN of a recovery item is + * defined as the starting LSN of the first record in which its + * transaction appears, that a record can hold multiple transactions, + * and/or that a transaction can span multiple records. + * + * In other words, we are allowed to submit a buffer from log recovery + * once per current LSN. Otherwise, we may incorrectly skip recovery + * items and cause corruption. + * + * We don't know up front whether buffers are updated multiple times per + * LSN. Therefore, track the current LSN of each commit log record as it + * is processed and drain the queue when it changes. Use commit records + * because they are ordered correctly by the logging code. + */ + if (log->l_recovery_lsn != trans->r_lsn && + ohead->oh_flags & XLOG_COMMIT_TRANS) { + error = xfs_buf_delwri_submit(buffer_list); + if (error) + return error; + log->l_recovery_lsn = trans->r_lsn; + } + return xlog_recovery_process_trans(log, trans, dp, len, - ohead->oh_flags, pass); + ohead->oh_flags, pass, buffer_list); } /* @@ -4240,7 +4273,8 @@ xlog_recover_process_data( struct hlist_head rhash[], struct xlog_rec_header *rhead, char *dp, - int pass) + int pass, + struct list_head *buffer_list) { struct xlog_op_header *ohead; char *end; @@ -4262,7 +4296,7 @@ xlog_recover_process_data( /* errors will abort recovery */ error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, - dp, end, pass); + dp, end, pass, buffer_list); if (error) return error; @@ -4685,7 +4719,8 @@ xlog_recover_process( struct hlist_head rhash[], struct xlog_rec_header *rhead, char *dp, - int pass) + int pass, + struct list_head *buffer_list) { int error; __le32 crc; @@ -4732,7 +4767,8 @@ xlog_recover_process( if (error) return error; - return xlog_recover_process_data(log, rhash, rhead, dp, pass); + return xlog_recover_process_data(log, rhash, rhead, dp, pass, + buffer_list); } STATIC int @@ -4793,9 +4829,11 @@ xlog_do_recovery_pass( char *offset; xfs_buf_t *hbp, *dbp; int error = 0, h_size, h_len; + int error2 = 0; int bblks, split_bblks; int hblks, split_hblks, wrapped_hblks; struct hlist_head rhash[XLOG_RHASH_SIZE]; + LIST_HEAD (buffer_list); ASSERT(head_blk != tail_blk); rhead_blk = 0; @@ -4981,7 +5019,7 @@ xlog_do_recovery_pass( } error = xlog_recover_process(log, rhash, rhead, offset, - pass); + pass, &buffer_list); if (error) goto bread_err2; @@ -5012,7 +5050,8 @@ xlog_do_recovery_pass( if (error) goto bread_err2; - error = xlog_recover_process(log, rhash, rhead, offset, pass); + error = xlog_recover_process(log, rhash, rhead, offset, pass, + &buffer_list); if (error) goto bread_err2; @@ -5025,10 +5064,17 @@ xlog_do_recovery_pass( bread_err1: xlog_put_bp(hbp); + /* + * Submit buffers that have been added from the last record processed, + * regardless of error status. + */ + if (!list_empty(&buffer_list)) + error2 = xfs_buf_delwri_submit(&buffer_list); + if (error && first_bad) *first_bad = rhead_blk; - return error; + return error ? error : error2; } /* -- cgit v1.2.1 From 22db9af2488655f7f841e9588d25384a5e694fa6 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 26 Sep 2016 08:32:07 +1000 Subject: xfs: pass current lsn to log recovery buffer validation The current LSN must be available to the buffer validation function to provide the ability to update the metadata LSN of the buffer. Pass the current_lsn value down to xlog_recover_validate_buf_type() in preparation. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_recover.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e24fb7b6f3fb..bf325f25d4ec 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2360,7 +2360,8 @@ static void xlog_recover_validate_buf_type( struct xfs_mount *mp, struct xfs_buf *bp, - xfs_buf_log_format_t *buf_f) + xfs_buf_log_format_t *buf_f, + xfs_lsn_t current_lsn) { struct xfs_da_blkinfo *info = bp->b_addr; __uint32_t magic32; @@ -2569,7 +2570,8 @@ xlog_recover_do_reg_buffer( struct xfs_mount *mp, xlog_recover_item_t *item, struct xfs_buf *bp, - xfs_buf_log_format_t *buf_f) + xfs_buf_log_format_t *buf_f, + xfs_lsn_t current_lsn) { int i; int bit; @@ -2642,7 +2644,7 @@ xlog_recover_do_reg_buffer( /* Shouldn't be any more regions */ ASSERT(i == item->ri_total); - xlog_recover_validate_buf_type(mp, bp, buf_f); + xlog_recover_validate_buf_type(mp, bp, buf_f, current_lsn); } /* @@ -2685,7 +2687,7 @@ xlog_recover_do_dquot_buffer( if (log->l_quotaoffs_flag & type) return false; - xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, NULLCOMMITLSN); return true; } @@ -2773,7 +2775,7 @@ xlog_recover_buffer_pass2( */ lsn = xlog_recover_get_buf_lsn(mp, bp); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { - xlog_recover_validate_buf_type(mp, bp, buf_f); + xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); goto out_release; } @@ -2789,7 +2791,7 @@ xlog_recover_buffer_pass2( if (!dirty) goto out_release; } else { - xlog_recover_do_reg_buffer(mp, item, bp, buf_f); + xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn); } /* -- cgit v1.2.1 From 040c52c0aa7c1736522676078ece0483c8596daf Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 26 Sep 2016 08:32:50 +1000 Subject: xfs: don't warn on buffers not being recovered due to LSN The log recovery buffer validation function is invoked in cases where a buffer update may be skipped due to LSN ordering. If the validation function happens to come across directory conversion situations (e.g., a dir3 block to data conversion), it may warn about seeing a buffer log format of one type and a buffer with a magic number of another. This warning is not valid as the buffer update is ultimately skipped. This is indicated by a current_lsn of NULLCOMMITLSN provided by the caller. As such, update xlog_recover_validate_buf_type() to only warn in such cases when a buffer update is expected. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_recover.c | 58 ++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index bf325f25d4ec..9be763043f28 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2367,6 +2367,7 @@ xlog_recover_validate_buf_type( __uint32_t magic32; __uint16_t magic16; __uint16_t magicda; + char *warnmsg = NULL; /* * We can only do post recovery validation on items on CRC enabled @@ -2405,31 +2406,27 @@ xlog_recover_validate_buf_type( bp->b_ops = &xfs_rmapbt_buf_ops; break; default: - xfs_warn(mp, "Bad btree block magic!"); - ASSERT(0); + warnmsg = "Bad btree block magic!"; break; } break; case XFS_BLFT_AGF_BUF: if (magic32 != XFS_AGF_MAGIC) { - xfs_warn(mp, "Bad AGF block magic!"); - ASSERT(0); + warnmsg = "Bad AGF block magic!"; break; } bp->b_ops = &xfs_agf_buf_ops; break; case XFS_BLFT_AGFL_BUF: if (magic32 != XFS_AGFL_MAGIC) { - xfs_warn(mp, "Bad AGFL block magic!"); - ASSERT(0); + warnmsg = "Bad AGFL block magic!"; break; } bp->b_ops = &xfs_agfl_buf_ops; break; case XFS_BLFT_AGI_BUF: if (magic32 != XFS_AGI_MAGIC) { - xfs_warn(mp, "Bad AGI block magic!"); - ASSERT(0); + warnmsg = "Bad AGI block magic!"; break; } bp->b_ops = &xfs_agi_buf_ops; @@ -2439,8 +2436,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_GDQUOT_BUF: #ifdef CONFIG_XFS_QUOTA if (magic16 != XFS_DQUOT_MAGIC) { - xfs_warn(mp, "Bad DQUOT block magic!"); - ASSERT(0); + warnmsg = "Bad DQUOT block magic!"; break; } bp->b_ops = &xfs_dquot_buf_ops; @@ -2452,16 +2448,14 @@ xlog_recover_validate_buf_type( break; case XFS_BLFT_DINO_BUF: if (magic16 != XFS_DINODE_MAGIC) { - xfs_warn(mp, "Bad INODE block magic!"); - ASSERT(0); + warnmsg = "Bad INODE block magic!"; break; } bp->b_ops = &xfs_inode_buf_ops; break; case XFS_BLFT_SYMLINK_BUF: if (magic32 != XFS_SYMLINK_MAGIC) { - xfs_warn(mp, "Bad symlink block magic!"); - ASSERT(0); + warnmsg = "Bad symlink block magic!"; break; } bp->b_ops = &xfs_symlink_buf_ops; @@ -2469,8 +2463,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DIR_BLOCK_BUF: if (magic32 != XFS_DIR2_BLOCK_MAGIC && magic32 != XFS_DIR3_BLOCK_MAGIC) { - xfs_warn(mp, "Bad dir block magic!"); - ASSERT(0); + warnmsg = "Bad dir block magic!"; break; } bp->b_ops = &xfs_dir3_block_buf_ops; @@ -2478,8 +2471,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DIR_DATA_BUF: if (magic32 != XFS_DIR2_DATA_MAGIC && magic32 != XFS_DIR3_DATA_MAGIC) { - xfs_warn(mp, "Bad dir data magic!"); - ASSERT(0); + warnmsg = "Bad dir data magic!"; break; } bp->b_ops = &xfs_dir3_data_buf_ops; @@ -2487,8 +2479,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DIR_FREE_BUF: if (magic32 != XFS_DIR2_FREE_MAGIC && magic32 != XFS_DIR3_FREE_MAGIC) { - xfs_warn(mp, "Bad dir3 free magic!"); - ASSERT(0); + warnmsg = "Bad dir3 free magic!"; break; } bp->b_ops = &xfs_dir3_free_buf_ops; @@ -2496,8 +2487,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DIR_LEAF1_BUF: if (magicda != XFS_DIR2_LEAF1_MAGIC && magicda != XFS_DIR3_LEAF1_MAGIC) { - xfs_warn(mp, "Bad dir leaf1 magic!"); - ASSERT(0); + warnmsg = "Bad dir leaf1 magic!"; break; } bp->b_ops = &xfs_dir3_leaf1_buf_ops; @@ -2505,8 +2495,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DIR_LEAFN_BUF: if (magicda != XFS_DIR2_LEAFN_MAGIC && magicda != XFS_DIR3_LEAFN_MAGIC) { - xfs_warn(mp, "Bad dir leafn magic!"); - ASSERT(0); + warnmsg = "Bad dir leafn magic!"; break; } bp->b_ops = &xfs_dir3_leafn_buf_ops; @@ -2514,8 +2503,7 @@ xlog_recover_validate_buf_type( case XFS_BLFT_DA_NODE_BUF: if (magicda != XFS_DA_NODE_MAGIC && magicda != XFS_DA3_NODE_MAGIC) { - xfs_warn(mp, "Bad da node magic!"); - ASSERT(0); + warnmsg = "Bad da node magic!"; break; } bp->b_ops = &xfs_da3_node_buf_ops; @@ -2523,24 +2511,21 @@ xlog_recover_validate_buf_type( case XFS_BLFT_ATTR_LEAF_BUF: if (magicda != XFS_ATTR_LEAF_MAGIC && magicda != XFS_ATTR3_LEAF_MAGIC) { - xfs_warn(mp, "Bad attr leaf magic!"); - ASSERT(0); + warnmsg = "Bad attr leaf magic!"; break; } bp->b_ops = &xfs_attr3_leaf_buf_ops; break; case XFS_BLFT_ATTR_RMT_BUF: if (magic32 != XFS_ATTR3_RMT_MAGIC) { - xfs_warn(mp, "Bad attr remote magic!"); - ASSERT(0); + warnmsg = "Bad attr remote magic!"; break; } bp->b_ops = &xfs_attr3_rmt_buf_ops; break; case XFS_BLFT_SB_BUF: if (magic32 != XFS_SB_MAGIC) { - xfs_warn(mp, "Bad SB block magic!"); - ASSERT(0); + warnmsg = "Bad SB block magic!"; break; } bp->b_ops = &xfs_sb_buf_ops; @@ -2557,6 +2542,15 @@ xlog_recover_validate_buf_type( xfs_blft_from_flags(buf_f)); break; } + + /* + * Don't warn in the case of a NULL current LSN as this means the buffer + * is more recent than the change in the log and will be skipped. + */ + if (warnmsg && current_lsn != NULLCOMMITLSN) { + xfs_warn(mp, warnmsg); + ASSERT(0); + } } /* -- cgit v1.2.1 From 60a4a22251568063c9289eb65099d7c61b4a54b0 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 26 Sep 2016 08:34:27 +1000 Subject: xfs: update metadata LSN in buffers during log recovery Log recovery is currently broken for v5 superblocks in that it never updates the metadata LSN of buffers written out during recovery. The metadata LSN is recorded in various bits of metadata to provide recovery ordering criteria that prevents transient corruption states reported by buffer write verifiers. Without such ordering logic, buffer updates can be replayed out of order and lead to false positive transient corruption states. This is generally not a corruption vector on its own, but corruption detection shuts down the filesystem and ultimately prevents a mount if it occurs during log recovery. This requires an xfs_repair run that clears the log and potentially loses filesystem updates. This problem is avoided in most cases as metadata writes during normal filesystem operation update the metadata LSN appropriately. The problem with log recovery not updating metadata LSNs manifests if the system happens to crash shortly after log recovery itself. In this scenario, it is possible for log recovery to complete all metadata I/O such that the filesystem is consistent. If a crash occurs after that point but before the log tail is pushed forward by subsequent operations, however, the next mount performs the same log recovery over again. If a buffer is updated multiple times in the dirty range of the log, an earlier update in the log might not be valid based on the current state of the associated buffer after all of the updates in the log had been replayed (before the previous crash). If a verifier happens to detect such a problem, the filesystem claims corruption and immediately shuts down. This commonly manifests in practice as directory block verifier failures such as the following, likely due to directory verifiers being particularly detailed in their checks as compared to most others: ... Mounting V5 Filesystem XFS (dm-0): Starting recovery (logdev: internal) XFS (dm-0): Internal error XFS_WANT_CORRUPTED_RETURN at line ... of \ file fs/xfs/libxfs/xfs_dir2_data.c. Caller xfs_dir3_data_verify ... ... Update log recovery to update the metadata LSN of recovered buffers. Since metadata LSNs are already updated by write verifer functions via attached log items, attach a dummy log item to the buffer during validation and explicitly set the LSN of the current transaction. This ensures that the metadata LSN of a buffer is updated based on whether the recovery I/O actually completes, and if so, that subsequent recovery attempts identify that the buffer is already up to date with respect to the current transaction. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_recover.c | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9be763043f28..9667d7d0fb79 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -44,6 +44,7 @@ #include "xfs_error.h" #include "xfs_dir2.h" #include "xfs_rmap_item.h" +#include "xfs_buf_item.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) @@ -381,6 +382,15 @@ xlog_recover_iodone( SHUTDOWN_META_IO_ERROR); } } + + /* + * On v5 supers, a bli could be attached to update the metadata LSN. + * Clean it up. + */ + if (bp->b_fspriv) + xfs_buf_item_relse(bp); + ASSERT(bp->b_fspriv == NULL); + bp->b_iodone = NULL; xfs_buf_ioend(bp); } @@ -2544,13 +2554,38 @@ xlog_recover_validate_buf_type( } /* - * Don't warn in the case of a NULL current LSN as this means the buffer - * is more recent than the change in the log and will be skipped. + * Nothing else to do in the case of a NULL current LSN as this means + * the buffer is more recent than the change in the log and will be + * skipped. */ - if (warnmsg && current_lsn != NULLCOMMITLSN) { + if (current_lsn == NULLCOMMITLSN) + return; + + if (warnmsg) { xfs_warn(mp, warnmsg); ASSERT(0); } + + /* + * We must update the metadata LSN of the buffer as it is written out to + * ensure that older transactions never replay over this one and corrupt + * the buffer. This can occur if log recovery is interrupted at some + * point after the current transaction completes, at which point a + * subsequent mount starts recovery from the beginning. + * + * Write verifiers update the metadata LSN from log items attached to + * the buffer. Therefore, initialize a bli purely to carry the LSN to + * the verifier. We'll clean it up in our ->iodone() callback. + */ + if (bp->b_ops) { + struct xfs_buf_log_item *bip; + + ASSERT(!bp->b_iodone || bp->b_iodone == xlog_recover_iodone); + bp->b_iodone = xlog_recover_iodone; + xfs_buf_item_init(bp, mp); + bip = bp->b_fspriv; + bip->bli_item.li_lsn = current_lsn; + } } /* -- cgit v1.2.1 From 5cd9cee98b256d94443d93a31efc36212a2bd634 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 26 Sep 2016 08:34:52 +1000 Subject: xfs: log recovery tracepoints to track current lsn and buffer submission Log recovery has particular rules around buffer submission along with tricky corner cases where independent transactions can share an LSN. As such, it can be difficult to follow when/why buffers are submitted during recovery. Add a couple tracepoints to post the current LSN of a record when a new record is being processed and when a buffer is being skipped due to LSN ordering. Also, update the recover item class to include the LSN of the current transaction for the item being processed. Signed-off-by: Brian Foster Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/xfs_log_recover.c | 2 ++ fs/xfs/xfs_trace.h | 31 +++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9667d7d0fb79..846483d56949 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2804,6 +2804,7 @@ xlog_recover_buffer_pass2( */ lsn = xlog_recover_get_buf_lsn(mp, bp); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); goto out_release; } @@ -4319,6 +4320,7 @@ xlog_recover_process_data( if (xlog_header_check_recover(log->l_mp, rhead)) return -EIO; + trace_xfs_log_recover_record(log, rhead, pass); while ((dp < end) && num_logops) { ohead = (struct xlog_op_header *)dp; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 1144522799bb..74606cba7dba 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1984,6 +1984,29 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \ DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); +TRACE_EVENT(xfs_log_recover_record, + TP_PROTO(struct xlog *log, struct xlog_rec_header *rhead, int pass), + TP_ARGS(log, rhead, pass), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_lsn_t, lsn) + __field(int, len) + __field(int, num_logops) + __field(int, pass) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->lsn = be64_to_cpu(rhead->h_lsn); + __entry->len = be32_to_cpu(rhead->h_len); + __entry->num_logops = be32_to_cpu(rhead->h_num_logops); + __entry->pass = pass; + ), + TP_printk("dev %d:%d lsn 0x%llx len 0x%x num_logops 0x%x pass %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->lsn, __entry->len, __entry->num_logops, + __entry->pass) +) + DECLARE_EVENT_CLASS(xfs_log_recover_item_class, TP_PROTO(struct xlog *log, struct xlog_recover *trans, struct xlog_recover_item *item, int pass), @@ -1992,6 +2015,7 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class, __field(dev_t, dev) __field(unsigned long, item) __field(xlog_tid_t, tid) + __field(xfs_lsn_t, lsn) __field(int, type) __field(int, pass) __field(int, count) @@ -2001,15 +2025,17 @@ DECLARE_EVENT_CLASS(xfs_log_recover_item_class, __entry->dev = log->l_mp->m_super->s_dev; __entry->item = (unsigned long)item; __entry->tid = trans->r_log_tid; + __entry->lsn = trans->r_lsn; __entry->type = ITEM_TYPE(item); __entry->pass = pass; __entry->count = item->ri_cnt; __entry->total = item->ri_total; ), - TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " - "item region count/total %d/%d", + TP_printk("dev %d:%d tid 0x%x lsn 0x%llx, pass %d, item 0x%p, " + "item type %s item region count/total %d/%d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->tid, + __entry->lsn, __entry->pass, (void *)__entry->item, __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), @@ -2068,6 +2094,7 @@ DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_skip); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); -- cgit v1.2.1