From 80cca6ecc9dcc374ae75b5f52ac3881756d3a3cf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:21 -0700 Subject: [PATCH 01/30] xfs: pass refcount intent directly through the log intent code [ Upstream commit 0b11553ec54a6d88907e60d0595dbcef98539747 ] Pass the incore refcount intent through the CUI logging code instead of repeatedly boxing and unboxing parameters. Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_refcount.c | 96 ++++++++++++++++-------------------- fs/xfs/libxfs/xfs_refcount.h | 4 +- fs/xfs/xfs_refcount_item.c | 62 ++++++++++------------- fs/xfs/xfs_trace.h | 15 ++---- 4 files changed, 74 insertions(+), 103 deletions(-) diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 6f7ed9288fe4..bcf46aa0d08b 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1213,37 +1213,33 @@ out_error: STATIC int xfs_refcount_adjust( struct xfs_btree_cur *cur, - xfs_agblock_t agbno, - xfs_extlen_t aglen, - xfs_agblock_t *new_agbno, - xfs_extlen_t *new_aglen, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen, enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; int error; - *new_agbno = agbno; - *new_aglen = aglen; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) - trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, - agbno, aglen); + trace_xfs_refcount_increase(cur->bc_mp, + cur->bc_ag.pag->pag_agno, *agbno, *aglen); else - trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, - agbno, aglen); + trace_xfs_refcount_decrease(cur->bc_mp, + cur->bc_ag.pag->pag_agno, *agbno, *aglen); /* * Ensure that no rcextents cross the boundary of the adjustment range. */ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, - agbno, &shape_changed); + *agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, - agbno + aglen, &shape_changed); + *agbno + *aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1253,7 +1249,7 @@ xfs_refcount_adjust( * Try to merge with the left or right extents of the range. */ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, - new_agbno, new_aglen, adj, &shape_changed); + agbno, aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1262,7 +1258,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); + error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); if (error) goto out_error; @@ -1298,21 +1294,20 @@ xfs_refcount_finish_one_cleanup( static inline int xfs_refcount_continue_op( struct xfs_btree_cur *cur, - xfs_fsblock_t startblock, - xfs_agblock_t new_agbno, - xfs_extlen_t new_len, - xfs_fsblock_t *new_fsbno) + struct xfs_refcount_intent *ri, + xfs_agblock_t new_agbno) { struct xfs_mount *mp = cur->bc_mp; struct xfs_perag *pag = cur->bc_ag.pag; - if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, + ri->ri_blockcount))) return -EFSCORRUPTED; - *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); - ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); - ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); return 0; } @@ -1327,11 +1322,7 @@ xfs_refcount_continue_op( int xfs_refcount_finish_one( struct xfs_trans *tp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; @@ -1339,17 +1330,16 @@ xfs_refcount_finish_one( struct xfs_buf *agbp = NULL; int error = 0; xfs_agblock_t bno; - xfs_agblock_t new_agbno; unsigned long nr_ops = 0; int shape_changes = 0; struct xfs_perag *pag; - pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); - bno = XFS_FSB_TO_AGBNO(mp, startblock); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); + bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); - trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), - type, XFS_FSB_TO_AGBNO(mp, startblock), - blockcount); + trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), + ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), + ri->ri_blockcount); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { error = -EIO; @@ -1380,42 +1370,42 @@ xfs_refcount_finish_one( } *pcur = rcur; - switch (type) { + switch (ri->ri_type) { case XFS_REFCOUNT_INCREASE: - error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE); + error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, + XFS_REFCOUNT_ADJUST_INCREASE); if (error) goto out_drop; - if (*new_len > 0) - error = xfs_refcount_continue_op(rcur, startblock, - new_agbno, *new_len, new_fsb); + if (ri->ri_blockcount > 0) + error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_DECREASE: - error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE); + error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, + XFS_REFCOUNT_ADJUST_DECREASE); if (error) goto out_drop; - if (*new_len > 0) - error = xfs_refcount_continue_op(rcur, startblock, - new_agbno, *new_len, new_fsb); + if (ri->ri_blockcount > 0) + error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_ALLOC_COW: - *new_fsb = startblock + blockcount; - *new_len = 0; - error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); + error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); + if (error) + goto out_drop; + ri->ri_blockcount = 0; break; case XFS_REFCOUNT_FREE_COW: - *new_fsb = startblock + blockcount; - *new_len = 0; - error = __xfs_refcount_cow_free(rcur, bno, blockcount); + error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); + if (error) + goto out_drop; + ri->ri_blockcount = 0; break; default: ASSERT(0); error = -EFSCORRUPTED; } - if (!error && *new_len > 0) - trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, - bno, blockcount, new_agbno, *new_len); + if (!error && ri->ri_blockcount > 0) + trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, + ri->ri_type, bno, ri->ri_blockcount); out_drop: xfs_perag_put(pag); return error; diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 452f30556f5a..c633477ce3ce 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -75,9 +75,7 @@ void xfs_refcount_decrease_extent(struct xfs_trans *tp, extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); extern int xfs_refcount_finish_one(struct xfs_trans *tp, - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur); extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 858e3e9eb4a8..ff4d5087ba00 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -252,17 +252,12 @@ static int xfs_trans_log_finish_refcount_update( struct xfs_trans *tp, struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { int error; - error = xfs_refcount_finish_one(tp, type, startblock, - blockcount, new_fsb, new_len, pcur); + error = xfs_refcount_finish_one(tp, ri, pcur); /* * Mark the transaction dirty, even on error. This ensures the @@ -378,25 +373,20 @@ xfs_refcount_update_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_refcount_intent *refc; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_aglen; + struct xfs_refcount_intent *ri; int error; - refc = container_of(item, struct xfs_refcount_intent, ri_list); - error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), - refc->ri_type, refc->ri_startblock, refc->ri_blockcount, - &new_fsb, &new_aglen, state); + ri = container_of(item, struct xfs_refcount_intent, ri_list); + error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri, + state); /* Did we run out of reservation? Requeue what we didn't finish. */ - if (!error && new_aglen > 0) { - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || - refc->ri_type == XFS_REFCOUNT_DECREASE); - refc->ri_startblock = new_fsb; - refc->ri_blockcount = new_aglen; + if (!error && ri->ri_blockcount > 0) { + ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || + ri->ri_type == XFS_REFCOUNT_DECREASE); return -EAGAIN; } - kmem_cache_free(xfs_refcount_intent_cache, refc); + kmem_cache_free(xfs_refcount_intent_cache, ri); return error; } @@ -463,18 +453,13 @@ xfs_cui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { - struct xfs_bmbt_irec irec; struct xfs_cui_log_item *cuip = CUI_ITEM(lip); - struct xfs_phys_extent *refc; struct xfs_cud_log_item *cudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_log->l_mp; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_len; unsigned int refc_type; bool requeue_only = false; - enum xfs_refcount_intent_type type; int i; int error = 0; @@ -513,6 +498,9 @@ xfs_cui_item_recover( cudp = xfs_trans_get_cud(tp, cuip); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { + struct xfs_refcount_intent fake = { }; + struct xfs_phys_extent *refc; + refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; switch (refc_type) { @@ -520,7 +508,7 @@ xfs_cui_item_recover( case XFS_REFCOUNT_DECREASE: case XFS_REFCOUNT_ALLOC_COW: case XFS_REFCOUNT_FREE_COW: - type = refc_type; + fake.ri_type = refc_type; break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -529,13 +517,12 @@ xfs_cui_item_recover( error = -EFSCORRUPTED; goto abort_error; } - if (requeue_only) { - new_fsb = refc->pe_startblock; - new_len = refc->pe_len; - } else + + fake.ri_startblock = refc->pe_startblock; + fake.ri_blockcount = refc->pe_len; + if (!requeue_only) error = xfs_trans_log_finish_refcount_update(tp, cudp, - type, refc->pe_startblock, refc->pe_len, - &new_fsb, &new_len, &rcur); + &fake, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &cuip->cui_format, @@ -544,10 +531,13 @@ xfs_cui_item_recover( goto abort_error; /* Requeue what we didn't finish. */ - if (new_len > 0) { - irec.br_startblock = new_fsb; - irec.br_blockcount = new_len; - switch (type) { + if (fake.ri_blockcount > 0) { + struct xfs_bmbt_irec irec = { + .br_startblock = fake.ri_startblock, + .br_blockcount = fake.ri_blockcount, + }; + + switch (fake.ri_type) { case XFS_REFCOUNT_INCREASE: xfs_refcount_increase_extent(tp, &irec); break; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0cd62031e53f..20e2ec8b73aa 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3208,17 +3208,14 @@ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred); TRACE_EVENT(xfs_refcount_finish_one_leftover, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - int type, xfs_agblock_t agbno, xfs_extlen_t len, - xfs_agblock_t new_agbno, xfs_extlen_t new_len), - TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len), + int type, xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, type, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, type) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) - __field(xfs_agblock_t, new_agbno) - __field(xfs_extlen_t, new_len) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; @@ -3226,17 +3223,13 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover, __entry->type = type; __entry->agbno = agbno; __entry->len = len; - __entry->new_agbno = new_agbno; - __entry->new_len = new_len; ), - TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x", + TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, __entry->agbno, - __entry->len, - __entry->new_agbno, - __entry->new_len) + __entry->len) ); /* simple inode-based error/%ip tracepoint class */ From 5b99dcc1470bbb8eaf08d896779861c6ac2cd310 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:22 -0700 Subject: [PATCH 02/30] xfs: pass xfs_extent_free_item directly through the log intent code [ Upstream commit 72ba455599ad13d08c29dafa22a32360e07b1961 ] Pass the incore xfs_extent_free_item through the EFI logging code instead of repeatedly boxing and unboxing parameters. Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_extfree_item.c | 55 +++++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index d5130d1fcfae..618d2f9ff535 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -345,23 +345,30 @@ static int xfs_trans_free_extent( struct xfs_trans *tp, struct xfs_efd_log_item *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len, - const struct xfs_owner_info *oinfo, - bool skip_discard) + struct xfs_extent_free_item *free) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_extent *extp; uint next_extent; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, + free->xefi_startblock); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - start_block); + free->xefi_startblock); int error; - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + oinfo.oi_owner = free->xefi_owner; + if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; - error = __xfs_free_extent(tp, start_block, ext_len, - oinfo, XFS_AG_RESV_NONE, skip_discard); + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, + free->xefi_blockcount); + + error = __xfs_free_extent(tp, free->xefi_startblock, + free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, + free->xefi_flags & XFS_EFI_SKIP_DISCARD); /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: @@ -375,8 +382,8 @@ xfs_trans_free_extent( next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = start_block; - extp->ext_len = ext_len; + extp->ext_start = free->xefi_startblock; + extp->ext_len = free->xefi_blockcount; efdp->efd_next_extent++; return error; @@ -463,20 +470,12 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_owner_info oinfo = { }; struct xfs_extent_free_item *free; int error; free = container_of(item, struct xfs_extent_free_item, xefi_list); - oinfo.oi_owner = free->xefi_owner; - if (free->xefi_flags & XFS_EFI_ATTR_FORK) - oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; - if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) - oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; - error = xfs_trans_free_extent(tp, EFD_ITEM(done), - free->xefi_startblock, - free->xefi_blockcount, - &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); + + error = xfs_trans_free_extent(tp, EFD_ITEM(done), free); kmem_cache_free(xfs_extfree_item_cache, free); return error; } @@ -599,7 +598,6 @@ xfs_efi_item_recover( struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_efd_log_item *efdp; struct xfs_trans *tp; - struct xfs_extent *extp; int i; int error = 0; @@ -624,10 +622,17 @@ xfs_efi_item_recover( efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { + struct xfs_extent_free_item fake = { + .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + }; + struct xfs_extent *extp; + extp = &efip->efi_format.efi_extents[i]; - error = xfs_trans_free_extent(tp, efdp, extp->ext_start, - extp->ext_len, - &XFS_RMAP_OINFO_ANY_OWNER, false); + + fake.xefi_startblock = extp->ext_start; + fake.xefi_blockcount = extp->ext_len; + + error = xfs_trans_free_extent(tp, efdp, &fake); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, extp, sizeof(*extp)); From e0e440bfea4565d138611a896d7d429a8e9a0168 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:23 -0700 Subject: [PATCH 03/30] xfs: fix confusing xfs_extent_item variable names [ Upstream commit 578c714b215d474c52949e65a914dae67924f0fe ] Change the name of all pointers to xfs_extent_item structures to "xefi" to make the name consistent and because the current selections ("new" and "free") mean other things in C. Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 32 +++++++++--------- fs/xfs/xfs_extfree_item.c | 70 +++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 74d039bdc9f7..cc5c954cda88 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2493,20 +2493,20 @@ xfs_defer_agfl_block( struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent_free_item *new; /* new element */ + struct xfs_extent_free_item *xefi; ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_zalloc(xfs_extfree_item_cache, + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); - new->xefi_blockcount = 1; - new->xefi_owner = oinfo->oi_owner; + xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); + xefi->xefi_blockcount = 1; + xefi->xefi_owner = oinfo->oi_owner; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); } /* @@ -2521,7 +2521,7 @@ __xfs_free_extent_later( const struct xfs_owner_info *oinfo, bool skip_discard) { - struct xfs_extent_free_item *new; /* new element */ + struct xfs_extent_free_item *xefi; #ifdef DEBUG struct xfs_mount *mp = tp->t_mountp; xfs_agnumber_t agno; @@ -2540,27 +2540,27 @@ __xfs_free_extent_later( #endif ASSERT(xfs_extfree_item_cache != NULL); - new = kmem_cache_zalloc(xfs_extfree_item_cache, + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; + xefi->xefi_startblock = bno; + xefi->xefi_blockcount = (xfs_extlen_t)len; if (skip_discard) - new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { ASSERT(oinfo->oi_offset == 0); if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) - new->xefi_flags |= XFS_EFI_ATTR_FORK; + xefi->xefi_flags |= XFS_EFI_ATTR_FORK; if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) - new->xefi_flags |= XFS_EFI_BMBT_BLOCK; - new->xefi_owner = oinfo->oi_owner; + xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK; + xefi->xefi_owner = oinfo->oi_owner; } else { - new->xefi_owner = XFS_RMAP_OWN_NULL; + xefi->xefi_owner = XFS_RMAP_OWN_NULL; } trace_xfs_bmap_free_defer(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); } #ifdef DEBUG diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 618d2f9ff535..011b50469301 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -345,30 +345,30 @@ static int xfs_trans_free_extent( struct xfs_trans *tp, struct xfs_efd_log_item *efdp, - struct xfs_extent_free_item *free) + struct xfs_extent_free_item *xefi) { struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_extent *extp; uint next_extent; xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, - free->xefi_startblock); + xefi->xefi_startblock); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - free->xefi_startblock); + xefi->xefi_startblock); int error; - oinfo.oi_owner = free->xefi_owner; - if (free->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_owner = xefi->xefi_owner; + if (xefi->xefi_flags & XFS_EFI_ATTR_FORK) oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; - if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) + if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, - free->xefi_blockcount); + xefi->xefi_blockcount); - error = __xfs_free_extent(tp, free->xefi_startblock, - free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, - free->xefi_flags & XFS_EFI_SKIP_DISCARD); + error = __xfs_free_extent(tp, xefi->xefi_startblock, + xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, + xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: @@ -382,8 +382,8 @@ xfs_trans_free_extent( next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; return error; @@ -411,7 +411,7 @@ STATIC void xfs_extent_free_log_item( struct xfs_trans *tp, struct xfs_efi_log_item *efip, - struct xfs_extent_free_item *free) + struct xfs_extent_free_item *xefi) { uint next_extent; struct xfs_extent *extp; @@ -427,8 +427,8 @@ xfs_extent_free_log_item( next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; ASSERT(next_extent < efip->efi_format.efi_nextents); extp = &efip->efi_format.efi_extents[next_extent]; - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; } static struct xfs_log_item * @@ -440,15 +440,15 @@ xfs_extent_free_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_efi_log_item *efip = xfs_efi_init(mp, count); - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; ASSERT(count > 0); xfs_trans_add_item(tp, &efip->efi_item); if (sort) list_sort(mp, items, xfs_extent_free_diff_items); - list_for_each_entry(free, items, xefi_list) - xfs_extent_free_log_item(tp, efip, free); + list_for_each_entry(xefi, items, xefi_list) + xfs_extent_free_log_item(tp, efip, xefi); return &efip->efi_item; } @@ -470,13 +470,13 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; int error; - free = container_of(item, struct xfs_extent_free_item, xefi_list); + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); - error = xfs_trans_free_extent(tp, EFD_ITEM(done), free); - kmem_cache_free(xfs_extfree_item_cache, free); + error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); + kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -493,10 +493,10 @@ STATIC void xfs_extent_free_cancel_item( struct list_head *item) { - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; - free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_cache_free(xfs_extfree_item_cache, free); + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + kmem_cache_free(xfs_extfree_item_cache, xefi); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -522,7 +522,7 @@ xfs_agfl_free_finish_item( struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; struct xfs_extent *extp; struct xfs_buf *agbp; int error; @@ -531,13 +531,13 @@ xfs_agfl_free_finish_item( uint next_extent; struct xfs_perag *pag; - free = container_of(item, struct xfs_extent_free_item, xefi_list); - ASSERT(free->xefi_blockcount == 1); - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); - oinfo.oi_owner = free->xefi_owner; + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + ASSERT(xefi->xefi_blockcount == 1); + agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); + oinfo.oi_owner = xefi->xefi_owner; - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, xefi->xefi_blockcount); pag = xfs_perag_get(mp, agno); error = xfs_alloc_read_agf(pag, tp, 0, &agbp); @@ -558,11 +558,11 @@ xfs_agfl_free_finish_item( next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; - kmem_cache_free(xfs_extfree_item_cache, free); + kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } From ab3b2a70c48d26337c3d92e6a499c6cf2795b17e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:24 -0700 Subject: [PATCH 04/30] xfs: pass the xfs_bmbt_irec directly through the log intent code [ Upstream commit ddccb81b26ec021ae1f3366aa996cc4c68dd75ce ] Instead of repeatedly boxing and unboxing the incore extent mapping structure as it passes through the BUI code, pass the pointer directly through. Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 32 ++++++++-------- fs/xfs/libxfs/xfs_bmap.h | 5 +-- fs/xfs/xfs_bmap_item.c | 81 +++++++++++++++------------------------- 3 files changed, 46 insertions(+), 72 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 27d3121e6da9..d523ac51c662 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -6119,39 +6119,37 @@ xfs_bmap_unmap_extent( int xfs_bmap_finish_one( struct xfs_trans *tp, - struct xfs_inode *ip, - enum xfs_bmap_intent_type type, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) + struct xfs_bmap_intent *bi) { + struct xfs_bmbt_irec *bmap = &bi->bi_bmap; int error = 0; ASSERT(tp->t_firstblock == NULLFSBLOCK); trace_xfs_bmap_deferred(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, - XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), - ip->i_ino, whichfork, startoff, *blockcount, state); + XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), + bi->bi_type, + XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), + bi->bi_owner->i_ino, bi->bi_whichfork, + bmap->br_startoff, bmap->br_blockcount, + bmap->br_state); - if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) + if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK)) return -EFSCORRUPTED; if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) return -EIO; - switch (type) { + switch (bi->bi_type) { case XFS_BMAP_MAP: - error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, - startblock, 0); - *blockcount = 0; + error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, + bmap->br_blockcount, bmap->br_startblock, 0); + bmap->br_blockcount = 0; break; case XFS_BMAP_UNMAP: - error = __xfs_bunmapi(tp, ip, startoff, blockcount, - XFS_BMAPI_REMAP, 1); + error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, + &bmap->br_blockcount, XFS_BMAPI_REMAP, 1); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 08c16e4edc0f..524912f276f8 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -236,10 +236,7 @@ struct xfs_bmap_intent { struct xfs_bmbt_irec bi_bmap; }; -int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, - enum xfs_bmap_intent_type type, int whichfork, - xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, xfs_exntst_t state); +int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 41323da523d1..13aa5359c02f 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -246,18 +246,11 @@ static int xfs_trans_log_finish_bmap_update( struct xfs_trans *tp, struct xfs_bud_log_item *budp, - enum xfs_bmap_intent_type type, - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) + struct xfs_bmap_intent *bi) { int error; - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, - startblock, blockcount, state); + error = xfs_bmap_finish_one(tp, bi); /* * Mark the transaction dirty, even on error. This ensures the @@ -378,25 +371,17 @@ xfs_bmap_update_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_bmap_intent *bmap; - xfs_filblks_t count; + struct xfs_bmap_intent *bi; int error; - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - count = bmap->bi_bmap.br_blockcount; - error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), - bmap->bi_type, - bmap->bi_owner, bmap->bi_whichfork, - bmap->bi_bmap.br_startoff, - bmap->bi_bmap.br_startblock, - &count, - bmap->bi_bmap.br_state); - if (!error && count > 0) { - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); - bmap->bi_bmap.br_blockcount = count; + bi = container_of(item, struct xfs_bmap_intent, bi_list); + + error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi); + if (!error && bi->bi_bmap.br_blockcount > 0) { + ASSERT(bi->bi_type == XFS_BMAP_UNMAP); return -EAGAIN; } - kmem_cache_free(xfs_bmap_intent_cache, bmap); + kmem_cache_free(xfs_bmap_intent_cache, bi); return error; } @@ -471,17 +456,13 @@ xfs_bui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { - struct xfs_bmbt_irec irec; + struct xfs_bmap_intent fake = { }; struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; struct xfs_mount *mp = lip->li_log->l_mp; - struct xfs_map_extent *bmap; + struct xfs_map_extent *map; struct xfs_bud_log_item *budp; - xfs_filblks_t count; - xfs_exntst_t state; - unsigned int bui_type; - int whichfork; int iext_delta; int error = 0; @@ -491,14 +472,12 @@ xfs_bui_item_recover( return -EFSCORRUPTED; } - bmap = &buip->bui_format.bui_extents[0]; - state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? - XFS_EXT_UNWRITTEN : XFS_EXT_NORM; - whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? + map = &buip->bui_format.bui_extents[0]; + fake.bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; - bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; + fake.bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - error = xlog_recover_iget(mp, bmap->me_owner, &ip); + error = xlog_recover_iget(mp, map->me_owner, &ip); if (error) return error; @@ -512,34 +491,34 @@ xfs_bui_item_recover( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - if (bui_type == XFS_BMAP_MAP) + if (fake.bi_type == XFS_BMAP_MAP) iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT; else iext_delta = XFS_IEXT_PUNCH_HOLE_CNT; - error = xfs_iext_count_may_overflow(ip, whichfork, iext_delta); + error = xfs_iext_count_may_overflow(ip, fake.bi_whichfork, iext_delta); if (error == -EFBIG) error = xfs_iext_count_upgrade(tp, ip, iext_delta); if (error) goto err_cancel; - count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip, - whichfork, bmap->me_startoff, bmap->me_startblock, - &count, state); + fake.bi_owner = ip; + fake.bi_bmap.br_startblock = map->me_startblock; + fake.bi_bmap.br_startoff = map->me_startoff; + fake.bi_bmap.br_blockcount = map->me_len; + fake.bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + + error = xfs_trans_log_finish_bmap_update(tp, budp, &fake); if (error == -EFSCORRUPTED) - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap, - sizeof(*bmap)); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, map, + sizeof(*map)); if (error) goto err_cancel; - if (count > 0) { - ASSERT(bui_type == XFS_BMAP_UNMAP); - irec.br_startblock = bmap->me_startblock; - irec.br_blockcount = count; - irec.br_startoff = bmap->me_startoff; - irec.br_state = state; - xfs_bmap_unmap_extent(tp, ip, &irec); + if (fake.bi_bmap.br_blockcount > 0) { + ASSERT(fake.bi_type == XFS_BMAP_UNMAP); + xfs_bmap_unmap_extent(tp, ip, &fake.bi_bmap); } /* From ec81c519e78bcebc9036c8383a52277222a181f8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:25 -0700 Subject: [PATCH 05/30] xfs: pass per-ag references to xfs_free_extent [ Upstream commit b2ccab3199aa7cea9154d80ea2585312c5f6eba0 ] Pass a reference to the per-AG structure to xfs_free_extent. Most callers already have one, so we can eliminate unnecessary lookups. The one exception to this is the EFI code, which the next patch will fix. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.c | 6 ++---- fs/xfs/libxfs/xfs_alloc.c | 15 +++++---------- fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- fs/xfs/libxfs/xfs_ialloc_btree.c | 7 +++++-- fs/xfs/libxfs/xfs_refcount_btree.c | 5 +++-- fs/xfs/scrub/repair.c | 3 ++- fs/xfs/xfs_extfree_item.c | 8 ++++++-- 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index bf47efe08a58..f29767e6eb7f 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -981,10 +981,8 @@ xfs_ag_extend_space( if (error) return error; - error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno, - be32_to_cpu(agf->agf_length) - len), - len, &XFS_RMAP_OINFO_SKIP_UPDATE, - XFS_AG_RESV_NONE); + error = xfs_free_extent(tp, pag, be32_to_cpu(agf->agf_length) - len, + len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index cc5c954cda88..da6d158d666f 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3447,7 +3447,8 @@ xfs_free_extent_fix_freelist( int __xfs_free_extent( struct xfs_trans *tp, - xfs_fsblock_t bno, + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, @@ -3455,12 +3456,9 @@ __xfs_free_extent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); - xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); struct xfs_agf *agf; int error; unsigned int busy_flags = 0; - struct xfs_perag *pag; ASSERT(len != 0); ASSERT(type != XFS_AG_RESV_AGFL); @@ -3469,10 +3467,9 @@ __xfs_free_extent( XFS_ERRTAG_FREE_EXTENT)) return -EIO; - pag = xfs_perag_get(mp, agno); error = xfs_free_extent_fix_freelist(tp, pag, &agbp); if (error) - goto err; + return error; agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { @@ -3486,20 +3483,18 @@ __xfs_free_extent( goto err_release; } - error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); + error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo, + type); if (error) goto err_release; if (skip_discard) busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags); - xfs_perag_put(pag); return 0; err_release: xfs_trans_brelse(tp, agbp); -err: - xfs_perag_put(pag); return error; } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 2c3f762dfb58..5074aed6dfad 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -130,7 +130,8 @@ xfs_alloc_vextent( int /* error */ __xfs_free_extent( struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t bno, /* starting block number of extent */ + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, /* length of extent */ const struct xfs_owner_info *oinfo, /* extent owner */ enum xfs_ag_resv_type type, /* block reservation type */ @@ -139,12 +140,13 @@ __xfs_free_extent( static inline int xfs_free_extent( struct xfs_trans *tp, - xfs_fsblock_t bno, + struct xfs_perag *pag, + xfs_agblock_t agbno, xfs_extlen_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type) { - return __xfs_free_extent(tp, bno, len, oinfo, type, false); + return __xfs_free_extent(tp, pag, agbno, len, oinfo, type, false); } int /* error */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 8c83e265770c..2dbe553d87fb 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -156,9 +156,12 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_fsblock_t fsbno; + xfs_inobt_mod_blockcount(cur, -1); - return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); + return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, &XFS_RMAP_OINFO_INOBT, resv); } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index e1f789866683..3d8e62da2ccc 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -112,8 +112,9 @@ xfs_refcountbt_free_block( XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, - XFS_AG_RESV_METADATA); + error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, + XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); if (error) return error; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index c18bd039fce9..e0ed0ebfdaea 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -582,7 +582,8 @@ xrep_reap_block( else if (resv == XFS_AG_RESV_AGFL) error = xrep_put_freelist(sc, agbno); else - error = xfs_free_extent(sc->tp, fsbno, 1, oinfo, resv); + error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, oinfo, + resv); if (agf_bp != sc->sa.agf_bp) xfs_trans_brelse(sc->tp, agf_bp); if (error) diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 011b50469301..c1aae07467c9 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -350,6 +350,7 @@ xfs_trans_free_extent( struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_extent *extp; + struct xfs_perag *pag; uint next_extent; xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); @@ -366,9 +367,12 @@ xfs_trans_free_extent( trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, xefi->xefi_blockcount); - error = __xfs_free_extent(tp, xefi->xefi_startblock, - xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, + pag = xfs_perag_get(mp, agno); + error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount, + &oinfo, XFS_AG_RESV_NONE, xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); + xfs_perag_put(pag); + /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: From fa91c6969db4843e0126f6ab38877c9559d64c37 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 13 Mar 2025 13:25:26 -0700 Subject: [PATCH 06/30] xfs: validate block number being freed before adding to xefi [ Upstream commit 7dfee17b13e5024c5c0ab1911859ded4182de3e5 ] Bad things happen in defered extent freeing operations if it is passed a bad block number in the xefi. This can come from a bogus agno/agbno pair from deferred agfl freeing, or just a bad fsbno being passed to __xfs_free_extent_later(). Either way, it's very difficult to diagnose where a null perag oops in EFI creation is coming from when the operation that queued the xefi has already been completed and there's no longer any trace of it around.... Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.c | 5 ++++- fs/xfs/libxfs/xfs_alloc.c | 16 +++++++++++++--- fs/xfs/libxfs/xfs_alloc.h | 6 +++--- fs/xfs/libxfs/xfs_bmap.c | 10 ++++++++-- fs/xfs/libxfs/xfs_bmap_btree.c | 7 +++++-- fs/xfs/libxfs/xfs_ialloc.c | 24 ++++++++++++++++-------- fs/xfs/libxfs/xfs_refcount.c | 13 ++++++++++--- fs/xfs/xfs_reflink.c | 4 +++- 8 files changed, 62 insertions(+), 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index f29767e6eb7f..ee0d56854b83 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -906,7 +906,10 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true); + err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, + true); + if (err2) + goto resv_err; /* * Roll the transaction before trying to re-init the per-ag diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index da6d158d666f..ec03040237db 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2485,7 +2485,7 @@ xfs_agfl_reset( * the real allocation can proceed. Deferring the free disconnects freeing up * the AGFL slot from freeing the block. */ -STATIC void +static int xfs_defer_agfl_block( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -2504,16 +2504,20 @@ xfs_defer_agfl_block( xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) + return -EFSCORRUPTED; + trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); + return 0; } /* * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ -void +int __xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, @@ -2540,6 +2544,9 @@ __xfs_free_extent_later( #endif ASSERT(xfs_extfree_item_cache != NULL); + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) + return -EFSCORRUPTED; + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; @@ -2561,6 +2568,7 @@ __xfs_free_extent_later( XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); + return 0; } #ifdef DEBUG @@ -2720,7 +2728,9 @@ xfs_alloc_fix_freelist( goto out_agbp_relse; /* defer agfl frees */ - xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); + if (error) + goto out_agbp_relse; } targs.tp = tp; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 5074aed6dfad..cfc9dcdc1753 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -213,7 +213,7 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } -void __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, +int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, bool skip_discard); @@ -233,14 +233,14 @@ struct xfs_extent_free_item { #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ -static inline void +static inline int xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo) { - __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, false); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d523ac51c662..29781a124323 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -572,8 +572,12 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -5202,10 +5206,12 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - __xfs_free_extent_later(tp, del->br_startblock, + error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, (bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN); + if (error) + goto done; } } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 18de4fbfef4e..bac2a6496a8e 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -285,11 +285,14 @@ xfs_bmbt_free_block( struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); struct xfs_owner_info oinfo; + int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); - ip->i_nblocks--; + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + if (error) + return error; + ip->i_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 120dbec16f5c..448ea76d50f8 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1827,7 +1827,7 @@ xfs_dialloc( * might be sparse and only free the regions that are allocated as part of the * chunk. */ -STATIC void +static int xfs_difree_inode_chunk( struct xfs_trans *tp, xfs_agnumber_t agno, @@ -1844,10 +1844,10 @@ xfs_difree_inode_chunk( if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); - return; + return xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, sagbno), + M_IGEO(mp)->ialloc_blks, + &XFS_RMAP_OINFO_INODES); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1864,6 +1864,8 @@ xfs_difree_inode_chunk( XFS_INOBT_HOLEMASK_BITS); nextbit = startidx + 1; while (startidx < XFS_INOBT_HOLEMASK_BITS) { + int error; + nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS, nextbit); /* @@ -1889,8 +1891,11 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); - xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + error = xfs_free_extent_later(tp, + XFS_AGB_TO_FSB(mp, agno, agbno), + contigblk, &XFS_RMAP_OINFO_INODES); + if (error) + return error; /* reset range to current bit and carry on... */ startidx = endidx = nextbit; @@ -1898,6 +1903,7 @@ xfs_difree_inode_chunk( next: nextbit++; } + return 0; } STATIC int @@ -1998,7 +2004,9 @@ xfs_difree_inobt( goto error0; } - xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec); + if (error) + goto error0; } else { xic->deleted = false; diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index bcf46aa0d08b..fec1ad95988c 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1129,8 +1129,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, tmp.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL); + if (error) + goto out_error; } (*agbno) += tmp.rc_blockcount; @@ -1188,8 +1190,10 @@ xfs_refcount_adjust_extents( fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, ext.rc_startblock); - xfs_free_extent_later(cur->bc_tp, fsbno, + error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL); + if (error) + goto out_error; } skip: @@ -1958,7 +1962,10 @@ xfs_refcount_recover_cow_leftovers( rr->rr_rrec.rc_blockcount); /* Free the block. */ - xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL); + error = xfs_free_extent_later(tp, fsb, + rr->rr_rrec.rc_blockcount, NULL); + if (error) + goto out_trans; error = xfs_trans_commit(tp); if (error) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cbdc23217a42..ee187e90d943 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -618,8 +618,10 @@ xfs_reflink_cancel_cow_blocks( xfs_refcount_free_cow_extent(*tpp, del.br_startblock, del.br_blockcount); - xfs_free_extent_later(*tpp, del.br_startblock, + error = xfs_free_extent_later(*tpp, del.br_startblock, del.br_blockcount, NULL); + if (error) + break; /* Roll the transaction */ error = xfs_defer_finish(tpp); From ec35f7567b970506ac45f1c6def1f1b908c84bc3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 13 Mar 2025 13:25:27 -0700 Subject: [PATCH 07/30] xfs: fix bounds check in xfs_defer_agfl_block() [ Upstream commit 2bed0d82c2f78b91a0a9a5a73da57ee883a0c070 ] Need to happen before we allocate and then leak the xefi. Found by coverity via an xfsprogs libxfs scan. [djwong: This also fixes the type of the @agbno argument.] Fixes: 7dfee17b13e5 ("xfs: validate block number being freed before adding to xefi") Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_alloc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ec03040237db..af447605051b 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2489,24 +2489,25 @@ static int xfs_defer_agfl_block( struct xfs_trans *tp, xfs_agnumber_t agno, - xfs_fsblock_t agbno, + xfs_agblock_t agbno, struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; struct xfs_extent_free_item *xefi; + xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno); ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); + if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno))) + return -EFSCORRUPTED; + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); + xefi->xefi_startblock = fsbno; xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; - if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, xefi->xefi_startblock))) - return -EFSCORRUPTED; - trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); From 5d6f3d30a4659e25a53c3ef4f3c333d5c0fa1ef9 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 13 Mar 2025 13:25:28 -0700 Subject: [PATCH 08/30] xfs: use deferred frees for btree block freeing [ Upstream commit b742d7b4f0e03df25c2a772adcded35044b625ca ] [ 6.1: resolved conflict in xfs_extfree_item.c ] Btrees that aren't freespace management trees use the normal extent allocation and freeing routines for their blocks. Hence when a btree block is freed, a direct call to xfs_free_extent() is made and the extent is immediately freed. This puts the entire free space management btrees under this path, so we are stacking btrees on btrees in the call stack. The inobt, finobt and refcount btrees all do this. However, the bmap btree does not do this - it calls xfs_free_extent_later() to defer the extent free operation via an XEFI and hence it gets processed in deferred operation processing during the commit of the primary transaction (i.e. via intent chaining). We need to change xfs_free_extent() to behave in a non-blocking manner so that we can avoid deadlocks with busy extents near ENOSPC in transactions that free multiple extents. Inserting or removing a record from a btree can cause a multi-level tree merge operation and that will free multiple blocks from the btree in a single transaction. i.e. we can call xfs_free_extent() multiple times, and hence the btree manipulation transaction is vulnerable to this busy extent deadlock vector. To fix this, convert all the remaining callers of xfs_free_extent() to use xfs_free_extent_later() to queue XEFIs and hence defer processing of the extent frees to a context that can be safely restarted if a deadlock condition is detected. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong Reviewed-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.c | 2 +- fs/xfs/libxfs/xfs_alloc.c | 4 ++++ fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- fs/xfs/libxfs/xfs_bmap.c | 8 +++++--- fs/xfs/libxfs/xfs_bmap_btree.c | 3 ++- fs/xfs/libxfs/xfs_ialloc.c | 8 ++++---- fs/xfs/libxfs/xfs_ialloc_btree.c | 3 +-- fs/xfs/libxfs/xfs_refcount.c | 9 ++++++--- fs/xfs/libxfs/xfs_refcount_btree.c | 8 +------- fs/xfs/xfs_extfree_item.c | 3 ++- fs/xfs/xfs_reflink.c | 3 ++- 11 files changed, 33 insertions(+), 26 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index ee0d56854b83..e03bfeacbed4 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -907,7 +907,7 @@ xfs_ag_shrink_space( goto resv_err; err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, - true); + XFS_AG_RESV_NONE, true); if (err2) goto resv_err; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index af447605051b..e44f3f5c6d27 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2507,6 +2507,7 @@ xfs_defer_agfl_block( xefi->xefi_startblock = fsbno; xefi->xefi_blockcount = 1; xefi->xefi_owner = oinfo->oi_owner; + xefi->xefi_agresv = XFS_AG_RESV_AGFL; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); @@ -2524,6 +2525,7 @@ __xfs_free_extent_later( xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, bool skip_discard) { struct xfs_extent_free_item *xefi; @@ -2544,6 +2546,7 @@ __xfs_free_extent_later( ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(type != XFS_AG_RESV_AGFL); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; @@ -2552,6 +2555,7 @@ __xfs_free_extent_later( GFP_KERNEL | __GFP_NOFAIL); xefi->xefi_startblock = bno; xefi->xefi_blockcount = (xfs_extlen_t)len; + xefi->xefi_agresv = type; if (skip_discard) xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index cfc9dcdc1753..bbedb18651de 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -215,7 +215,7 @@ xfs_buf_to_agfl_bno( int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); + enum xfs_ag_resv_type type, bool skip_discard); /* * List of extents to be free "later". @@ -227,6 +227,7 @@ struct xfs_extent_free_item { xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ unsigned int xefi_flags; + enum xfs_ag_resv_type xefi_agresv; }; #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ @@ -238,9 +239,10 @@ xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, - const struct xfs_owner_info *oinfo) + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) { - return __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 29781a124323..aac071e4d000 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents( return error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error; @@ -5208,8 +5209,9 @@ xfs_bmap_del_extent_real( } else { error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - (bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN); + XFS_AG_RESV_NONE, + ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN)); if (error) goto done; } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index bac2a6496a8e..57f401f2492d 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -288,7 +288,8 @@ xfs_bmbt_free_block( int error; xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 448ea76d50f8..d1472cbd48ff 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1846,8 +1846,8 @@ xfs_difree_inode_chunk( /* not sparse, calculate extent info directly */ return xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); + M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1892,8 +1892,8 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 2dbe553d87fb..7125447cde1a 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -160,8 +160,7 @@ __xfs_inobt_free_block( xfs_inobt_mod_blockcount(cur, -1); fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); - return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_INOBT, resv); } diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index fec1ad95988c..4ec7a81dd3ef 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1130,7 +1130,8 @@ xfs_refcount_adjust_extents( cur->bc_ag.pag->pag_agno, tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - tmp.rc_blockcount, NULL); + tmp.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } @@ -1191,7 +1192,8 @@ xfs_refcount_adjust_extents( cur->bc_ag.pag->pag_agno, ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - ext.rc_blockcount, NULL); + ext.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } @@ -1963,7 +1965,8 @@ xfs_refcount_recover_cow_leftovers( /* Free the block. */ error = xfs_free_extent_later(tp, fsb, - rr->rr_rrec.rc_blockcount, NULL); + rr->rr_rrec.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_trans; diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 3d8e62da2ccc..fbd53b6951a9 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -106,19 +106,13 @@ xfs_refcountbt_free_block( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - int error; trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1, + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); - if (error) - return error; - - return error; } STATIC int diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index c1aae07467c9..b670863d8467 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -369,7 +369,7 @@ xfs_trans_free_extent( pag = xfs_perag_get(mp, agno); error = __xfs_free_extent(tp, pag, agbno, xefi->xefi_blockcount, - &oinfo, XFS_AG_RESV_NONE, + &oinfo, xefi->xefi_agresv, xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); xfs_perag_put(pag); @@ -628,6 +628,7 @@ xfs_efi_item_recover( for (i = 0; i < efip->efi_format.efi_nextents; i++) { struct xfs_extent_free_item fake = { .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + .xefi_agresv = XFS_AG_RESV_NONE, }; struct xfs_extent *extp; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index ee187e90d943..1bac6a8af970 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -619,7 +619,8 @@ xfs_reflink_cancel_cow_blocks( del.br_blockcount); error = xfs_free_extent_later(*tpp, del.br_startblock, - del.br_blockcount, NULL); + del.br_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) break; From a64e7b6cd104554da06411de854f063be58b2b02 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:29 -0700 Subject: [PATCH 09/30] xfs: reserve less log space when recovering log intent items [ Upstream commit 3c919b0910906cc69d76dea214776f0eac73358b ] Wengang Wang reports that a customer's system was running a number of truncate operations on a filesystem with a very small log. Contention on the reserve heads lead to other threads stalling on smaller updates (e.g. mtime updates) long enough to result in the node being rebooted on account of the lack of responsivenes. The node failed to recover because log recovery of an EFI became stuck waiting for a grant of reserve space. From Wengang's report: "For the file deletion, log bytes are reserved basing on xfs_mount->tr_itruncate which is: tr_logres = 175488, tr_logcount = 2, tr_logflags = XFS_TRANS_PERM_LOG_RES, "You see it's a permanent log reservation with two log operations (two transactions in rolling mode). After calculation (xlog_calc_unit_res() adds space for various log headers), the final log space needed per transaction changes from 175488 to 180208 bytes. So the total log space needed is 360416 bytes (180208 * 2). [That quantity] of log space (360416 bytes) needs to be reserved for both run time inode removing (xfs_inactive_truncate()) and EFI recover (xfs_efi_item_recover())." In other words, runtime pre-reserves 360K of space in anticipation of running a chain of two transactions in which each transaction gets a 180K reservation. Now that we've allocated the transaction, we delete the bmap mapping, log an EFI to free the space, and roll the transaction as part of finishing the deferops chain. Rolling creates a new xfs_trans which shares its ticket with the old transaction. Next, xfs_trans_roll calls __xfs_trans_commit with regrant == true, which calls xlog_cil_commit with the same regrant parameter. xlog_cil_commit calls xfs_log_ticket_regrant, which decrements t_cnt and subtracts t_curr_res from the reservation and write heads. If the filesystem is fresh and the first transaction only used (say) 20K, then t_curr_res will be 160K, and we give that much reservation back to the reservation head. Or if the file is really fragmented and the first transaction actually uses 170K, then t_curr_res will be 10K, and that's what we give back to the reservation. Having done that, we're now headed into the second transaction with an EFI and 180K of reservation. Other threads apparently consumed all the reservation for smaller transactions, such as timestamp updates. Now let's say the first transaction gets written to disk and we crash without ever completing the second transaction. Now we remount the fs, log recovery finds the unfinished EFI, and calls xfs_efi_recover to finish the EFI. However, xfs_efi_recover starts a new tr_itruncate tranasction, which asks for 360K log reservation. This is a lot more than the 180K that we had reserved at the time of the crash. If the first EFI to be recovered is also pinning the tail of the log, we will be unable to free any space in the log, and recovery livelocks. Wengang confirmed this: "Now we have the second transaction which has 180208 log bytes reserved too. The second transaction is supposed to process intents including extent freeing. With my hacking patch, I blocked the extent freeing 5 hours. So in that 5 hours, 180208 (NOT 360416) log bytes are reserved. "With my test case, other transactions (update timestamps) then happen. As my hacking patch pins the journal tail, those timestamp-updating transactions finally use up (almost) all the left available log space (in memory in on disk). And finally the on disk (and in memory) available log space goes down near to 180208 bytes. Those 180208 bytes are reserved by [the] second (extent-free) transaction [in the chain]." Wengang and I noticed that EFI recovery starts a transaction, completes one step of the chain, and commits the transaction without completing any other steps of the chain. Those subsequent steps are completed by xlog_finish_defer_ops, which allocates yet another transaction to finish the rest of the chain. That transaction gets the same tr_logres as the head transaction, but with tr_logcount = 1 to force regranting with every roll to avoid livelocks. In other words, we already figured this out in commit 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation"), but should have applied that logic to each intent item's recovery function. For Wengang's case, the xfs_trans_alloc call in the EFI recovery function should only be asking for a single transaction's worth of log reservation -- 180K, not 360K. Quoting Wengang again: "With log recovery, during EFI recovery, we use tr_itruncate again to reserve two transactions that needs 360416 log bytes. Reserving 360416 bytes fails [stalls] because we now only have about 180208 available. "Actually during the EFI recover, we only need one transaction to free the extents just like the 2nd transaction at RUNTIME. So it only needs to reserve 180208 rather than 360416 bytes. We have (a bit) more than 180208 available log bytes on disk, so [if we decrease the reservation to 180K] the reservation goes and the recovery [finishes]. That is to say: we can fix the log recover part to fix the issue. We can introduce a new xfs_trans_res xfs_mount->tr_ext_free { tr_logres = 175488, tr_logcount = 0, tr_logflags = 0, } "and use tr_ext_free instead of tr_itruncate in EFI recover." However, I don't think it quite makes sense to create an entirely new transaction reservation type to handle single-stepping during log recovery. Instead, we should copy the transaction reservation information in the xfs_mount, change tr_logcount to 1, and pass that into xfs_trans_alloc. We know this won't risk changing the min log size computation since we always ask for a fraction of the reservation for all known transaction types. This looks like it's been lurking in the codebase since commit 3d3c8b5222b92, which changed the xfs_trans_reserve call in xlog_recover_process_efi to use the tr_logcount in tr_itruncate. That changed the EFI recovery transaction from making a non-XFS_TRANS_PERM_LOG_RES request for one transaction's worth of log space to a XFS_TRANS_PERM_LOG_RES request for two transactions worth. Fixes: 3d3c8b5222b92 ("xfs: refactor xfs_trans_reserve() interface") Complements: 929b92f64048d ("xfs: xfs_defer_capture should absorb remaining transaction reservation") Suggested-by: Wengang Wang Cc: Srikanth C S [djwong: apply the same transformation to all log intent recovery] Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_log_recover.h | 22 ++++++++++++++++++++++ fs/xfs/xfs_attr_item.c | 7 ++++--- fs/xfs/xfs_bmap_item.c | 4 +++- fs/xfs/xfs_extfree_item.c | 4 +++- fs/xfs/xfs_refcount_item.c | 6 ++++-- fs/xfs/xfs_rmap_item.c | 6 ++++-- 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index 2420865f3007..a5100a11faf9 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -131,4 +131,26 @@ void xlog_check_buf_cancel_table(struct xlog *log); #define xlog_check_buf_cancel_table(log) do { } while (0) #endif +/* + * Transform a regular reservation into one suitable for recovery of a log + * intent item. + * + * Intent recovery only runs a single step of the transaction chain and defers + * the rest to a separate transaction. Therefore, we reduce logcount to 1 here + * to avoid livelocks if the log grant space is nearly exhausted due to the + * recovered intent pinning the tail. Keep the same logflags to avoid tripping + * asserts elsewhere. Struct copies abound below. + */ +static inline struct xfs_trans_res +xlog_recover_resv(const struct xfs_trans_res *r) +{ + struct xfs_trans_res ret = { + .tr_logres = r->tr_logres, + .tr_logcount = 1, + .tr_logflags = r->tr_logflags, + }; + + return ret; +} + #endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 2788a6f2edcd..36fe2abb16e6 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -547,7 +547,7 @@ xfs_attri_item_recover( struct xfs_inode *ip; struct xfs_da_args *args; struct xfs_trans *tp; - struct xfs_trans_res tres; + struct xfs_trans_res resv; struct xfs_attri_log_format *attrp; struct xfs_attri_log_nameval *nv = attrip->attri_nameval; int error; @@ -618,8 +618,9 @@ xfs_attri_item_recover( goto out; } - xfs_init_attr_trans(args, &tres, &total); - error = xfs_trans_alloc(mp, &tres, total, 0, XFS_TRANS_RESERVE, &tp); + xfs_init_attr_trans(args, &resv, &total); + resv = xlog_recover_resv(&resv); + error = xfs_trans_alloc(mp, &resv, total, 0, XFS_TRANS_RESERVE, &tp); if (error) goto out; diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 13aa5359c02f..1058603db3ac 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -457,6 +457,7 @@ xfs_bui_item_recover( struct list_head *capture_list) { struct xfs_bmap_intent fake = { }; + struct xfs_trans_res resv; struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; @@ -482,7 +483,8 @@ xfs_bui_item_recover( return error; /* Allocate transaction and do the work. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), 0, 0, &tp); if (error) goto err_rele; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index b670863d8467..3ed25c352269 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -598,6 +598,7 @@ xfs_efi_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_efi_log_item *efip = EFI_ITEM(lip); struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_efd_log_item *efdp; @@ -620,7 +621,8 @@ xfs_efi_item_recover( } } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, 0, 0, 0, &tp); if (error) return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index ff4d5087ba00..dfd7b824e32b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -453,6 +453,7 @@ xfs_cui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_cui_log_item *cuip = CUI_ITEM(lip); struct xfs_cud_log_item *cudp; struct xfs_trans *tp; @@ -490,8 +491,9 @@ xfs_cui_item_recover( * doesn't fit. We need to reserve enough blocks to handle a * full btree split on either end of the refcount range. */ - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, - mp->m_refc_maxlevels * 2, 0, XFS_TRANS_RESERVE, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, mp->m_refc_maxlevels * 2, 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 534504ede1a3..2043cea261c0 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -492,6 +492,7 @@ xfs_rui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { + struct xfs_trans_res resv; struct xfs_rui_log_item *ruip = RUI_ITEM(lip); struct xfs_map_extent *rmap; struct xfs_rud_log_item *rudp; @@ -519,8 +520,9 @@ xfs_rui_item_recover( } } - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, - mp->m_rmap_maxlevels, 0, XFS_TRANS_RESERVE, &tp); + resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate); + error = xfs_trans_alloc(mp, &resv, mp->m_rmap_maxlevels, 0, + XFS_TRANS_RESERVE, &tp); if (error) return error; rudp = xfs_trans_get_rud(tp, ruip); From 6744e7b06c496c13816c447c99b6b8e5f3381b78 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:30 -0700 Subject: [PATCH 10/30] xfs: move the xfs_rtbitmap.c declarations to xfs_rtbitmap.h [ Upstream commit 13928113fc5b5e79c91796290a99ed991ac0efe2 ] [6.1: resolved conflicts with fscounters.c and rtsummary.c ] Move all the declarations for functionality in xfs_rtbitmap.c into a separate xfs_rtbitmap.h header file. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Catherine Hoang Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 2 +- fs/xfs/libxfs/xfs_rtbitmap.c | 1 + fs/xfs/libxfs/xfs_rtbitmap.h | 82 ++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/rtbitmap.c | 2 +- fs/xfs/xfs_fsmap.c | 2 +- fs/xfs/xfs_rtalloc.c | 1 + fs/xfs/xfs_rtalloc.h | 73 -------------------------------- 7 files changed, 87 insertions(+), 76 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_rtbitmap.h diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index aac071e4d000..c99fd7fe242e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -21,7 +21,7 @@ #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_bmap_btree.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_quota.h" diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 655108a4cd05..9eb1b5aa7e35 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -16,6 +16,7 @@ #include "xfs_trans.h" #include "xfs_rtalloc.h" #include "xfs_error.h" +#include "xfs_rtbitmap.h" /* * Realtime allocator bitmap functions shared with userspace. diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h new file mode 100644 index 000000000000..546dea34bb37 --- /dev/null +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_RTBITMAP_H__ +#define __XFS_RTBITMAP_H__ + +/* + * XXX: Most of the realtime allocation functions deal in units of realtime + * extents, not realtime blocks. This looks funny when paired with the type + * name and screams for a larger cleanup. + */ +struct xfs_rtalloc_rec { + xfs_rtblock_t ar_startext; + xfs_rtblock_t ar_extcount; +}; + +typedef int (*xfs_rtalloc_query_range_fn)( + struct xfs_mount *mp, + struct xfs_trans *tp, + const struct xfs_rtalloc_rec *rec, + void *priv); + +#ifdef CONFIG_XFS_RT +int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t block, int issum, struct xfs_buf **bpp); +int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, int val, + xfs_rtblock_t *new, int *stat); +int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_rtblock_t limit, + xfs_rtblock_t *rtblock); +int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_rtblock_t limit, + xfs_rtblock_t *rtblock); +int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, int val); +int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, + int log, xfs_rtblock_t bbno, int delta, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb, + xfs_suminfo_t *sum); +int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, + xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, + xfs_fsblock_t *rsb); +int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb); +int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, + const struct xfs_rtalloc_rec *low_rec, + const struct xfs_rtalloc_rec *high_rec, + xfs_rtalloc_query_range_fn fn, void *priv); +int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtalloc_query_range_fn fn, + void *priv); +bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, + xfs_rtblock_t start, xfs_extlen_t len, + bool *is_free); +/* + * Free an extent in the realtime subvolume. Length is expressed in + * realtime extents, as is the block number. + */ +int /* error */ +xfs_rtfree_extent( + struct xfs_trans *tp, /* transaction pointer */ + xfs_rtblock_t bno, /* starting block number to free */ + xfs_extlen_t len); /* length of extent freed */ + +/* Same as above, but in units of rt blocks. */ +int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, + xfs_filblks_t rtlen); +#else /* CONFIG_XFS_RT */ +# define xfs_rtfree_extent(t,b,l) (-ENOSYS) +# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) +# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) +# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) +# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) +# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) +#endif /* CONFIG_XFS_RT */ + +#endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 0a3bde64c675..faf6e0890d44 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -11,7 +11,7 @@ #include "xfs_mount.h" #include "xfs_log_format.h" #include "xfs_trans.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_inode.h" #include "xfs_bmap.h" #include "scrub/scrub.h" diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 062e5dc5db9f..a5b9754c62d1 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -23,7 +23,7 @@ #include "xfs_refcount.h" #include "xfs_refcount_btree.h" #include "xfs_alloc_btree.h" -#include "xfs_rtalloc.h" +#include "xfs_rtbitmap.h" #include "xfs_ag.h" /* Convert an xfs_fsmap to an fsmap. */ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 0bfbbc1dd0da..7ce122da43fe 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -19,6 +19,7 @@ #include "xfs_icache.h" #include "xfs_rtalloc.h" #include "xfs_sb.h" +#include "xfs_rtbitmap.h" /* * Read and return the summary information for a given extent size, diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 65c284e9d33e..11859c259a1c 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -11,22 +11,6 @@ struct xfs_mount; struct xfs_trans; -/* - * XXX: Most of the realtime allocation functions deal in units of realtime - * extents, not realtime blocks. This looks funny when paired with the type - * name and screams for a larger cleanup. - */ -struct xfs_rtalloc_rec { - xfs_rtblock_t ar_startext; - xfs_rtblock_t ar_extcount; -}; - -typedef int (*xfs_rtalloc_query_range_fn)( - struct xfs_mount *mp, - struct xfs_trans *tp, - const struct xfs_rtalloc_rec *rec, - void *priv); - #ifdef CONFIG_XFS_RT /* * Function prototypes for exported functions. @@ -48,19 +32,6 @@ xfs_rtallocate_extent( xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock); /* out: start block allocated */ -/* - * Free an extent in the realtime subvolume. Length is expressed in - * realtime extents, as is the block number. - */ -int /* error */ -xfs_rtfree_extent( - struct xfs_trans *tp, /* transaction pointer */ - xfs_rtblock_t bno, /* starting block number to free */ - xfs_extlen_t len); /* length of extent freed */ - -/* Same as above, but in units of rt blocks. */ -int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, - xfs_filblks_t rtlen); /* * Initialize realtime fields in the mount structure. @@ -102,55 +73,11 @@ xfs_growfs_rt( struct xfs_mount *mp, /* file system mount structure */ xfs_growfs_rt_t *in); /* user supplied growfs struct */ -/* - * From xfs_rtbitmap.c - */ -int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t block, int issum, struct xfs_buf **bpp); -int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val, - xfs_rtblock_t *new, int *stat); -int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_rtblock_t limit, - xfs_rtblock_t *rtblock); -int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, int val); -int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, - int log, xfs_rtblock_t bbno, int delta, - struct xfs_buf **rbpp, xfs_fsblock_t *rsb, - xfs_suminfo_t *sum); -int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, - xfs_fsblock_t *rsb); -int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, - struct xfs_buf **rbpp, xfs_fsblock_t *rsb); -int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp, - const struct xfs_rtalloc_rec *low_rec, - const struct xfs_rtalloc_rec *high_rec, - xfs_rtalloc_query_range_fn fn, void *priv); -int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtalloc_query_range_fn fn, - void *priv); -bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); -int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_rtblock_t start, xfs_extlen_t len, - bool *is_free); int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp); #else # define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS) -# define xfs_rtfree_extent(t,b,l) (-ENOSYS) -# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) # define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS) # define xfs_growfs_rt(mp,in) (-ENOSYS) -# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS) -# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) -# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) -# define xfs_verify_rtbno(m, r) (false) -# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) # define xfs_rtalloc_reinit_frextents(m) (0) static inline int /* error */ xfs_rtmount_init( From e377031115332a23d7110a577ca96c4b1efad1f9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:31 -0700 Subject: [PATCH 11/30] xfs: convert rt bitmap extent lengths to xfs_rtbxlen_t [ Upstream commit f29c3e745dc253bf9d9d06ddc36af1a534ba1dd0 ] [ 6.1: excluded changes to trace.h as xchk_rtsum_record_free does not exist yet ] XFS uses xfs_rtblock_t for many different uses, which makes it much more difficult to perform a unit analysis on the codebase. One of these (ab)uses is when we need to store the length of a free space extent as stored in the realtime bitmap. Because there can be up to 2^64 realtime extents in a filesystem, we need a new type that is larger than xfs_rtxlen_t for callers that are querying the bitmap directly. This means scrub and growfs. Create this type as "xfs_rtbxlen_t" and use it to store 64-bit rtx lengths. 'b' stands for 'bitmap' or 'big'; reader's choice. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Catherine Hoang Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_format.h | 2 +- fs/xfs/libxfs/xfs_rtbitmap.h | 2 +- fs/xfs/libxfs/xfs_types.h | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 371dc07233e0..20acb8573d7a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -98,7 +98,7 @@ typedef struct xfs_sb { uint32_t sb_blocksize; /* logical block size, bytes */ xfs_rfsblock_t sb_dblocks; /* number of data blocks */ xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */ - xfs_rtblock_t sb_rextents; /* number of realtime extents */ + xfs_rtbxlen_t sb_rextents; /* number of realtime extents */ uuid_t sb_uuid; /* user-visible file system unique id */ xfs_fsblock_t sb_logstart; /* starting block of log if internal */ xfs_ino_t sb_rootino; /* root inode number */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 546dea34bb37..c3ef22e67aa3 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -13,7 +13,7 @@ */ struct xfs_rtalloc_rec { xfs_rtblock_t ar_startext; - xfs_rtblock_t ar_extcount; + xfs_rtbxlen_t ar_extcount; }; typedef int (*xfs_rtalloc_query_range_fn)( diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 5ebdda7e1078..7023e4a79f87 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -31,6 +31,7 @@ typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */ typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */ typedef uint64_t xfs_fileoff_t; /* block number in a file */ typedef uint64_t xfs_filblks_t; /* number of blocks in a file */ +typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */ typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */ From c3c049984c0df772aa6b4825b27a20eb6d958ac6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Mar 2025 13:25:32 -0700 Subject: [PATCH 12/30] xfs: consider minlen sized extents in xfs_rtallocate_extent_block [ Upstream commit 944df75958807d56f2db9fdc769eb15dd9f0366a ] minlen is the lower bound on the extent length that the caller can accept, and maxlen is at this point the maximal available length. This means a minlen extent is perfectly fine to use, so do it. This matches the equivalent logic in xfs_rtallocate_extent_exact that also accepts a minlen sized extent. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_rtalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 7ce122da43fe..2f2280f4e7fa 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -318,7 +318,7 @@ xfs_rtallocate_extent_block( /* * Searched the whole thing & didn't find a maxlen free extent. */ - if (minlen < maxlen && besti != -1) { + if (minlen <= maxlen && besti != -1) { xfs_extlen_t p; /* amount to trim length by */ /* From f7a1233bb028066f31085250e683a30264538dcf Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:33 -0700 Subject: [PATCH 13/30] xfs: don't leak recovered attri intent items [ Upstream commit 07bcbdf020c9fd3c14bec51c50225a2a02707b94 ] If recovery finds an xattr log intent item calling for the removal of an attribute and the file doesn't even have an attr fork, we know that the removal is trivially complete. However, we can't just exit the recovery function without doing something about the recovered log intent item -- it's still on the AIL, and not logging an attrd item means it stays there forever. This has likely not been seen in practice because few people use LARP and the runtime code won't log the attri for a no-attrfork removexattr operation. But let's fix this anyway. Also we shouldn't really be testing the attr fork presence until we've taken the ILOCK, though this doesn't matter much in recovery, which is single threaded. Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_attr_item.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 36fe2abb16e6..11e88a76a33c 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -329,6 +329,13 @@ xfs_xattri_finish_update( goto out; } + /* If an attr removal is trivially complete, we're done. */ + if (attr->xattri_op_flags == XFS_ATTRI_OP_FLAGS_REMOVE && + !xfs_inode_hasattr(args->dp)) { + error = 0; + goto out; + } + error = xfs_attr_set_iter(attr); if (!error && attr->xattri_dela_state != XFS_DAS_DONE) error = -EAGAIN; @@ -608,8 +615,6 @@ xfs_attri_item_recover( attr->xattri_dela_state = xfs_attr_init_add_state(args); break; case XFS_ATTRI_OP_FLAGS_REMOVE: - if (!xfs_inode_hasattr(args->dp)) - goto out; attr->xattri_dela_state = xfs_attr_init_remove_state(args); break; default: From 6a258245c563b03ad246d39e713abd1778163630 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:37 -0700 Subject: [PATCH 14/30] xfs: make rextslog computation consistent with mkfs [ Upstream commit a6a38f309afc4a7ede01242b603f36c433997780 ] There's a weird discrepancy in xfsprogs dating back to the creation of the Linux port -- if there are zero rt extents, mkfs will set sb_rextents and sb_rextslog both to zero: sbp->sb_rextslog = (uint8_t)(rtextents ? libxfs_highbit32((unsigned int)rtextents) : 0); However, that's not the check that xfs_repair uses for nonzero rtblocks: if (sb->sb_rextslog != libxfs_highbit32((unsigned int)sb->sb_rextents)) The difference here is that xfs_highbit32 returns -1 if its argument is zero. Unfortunately, this means that in the weird corner case of a realtime volume shorter than 1 rt extent, xfs_repair will immediately flag a freshly formatted filesystem as corrupt. Because mkfs has been writing ondisk artifacts like this for decades, we have to accept that as "correct". TBH, zero rextslog for zero rtextents makes more sense to me anyway. Regrettably, the superblock verifier checks created in commit copied xfs_repair even though mkfs has been writing out such filesystems for ages. Fix the superblock verifier to accept what mkfs spits out; the userspace version of this patch will have to fix xfs_repair as well. Note that the new helper leaves the zeroday bug where the upper 32 bits of sb_rextents is ripped off and fed to highbit32. This leads to a seriously undersized rt summary file, which immediately breaks mkfs: $ hugedisk.sh foo /dev/sdc $(( 0x100000080 * 4096))B $ /sbin/mkfs.xfs -f /dev/sda -m rmapbt=0,reflink=0 -r rtdev=/dev/mapper/foo meta-data=/dev/sda isize=512 agcount=4, agsize=1298176 blks = sectsz=512 attr=2, projid32bit=1 = crc=1 finobt=1, sparse=1, rmapbt=0 = reflink=0 bigtime=1 inobtcount=1 nrext64=1 data = bsize=4096 blocks=5192704, imaxpct=25 = sunit=0 swidth=0 blks naming =version 2 bsize=4096 ascii-ci=0, ftype=1 log =internal log bsize=4096 blocks=16384, version=2 = sectsz=512 sunit=0 blks, lazy-count=1 realtime =/dev/mapper/foo extsz=4096 blocks=4294967424, rtextents=4294967424 Discarding blocks...Done. mkfs.xfs: Error initializing the realtime space [117 - Structure needs cleaning] The next patch will drop support for rt volumes with fewer than 1 or more than 2^32-1 rt extents, since they've clearly been broken forever. Fixes: f8e566c0f5e1f ("xfs: validate the realtime geometry in xfs_validate_sb_common") Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_rtbitmap.c | 13 +++++++++++++ fs/xfs/libxfs/xfs_rtbitmap.h | 4 ++++ fs/xfs/libxfs/xfs_sb.c | 3 ++- fs/xfs/xfs_rtalloc.c | 4 ++-- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 9eb1b5aa7e35..37b425ea3fed 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1130,3 +1130,16 @@ xfs_rtalloc_extent_is_free( *is_free = matches; return 0; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that + * prohibits correct use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + return rtextents ? xfs_highbit32(rtextents) : 0; +} + diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index c3ef22e67aa3..6becdc7a48ed 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -70,6 +70,9 @@ xfs_rtfree_extent( /* Same as above, but in units of rt blocks. */ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); + +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #else /* CONFIG_XFS_RT */ # define xfs_rtfree_extent(t,b,l) (-ENOSYS) # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) @@ -77,6 +80,7 @@ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) +# define xfs_compute_rextslog(rtx) (0) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index d214233ef532..6b87b04d0c6c 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -25,6 +25,7 @@ #include "xfs_da_format.h" #include "xfs_health.h" #include "xfs_ag.h" +#include "xfs_rtbitmap.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -502,7 +503,7 @@ xfs_validate_sb_common( NBBY * sbp->sb_blocksize); if (sbp->sb_rextents != rexts || - sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) || + sbp->sb_rextslog != xfs_compute_rextslog(rexts) || sbp->sb_rbmblocks != rbmblocks) { xfs_notice(mp, "realtime geometry sanity check failed"); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 2f2280f4e7fa..eca800e2b879 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -999,7 +999,7 @@ xfs_growfs_rt( nrextents = nrblocks; do_div(nrextents, in->extsize); nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); - nrextslog = xfs_highbit32(nrextents); + nrextslog = xfs_compute_rextslog(nrextents); nrsumlevels = nrextslog + 1; nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks; nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); @@ -1061,7 +1061,7 @@ xfs_growfs_rt( nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); ASSERT(nsbp->sb_rextents != 0); - nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); + nsbp->sb_rextslog = xfs_compute_rextslog(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * From 7d568f9d0f61ef48808cfca16289b213d6f1ea9d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:38 -0700 Subject: [PATCH 15/30] xfs: fix 32-bit truncation in xfs_compute_rextslog [ Upstream commit cf8f0e6c1429be7652869059ea44696b72d5b726 ] It's quite reasonable that some customer somewhere will want to configure a realtime volume with more than 2^32 extents. If they try to do this, the highbit32() call will truncate the upper bits of the xfs_rtbxlen_t and produce the wrong value for rextslog. This in turn causes the rsumlevels to be wrong, which results in a realtime summary file that is the wrong length. Fix that. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_rtbitmap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 37b425ea3fed..8db1243beacc 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1133,13 +1133,15 @@ xfs_rtalloc_extent_is_free( /* * Compute the maximum level number of the realtime summary file, as defined by - * mkfs. The use of highbit32 on a 64-bit quantity is a historic artifact that - * prohibits correct use of rt volumes with more than 2^32 extents. + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. */ uint8_t xfs_compute_rextslog( xfs_rtbxlen_t rtextents) { - return rtextents ? xfs_highbit32(rtextents) : 0; + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); } From 5c29b065246d4c13bd6974706fb88f7a1769ea5e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:39 -0700 Subject: [PATCH 16/30] xfs: don't allow overly small or large realtime volumes [ Upstream commit e14293803f4e84eb23a417b462b56251033b5a66 ] Don't allow realtime volumes that are less than one rt extent long. This has been broken across 4 LTS kernels with nobody noticing, so let's just disable it. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_rtbitmap.h | 13 +++++++++++++ fs/xfs/libxfs/xfs_sb.c | 3 ++- fs/xfs/xfs_rtalloc.c | 2 ++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 6becdc7a48ed..4e49aadf0955 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -73,6 +73,18 @@ int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #else /* CONFIG_XFS_RT */ # define xfs_rtfree_extent(t,b,l) (-ENOSYS) # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) @@ -81,6 +93,7 @@ uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) # define xfs_compute_rextslog(rtx) (0) +# define xfs_validate_rtextents(rtx) (false) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 6b87b04d0c6c..04247d1c7523 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -502,7 +502,8 @@ xfs_validate_sb_common( rbmblocks = howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); - if (sbp->sb_rextents != rexts || + if (!xfs_validate_rtextents(rexts) || + sbp->sb_rextents != rexts || sbp->sb_rextslog != xfs_compute_rextslog(rexts) || sbp->sb_rbmblocks != rbmblocks) { xfs_notice(mp, diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index eca800e2b879..2dcd5cca4ec2 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -998,6 +998,8 @@ xfs_growfs_rt( */ nrextents = nrblocks; do_div(nrextents, in->extsize); + if (!xfs_validate_rtextents(nrextents)) + return -EINVAL; nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); nrextslog = xfs_compute_rextslog(nrextents); nrsumlevels = nrextslog + 1; From 072a9c45d23e6aeed2d6369f49f78d32bc6b8cd0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:40 -0700 Subject: [PATCH 17/30] xfs: remove unused fields from struct xbtree_ifakeroot [ Upstream commit 4c8ecd1cfdd01fb727121035014d9f654a30bdf2 ] Remove these unused fields since nobody uses them. They should have been removed years ago in a different cleanup series from Christoph Hellwig. Fixes: daf83964a3681 ("xfs: move the per-fork nextents fields into struct xfs_ifork") Fixes: f7e67b20ecbbc ("xfs: move the fork format fields into struct xfs_ifork") Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree_staging.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h index f0d2976050ae..5f638f711246 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.h +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -37,12 +37,6 @@ struct xbtree_ifakeroot { /* Number of bytes available for this fork in the inode. */ unsigned int if_fork_size; - - /* Fork format. */ - unsigned int if_format; - - /* Number of records. */ - unsigned int if_extents; }; /* Cursor interactions with fake roots for inode-rooted btrees. */ From ec1d3a6899cd9f54b1f01ea67b0f3b70572c28d5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:41 -0700 Subject: [PATCH 18/30] xfs: recompute growfsrtfree transaction reservation while growing rt volume [ Upstream commit 578bd4ce7100ae34f98c6b0147fe75cfa0dadbac ] While playing with growfs to create a 20TB realtime section on a filesystem that didn't previously have an rt section, I noticed that growfs would occasionally shut down the log due to a transaction reservation overflow. xfs_calc_growrtfree_reservation uses the current size of the realtime summary file (m_rsumsize) to compute the transaction reservation for a growrtfree transaction. The reservations are computed at mount time, which means that m_rsumsize is zero when growfs starts "freeing" the new realtime extents into the rt volume. As a result, the transaction is undersized and fails. Fix this by recomputing the transaction reservations every time we change m_rsumsize. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_rtalloc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 2dcd5cca4ec2..7c5134899634 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1070,6 +1070,9 @@ xfs_growfs_rt( nsbp->sb_rbmblocks; nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize); nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks); + /* recompute growfsrt reservation from new rsumsize */ + xfs_trans_resv_calc(nmp, &nmp->m_resv); + /* * Start a transaction, get the log reservation. */ @@ -1153,6 +1156,8 @@ error_cancel: */ mp->m_rsumlevels = nrsumlevels; mp->m_rsumsize = nrsumsize; + /* recompute growfsrt reservation from new rsumsize */ + xfs_trans_resv_calc(mp, &mp->m_resv); error = xfs_trans_commit(tp); if (error) From 87988e80b6b3bdbfd2c102ab99124db6ac543112 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:42 -0700 Subject: [PATCH 19/30] xfs: force all buffers to be written during btree bulk load [ Upstream commit 13ae04d8d45227c2ba51e188daf9fc13d08a1b12 ] While stress-testing online repair of btrees, I noticed periodic assertion failures from the buffer cache about buffers with incorrect DELWRI_Q state. Looking further, I observed this race between the AIL trying to write out a btree block and repair zapping a btree block after the fact: AIL: Repair0: pin buffer X delwri_queue: set DELWRI_Q add to delwri list stale buf X: clear DELWRI_Q does not clear b_list free space X commit delwri_submit # oops Worse yet, I discovered that running the same repair over and over in a tight loop can result in a second race that cause data integrity problems with the repair: AIL: Repair0: Repair1: pin buffer X delwri_queue: set DELWRI_Q add to delwri list stale buf X: clear DELWRI_Q does not clear b_list free space X commit find free space X get buffer rewrite buffer delwri_queue: set DELWRI_Q already on a list, do not add commit BAD: committed tree root before all blocks written delwri_submit # too late now I traced this to my own misunderstanding of how the delwri lists work, particularly with regards to the AIL's buffer list. If a buffer is logged and committed, the buffer can end up on that AIL buffer list. If btree repairs are run twice in rapid succession, it's possible that the first repair will invalidate the buffer and free it before the next time the AIL wakes up. Marking the buffer stale clears DELWRI_Q from the buffer state without removing the buffer from its delwri list. The buffer doesn't know which list it's on, so it cannot know which lock to take to protect the list for a removal. If the second repair allocates the same block, it will then recycle the buffer to start writing the new btree block. Meanwhile, if the AIL wakes up and walks the buffer list, it will ignore the buffer because it can't lock it, and go back to sleep. When the second repair calls delwri_queue to put the buffer on the list of buffers to write before committing the new btree, it will set DELWRI_Q again, but since the buffer hasn't been removed from the AIL's buffer list, it won't add it to the bulkload buffer's list. This is incorrect, because the bulkload caller relies on delwri_submit to ensure that all the buffers have been sent to disk /before/ committing the new btree root pointer. This ordering requirement is required for data consistency. Worse, the AIL won't clear DELWRI_Q from the buffer when it does finally drop it, so the next thread to walk through the btree will trip over a debug assertion on that flag. To fix this, create a new function that waits for the buffer to be removed from any other delwri lists before adding the buffer to the caller's delwri list. By waiting for the buffer to clear both the delwri list and any potential delwri wait list, we can be sure that repair will initiate writes of all buffers and report all write errors back to userspace instead of committing the new structure. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_btree_staging.c | 4 +-- fs/xfs/xfs_buf.c | 44 ++++++++++++++++++++++++++++--- fs/xfs/xfs_buf.h | 1 + 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index dd75e208b543..29e3f8ccb185 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -342,9 +342,7 @@ xfs_btree_bload_drop_buf( if (*bpp == NULL) return; - if (!xfs_buf_delwri_queue(*bpp, buffers_list)) - ASSERT(0); - + xfs_buf_delwri_queue_here(*bpp, buffers_list); xfs_buf_relse(*bpp); *bpp = NULL; } diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 54c774af6e1c..257945cdf63b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2040,6 +2040,14 @@ error_free: return NULL; } +static inline void +xfs_buf_list_del( + struct xfs_buf *bp) +{ + list_del_init(&bp->b_list); + wake_up_var(&bp->b_list); +} + /* * Cancel a delayed write list. * @@ -2057,7 +2065,7 @@ xfs_buf_delwri_cancel( xfs_buf_lock(bp); bp->b_flags &= ~_XBF_DELWRI_Q; - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); xfs_buf_relse(bp); } } @@ -2110,6 +2118,34 @@ xfs_buf_delwri_queue( return true; } +/* + * Queue a buffer to this delwri list as part of a data integrity operation. + * If the buffer is on any other delwri list, we'll wait for that to clear + * so that the caller can submit the buffer for IO and wait for the result. + * Callers must ensure the buffer is not already on the list. + */ +void +xfs_buf_delwri_queue_here( + struct xfs_buf *bp, + struct list_head *buffer_list) +{ + /* + * We need this buffer to end up on the /caller's/ delwri list, not any + * old list. This can happen if the buffer is marked stale (which + * clears DELWRI_Q) after the AIL queues the buffer to its list but + * before the AIL has a chance to submit the list. + */ + while (!list_empty(&bp->b_list)) { + xfs_buf_unlock(bp); + wait_var_event(&bp->b_list, list_empty(&bp->b_list)); + xfs_buf_lock(bp); + } + + ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); + + xfs_buf_delwri_queue(bp, buffer_list); +} + /* * Compare function is more complex than it needs to be because * the return value is only 32 bits and we are doing comparisons @@ -2172,7 +2208,7 @@ xfs_buf_delwri_submit_buffers( * reference and remove it from the list here. */ if (!(bp->b_flags & _XBF_DELWRI_Q)) { - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); xfs_buf_relse(bp); continue; } @@ -2192,7 +2228,7 @@ xfs_buf_delwri_submit_buffers( list_move_tail(&bp->b_list, wait_list); } else { bp->b_flags |= XBF_ASYNC; - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); } __xfs_buf_submit(bp, false); } @@ -2246,7 +2282,7 @@ xfs_buf_delwri_submit( while (!list_empty(&wait_list)) { bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - list_del_init(&bp->b_list); + xfs_buf_list_del(bp); /* * Wait on the locked buffer, check for errors and unlock and diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 549c60942208..6cf0332ba62c 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -305,6 +305,7 @@ extern void xfs_buf_stale(struct xfs_buf *bp); /* Delayed Write Buffer Routines */ extern void xfs_buf_delwri_cancel(struct list_head *); extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *); +void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl); extern int xfs_buf_delwri_submit(struct list_head *); extern int xfs_buf_delwri_submit_nowait(struct list_head *); extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *); From 6587549b086a4399d3bb5e741ab81f899792671c Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 13 Mar 2025 13:25:43 -0700 Subject: [PATCH 20/30] xfs: initialise di_crc in xfs_log_dinode [ Upstream commit 0573676fdde7ce3829ee6a42a8e5a56355234712 ] Alexander Potapenko report that KMSAN was issuing these warnings: kmalloc-ed xlog buffer of size 512 : ffff88802fc26200 kmalloc-ed xlog buffer of size 368 : ffff88802fc24a00 kmalloc-ed xlog buffer of size 648 : ffff88802b631000 kmalloc-ed xlog buffer of size 648 : ffff88802b632800 kmalloc-ed xlog buffer of size 648 : ffff88802b631c00 xlog_write_iovec: copying 12 bytes from ffff888017ddbbd8 to ffff88802c300400 xlog_write_iovec: copying 28 bytes from ffff888017ddbbe4 to ffff88802c30040c xlog_write_iovec: copying 68 bytes from ffff88802fc26274 to ffff88802c300428 xlog_write_iovec: copying 188 bytes from ffff88802fc262bc to ffff88802c30046c ===================================================== BUG: KMSAN: uninit-value in xlog_write_iovec fs/xfs/xfs_log.c:2227 BUG: KMSAN: uninit-value in xlog_write_full fs/xfs/xfs_log.c:2263 BUG: KMSAN: uninit-value in xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532 xlog_write_iovec fs/xfs/xfs_log.c:2227 xlog_write_full fs/xfs/xfs_log.c:2263 xlog_write+0x1fac/0x2600 fs/xfs/xfs_log.c:2532 xlog_cil_write_chain fs/xfs/xfs_log_cil.c:918 xlog_cil_push_work+0x30f2/0x44e0 fs/xfs/xfs_log_cil.c:1263 process_one_work kernel/workqueue.c:2630 process_scheduled_works+0x1188/0x1e30 kernel/workqueue.c:2703 worker_thread+0xee5/0x14f0 kernel/workqueue.c:2784 kthread+0x391/0x500 kernel/kthread.c:388 ret_from_fork+0x66/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 Uninit was created at: slab_post_alloc_hook+0x101/0xac0 mm/slab.h:768 slab_alloc_node mm/slub.c:3482 __kmem_cache_alloc_node+0x612/0xae0 mm/slub.c:3521 __do_kmalloc_node mm/slab_common.c:1006 __kmalloc+0x11a/0x410 mm/slab_common.c:1020 kmalloc ./include/linux/slab.h:604 xlog_kvmalloc fs/xfs/xfs_log_priv.h:704 xlog_cil_alloc_shadow_bufs fs/xfs/xfs_log_cil.c:343 xlog_cil_commit+0x487/0x4dc0 fs/xfs/xfs_log_cil.c:1574 __xfs_trans_commit+0x8df/0x1930 fs/xfs/xfs_trans.c:1017 xfs_trans_commit+0x30/0x40 fs/xfs/xfs_trans.c:1061 xfs_create+0x15af/0x2150 fs/xfs/xfs_inode.c:1076 xfs_generic_create+0x4cd/0x1550 fs/xfs/xfs_iops.c:199 xfs_vn_create+0x4a/0x60 fs/xfs/xfs_iops.c:275 lookup_open fs/namei.c:3477 open_last_lookups fs/namei.c:3546 path_openat+0x29ac/0x6180 fs/namei.c:3776 do_filp_open+0x24d/0x680 fs/namei.c:3809 do_sys_openat2+0x1bc/0x330 fs/open.c:1440 do_sys_open fs/open.c:1455 __do_sys_openat fs/open.c:1471 __se_sys_openat fs/open.c:1466 __x64_sys_openat+0x253/0x330 fs/open.c:1466 do_syscall_x64 arch/x86/entry/common.c:51 do_syscall_64+0x4f/0x140 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0x6b arch/x86/entry/entry_64.S:120 Bytes 112-115 of 188 are uninitialized Memory access of size 188 starts at ffff88802fc262bc This is caused by the struct xfs_log_dinode not having the di_crc field initialised. Log recovery never uses this field (it is only present these days for on-disk format compatibility reasons) and so it's value is never checked so nothing in XFS has caught this. Further, none of the uninitialised memory access warning tools have caught this (despite catching other uninit memory accesses in the struct xfs_log_dinode back in 2017!) until recently. Alexander annotated the XFS code to get the dump of the actual bytes that were detected as uninitialised, and from that report it took me about 30s to realise what the issue was. The issue was introduced back in 2016 and every inode that is logged fails to initialise this field. This is no actual bad behaviour caused by this issue - I find it hard to even classify it as a bug... Reported-and-tested-by: Alexander Potapenko Fixes: f8d55aa0523a ("xfs: introduce inode log format object") Signed-off-by: Dave Chinner Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/xfs_inode_item.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 91c847a84e10..2ec23c9af760 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -556,6 +556,9 @@ xfs_inode_to_log_dinode( memset(to->di_pad2, 0, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid); to->di_v3_pad = 0; + + /* dummy value for initialisation */ + to->di_crc = 0; } else { to->di_version = 2; to->di_flushiter = ip->i_flushiter; From 4f4e046caa79b2e401aaa7f65869b8f2feaf129a Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 13 Mar 2025 13:25:44 -0700 Subject: [PATCH 21/30] xfs: add lock protection when remove perag from radix tree [ Upstream commit 07afd3173d0c6d24a47441839a835955ec6cf0d4 ] [ 6.1: resolved conflict in xfs_ag.c ] Take mp->m_perag_lock for deletions from the perag radix tree in xfs_initialize_perag to prevent racing with tagging operations. Lookups are fine - they are RCU protected so already deal with the tree changing shape underneath the lookup - but tagging operations require the tree to be stable while the tags are propagated back up to the root. Right now there's nothing stopping radix tree tagging from operating while a growfs operation is progress and adding/removing new entries into the radix tree. Hence we can have traversals that require a stable tree occurring at the same time we are removing unused entries from the radix tree which causes the shape of the tree to change. Likely this hasn't caused a problem in the past because we are only doing append addition and removal so the active AG part of the tree is not changing shape, but that doesn't mean it is safe. Just making the radix tree modifications serialise against each other is obviously correct. Signed-off-by: Long Li Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Catherine Hoang Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e03bfeacbed4..e7b011c42b7a 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -345,13 +345,17 @@ xfs_initialize_perag( return 0; out_remove_pag: + spin_lock(&mp->m_perag_lock); radix_tree_delete(&mp->m_perag_tree, index); + spin_unlock(&mp->m_perag_lock); out_free_pag: kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ for (index = first_initialised; index < agcount; index++) { + spin_lock(&mp->m_perag_lock); pag = radix_tree_delete(&mp->m_perag_tree, index); + spin_unlock(&mp->m_perag_lock); if (!pag) break; xfs_buf_hash_destroy(pag); From 6c20890ebf2d7472a7c4b4baa401c892a4988442 Mon Sep 17 00:00:00 2001 From: Long Li Date: Thu, 13 Mar 2025 13:25:45 -0700 Subject: [PATCH 22/30] xfs: fix perag leak when growfs fails [ Upstream commit 7823921887750b39d02e6b44faafdd1cc617c651 ] [ 6.1: resolved conflicts in xfs_ag.c and xfs_ag.h ] During growfs, if new ag in memory has been initialized, however sb_agcount has not been updated, if an error occurs at this time it will cause perag leaks as follows, these new AGs will not been freed during umount , because of these new AGs are not visible(that is included in mp->m_sb.sb_agcount). unreferenced object 0xffff88810be40200 (size 512): comm "xfs_growfs", pid 857, jiffies 4294909093 hex dump (first 32 bytes): 00 c0 c1 05 81 88 ff ff 04 00 00 00 00 00 00 00 ................ 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace (crc 381741e2): [] __kmalloc+0x386/0x4f0 [] kmem_alloc+0xb5/0x2f0 [] xfs_initialize_perag+0xc5/0x810 [] xfs_growfs_data+0x9bc/0xbc0 [] xfs_file_ioctl+0x5fe/0x14d0 [] __x64_sys_ioctl+0x144/0x1c0 [] do_syscall_64+0x3f/0xe0 [] entry_SYSCALL_64_after_hwframe+0x62/0x6a unreferenced object 0xffff88810be40800 (size 512): comm "xfs_growfs", pid 857, jiffies 4294909093 hex dump (first 32 bytes): 20 00 00 00 00 00 00 00 57 ef be dc 00 00 00 00 .......W....... 10 08 e4 0b 81 88 ff ff 10 08 e4 0b 81 88 ff ff ................ backtrace (crc bde50e2d): [] __kmalloc_node+0x3da/0x540 [] kvmalloc_node+0x99/0x160 [] bucket_table_alloc.isra.0+0x5f/0x400 [] rhashtable_init+0x405/0x760 [] xfs_initialize_perag+0x3a3/0x810 [] xfs_growfs_data+0x9bc/0xbc0 [] xfs_file_ioctl+0x5fe/0x14d0 [] __x64_sys_ioctl+0x144/0x1c0 [] do_syscall_64+0x3f/0xe0 [] entry_SYSCALL_64_after_hwframe+0x62/0x6a Factor out xfs_free_unused_perag_range() from xfs_initialize_perag(), used for freeing unused perag within a specified range in error handling, included in the error path of the growfs failure. Fixes: 1c1c6ebcf528 ("xfs: Replace per-ag array with a radix tree") Signed-off-by: Long Li Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Catherine Hoang Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_ag.c | 34 +++++++++++++++++++++++++--------- fs/xfs/libxfs/xfs_ag.h | 3 +++ fs/xfs/xfs_fsops.c | 5 ++++- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index e7b011c42b7a..9743fa5b5388 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -259,6 +259,30 @@ xfs_agino_range( return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); } +/* + * Free perag within the specified AG range, it is only used to free unused + * perags under the error handling path. + */ +void +xfs_free_unused_perag_range( + struct xfs_mount *mp, + xfs_agnumber_t agstart, + xfs_agnumber_t agend) +{ + struct xfs_perag *pag; + xfs_agnumber_t index; + + for (index = agstart; index < agend; index++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, index); + spin_unlock(&mp->m_perag_lock); + if (!pag) + break; + xfs_buf_hash_destroy(pag); + kmem_free(pag); + } +} + int xfs_initialize_perag( struct xfs_mount *mp, @@ -352,15 +376,7 @@ out_free_pag: kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ - for (index = first_initialised; index < agcount; index++) { - spin_lock(&mp->m_perag_lock); - pag = radix_tree_delete(&mp->m_perag_tree, index); - spin_unlock(&mp->m_perag_lock); - if (!pag) - break; - xfs_buf_hash_destroy(pag); - kmem_free(pag); - } + xfs_free_unused_perag_range(mp, first_initialised, agcount); return error; } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 191b22b9a35b..eb84af1c8628 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -106,6 +106,9 @@ struct xfs_perag { #endif /* __KERNEL__ */ }; + +void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart, + xfs_agnumber_t agend); int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 77b14f788214..96e9d64fbe62 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -153,7 +153,7 @@ xfs_growfs_data_private( (delta > 0 ? XFS_GROWFS_SPACE_RES(mp) : -delta), 0, XFS_TRANS_RESERVE, &tp); if (error) - return error; + goto out_free_unused_perag; last_pag = xfs_perag_get(mp, oagcount - 1); if (delta > 0) { @@ -227,6 +227,9 @@ xfs_growfs_data_private( out_trans_cancel: xfs_trans_cancel(tp); +out_free_unused_perag: + if (nagcount > oagcount) + xfs_free_unused_perag_range(mp, oagcount, nagcount); return error; } From a904118d7b7b13ed9e7df3ec346e975782a97f11 Mon Sep 17 00:00:00 2001 From: Jiachen Zhang Date: Thu, 13 Mar 2025 13:25:46 -0700 Subject: [PATCH 23/30] xfs: ensure logflagsp is initialized in xfs_bmap_del_extent_real [ Upstream commit e6af9c98cbf0164a619d95572136bfb54d482dd6 ] In the case of returning -ENOSPC, ensure logflagsp is initialized by 0. Otherwise the caller __xfs_bunmapi will set uninitialized illegal tmp_logflags value into xfs log, which might cause unpredictable error in the log recovery procedure. Also, remove the flags variable and set the *logflagsp directly, so that the code should be more robust in the long run. Fixes: 1b24b633aafe ("xfs: move some more code into xfs_bmap_del_extent_real") Signed-off-by: Jiachen Zhang Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_bmap.c | 73 +++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 42 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c99fd7fe242e..2a3b78032cb0 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4999,7 +4999,6 @@ xfs_bmap_del_extent_real( xfs_fileoff_t del_endoff; /* first offset past del */ int do_fx; /* free extent at end of routine */ int error; /* error return value */ - int flags = 0;/* inode logging flags */ struct xfs_bmbt_irec got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ @@ -5012,6 +5011,8 @@ xfs_bmap_del_extent_real( uint32_t state = xfs_bmap_fork_to_state(whichfork); struct xfs_bmbt_irec old; + *logflagsp = 0; + mp = ip->i_mount; XFS_STATS_INC(mp, xs_del_exlist); @@ -5024,7 +5025,6 @@ xfs_bmap_del_extent_real( ASSERT(got_endoff >= del_endoff); ASSERT(!isnullstartblock(got.br_startblock)); qfield = 0; - error = 0; /* * If it's the case where the directory code is running with no block @@ -5040,13 +5040,13 @@ xfs_bmap_del_extent_real( del->br_startoff > got.br_startoff && del_endoff < got_endoff) return -ENOSPC; - flags = XFS_ILOG_CORE; + *logflagsp = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { if (!(bflags & XFS_BMAPI_REMAP)) { error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); if (error) - goto done; + return error; } do_fx = 0; @@ -5061,11 +5061,9 @@ xfs_bmap_del_extent_real( if (cur) { error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } if (got.br_startoff == del->br_startoff) @@ -5082,17 +5080,15 @@ xfs_bmap_del_extent_real( xfs_iext_prev(ifp, icur); ifp->if_nextents--; - flags |= XFS_ILOG_CORE; + *logflagsp |= XFS_ILOG_CORE; if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } if ((error = xfs_btree_delete(cur, &i))) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; break; case BMAP_LEFT_FILLING: /* @@ -5103,12 +5099,12 @@ xfs_bmap_del_extent_real( got.br_blockcount -= del->br_blockcount; xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; break; case BMAP_RIGHT_FILLING: /* @@ -5117,12 +5113,12 @@ xfs_bmap_del_extent_real( got.br_blockcount -= del->br_blockcount; xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; break; case 0: /* @@ -5139,18 +5135,18 @@ xfs_bmap_del_extent_real( new.br_state = got.br_state; new.br_startblock = del_endblock; - flags |= XFS_ILOG_CORE; + *logflagsp |= XFS_ILOG_CORE; if (cur) { error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; error = xfs_btree_increment(cur, 0, &i); if (error) - goto done; + return error; cur->bc_rec.b = new; error = xfs_btree_insert(cur, &i); if (error && error != -ENOSPC) - goto done; + return error; /* * If get no-space back from btree insert, it tried a * split, and we have a zero block reservation. Fix up @@ -5163,33 +5159,28 @@ xfs_bmap_del_extent_real( */ error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; /* * Update the btree record back * to the original value. */ error = xfs_bmbt_update(cur, &old); if (error) - goto done; + return error; /* * Reset the extent record back * to the original value. */ xfs_iext_update_extent(ip, state, icur, &old); - flags = 0; - error = -ENOSPC; - goto done; - } - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; + *logflagsp = 0; + return -ENOSPC; } + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } else - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); ifp->if_nextents++; xfs_iext_next(ifp, icur); @@ -5213,7 +5204,7 @@ xfs_bmap_del_extent_real( ((bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN)); if (error) - goto done; + return error; } } @@ -5228,9 +5219,7 @@ xfs_bmap_del_extent_real( if (qfield && !(bflags & XFS_BMAPI_REMAP)) xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); -done: - *logflagsp = flags; - return error; + return 0; } /* From 858c9d5278a2f427d6b5b13901fc0a991374558c Mon Sep 17 00:00:00 2001 From: Zhang Tianci Date: Thu, 13 Mar 2025 13:25:47 -0700 Subject: [PATCH 24/30] xfs: update dir3 leaf block metadata after swap [ Upstream commit 5759aa4f956034b289b0ae2c99daddfc775442e1 ] xfs_da3_swap_lastblock() copy the last block content to the dead block, but do not update the metadata in it. We need update some metadata for some kinds of type block, such as dir3 leafn block records its blkno, we shall update it to the dead block blkno. Otherwise, before write the xfs_buf to disk, the verify_write() will fail in blk_hdr->blkno != xfs_buf->b_bn, then xfs will be shutdown. We will get this warning: XFS (dm-0): Metadata corruption detected at xfs_dir3_leaf_verify+0xa8/0xe0 [xfs], xfs_dir3_leafn block 0x178 XFS (dm-0): Unmount and run xfs_repair XFS (dm-0): First 128 bytes of corrupted metadata buffer: 00000000e80f1917: 00 80 00 0b 00 80 00 07 3d ff 00 00 00 00 00 00 ........=....... 000000009604c005: 00 00 00 00 00 00 01 a0 00 00 00 00 00 00 00 00 ................ 000000006b6fb2bf: e4 44 e3 97 b5 64 44 41 8b 84 60 0e 50 43 d9 bf .D...dDA..`.PC.. 00000000678978a2: 00 00 00 00 00 00 00 83 01 73 00 93 00 00 00 00 .........s...... 00000000b28b247c: 99 29 1d 38 00 00 00 00 99 29 1d 40 00 00 00 00 .).8.....).@.... 000000002b2a662c: 99 29 1d 48 00 00 00 00 99 49 11 00 00 00 00 00 .).H.....I...... 00000000ea2ffbb8: 99 49 11 08 00 00 45 25 99 49 11 10 00 00 48 fe .I....E%.I....H. 0000000069e86440: 99 49 11 18 00 00 4c 6b 99 49 11 20 00 00 4d 97 .I....Lk.I. ..M. XFS (dm-0): xfs_do_force_shutdown(0x8) called from line 1423 of file fs/xfs/xfs_buf.c. Return address = 00000000c0ff63c1 XFS (dm-0): Corruption of in-memory data detected. Shutting down filesystem XFS (dm-0): Please umount the filesystem and rectify the problem(s) >>From the log above, we know xfs_buf->b_no is 0x178, but the block's hdr record its blkno is 0x1a0. Fixes: 24df33b45ecf ("xfs: add CRC checking to dir2 leaf blocks") Signed-off-by: Zhang Tianci Suggested-by: Dave Chinner Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_da_btree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index e576560b46e9..282c7cf032f4 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -2316,10 +2316,17 @@ xfs_da3_swap_lastblock( return error; /* * Copy the last block into the dead buffer and log it. + * On CRC-enabled file systems, also update the stamped in blkno. */ memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); + if (xfs_has_crc(mp)) { + struct xfs_da3_blkinfo *da3 = dead_buf->b_addr; + + da3->blkno = cpu_to_be64(xfs_buf_daddr(dead_buf)); + } xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); dead_info = dead_buf->b_addr; + /* * Get values from the moved block. */ From 23b8ab0c8ef9a934e700b72179d163ce9005058c Mon Sep 17 00:00:00 2001 From: Andrey Albershteyn Date: Thu, 13 Mar 2025 13:25:48 -0700 Subject: [PATCH 25/30] xfs: reset XFS_ATTR_INCOMPLETE filter on node removal [ Upstream commit 82ef1a5356572219f41f9123ca047259a77bd67b ] In XFS_DAS_NODE_REMOVE_ATTR case, xfs_attr_mode_remove_attr() sets filter to XFS_ATTR_INCOMPLETE. The filter is then reset in xfs_attr_complete_op() if XFS_DA_OP_REPLACE operation is performed. The filter is not reset though if XFS just removes the attribute (args->value == NULL) with xfs_attr_defer_remove(). attr code goes to XFS_DAS_DONE state. Fix this by always resetting XFS_ATTR_INCOMPLETE filter. The replace operation already resets this filter in anyway and others are completed at this step hence don't need it. Fixes: fdaf1bb3cafc ("xfs: ATTR_REPLACE algorithm with LARP enabled needs rework") Signed-off-by: Andrey Albershteyn Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_attr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index e28d93d232de..32d350e97e0f 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -421,10 +421,10 @@ xfs_attr_complete_op( bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; args->op_flags &= ~XFS_DA_OP_REPLACE; - if (do_replace) { - args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + if (do_replace) return replace_state; - } + return XFS_DAS_DONE; } From dd889e6a4ed6306c1e13d56c57c6ca0cbe4a5bed Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 13 Mar 2025 13:25:49 -0700 Subject: [PATCH 26/30] xfs: remove conditional building of rt geometry validator functions [ Upstream commit 881f78f472556ed05588172d5b5676b48dc48240 ] [ 6.1: used 6.6 backport to minimize conflicts ] [backport: resolve merge conflicts due to refactoring rtbitmap/summary macros and accessors] I mistakenly turned off CONFIG_XFS_RT in the Kconfig file for arm64 variant of the djwong-wtf git branch. Unfortunately, it took me a good hour to figure out that RT wasn't built because this is what got printed to dmesg: XFS (sda2): realtime geometry sanity check failed XFS (sda2): Metadata corruption detected at xfs_sb_read_verify+0x170/0x190 [xfs], xfs_sb block 0x0 Whereas I would have expected: XFS (sda2): Not built with CONFIG_XFS_RT XFS (sda2): RT mount failed The root cause of these problems is the conditional compilation of the new functions xfs_validate_rtextents and xfs_compute_rextslog that I introduced in the two commits listed below. The !RT versions of these functions return false and 0, respectively, which causes primary superblock validation to fail, which explains the first message. Move the two functions to other parts of libxfs that are not conditionally defined by CONFIG_XFS_RT and remove the broken stubs so that validation works again. Fixes: e14293803f4e ("xfs: don't allow overly small or large realtime volumes") Fixes: a6a38f309afc ("xfs: make rextslog computation consistent with mkfs") Signed-off-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R Signed-off-by: Catherine Hoang Acked-by: Darrick J. Wong Signed-off-by: Greg Kroah-Hartman Signed-off-by: Leah Rumancik Acked-by: "Darrick J. Wong" Signed-off-by: Greg Kroah-Hartman --- fs/xfs/libxfs/xfs_rtbitmap.c | 14 -------------- fs/xfs/libxfs/xfs_rtbitmap.h | 16 ---------------- fs/xfs/libxfs/xfs_sb.c | 14 ++++++++++++++ fs/xfs/libxfs/xfs_sb.h | 2 ++ fs/xfs/libxfs/xfs_types.h | 12 ++++++++++++ fs/xfs/scrub/rtbitmap.c | 1 + 6 files changed, 29 insertions(+), 30 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 8db1243beacc..760172a65aff 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1131,17 +1131,3 @@ xfs_rtalloc_extent_is_free( return 0; } -/* - * Compute the maximum level number of the realtime summary file, as defined by - * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct - * use of rt volumes with more than 2^32 extents. - */ -uint8_t -xfs_compute_rextslog( - xfs_rtbxlen_t rtextents) -{ - if (!rtextents) - return 0; - return xfs_highbit64(rtextents); -} - diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index 4e49aadf0955..b89712983347 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -71,20 +71,6 @@ xfs_rtfree_extent( int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno, xfs_filblks_t rtlen); -uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); - -/* Do we support an rt volume having this number of rtextents? */ -static inline bool -xfs_validate_rtextents( - xfs_rtbxlen_t rtextents) -{ - /* No runt rt volumes */ - if (rtextents == 0) - return false; - - return true; -} - #else /* CONFIG_XFS_RT */ # define xfs_rtfree_extent(t,b,l) (-ENOSYS) # define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS) @@ -92,8 +78,6 @@ xfs_validate_rtextents( # define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS) # define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS) # define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS) -# define xfs_compute_rextslog(rtx) (0) -# define xfs_validate_rtextents(rtx) (false) #endif /* CONFIG_XFS_RT */ #endif /* __XFS_RTBITMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 04247d1c7523..90ed55cd3d10 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -1367,3 +1367,17 @@ xfs_validate_stripe_geometry( } return true; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 19134b23c10b..2e8e8d63d4eb 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool silent); +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 7023e4a79f87..42fed04f038d 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -228,4 +228,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index faf6e0890d44..fad7c353ada6 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -14,6 +14,7 @@ #include "xfs_rtbitmap.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_sb.h" #include "scrub/scrub.h" #include "scrub/common.h" From 24af158fe2fab10caa54098654e7a57ead3d86b5 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:22 +0100 Subject: [PATCH 27/30] Input: i8042 - swap old quirk combination with new quirk for NHxxRZQ commit 729d163232971672d0f41b93c02092fb91f0e758 upstream. Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. With the old i8042 quirks this devices keyboard is sometimes laggy after resume. With the new quirk this issue doesn't happen. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-1-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 34d1f07ea4c3..ba589735566d 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1209,18 +1209,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NHxxRZQ"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { From c08785b0bd818c7be951ed259503dbae22d8807d Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:23 +0100 Subject: [PATCH 28/30] Input: i8042 - add required quirks for missing old boardnames commit 9ed468e17d5b80e7116fd35842df3648e808ae47 upstream. Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. The PB71RD keyboard is sometimes laggy after resume and the PC70DR, PB51RF, P640RE, and PCX0DX_GN20 keyboard is sometimes unresponsive after resume. This quirk fixes that. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-2-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index ba589735566d..2e2dbf4cff24 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1260,6 +1260,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "P640RE"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { /* * This is only a partial board_name and might be followed by @@ -1335,6 +1341,24 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB51RF"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PB71RD"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PC70DR"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), @@ -1342,6 +1366,12 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "PCX0DX_GN20"), + }, + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) + }, /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */ { .matches = { From e2ff9a5f7ac7cb299f20fa13f11bd74052bd786e Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:24 +0100 Subject: [PATCH 29/30] Input: i8042 - swap old quirk combination with new quirk for several devices commit 75ee4ebebbbe8dc4b55ba37f388924fa96bf1564 upstream. Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. While the old quirk combination did not show negative effects on these devices specifically, the new quirk works just as well and seems more stable in general. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-3-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 40 ++++++++++----------------- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 2e2dbf4cff24..a64e34dec02d 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1080,16 +1080,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "AURA1501"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "TUXEDO"), DMI_MATCH(DMI_BOARD_NAME, "EDUBOOK1502"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Mivvy M310 */ @@ -1171,8 +1169,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71A"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1205,8 +1202,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1218,8 +1214,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, /* * At least one modern Clevo barebone has the touchpad connected both @@ -1235,17 +1230,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NS50_70MU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOAUX | SERIO_QUIRK_NOMUX | - SERIO_QUIRK_RESET_ALWAYS | SERIO_QUIRK_NOLOOP | - SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_NOAUX | + SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1319,8 +1312,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RS"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1338,8 +1330,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PB50_70DFx,DDx"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1363,8 +1354,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "PCX0DX"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1383,15 +1373,13 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170SM"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "X170KM-G"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* From 870e3066fedb4199038ddce3a3d80ba913d55b22 Mon Sep 17 00:00:00 2001 From: Werner Sembach Date: Sat, 22 Feb 2025 00:01:25 +0100 Subject: [PATCH 30/30] Input: i8042 - swap old quirk combination with new quirk for more devices commit d85862ccca452eeb19329e9f4f9a6ce1d1e53561 upstream. Some older Clevo barebones have problems like no or laggy keyboard after resume or boot which can be fixed with the SERIO_QUIRK_FORCENORESTORE quirk. We could not activly retest these devices because we no longer have them in our archive, but based on the other old Clevo barebones we tested where the new quirk had the same or a better behaviour I think it would be good to apply it on these too. Cc: stable@vger.kernel.org Signed-off-by: Werner Sembach Link: https://lore.kernel.org/r/20250221230137.70292-4-wse@tuxedocomputers.com Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/serio/i8042-acpipnpio.h | 31 +++++++++------------------ 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index a64e34dec02d..8813db7eec39 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -1157,9 +1157,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, /* * A lot of modern Clevo barebones have touchpad and/or keyboard issues - * after suspend fixable with nomux + reset + noloop + nopnp. Luckily, - * none of them have an external PS/2 port so this can safely be set for - * all of them. + * after suspend fixable with the forcenorestore quirk. * Clevo barebones come with board_vendor and/or system_vendor set to * either the very generic string "Notebook" and/or a different value * for each individual reseller. The only somewhat universal way to @@ -1175,22 +1173,19 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "LAPQC71B"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N140CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { DMI_MATCH(DMI_BOARD_NAME, "N141CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1250,8 +1245,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_BOARD_NAME, "NJ50_70CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1268,16 +1262,14 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xH"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* Clevo P650RS, 650RP6, Sager NP8152-S, and others */ .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1288,8 +1280,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_P67H"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1300,8 +1291,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P65_67RP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { /* @@ -1323,8 +1313,7 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { .matches = { DMI_MATCH(DMI_PRODUCT_NAME, "P67xRP"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = {