From b44b1126279b60597f96bbe77507b1650f88a969 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Dec 2009 18:12:29 +0000 Subject: [PATCH 1/4] xfs: check for not fully initialized inodes in xfs_ireclaim Add an assert for inodes not added to the inode cache in xfs_ireclaim, to make sure we're not going to introduce something like the famous nfsd inode cache bug again. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/xfs_iget.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index f5c904a10c1..0de36c2a46f 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -478,17 +478,21 @@ xfs_ireclaim( { struct xfs_mount *mp = ip->i_mount; struct xfs_perag *pag; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); XFS_STATS_INC(xs_ig_reclaims); /* - * Remove the inode from the per-AG radix tree. It doesn't matter - * if it was never added to it because radix_tree_delete can deal - * with that case just fine. + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. */ pag = xfs_get_perag(mp, ip->i_ino); write_lock(&pag->pag_ici_lock); - radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino)); + if (!radix_tree_delete(&pag->pag_ici_root, agino)) + ASSERT(0); write_unlock(&pag->pag_ici_lock); xfs_put_perag(mp, pag); From 2ee1abad73a12df5521cd3f017f081f1f684a361 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 24 Nov 2009 18:03:15 +0000 Subject: [PATCH 2/4] xfs: improve metadata I/O merging in the elevator Change all async metadata buffers to use [READ|WRITE]_META I/O types so that the I/O doesn't get issued immediately. This allows merging of adjacent metadata requests but still prioritises them over bulk data. This shows a 10-15% improvement in sequential create speed of small files. Don't include the log buffers in this classification - leave them as sync types so they are issued immediately. Signed-off-by: Dave Chinner Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 6 +++++- fs/xfs/linux-2.6/xfs_buf.h | 1 + fs/xfs/xfs_log.c | 2 ++ include/linux/fs.h | 1 + 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b4c7d4248aa..162359b664c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1149,10 +1149,14 @@ _xfs_buf_ioapply( if (bp->b_flags & XBF_ORDERED) { ASSERT(!(bp->b_flags & XBF_READ)); rw = WRITE_BARRIER; - } else if (bp->b_flags & _XBF_RUN_QUEUES) { + } else if (bp->b_flags & XBF_LOG_BUFFER) { ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); bp->b_flags &= ~_XBF_RUN_QUEUES; rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; + } else if (bp->b_flags & _XBF_RUN_QUEUES) { + ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); + bp->b_flags &= ~_XBF_RUN_QUEUES; + rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; } else { rw = (bp->b_flags & XBF_WRITE) ? WRITE : (bp->b_flags & XBF_READ_AHEAD) ? READA : READ; diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a509f4addc2..a34c7b54822 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -55,6 +55,7 @@ typedef enum { XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */ XBF_ORDERED = (1 << 11), /* use ordered writes */ XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */ + XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */ /* flags used only as arguments to access routines */ XBF_LOCK = (1 << 14), /* lock requested */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 4cb1792040e..600b5b06aae 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1441,6 +1441,7 @@ xlog_sync(xlog_t *log, XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); + bp->b_flags |= XBF_LOG_BUFFER; /* * Do an ordered write for the log block. * Its unnecessary to flush the first split block in the log wrap case. @@ -1478,6 +1479,7 @@ xlog_sync(xlog_t *log, XFS_BUF_ZEROFLAGS(bp); XFS_BUF_BUSY(bp); XFS_BUF_ASYNC(bp); + bp->b_flags |= XBF_LOG_BUFFER; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) XFS_BUF_ORDERED(bp); dptr = XFS_BUF_PTR(bp); diff --git a/include/linux/fs.h b/include/linux/fs.h index b23a7018eb9..cf7fc8a7fe6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -152,6 +152,7 @@ struct inodes_stat_t { #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) #define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) +#define WRITE_META (WRITE | (1 << BIO_RW_META)) #define SWRITE_SYNC_PLUG \ (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) #define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) From a5f9be58c2b87106100a6053d09b1f9f8d551c6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Dec 2009 10:19:07 +0000 Subject: [PATCH 3/4] xfs: kill xfs_bmbt_rec_32/64 types For a long time we've always stored bmap btree records in the 64bit format, so kill off the dead 32bit type, and make sure the 64bit type is named just xfs_bmbt_rec everywhere, without any size postfix. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Alex Elder --- fs/xfs/xfs_bmap_btree.h | 14 +++----------- fs/xfs/xfs_inode_item.h | 6 +++--- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index 5549d495947..cf07ca7c22e 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block { #define BMBT_STARTBLOCK_BITLEN 52 #define BMBT_BLOCKCOUNT_BITLEN 21 - -#define BMBT_USE_64 1 - -typedef struct xfs_bmbt_rec_32 -{ - __uint32_t l0, l1, l2, l3; -} xfs_bmbt_rec_32_t; -typedef struct xfs_bmbt_rec_64 -{ +typedef struct xfs_bmbt_rec { __be64 l0, l1; -} xfs_bmbt_rec_64_t; +} xfs_bmbt_rec_t; typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */ -typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t; +typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; typedef struct xfs_bmbt_rec_host { __uint64_t l0, l1; diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index 65bae4c9b8b..cc8df1ac778 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w) #ifdef __KERNEL__ struct xfs_buf; -struct xfs_bmbt_rec_64; +struct xfs_bmbt_rec; struct xfs_inode; struct xfs_mount; @@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item { unsigned short ili_flags; /* misc flags */ unsigned short ili_logged; /* flushed logged data */ unsigned int ili_last_fields; /* fields when flushed */ - struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged + struct xfs_bmbt_rec *ili_extents_buf; /* array of logged data exts */ - struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged + struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged attr exts */ unsigned int ili_pushbuf_flag; /* one bit used in push_ail */ From 3fc98b1ac036675b95f6e3fafd5ef147b97d4d30 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 14 Dec 2009 23:11:57 +0000 Subject: [PATCH 4/4] XFS: Free buffer pages array unconditionally The code in xfs_free_buf() only attempts to free the b_pages array if the buffer is a page cache backed or page allocated buffer. The extra log buffer that is used when the log wraps uses pages that are allocated to a different log buffer, but it still has a b_pages array allocated when those pages are associated to with the extra buffer in xfs_buf_associate_memory. Hence we need to always attempt to free the b_pages array when tearing down a buffer, not just on buffers that are explicitly marked as page bearing buffers. This fixes a leak detected by the kernel memory leak code. Signed-off-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/linux-2.6/xfs_buf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 162359b664c..77b8be81c76 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -292,6 +292,7 @@ _xfs_buf_free_pages( { if (bp->b_pages != bp->b_page_array) { kmem_free(bp->b_pages); + bp->b_pages = NULL; } } @@ -323,9 +324,8 @@ xfs_buf_free( ASSERT(!PagePrivate(page)); page_cache_release(page); } - _xfs_buf_free_pages(bp); } - + _xfs_buf_free_pages(bp); xfs_buf_deallocate(bp); }