mirror of
https://github.com/adulau/aha.git
synced 2024-12-29 04:06:22 +00:00
Btrfs: Data ordered fixes
* In btrfs_delete_inode, wait for ordered extents after calling truncate_inode_pages. This is much faster, and more correct * Properly clear our the PageChecked bit everywhere we redirty the page. * Change the writepage fixup handler to lock the page range and check to see if an ordered extent had been inserted since the improperly dirtied page was discovered * Wait for ordered extents outside the transaction. This isn't required for locking rules but does improve transaction latencies * Reduce contention on the alloc_mutex by dropping it while incrementing refs on a node/leaf and while dropping refs on a leaf. Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
e5a2217ef6
commit
4a09675279
4 changed files with 43 additions and 10 deletions
|
@ -934,7 +934,6 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
if (!root->ref_cows)
|
if (!root->ref_cows)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
mutex_lock(&root->fs_info->alloc_mutex);
|
|
||||||
level = btrfs_header_level(buf);
|
level = btrfs_header_level(buf);
|
||||||
nritems = btrfs_header_nritems(buf);
|
nritems = btrfs_header_nritems(buf);
|
||||||
for (i = 0; i < nritems; i++) {
|
for (i = 0; i < nritems; i++) {
|
||||||
|
@ -951,29 +950,36 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||||
disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
|
disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi);
|
||||||
if (disk_bytenr == 0)
|
if (disk_bytenr == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
mutex_lock(&root->fs_info->alloc_mutex);
|
||||||
ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
|
ret = __btrfs_inc_extent_ref(trans, root, disk_bytenr,
|
||||||
btrfs_file_extent_disk_num_bytes(buf, fi),
|
btrfs_file_extent_disk_num_bytes(buf, fi),
|
||||||
root->root_key.objectid, trans->transid,
|
root->root_key.objectid, trans->transid,
|
||||||
key.objectid, key.offset);
|
key.objectid, key.offset);
|
||||||
|
mutex_unlock(&root->fs_info->alloc_mutex);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
faili = i;
|
faili = i;
|
||||||
|
WARN_ON(1);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
bytenr = btrfs_node_blockptr(buf, i);
|
bytenr = btrfs_node_blockptr(buf, i);
|
||||||
btrfs_node_key_to_cpu(buf, &key, i);
|
btrfs_node_key_to_cpu(buf, &key, i);
|
||||||
|
|
||||||
|
mutex_lock(&root->fs_info->alloc_mutex);
|
||||||
ret = __btrfs_inc_extent_ref(trans, root, bytenr,
|
ret = __btrfs_inc_extent_ref(trans, root, bytenr,
|
||||||
btrfs_level_size(root, level - 1),
|
btrfs_level_size(root, level - 1),
|
||||||
root->root_key.objectid,
|
root->root_key.objectid,
|
||||||
trans->transid,
|
trans->transid,
|
||||||
level - 1, key.objectid);
|
level - 1, key.objectid);
|
||||||
|
mutex_unlock(&root->fs_info->alloc_mutex);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
faili = i;
|
faili = i;
|
||||||
|
WARN_ON(1);
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mutex_unlock(&root->fs_info->alloc_mutex);
|
|
||||||
return 0;
|
return 0;
|
||||||
fail:
|
fail:
|
||||||
WARN_ON(1);
|
WARN_ON(1);
|
||||||
|
@ -1004,7 +1010,6 @@ fail:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
mutex_unlock(&root->fs_info->alloc_mutex);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2180,6 +2185,8 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
|
||||||
leaf_owner = btrfs_header_owner(leaf);
|
leaf_owner = btrfs_header_owner(leaf);
|
||||||
leaf_generation = btrfs_header_generation(leaf);
|
leaf_generation = btrfs_header_generation(leaf);
|
||||||
|
|
||||||
|
mutex_unlock(&root->fs_info->alloc_mutex);
|
||||||
|
|
||||||
for (i = 0; i < nritems; i++) {
|
for (i = 0; i < nritems; i++) {
|
||||||
u64 disk_bytenr;
|
u64 disk_bytenr;
|
||||||
|
|
||||||
|
@ -2197,12 +2204,17 @@ static int noinline drop_leaf_ref(struct btrfs_trans_handle *trans,
|
||||||
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
|
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
|
||||||
if (disk_bytenr == 0)
|
if (disk_bytenr == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
mutex_lock(&root->fs_info->alloc_mutex);
|
||||||
ret = __btrfs_free_extent(trans, root, disk_bytenr,
|
ret = __btrfs_free_extent(trans, root, disk_bytenr,
|
||||||
btrfs_file_extent_disk_num_bytes(leaf, fi),
|
btrfs_file_extent_disk_num_bytes(leaf, fi),
|
||||||
leaf_owner, leaf_generation,
|
leaf_owner, leaf_generation,
|
||||||
key.objectid, key.offset, 0);
|
key.objectid, key.offset, 0);
|
||||||
|
mutex_unlock(&root->fs_info->alloc_mutex);
|
||||||
BUG_ON(ret);
|
BUG_ON(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mutex_lock(&root->fs_info->alloc_mutex);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
|
||||||
for (i = 0; i < num_pages; i++) {
|
for (i = 0; i < num_pages; i++) {
|
||||||
if (!pages[i])
|
if (!pages[i])
|
||||||
break;
|
break;
|
||||||
|
ClearPageChecked(pages[i]);
|
||||||
unlock_page(pages[i]);
|
unlock_page(pages[i]);
|
||||||
mark_page_accessed(pages[i]);
|
mark_page_accessed(pages[i]);
|
||||||
page_cache_release(pages[i]);
|
page_cache_release(pages[i]);
|
||||||
|
|
|
@ -418,7 +418,7 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
|
||||||
|
|
||||||
fixup = container_of(work, struct btrfs_writepage_fixup, work);
|
fixup = container_of(work, struct btrfs_writepage_fixup, work);
|
||||||
page = fixup->page;
|
page = fixup->page;
|
||||||
|
again:
|
||||||
lock_page(page);
|
lock_page(page);
|
||||||
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
|
if (!page->mapping || !PageDirty(page) || !PageChecked(page)) {
|
||||||
ClearPageChecked(page);
|
ClearPageChecked(page);
|
||||||
|
@ -430,9 +430,21 @@ void btrfs_writepage_fixup_worker(struct btrfs_work *work)
|
||||||
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
|
page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
|
||||||
|
|
||||||
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
|
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);
|
||||||
ordered = btrfs_lookup_ordered_extent(inode, page_start);
|
|
||||||
if (ordered)
|
/* already ordered? We're done */
|
||||||
|
if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
||||||
|
EXTENT_ORDERED, 0)) {
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
ordered = btrfs_lookup_ordered_extent(inode, page_start);
|
||||||
|
if (ordered) {
|
||||||
|
unlock_extent(&BTRFS_I(inode)->io_tree, page_start,
|
||||||
|
page_end, GFP_NOFS);
|
||||||
|
unlock_page(page);
|
||||||
|
btrfs_start_ordered_extent(inode, ordered, 1);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
|
||||||
set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start, page_end,
|
||||||
GFP_NOFS);
|
GFP_NOFS);
|
||||||
|
@ -1465,11 +1477,11 @@ void btrfs_delete_inode(struct inode *inode)
|
||||||
unsigned long nr;
|
unsigned long nr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
|
||||||
truncate_inode_pages(&inode->i_data, 0);
|
truncate_inode_pages(&inode->i_data, 0);
|
||||||
if (is_bad_inode(inode)) {
|
if (is_bad_inode(inode)) {
|
||||||
goto no_delete;
|
goto no_delete;
|
||||||
}
|
}
|
||||||
|
btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||||
|
|
||||||
btrfs_i_size_write(inode, 0);
|
btrfs_i_size_write(inode, 0);
|
||||||
trans = btrfs_start_transaction(root, 1);
|
trans = btrfs_start_transaction(root, 1);
|
||||||
|
@ -2707,6 +2719,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
|
||||||
1, 1, GFP_NOFS);
|
1, 1, GFP_NOFS);
|
||||||
__btrfs_releasepage(page, GFP_NOFS);
|
__btrfs_releasepage(page, GFP_NOFS);
|
||||||
|
|
||||||
|
ClearPageChecked(page);
|
||||||
if (PagePrivate(page)) {
|
if (PagePrivate(page)) {
|
||||||
invalidate_extent_lru(tree, page_offset(page),
|
invalidate_extent_lru(tree, page_offset(page),
|
||||||
PAGE_CACHE_SIZE);
|
PAGE_CACHE_SIZE);
|
||||||
|
@ -2818,10 +2831,10 @@ static void btrfs_truncate(struct inode *inode)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
btrfs_truncate_page(inode->i_mapping, inode->i_size);
|
btrfs_truncate_page(inode->i_mapping, inode->i_size);
|
||||||
|
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
|
||||||
|
|
||||||
trans = btrfs_start_transaction(root, 1);
|
trans = btrfs_start_transaction(root, 1);
|
||||||
btrfs_set_trans_block_group(trans, inode);
|
btrfs_set_trans_block_group(trans, inode);
|
||||||
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
|
|
||||||
btrfs_i_size_write(inode, inode->i_size);
|
btrfs_i_size_write(inode, inode->i_size);
|
||||||
|
|
||||||
/* FIXME, add redo link to tree so we don't leak on crash */
|
/* FIXME, add redo link to tree so we don't leak on crash */
|
||||||
|
|
|
@ -336,7 +336,7 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||||
orig_end = start + len - 1;
|
orig_end = start + len - 1;
|
||||||
wait_end = orig_end;
|
wait_end = orig_end;
|
||||||
}
|
}
|
||||||
|
again:
|
||||||
/* start IO across the range first to instantiate any delalloc
|
/* start IO across the range first to instantiate any delalloc
|
||||||
* extents
|
* extents
|
||||||
*/
|
*/
|
||||||
|
@ -369,6 +369,14 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||||
break;
|
break;
|
||||||
end--;
|
end--;
|
||||||
}
|
}
|
||||||
|
if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
|
||||||
|
EXTENT_ORDERED | EXTENT_DELALLOC, 0)) {
|
||||||
|
printk("inode %lu still ordered or delalloc after wait "
|
||||||
|
"%llu %llu\n", inode->i_ino,
|
||||||
|
(unsigned long long)start,
|
||||||
|
(unsigned long long)orig_end);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -545,7 +553,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
|
||||||
sector_sums = &ordered_sum->sums;
|
sector_sums = &ordered_sum->sums;
|
||||||
for (i = 0; i < num_sectors; i++) {
|
for (i = 0; i < num_sectors; i++) {
|
||||||
if (sector_sums[i].offset == offset) {
|
if (sector_sums[i].offset == offset) {
|
||||||
printk("find ordered sum inode %lu offset %Lu\n", inode->i_ino, offset);
|
|
||||||
*sum = sector_sums[i].sum;
|
*sum = sector_sums[i].sum;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out;
|
goto out;
|
||||||
|
|
Loading…
Reference in a new issue