mirror of
https://github.com/adulau/aha.git
synced 2024-12-29 04:06:22 +00:00
fs: restore nobh
Implement nobh in new aops. This is a bit tricky. FWIW, nobh_truncate is now implemented in a way that does not create blocks in sparse regions, which is a silly thing for it to have been doing (isn't it?) ext2 survives fsx and fsstress. jfs is converted as well... ext3 should be easy to do (but not done yet). [akpm@linux-foundation.org: coding-style fixes] Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
b6af1bcd87
commit
03158cd7eb
4 changed files with 178 additions and 88 deletions
227
fs/buffer.c
227
fs/buffer.c
|
@ -2369,7 +2369,7 @@ out_unlock:
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* nobh_prepare_write()'s prereads are special: the buffer_heads are freed
|
* nobh_write_begin()'s prereads are special: the buffer_heads are freed
|
||||||
* immediately, while under the page lock. So it needs a special end_io
|
* immediately, while under the page lock. So it needs a special end_io
|
||||||
* handler which does not touch the bh after unlocking it.
|
* handler which does not touch the bh after unlocking it.
|
||||||
*/
|
*/
|
||||||
|
@ -2378,17 +2378,46 @@ static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate)
|
||||||
__end_buffer_read_notouch(bh, uptodate);
|
__end_buffer_read_notouch(bh, uptodate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Attach the singly-linked list of buffers created by nobh_write_begin, to
|
||||||
|
* the page (converting it to circular linked list and taking care of page
|
||||||
|
* dirty races).
|
||||||
|
*/
|
||||||
|
static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
|
||||||
|
{
|
||||||
|
struct buffer_head *bh;
|
||||||
|
|
||||||
|
BUG_ON(!PageLocked(page));
|
||||||
|
|
||||||
|
spin_lock(&page->mapping->private_lock);
|
||||||
|
bh = head;
|
||||||
|
do {
|
||||||
|
if (PageDirty(page))
|
||||||
|
set_buffer_dirty(bh);
|
||||||
|
if (!bh->b_this_page)
|
||||||
|
bh->b_this_page = head;
|
||||||
|
bh = bh->b_this_page;
|
||||||
|
} while (bh != head);
|
||||||
|
attach_page_buffers(page, head);
|
||||||
|
spin_unlock(&page->mapping->private_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* On entry, the page is fully not uptodate.
|
* On entry, the page is fully not uptodate.
|
||||||
* On exit the page is fully uptodate in the areas outside (from,to)
|
* On exit the page is fully uptodate in the areas outside (from,to)
|
||||||
*/
|
*/
|
||||||
int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
|
int nobh_write_begin(struct file *file, struct address_space *mapping,
|
||||||
|
loff_t pos, unsigned len, unsigned flags,
|
||||||
|
struct page **pagep, void **fsdata,
|
||||||
get_block_t *get_block)
|
get_block_t *get_block)
|
||||||
{
|
{
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
const unsigned blkbits = inode->i_blkbits;
|
const unsigned blkbits = inode->i_blkbits;
|
||||||
const unsigned blocksize = 1 << blkbits;
|
const unsigned blocksize = 1 << blkbits;
|
||||||
struct buffer_head *head, *bh;
|
struct buffer_head *head, *bh;
|
||||||
|
struct page *page;
|
||||||
|
pgoff_t index;
|
||||||
|
unsigned from, to;
|
||||||
unsigned block_in_page;
|
unsigned block_in_page;
|
||||||
unsigned block_start, block_end;
|
unsigned block_start, block_end;
|
||||||
sector_t block_in_file;
|
sector_t block_in_file;
|
||||||
|
@ -2397,8 +2426,23 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
int is_mapped_to_disk = 1;
|
int is_mapped_to_disk = 1;
|
||||||
|
|
||||||
if (page_has_buffers(page))
|
index = pos >> PAGE_CACHE_SHIFT;
|
||||||
return block_prepare_write(page, from, to, get_block);
|
from = pos & (PAGE_CACHE_SIZE - 1);
|
||||||
|
to = from + len;
|
||||||
|
|
||||||
|
page = __grab_cache_page(mapping, index);
|
||||||
|
if (!page)
|
||||||
|
return -ENOMEM;
|
||||||
|
*pagep = page;
|
||||||
|
*fsdata = NULL;
|
||||||
|
|
||||||
|
if (page_has_buffers(page)) {
|
||||||
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
*pagep = NULL;
|
||||||
|
return block_write_begin(file, mapping, pos, len, flags, pagep,
|
||||||
|
fsdata, get_block);
|
||||||
|
}
|
||||||
|
|
||||||
if (PageMappedToDisk(page))
|
if (PageMappedToDisk(page))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -2413,8 +2457,10 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
|
||||||
* than the circular one we're used to.
|
* than the circular one we're used to.
|
||||||
*/
|
*/
|
||||||
head = alloc_page_buffers(page, blocksize, 0);
|
head = alloc_page_buffers(page, blocksize, 0);
|
||||||
if (!head)
|
if (!head) {
|
||||||
return -ENOMEM;
|
ret = -ENOMEM;
|
||||||
|
goto out_release;
|
||||||
|
}
|
||||||
|
|
||||||
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
|
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
|
||||||
|
|
||||||
|
@ -2483,15 +2529,12 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to,
|
||||||
if (is_mapped_to_disk)
|
if (is_mapped_to_disk)
|
||||||
SetPageMappedToDisk(page);
|
SetPageMappedToDisk(page);
|
||||||
|
|
||||||
do {
|
*fsdata = head; /* to be released by nobh_write_end */
|
||||||
bh = head;
|
|
||||||
head = head->b_this_page;
|
|
||||||
free_buffer_head(bh);
|
|
||||||
} while (head);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
failed:
|
failed:
|
||||||
|
BUG_ON(!ret);
|
||||||
/*
|
/*
|
||||||
* Error recovery is a bit difficult. We need to zero out blocks that
|
* Error recovery is a bit difficult. We need to zero out blocks that
|
||||||
* were newly allocated, and dirty them to ensure they get written out.
|
* were newly allocated, and dirty them to ensure they get written out.
|
||||||
|
@ -2499,64 +2542,57 @@ failed:
|
||||||
* the handling of potential IO errors during writeout would be hard
|
* the handling of potential IO errors during writeout would be hard
|
||||||
* (could try doing synchronous writeout, but what if that fails too?)
|
* (could try doing synchronous writeout, but what if that fails too?)
|
||||||
*/
|
*/
|
||||||
spin_lock(&page->mapping->private_lock);
|
attach_nobh_buffers(page, head);
|
||||||
bh = head;
|
page_zero_new_buffers(page, from, to);
|
||||||
block_start = 0;
|
|
||||||
do {
|
|
||||||
if (PageUptodate(page))
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
if (PageDirty(page))
|
|
||||||
set_buffer_dirty(bh);
|
|
||||||
|
|
||||||
block_end = block_start+blocksize;
|
out_release:
|
||||||
if (block_end <= from)
|
unlock_page(page);
|
||||||
goto next;
|
page_cache_release(page);
|
||||||
if (block_start >= to)
|
*pagep = NULL;
|
||||||
goto next;
|
|
||||||
|
|
||||||
if (buffer_new(bh)) {
|
if (pos + len > inode->i_size)
|
||||||
clear_buffer_new(bh);
|
vmtruncate(inode, inode->i_size);
|
||||||
if (!buffer_uptodate(bh)) {
|
|
||||||
zero_user_page(page, block_start, bh->b_size, KM_USER0);
|
|
||||||
set_buffer_uptodate(bh);
|
|
||||||
}
|
|
||||||
mark_buffer_dirty(bh);
|
|
||||||
}
|
|
||||||
next:
|
|
||||||
block_start = block_end;
|
|
||||||
if (!bh->b_this_page)
|
|
||||||
bh->b_this_page = head;
|
|
||||||
bh = bh->b_this_page;
|
|
||||||
} while (bh != head);
|
|
||||||
attach_page_buffers(page, head);
|
|
||||||
spin_unlock(&page->mapping->private_lock);
|
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(nobh_prepare_write);
|
EXPORT_SYMBOL(nobh_write_begin);
|
||||||
|
|
||||||
/*
|
int nobh_write_end(struct file *file, struct address_space *mapping,
|
||||||
* Make sure any changes to nobh_commit_write() are reflected in
|
loff_t pos, unsigned len, unsigned copied,
|
||||||
* nobh_truncate_page(), since it doesn't call commit_write().
|
struct page *page, void *fsdata)
|
||||||
*/
|
|
||||||
int nobh_commit_write(struct file *file, struct page *page,
|
|
||||||
unsigned from, unsigned to)
|
|
||||||
{
|
{
|
||||||
struct inode *inode = page->mapping->host;
|
struct inode *inode = page->mapping->host;
|
||||||
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
|
struct buffer_head *head = NULL;
|
||||||
|
struct buffer_head *bh;
|
||||||
|
|
||||||
|
if (!PageMappedToDisk(page)) {
|
||||||
|
if (unlikely(copied < len) && !page_has_buffers(page))
|
||||||
|
attach_nobh_buffers(page, head);
|
||||||
if (page_has_buffers(page))
|
if (page_has_buffers(page))
|
||||||
return generic_commit_write(file, page, from, to);
|
return generic_write_end(file, mapping, pos, len,
|
||||||
|
copied, page, fsdata);
|
||||||
|
}
|
||||||
|
|
||||||
SetPageUptodate(page);
|
SetPageUptodate(page);
|
||||||
set_page_dirty(page);
|
set_page_dirty(page);
|
||||||
if (pos > inode->i_size) {
|
if (pos+copied > inode->i_size) {
|
||||||
i_size_write(inode, pos);
|
i_size_write(inode, pos+copied);
|
||||||
mark_inode_dirty(inode);
|
mark_inode_dirty(inode);
|
||||||
}
|
}
|
||||||
return 0;
|
|
||||||
|
unlock_page(page);
|
||||||
|
page_cache_release(page);
|
||||||
|
|
||||||
|
head = fsdata;
|
||||||
|
while (head) {
|
||||||
|
bh = head;
|
||||||
|
head = head->b_this_page;
|
||||||
|
free_buffer_head(bh);
|
||||||
|
}
|
||||||
|
|
||||||
|
return copied;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(nobh_commit_write);
|
EXPORT_SYMBOL(nobh_write_end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* nobh_writepage() - based on block_full_write_page() except
|
* nobh_writepage() - based on block_full_write_page() except
|
||||||
|
@ -2609,44 +2645,79 @@ out:
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(nobh_writepage);
|
EXPORT_SYMBOL(nobh_writepage);
|
||||||
|
|
||||||
/*
|
int nobh_truncate_page(struct address_space *mapping,
|
||||||
* This function assumes that ->prepare_write() uses nobh_prepare_write().
|
loff_t from, get_block_t *get_block)
|
||||||
*/
|
|
||||||
int nobh_truncate_page(struct address_space *mapping, loff_t from)
|
|
||||||
{
|
{
|
||||||
struct inode *inode = mapping->host;
|
|
||||||
unsigned blocksize = 1 << inode->i_blkbits;
|
|
||||||
pgoff_t index = from >> PAGE_CACHE_SHIFT;
|
pgoff_t index = from >> PAGE_CACHE_SHIFT;
|
||||||
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
unsigned offset = from & (PAGE_CACHE_SIZE-1);
|
||||||
unsigned to;
|
unsigned blocksize;
|
||||||
|
sector_t iblock;
|
||||||
|
unsigned length, pos;
|
||||||
|
struct inode *inode = mapping->host;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
const struct address_space_operations *a_ops = mapping->a_ops;
|
struct buffer_head map_bh;
|
||||||
int ret = 0;
|
int err;
|
||||||
|
|
||||||
if ((offset & (blocksize - 1)) == 0)
|
blocksize = 1 << inode->i_blkbits;
|
||||||
goto out;
|
length = offset & (blocksize - 1);
|
||||||
|
|
||||||
|
/* Block boundary? Nothing to do */
|
||||||
|
if (!length)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
length = blocksize - length;
|
||||||
|
iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
|
||||||
|
|
||||||
ret = -ENOMEM;
|
|
||||||
page = grab_cache_page(mapping, index);
|
page = grab_cache_page(mapping, index);
|
||||||
|
err = -ENOMEM;
|
||||||
if (!page)
|
if (!page)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
to = (offset + blocksize) & ~(blocksize - 1);
|
if (page_has_buffers(page)) {
|
||||||
ret = a_ops->prepare_write(NULL, page, offset, to);
|
has_buffers:
|
||||||
if (ret == 0) {
|
unlock_page(page);
|
||||||
zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
|
page_cache_release(page);
|
||||||
KM_USER0);
|
return block_truncate_page(mapping, from, get_block);
|
||||||
/*
|
|
||||||
* It would be more correct to call aops->commit_write()
|
|
||||||
* here, but this is more efficient.
|
|
||||||
*/
|
|
||||||
SetPageUptodate(page);
|
|
||||||
set_page_dirty(page);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Find the buffer that contains "offset" */
|
||||||
|
pos = blocksize;
|
||||||
|
while (offset >= pos) {
|
||||||
|
iblock++;
|
||||||
|
pos += blocksize;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = get_block(inode, iblock, &map_bh, 0);
|
||||||
|
if (err)
|
||||||
|
goto unlock;
|
||||||
|
/* unmapped? It's a hole - nothing to do */
|
||||||
|
if (!buffer_mapped(&map_bh))
|
||||||
|
goto unlock;
|
||||||
|
|
||||||
|
/* Ok, it's mapped. Make sure it's up-to-date */
|
||||||
|
if (!PageUptodate(page)) {
|
||||||
|
err = mapping->a_ops->readpage(NULL, page);
|
||||||
|
if (err) {
|
||||||
|
page_cache_release(page);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
lock_page(page);
|
||||||
|
if (!PageUptodate(page)) {
|
||||||
|
err = -EIO;
|
||||||
|
goto unlock;
|
||||||
|
}
|
||||||
|
if (page_has_buffers(page))
|
||||||
|
goto has_buffers;
|
||||||
|
}
|
||||||
|
zero_user_page(page, offset, length, KM_USER0);
|
||||||
|
set_page_dirty(page);
|
||||||
|
err = 0;
|
||||||
|
|
||||||
|
unlock:
|
||||||
unlock_page(page);
|
unlock_page(page);
|
||||||
page_cache_release(page);
|
page_cache_release(page);
|
||||||
out:
|
out:
|
||||||
return ret;
|
return err;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(nobh_truncate_page);
|
EXPORT_SYMBOL(nobh_truncate_page);
|
||||||
|
|
||||||
|
|
|
@ -659,6 +659,20 @@ ext2_write_begin(struct file *file, struct address_space *mapping,
|
||||||
return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
|
return __ext2_write_begin(file, mapping, pos, len, flags, pagep,fsdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ext2_nobh_write_begin(struct file *file, struct address_space *mapping,
|
||||||
|
loff_t pos, unsigned len, unsigned flags,
|
||||||
|
struct page **pagep, void **fsdata)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Dir-in-pagecache still uses ext2_write_begin. Would have to rework
|
||||||
|
* directory handling code to pass around offsets rather than struct
|
||||||
|
* pages in order to make this work easily.
|
||||||
|
*/
|
||||||
|
return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
||||||
|
ext2_get_block);
|
||||||
|
}
|
||||||
|
|
||||||
static int ext2_nobh_writepage(struct page *page,
|
static int ext2_nobh_writepage(struct page *page,
|
||||||
struct writeback_control *wbc)
|
struct writeback_control *wbc)
|
||||||
{
|
{
|
||||||
|
@ -710,7 +724,8 @@ const struct address_space_operations ext2_nobh_aops = {
|
||||||
.readpages = ext2_readpages,
|
.readpages = ext2_readpages,
|
||||||
.writepage = ext2_nobh_writepage,
|
.writepage = ext2_nobh_writepage,
|
||||||
.sync_page = block_sync_page,
|
.sync_page = block_sync_page,
|
||||||
/* XXX: todo */
|
.write_begin = ext2_nobh_write_begin,
|
||||||
|
.write_end = nobh_write_end,
|
||||||
.bmap = ext2_bmap,
|
.bmap = ext2_bmap,
|
||||||
.direct_IO = ext2_direct_IO,
|
.direct_IO = ext2_direct_IO,
|
||||||
.writepages = ext2_writepages,
|
.writepages = ext2_writepages,
|
||||||
|
@ -927,7 +942,8 @@ void ext2_truncate (struct inode * inode)
|
||||||
if (mapping_is_xip(inode->i_mapping))
|
if (mapping_is_xip(inode->i_mapping))
|
||||||
xip_truncate_page(inode->i_mapping, inode->i_size);
|
xip_truncate_page(inode->i_mapping, inode->i_size);
|
||||||
else if (test_opt(inode->i_sb, NOBH))
|
else if (test_opt(inode->i_sb, NOBH))
|
||||||
nobh_truncate_page(inode->i_mapping, inode->i_size);
|
nobh_truncate_page(inode->i_mapping,
|
||||||
|
inode->i_size, ext2_get_block);
|
||||||
else
|
else
|
||||||
block_truncate_page(inode->i_mapping,
|
block_truncate_page(inode->i_mapping,
|
||||||
inode->i_size, ext2_get_block);
|
inode->i_size, ext2_get_block);
|
||||||
|
|
|
@ -279,8 +279,7 @@ static int jfs_write_begin(struct file *file, struct address_space *mapping,
|
||||||
loff_t pos, unsigned len, unsigned flags,
|
loff_t pos, unsigned len, unsigned flags,
|
||||||
struct page **pagep, void **fsdata)
|
struct page **pagep, void **fsdata)
|
||||||
{
|
{
|
||||||
*pagep = NULL;
|
return nobh_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
||||||
return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
|
|
||||||
jfs_get_block);
|
jfs_get_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -306,7 +305,7 @@ const struct address_space_operations jfs_aops = {
|
||||||
.writepages = jfs_writepages,
|
.writepages = jfs_writepages,
|
||||||
.sync_page = block_sync_page,
|
.sync_page = block_sync_page,
|
||||||
.write_begin = jfs_write_begin,
|
.write_begin = jfs_write_begin,
|
||||||
.write_end = generic_write_end,
|
.write_end = nobh_write_end,
|
||||||
.bmap = jfs_bmap,
|
.bmap = jfs_bmap,
|
||||||
.direct_IO = jfs_direct_IO,
|
.direct_IO = jfs_direct_IO,
|
||||||
};
|
};
|
||||||
|
@ -359,7 +358,7 @@ void jfs_truncate(struct inode *ip)
|
||||||
{
|
{
|
||||||
jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
|
jfs_info("jfs_truncate: size = 0x%lx", (ulong) ip->i_size);
|
||||||
|
|
||||||
block_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
|
nobh_truncate_page(ip->i_mapping, ip->i_size, jfs_get_block);
|
||||||
|
|
||||||
IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
|
IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
|
||||||
jfs_truncate_nolock(ip, ip->i_size);
|
jfs_truncate_nolock(ip, ip->i_size);
|
||||||
|
|
|
@ -226,9 +226,13 @@ sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
|
||||||
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
|
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
|
||||||
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
|
int block_truncate_page(struct address_space *, loff_t, get_block_t *);
|
||||||
int file_fsync(struct file *, struct dentry *, int);
|
int file_fsync(struct file *, struct dentry *, int);
|
||||||
int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
|
int nobh_write_begin(struct file *, struct address_space *,
|
||||||
int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
|
loff_t, unsigned, unsigned,
|
||||||
int nobh_truncate_page(struct address_space *, loff_t);
|
struct page **, void **, get_block_t*);
|
||||||
|
int nobh_write_end(struct file *, struct address_space *,
|
||||||
|
loff_t, unsigned, unsigned,
|
||||||
|
struct page *, void *);
|
||||||
|
int nobh_truncate_page(struct address_space *, loff_t, get_block_t *);
|
||||||
int nobh_writepage(struct page *page, get_block_t *get_block,
|
int nobh_writepage(struct page *page, get_block_t *get_block,
|
||||||
struct writeback_control *wbc);
|
struct writeback_control *wbc);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue