Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (47 commits)
  ext4: Fix potential fiemap deadlock (mmap_sem vs. i_data_sem)
  ext4: Do not override ext2 or ext3 if built they are built as modules
  jbd2: Export jbd2_log_start_commit to fix ext4 build
  ext4: Fix insufficient checks in EXT4_IOC_MOVE_EXT
  ext4: Wait for proper transaction commit on fsync
  ext4: fix incorrect block reservation on quota transfer.
  ext4: quota macros cleanup
  ext4: ext4_get_reserved_space() must return bytes instead of blocks
  ext4: remove blocks from inode prealloc list on failure
  ext4: wait for log to commit when umounting
  ext4: Avoid data / filesystem corruption when write fails to copy data
  ext4: Use ext4 file system driver for ext2/ext3 file system mounts
  ext4: Return the PTR_ERR of the correct pointer in setup_new_group_blocks()
  jbd2: Add ENOMEM checking in and for jbd2_journal_write_metadata_buffer()
  ext4: remove unused parameter wbc from __ext4_journalled_writepage()
  ext4: remove encountered_congestion trace
  ext4: move_extent_per_page() cleanup
  ext4: initialize moved_len before calling ext4_move_extents()
  ext4: Fix double-free of blocks with EXT4_IOC_MOVE_EXT
  ext4: use ext4_data_block_valid() in ext4_free_blocks()
  ...
This commit is contained in:
Linus Torvalds 2009-12-10 09:33:29 -08:00
commit 4515c3069d
21 changed files with 678 additions and 515 deletions

View file

@ -153,8 +153,8 @@ journal_dev=devnum When the external journal device's major/minor numbers
identified through its new major/minor numbers encoded
in devnum.
noload Don't load the journal on mounting. Note that
if the filesystem was not unmounted cleanly,
norecovery Don't load the journal on mounting. Note that
noload if the filesystem was not unmounted cleanly,
skipping the journal replay will lead to the
filesystem containing inconsistencies that can
lead to any number of problems.
@ -353,6 +353,12 @@ noauto_da_alloc replacing existing files via patterns such as
system crashes before the delayed allocation
blocks are forced to disk.
discard Controls whether ext4 should issue discard/TRIM
nodiscard(*) commands to the underlying block device when
blocks are freed. This is useful for SSD devices
and sparse/thinly-provisioned LUNs, but it is off
by default until sufficient testing has been done.
Data Mode
=========
There are 3 different data modes:

View file

@ -26,6 +26,16 @@ config EXT4_FS
If unsure, say N.
config EXT4_USE_FOR_EXT23
bool "Use ext4 for ext2/ext3 file systems"
depends on EXT3_FS=n || EXT2_FS=n
default y
help
Allow the ext4 file system driver code to be used for ext2 or
ext3 file system mounts. This allows users to reduce their
compiled kernel size by using one file system driver for
ext2, ext3, and ext4 file systems.
config EXT4_FS_XATTR
bool "Ext4 extended attributes"
depends on EXT4_FS

View file

@ -498,44 +498,6 @@ error_return:
return;
}
/**
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
* @block: start physical block to free
* @count: number of blocks to count
* @metadata: Are these metadata blocks
*/
void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count,
int metadata)
{
struct super_block *sb;
unsigned long dquot_freed_blocks;
/* this isn't the right place to decide whether block is metadata
* inode.c/extents.c knows better, but for safety ... */
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
metadata = 1;
/* We need to make sure we don't reuse
* block released untill the transaction commit.
* writeback mode have weak data consistency so
* don't force data as metadata when freeing block
* for writeback mode.
*/
if (metadata == 0 && !ext4_should_writeback_data(inode))
metadata = 1;
sb = inode->i_sb;
ext4_mb_free_blocks(handle, inode, block, count,
metadata, &dquot_freed_blocks);
if (dquot_freed_blocks)
vfs_dq_free_block(inode, dquot_freed_blocks);
return;
}
/**
* ext4_has_free_blocks()
* @sbi: in-core super block structure.
@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
ext4_group_t group)
{
return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0;
if (!ext4_bg_has_super(sb, group))
return 0;
if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
else
return EXT4_SB(sb)->s_gdb_count;
}
/**

View file

@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
if (ext4_bg_has_super(sb, i) &&
((i < 5) || ((i % flex_size) == 0)))
add_system_zone(sbi, ext4_group_first_block_no(sb, i),
sbi->s_gdb_count + 1);
ext4_bg_num_gdb(sb, i) + 1);
gdp = ext4_get_group_desc(sb, i, NULL);
ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
if (ret)
@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
struct rb_node *n = sbi->system_blks.rb_node;
if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
(start_blk + count < start_blk) ||
(start_blk + count > ext4_blocks_count(sbi->s_es)))
return 0;
while (n) {

View file

@ -375,6 +375,12 @@ struct ext4_new_group_data {
#define EXT4_GET_BLOCKS_DIO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
EXT4_GET_BLOCKS_DIO_CREATE_EXT)
/*
* Flags used by ext4_free_blocks
*/
#define EXT4_FREE_BLOCKS_METADATA 0x0001
#define EXT4_FREE_BLOCKS_FORGET 0x0002
/*
* ioctl commands
*/
@ -703,6 +709,13 @@ struct ext4_inode_info {
struct list_head i_aio_dio_complete_list;
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
};
/*
@ -750,6 +763,7 @@ struct ext4_inode_info {
#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@ -1324,8 +1338,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t goal, unsigned long *count, int *errp);
extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count, int metadata);
extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
ext4_fsblk_t block, unsigned long count);
extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@ -1384,16 +1396,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern int __init init_ext4_mballoc(void);
extern void exit_ext4_mballoc(void);
extern void ext4_mb_free_blocks(handle_t *, struct inode *,
ext4_fsblk_t, unsigned long, int, unsigned long *);
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags);
extern int ext4_mb_add_groupinfo(struct super_block *sb,
ext4_group_t i, struct ext4_group_desc *desc);
extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
ext4_group_t, int);
/* inode.c */
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
ext4_lblk_t, int, int *);
struct buffer_head *ext4_bread(handle_t *, struct inode *,

View file

@ -4,6 +4,8 @@
#include "ext4_jbd2.h"
#include <trace/events/ext4.h>
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
return err;
}
int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh)
/*
* The ext4 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
* revoked in all cases.
*
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
*
* If the handle isn't valid we're not journaling, but we still need to
* call into ext4_journal_revoke() to put the buffer head.
*/
int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
struct inode *inode, struct buffer_head *bh,
ext4_fsblk_t blocknr)
{
int err = 0;
int err;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_forget(handle, bh);
if (err)
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
}
else
might_sleep();
trace_ext4_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
"data mode %x\n",
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));
/* In the no journal case, we can just do a bforget and return */
if (!ext4_handle_valid(handle)) {
bforget(bh);
return err;
}
int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh)
{
int err = 0;
if (ext4_handle_valid(handle)) {
err = jbd2_journal_revoke(handle, blocknr, bh);
if (err)
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
return 0;
}
else
bforget(bh);
/* Never use the revoke function if we are doing full data
* journaling: there is no need to, and a V1 superblock won't
* support it. Otherwise, only skip the revoke on un-journaled
* data blocks. */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(!is_metadata && !ext4_should_journal_data(inode))) {
if (bh) {
BUFFER_TRACE(bh, "call jbd2_journal_forget");
err = jbd2_journal_forget(handle, bh);
if (err)
ext4_journal_abort_handle(where, __func__, bh,
handle, err);
return err;
}
return 0;
}
/*
* data!=journal && (is_metadata || should_journal_data(inode))
*/
BUFFER_TRACE(bh, "call jbd2_journal_revoke");
err = jbd2_journal_revoke(handle, blocknr, bh);
if (err) {
ext4_journal_abort_handle(where, __func__, bh, handle, err);
ext4_abort(inode->i_sb, __func__,
"error %d when attempting revoke", err);
}
BUFFER_TRACE(bh, "exit");
return err;
}

View file

@ -49,7 +49,7 @@
#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
EXT4_XATTR_TRANS_BLOCKS - 2 + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
/*
* Define the number of metadata blocks we need to account to modify data.
@ -57,7 +57,7 @@
* This include super block, inode block, quota blocks and xattr blocks
*/
#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
2*EXT4_QUOTA_TRANS_BLOCKS(sb))
EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
/* Delete operations potentially hit one directory's namespace plus an
* entire inode, plus arbitrary amounts of bitmap/indirection data. Be
@ -92,6 +92,7 @@
* but inode, sb and group updates are done only once */
#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
#else
@ -99,6 +100,9 @@
#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
#endif
#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
int
ext4_mark_iloc_dirty(handle_t *handle,
@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
/*
* Wrapper functions with which ext4 calls into JBD. The intent here is
* to allow these to be turned into appropriate stubs so ext4 can control
* ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
* been done yet.
* Wrapper functions with which ext4 calls into JBD.
*/
void ext4_journal_abort_handle(const char *caller, const char *err_fn,
struct buffer_head *bh, handle_t *handle, int err);
@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
int __ext4_journal_get_write_access(const char *where, handle_t *handle,
struct buffer_head *bh);
/* When called with an invalid handle, this will still do a put on the BH */
int __ext4_journal_forget(const char *where, handle_t *handle,
struct buffer_head *bh);
/* When called with an invalid handle, this will still do a put on the BH */
int __ext4_journal_revoke(const char *where, handle_t *handle,
ext4_fsblk_t blocknr, struct buffer_head *bh);
int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
struct inode *inode, struct buffer_head *bh,
ext4_fsblk_t blocknr);
int __ext4_journal_get_create_access(const char *where,
handle_t *handle, struct buffer_head *bh);
@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
__ext4_journal_get_undo_access(__func__, (handle), (bh))
#define ext4_journal_get_write_access(handle, bh) \
__ext4_journal_get_write_access(__func__, (handle), (bh))
#define ext4_journal_revoke(handle, blocknr, bh) \
__ext4_journal_revoke(__func__, (handle), (blocknr), (bh))
#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
__ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
(block_nr))
#define ext4_journal_get_create_access(handle, bh) \
__ext4_journal_get_create_access(__func__, (handle), (bh))
#define ext4_journal_forget(handle, bh) \
__ext4_journal_forget(__func__, (handle), (bh))
#define ext4_handle_dirty_metadata(handle, inode, bh) \
__ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
return 0;
}
static inline void ext4_update_inode_fsync_trans(handle_t *handle,
struct inode *inode,
int datasync)
{
struct ext4_inode_info *ei = EXT4_I(inode);
if (ext4_handle_valid(handle)) {
ei->i_sync_tid = handle->h_transaction->t_tid;
if (datasync)
ei->i_datasync_tid = handle->h_transaction->t_tid;
}
}
/* super.c */
int ext4_force_commit(struct super_block *sb);

View file

@ -1007,7 +1007,8 @@ cleanup:
for (i = 0; i < depth; i++) {
if (!ablocks[i])
continue;
ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
EXT4_FREE_BLOCKS_METADATA);
}
}
kfree(ablocks);
@ -1761,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
while (block < last && block != EXT_MAX_BLOCK) {
num = last - block;
/* find extent for this block */
down_read(&EXT4_I(inode)->i_data_sem);
path = ext4_ext_find_extent(inode, block, path);
up_read(&EXT4_I(inode)->i_data_sem);
if (IS_ERR(path)) {
err = PTR_ERR(path);
path = NULL;
@ -1957,7 +1960,6 @@ errout:
static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path)
{
struct buffer_head *bh;
int err;
ext4_fsblk_t leaf;
@ -1973,9 +1975,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
if (err)
return err;
ext_debug("index is empty, remove it, free block %llu\n", leaf);
bh = sb_find_get_block(inode->i_sb, leaf);
ext4_forget(handle, 1, inode, bh, leaf);
ext4_free_blocks(handle, inode, leaf, 1, 1);
ext4_free_blocks(handle, inode, 0, leaf, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return err;
}
@ -2042,12 +2043,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent *ex,
ext4_lblk_t from, ext4_lblk_t to)
{
struct buffer_head *bh;
unsigned short ee_len = ext4_ext_get_actual_len(ex);
int i, metadata = 0;
int flags = EXT4_FREE_BLOCKS_FORGET;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
metadata = 1;
flags |= EXT4_FREE_BLOCKS_METADATA;
#ifdef EXTENTS_STATS
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@ -2072,11 +2072,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
num = le32_to_cpu(ex->ee_block) + ee_len - from;
start = ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
for (i = 0; i < num; i++) {
bh = sb_find_get_block(inode->i_sb, start + i);
ext4_forget(handle, 0, inode, bh, start + i);
}
ext4_free_blocks(handle, inode, start, num, metadata);
ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
&& to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@ -2167,7 +2163,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
correct_index = 1;
credits += (ext_depth(inode)) + 1;
}
credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
err = ext4_ext_truncate_extend_restart(handle, inode, credits);
if (err)
@ -3064,6 +3060,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
ret = ext4_convert_unwritten_extents_dio(handle, inode,
path);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
goto out2;
}
/* buffered IO case */
@ -3091,6 +3089,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
max_blocks);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
out:
if (ret <= 0) {
err = ret;
@ -3319,8 +3319,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
ext4_free_blocks(handle, inode, ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
@ -3329,10 +3329,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
allocated = ext4_ext_get_actual_len(&newex);
set_buffer_new(bh_result);
/* Cache only when it is _not_ an uninitialized extent */
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
/*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
out:
if (allocated > max_blocks)
allocated = max_blocks;
@ -3720,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
* Walk the extent tree gathering extent information.
* ext4_ext_fiemap_cb will push extents back to user.
*/
down_read(&EXT4_I(inode)->i_data_sem);
error = ext4_ext_walk_space(inode, start_blk, len_blks,
ext4_ext_fiemap_cb, fieinfo);
up_read(&EXT4_I(inode)->i_data_sem);
}
return error;

View file

@ -51,25 +51,30 @@
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int err, ret = 0;
int ret;
tid_t commit_tid;
J_ASSERT(ext4_journal_current_handle() == NULL);
trace_ext4_sync_file(file, dentry, datasync);
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
ret = flush_aio_dio_completed_IO(inode);
if (ret < 0)
goto out;
return ret;
if (!journal)
return simple_fsync(file, dentry, datasync);
/*
* data=writeback:
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
* sync_inode() will sync the metadata
*
* data=ordered:
* The caller's filemap_fdatawrite() will write the data and
* sync_inode() will write the inode if it is dirty. Then the caller's
* filemap_fdatawait() will wait on the pages.
* Metadata is in the journal, we wait for proper transaction to
* commit here.
*
* data=journal:
* filemap_fdatawrite won't do anything (the buffers are clean).
@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
if (ext4_should_journal_data(inode)) {
ret = ext4_force_commit(inode->i_sb);
goto out;
}
if (ext4_should_journal_data(inode))
return ext4_force_commit(inode->i_sb);
if (!journal)
ret = sync_mapping_buffers(inode->i_mapping);
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out;
/*
* The VFS has written the file data. If the inode is unaltered
* then we need not start a commit.
*/
if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* sys_fsync did this */
};
err = sync_inode(inode, &wbc);
if (ret == 0)
ret = err;
}
out:
if (journal && (journal->j_flags & JBD2_BARRIER))
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (jbd2_log_start_commit(journal, commit_tid))
jbd2_log_wait_commit(journal, commit_tid);
else if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret;
}

View file

@ -70,58 +70,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
}
/*
* The ext4 forget function must perform a revoke if we are freeing data
* which has been journaled. Metadata (eg. indirect blocks) must be
* revoked in all cases.
*
* "bh" may be NULL: a metadata block may have been freed from memory
* but there may still be a record of it in the journal, and that record
* still needs to be revoked.
*
* If the handle isn't valid we're not journaling, but we still need to
* call into ext4_journal_revoke() to put the buffer head.
*/
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr)
{
int err;
might_sleep();
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
"data mode %x\n",
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));
/* Never use the revoke function if we are doing full data
* journaling: there is no need to, and a V1 superblock won't
* support it. Otherwise, only skip the revoke on un-journaled
* data blocks. */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
(!is_metadata && !ext4_should_journal_data(inode))) {
if (bh) {
BUFFER_TRACE(bh, "call jbd2_journal_forget");
return ext4_journal_forget(handle, bh);
}
return 0;
}
/*
* data!=journal && (is_metadata || should_journal_data(inode))
*/
BUFFER_TRACE(bh, "call ext4_journal_revoke");
err = ext4_journal_revoke(handle, blocknr, bh);
if (err)
ext4_abort(inode->i_sb, __func__,
"error %d when attempting revoke", err);
BUFFER_TRACE(bh, "exit");
return err;
}
/*
* Work out how many blocks we need to proceed with the next chunk of a
* truncate transaction.
@ -721,7 +669,7 @@ allocated:
return ret;
failed_out:
for (i = 0; i < index; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
return ret;
}
@ -817,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return err;
failed:
/* Allocation failed, free what we already allocated */
ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
for (i = 1; i <= n ; i++) {
BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget");
ext4_journal_forget(handle, branch[i].bh);
/*
* branch[i].bh is newly allocated, so there is no
* need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA.
*/
ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
EXT4_FREE_BLOCKS_FORGET);
}
for (i = 0; i < indirect_blks; i++)
ext4_free_blocks(handle, inode, new_blocks[i], 1, 0);
for (i = n+1; i < indirect_blks; i++)
ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
ext4_free_blocks(handle, inode, new_blocks[i], num, 0);
ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
return err;
}
@ -903,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
err_out:
for (i = 1; i <= num; i++) {
BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget");
ext4_journal_forget(handle, where[i].bh);
ext4_free_blocks(handle, inode,
le32_to_cpu(where[i-1].key), 1, 0);
/*
* branch[i].bh is newly allocated, so there is no
* need to revoke the block, which is why we don't
* need to set EXT4_FREE_BLOCKS_METADATA.
*/
ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
EXT4_FREE_BLOCKS_FORGET);
}
ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0);
ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
blks, 0);
return err;
}
@ -1021,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
if (!err)
err = ext4_splice_branch(handle, inode, iblock,
partial, indirect_blks, count);
else
if (err)
goto cleanup;
set_buffer_new(bh_result);
ext4_update_inode_fsync_trans(handle, inode, 1);
got_it:
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
if (count > blocks_to_boundary)
@ -1052,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
EXT4_I(inode)->i_reserved_meta_blocks;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return total;
return (total << inode->i_blkbits);
}
/*
* Calculate the number of metadata blocks need to reserve
@ -1534,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
return ext4_journal_get_write_access(handle, bh);
}
/*
* Truncate blocks that were not used by write. We have to truncate the
* pagecache as well so that corresponding buffers get properly unmapped.
*/
static void ext4_truncate_failed_write(struct inode *inode)
{
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
}
static int ext4_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@ -1599,7 +1569,7 @@ retry:
ext4_journal_stop(handle);
if (pos + len > inode->i_size) {
ext4_truncate(inode);
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might
* still be on the orphan list; we need to
@ -1709,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
ext4_truncate(inode);
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@ -1751,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
ret = ret2;
if (pos + len > inode->i_size) {
ext4_truncate(inode);
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@ -1814,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
if (!ret)
ret = ret2;
if (pos + len > inode->i_size) {
ext4_truncate(inode);
ext4_truncate_failed_write(inode);
/*
* If truncate failed early the inode might still be
* on the orphan list; we need to make sure the inode
@ -2600,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
}
static int __ext4_journalled_writepage(struct page *page,
struct writeback_control *wbc,
unsigned int len)
{
struct address_space *mapping = page->mapping;
@ -2758,7 +2727,7 @@ static int ext4_writepage(struct page *page,
* doesn't seem much point in redirtying the page here.
*/
ClearPageChecked(page);
return __ext4_journalled_writepage(page, wbc, len);
return __ext4_journalled_writepage(page, len);
}
if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@ -2788,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
* number of contiguous block. So we will limit
* number of contiguous block to a sane value
*/
if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
(max_blocks > EXT4_MAX_TRANS_DATA))
max_blocks = EXT4_MAX_TRANS_DATA;
@ -3091,7 +3060,7 @@ retry:
* i_size_read because we hold i_mutex.
*/
if (pos + len > inode->i_size)
ext4_truncate(inode);
ext4_truncate_failed_write(inode);
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@ -4120,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
__le32 *last)
{
__le32 *p;
int flags = EXT4_FREE_BLOCKS_FORGET;
if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
flags |= EXT4_FREE_BLOCKS_METADATA;
if (try_to_extend_transaction(handle, inode)) {
if (bh) {
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@ -4134,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
}
}
/*
* Any buffers which are on the journal will be in memory. We
* find them on the hash table so jbd2_journal_revoke() will
* run jbd2_journal_forget() on them. We've already detached
* each block from the file, so bforget() in
* jbd2_journal_forget() should be safe.
*
* AKPM: turn on bforget in jbd2_journal_forget()!!!
*/
for (p = first; p < last; p++) {
u32 nr = le32_to_cpu(*p);
if (nr) {
struct buffer_head *tbh;
for (p = first; p < last; p++)
*p = 0;
*p = 0;
tbh = sb_find_get_block(inode->i_sb, nr);
ext4_forget(handle, 0, inode, tbh, nr);
}
}
ext4_free_blocks(handle, inode, block_to_free, count, 0);
ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
}
/**
@ -4342,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
blocks_for_truncate(inode));
}
ext4_free_blocks(handle, inode, nr, 1, 1);
ext4_free_blocks(handle, inode, 0, nr, 1,
EXT4_FREE_BLOCKS_METADATA);
if (parent_bh) {
/*
@ -4781,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
struct ext4_iloc iloc;
struct ext4_inode *raw_inode;
struct ext4_inode_info *ei;
struct buffer_head *bh;
struct inode *inode;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
int block;
@ -4793,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
return inode;
ei = EXT4_I(inode);
iloc.bh = 0;
ret = __ext4_get_inode_loc(inode, &iloc, 0);
if (ret < 0)
goto bad_inode;
bh = iloc.bh;
raw_inode = ext4_raw_inode(&iloc);
inode->i_mode = le16_to_cpu(raw_inode->i_mode);
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@ -4820,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
if (inode->i_mode == 0 ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
/* this inode is deleted */
brelse(bh);
ret = -ESTALE;
goto bad_inode;
}
@ -4848,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);
/*
* Set transaction id's of transactions that have to be committed
* to finish f[data]sync. We set them to currently running transaction
* as we cannot be sure that the inode or some of its metadata isn't
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
if (journal) {
transaction_t *transaction;
tid_t tid;
spin_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
transaction = journal->j_committing_transaction;
if (transaction)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid;
ei->i_datasync_tid = tid;
}
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
EXT4_INODE_SIZE(inode->i_sb)) {
brelse(bh);
ret = -EIO;
goto bad_inode;
}
@ -4884,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ret = 0;
if (ei->i_file_acl &&
((ei->i_file_acl <
(le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
EXT4_SB(sb)->s_gdb_count)) ||
(ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
!ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
ext4_error(sb, __func__,
"bad extended attribute block %llu in inode #%lu",
ei->i_file_acl, inode->i_ino);
@ -4905,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
/* Validate block references which are part of inode */
ret = ext4_check_inode_blockref(inode);
}
if (ret) {
brelse(bh);
if (ret)
goto bad_inode;
}
if (S_ISREG(inode->i_mode)) {
inode->i_op = &ext4_file_inode_operations;
@ -4936,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
init_special_inode(inode, inode->i_mode,
new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
} else {
brelse(bh);
ret = -EIO;
ext4_error(inode->i_sb, __func__,
"bogus i_mode (%o) for inode=%lu",
@ -4949,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
return inode;
bad_inode:
brelse(iloc.bh);
iget_failed(inode);
return ERR_PTR(ret);
}
@ -5108,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
err = rc;
ei->i_state &= ~EXT4_STATE_NEW;
ext4_update_inode_fsync_trans(handle, inode, 0);
out_brelse:
brelse(bh);
ext4_std_error(inode->i_sb, err);
@ -5227,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
/* (user+group)*(old+new) structure, inode write (sb,
* inode block, ? - but truncate inode update has it) */
handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;

View file

@ -221,31 +221,38 @@ setversion_out:
struct file *donor_filp;
int err;
if (!(filp->f_mode & FMODE_READ) ||
!(filp->f_mode & FMODE_WRITE))
return -EBADF;
if (copy_from_user(&me,
(struct move_extent __user *)arg, sizeof(me)))
return -EFAULT;
me.moved_len = 0;
donor_filp = fget(me.donor_fd);
if (!donor_filp)
return -EBADF;
if (!capable(CAP_DAC_OVERRIDE)) {
if ((current->real_cred->fsuid != inode->i_uid) ||
!(inode->i_mode & S_IRUSR) ||
!(donor_filp->f_dentry->d_inode->i_mode &
S_IRUSR)) {
fput(donor_filp);
return -EACCES;
}
if (!(donor_filp->f_mode & FMODE_WRITE)) {
err = -EBADF;
goto mext_out;
}
err = mnt_want_write(filp->f_path.mnt);
if (err)
goto mext_out;
err = ext4_move_extents(filp, donor_filp, me.orig_start,
me.donor_start, me.len, &me.moved_len);
fput(donor_filp);
mnt_drop_write(filp->f_path.mnt);
if (me.moved_len > 0)
file_remove_suid(donor_filp);
if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
return -EFAULT;
err = -EFAULT;
mext_out:
fput(donor_filp);
return err;
}

View file

@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
struct ext4_group_info *db;
int err, count = 0, count2 = 0;
struct ext4_free_data *entry;
ext4_fsblk_t discard_block;
struct list_head *l, *ltmp;
list_for_each_safe(l, ltmp, &txn->t_private_list) {
@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
page_cache_release(e4b.bd_bitmap_page);
}
ext4_unlock_group(sb, entry->group);
discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
+ entry->start_blk
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
trace_ext4_discard_blocks(sb, (unsigned long long)discard_block,
entry->count);
sb_issue_discard(sb, discard_block, entry->count);
if (test_opt(sb, DISCARD)) {
ext4_fsblk_t discard_block;
struct ext4_super_block *es = EXT4_SB(sb)->s_es;
discard_block = (ext4_fsblk_t)entry->group *
EXT4_BLOCKS_PER_GROUP(sb)
+ entry->start_blk
+ le32_to_cpu(es->s_first_data_block);
trace_ext4_discard_blocks(sb,
(unsigned long long)discard_block,
entry->count);
sb_issue_discard(sb, discard_block, entry->count);
}
kmem_cache_free(ext4_free_ext_cachep, entry);
ext4_mb_release_desc(&e4b);
}
@ -3005,6 +3010,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
trace_ext4_mballoc_prealloc(ac);
}
/*
* Called on failure; free up any blocks from the inode PA for this
* context. We don't need this for MB_GROUP_PA because we only change
* pa_free in ext4_mb_release_context(), but on failure, we've already
* zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
*/
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
int len;
if (pa && pa->pa_type == MB_INODE_PA) {
len = ac->ac_b_ex.fe_len;
pa->pa_free += len;
}
}
/*
* use blocks preallocated to inode
*/
@ -4290,6 +4313,7 @@ repeat:
ac->ac_status = AC_STATUS_CONTINUE;
goto repeat;
} else if (*errp) {
ext4_discard_allocated_blocks(ac);
ac->ac_b_ex.fe_len = 0;
ar->len = 0;
ext4_mb_show_ac(ac);
@ -4422,18 +4446,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
return 0;
}
/*
* Main entry point into mballoc to free blocks
/**
* ext4_free_blocks() -- Free given blocks and update quota
* @handle: handle for this transaction
* @inode: inode
* @block: start physical block to free
* @count: number of blocks to count
* @metadata: Are these metadata blocks
*/
void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
ext4_fsblk_t block, unsigned long count,
int metadata, unsigned long *freed)
void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t block,
unsigned long count, int flags)
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
struct ext4_super_block *es;
unsigned long freed = 0;
unsigned int overflow;
ext4_grpblk_t bit;
struct buffer_head *gd_bh;
@ -4443,13 +4473,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
int err = 0;
int ret;
*freed = 0;
if (bh) {
if (block)
BUG_ON(block != bh->b_blocknr);
else
block = bh->b_blocknr;
}
sbi = EXT4_SB(sb);
es = EXT4_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
block + count > ext4_blocks_count(es)) {
if (!ext4_data_block_valid(sbi, block, count)) {
ext4_error(sb, __func__,
"Freeing blocks not in datazone - "
"block = %llu, count = %lu", block, count);
@ -4457,7 +4490,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
}
ext4_debug("freeing block %llu\n", block);
trace_ext4_free_blocks(inode, block, count, metadata);
trace_ext4_free_blocks(inode, block, count, flags);
if (flags & EXT4_FREE_BLOCKS_FORGET) {
struct buffer_head *tbh = bh;
int i;
BUG_ON(bh && (count > 1));
for (i = 0; i < count; i++) {
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
}
/*
* We need to make sure we don't reuse the freed block until
* after the transaction is committed, which we can do by
* treating the block as metadata, below. We make an
* exception if the inode is to be written in writeback mode
* since writeback mode has weak data consistency guarantees.
*/
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (ac) {
@ -4533,7 +4591,8 @@ do_more:
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
goto error_return;
if (metadata && ext4_handle_valid(handle)) {
if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
struct ext4_free_data *new_entry;
/*
* blocks being freed are metadata. these blocks shouldn't
@ -4572,7 +4631,7 @@ do_more:
ext4_mb_release_desc(&e4b);
*freed += count;
freed += count;
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@ -4592,6 +4651,8 @@ do_more:
}
sb->s_dirt = 1;
error_return:
if (freed)
vfs_dq_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
if (ac)

View file

@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
* So allocate a credit of 3. We may update
* quota (user and group).
*/
needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
if (ext4_journal_extend(handle, needed) != 0)
retval = ext4_journal_restart(handle, needed);
@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
for (i = 0; i < max_entries; i++) {
if (tmp_idata[i]) {
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
le32_to_cpu(tmp_idata[i]), 1, 1);
ext4_free_blocks(handle, inode, 0,
le32_to_cpu(tmp_idata[i]), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
}
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
return 0;
}
@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1);
ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
return 0;
}
@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
/* ei->i_data[EXT4_IND_BLOCK] */
if (i_data[0]) {
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode,
le32_to_cpu(i_data[0]), 1, 1);
ext4_free_blocks(handle, inode, 0,
le32_to_cpu(i_data[0]), 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
}
/* ei->i_data[EXT4_DIND_BLOCK] */
@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
}
put_bh(bh);
extend_credit_for_blkdel(handle, inode);
ext4_free_blocks(handle, inode, block, 1, 1);
ext4_free_blocks(handle, inode, 0, block, 1,
EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
return retval;
}
@ -477,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode)
handle = ext4_journal_start(inode,
EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)
EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
+ 1);
if (IS_ERR(handle)) {
retval = PTR_ERR(handle);

View file

@ -77,12 +77,14 @@ static int
mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
struct ext4_extent **extent)
{
struct ext4_extent_header *eh;
int ppos, leaf_ppos = path->p_depth;
ppos = leaf_ppos;
if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
/* leaf block */
*extent = ++path[ppos].p_ext;
path[ppos].p_block = ext_pblock(path[ppos].p_ext);
return 0;
}
@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
ext_block_hdr(path[cur_ppos+1].p_bh);
}
path[leaf_ppos].p_ext = *extent = NULL;
eh = path[leaf_ppos].p_hdr;
if (le16_to_cpu(eh->eh_entries) == 0)
/* empty leaf is found */
return -ENODATA;
/* leaf block */
path[leaf_ppos].p_ext = *extent =
EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
path[leaf_ppos].p_block =
ext_pblock(path[leaf_ppos].p_ext);
return 0;
}
}
@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
}
/**
* mext_double_down_read - Acquire two inodes' read semaphore
* double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
* Acquire write lock of i_data_sem of the two inodes (orig and donor) by
* i_ino order.
*/
static void
mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
/*
* Use the inode number to provide the stable locking order instead
* of its address, because the C language doesn't guarantee you can
* compare pointers that don't come from the same array.
*/
if (donor_inode->i_ino < orig_inode->i_ino) {
first = donor_inode;
second = orig_inode;
}
down_read(&EXT4_I(first)->i_data_sem);
down_read(&EXT4_I(second)->i_data_sem);
}
/**
* mext_double_down_write - Acquire two inodes' write semaphore
*
* @orig_inode: original inode structure
* @donor_inode: donor inode structure
* Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
*/
static void
mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
{
struct inode *first = orig_inode, *second = donor_inode;
@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
}
down_write(&EXT4_I(first)->i_data_sem);
down_write(&EXT4_I(second)->i_data_sem);
down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
}
/**
* mext_double_up_read - Release two inodes' read semaphore
* double_up_write_data_sem - Release two inodes' write lock of i_data_sem
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release read semaphore of two inodes (orig and donor).
* Release write lock of i_data_sem of two inodes (orig and donor).
*/
static void
mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
{
up_read(&EXT4_I(orig_inode)->i_data_sem);
up_read(&EXT4_I(donor_inode)->i_data_sem);
}
/**
* mext_double_up_write - Release two inodes' write semaphore
*
* @orig_inode: original inode structure to be released its lock first
* @donor_inode: donor inode structure to be released its lock second
* Release write semaphore of two inodes (orig and donor).
*/
static void
mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
{
up_write(&EXT4_I(orig_inode)->i_data_sem);
up_write(&EXT4_I(donor_inode)->i_data_sem);
@ -596,7 +568,7 @@ out:
* @tmp_oext: the extent that will belong to the donor inode
* @orig_off: block offset of original inode
* @donor_off: block offset of donor inode
* @max_count: the maximun length of extents
* @max_count: the maximum length of extents
*
* Return 0 on success, or a negative error value on failure.
*/
@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
* @donor_inode: donor inode
* @from: block offset of orig_inode
* @count: block count to be replaced
* @err: pointer to save return value
*
* Replace original inode extents and donor inode extents page by page.
* We implement this replacement in the following three steps:
@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
* 3. Change the block information of donor inode to point at the saved
* original inode blocks in the dummy extents.
*
* Return 0 on success, or a negative error value on failure.
* Return replaced block count.
*/
static int
mext_replace_branches(handle_t *handle, struct inode *orig_inode,
struct inode *donor_inode, ext4_lblk_t from,
ext4_lblk_t count)
ext4_lblk_t count, int *err)
{
struct ext4_ext_path *orig_path = NULL;
struct ext4_ext_path *donor_path = NULL;
struct ext4_extent *oext, *dext;
struct ext4_extent tmp_dext, tmp_oext;
ext4_lblk_t orig_off = from, donor_off = from;
int err = 0;
int depth;
int replaced_count = 0;
int dext_alen;
mext_double_down_write(orig_inode, donor_inode);
/* Protect extent trees against block allocations via delalloc */
double_down_write_data_sem(orig_inode, donor_inode);
/* Get the original extent for the block "orig_off" */
err = get_ext_path(orig_inode, orig_off, &orig_path);
if (err)
*err = get_ext_path(orig_inode, orig_off, &orig_path);
if (*err)
goto out;
/* Get the donor extent for the head */
err = get_ext_path(donor_inode, donor_off, &donor_path);
if (err)
*err = get_ext_path(donor_inode, donor_off, &donor_path);
if (*err)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
dext = donor_path[depth].p_ext;
tmp_dext = *dext;
err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, count);
if (err)
if (*err)
goto out;
/* Loop for the donor extents */
@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
if (!dext) {
ext4_error(donor_inode->i_sb, __func__,
"The extent for donor must be found");
err = -EIO;
*err = -EIO;
goto out;
} else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
ext4_error(donor_inode->i_sb, __func__,
@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
"extent(%u) should be equal",
donor_off,
le32_to_cpu(tmp_dext.ee_block));
err = -EIO;
*err = -EIO;
goto out;
}
/* Set donor extent to orig extent */
err = mext_leaf_block(handle, orig_inode,
*err = mext_leaf_block(handle, orig_inode,
orig_path, &tmp_dext, &orig_off);
if (err < 0)
if (*err)
goto out;
/* Set orig extent to donor extent */
err = mext_leaf_block(handle, donor_inode,
*err = mext_leaf_block(handle, donor_inode,
donor_path, &tmp_oext, &donor_off);
if (err < 0)
if (*err)
goto out;
dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
if (orig_path)
ext4_ext_drop_refs(orig_path);
err = get_ext_path(orig_inode, orig_off, &orig_path);
if (err)
*err = get_ext_path(orig_inode, orig_off, &orig_path);
if (*err)
goto out;
depth = ext_depth(orig_inode);
oext = orig_path[depth].p_ext;
if (le32_to_cpu(oext->ee_block) +
ext4_ext_get_actual_len(oext) <= orig_off) {
err = 0;
goto out;
}
tmp_oext = *oext;
if (donor_path)
ext4_ext_drop_refs(donor_path);
err = get_ext_path(donor_inode, donor_off, &donor_path);
if (err)
*err = get_ext_path(donor_inode, donor_off, &donor_path);
if (*err)
goto out;
depth = ext_depth(donor_inode);
dext = donor_path[depth].p_ext;
if (le32_to_cpu(dext->ee_block) +
ext4_ext_get_actual_len(dext) <= donor_off) {
err = 0;
goto out;
}
tmp_dext = *dext;
err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
donor_off, count - replaced_count);
if (err)
if (*err)
goto out;
}
@ -795,8 +758,12 @@ out:
kfree(donor_path);
}
mext_double_up_write(orig_inode, donor_inode);
return err;
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
double_up_write_data_sem(orig_inode, donor_inode);
return replaced_count;
}
/**
@ -808,16 +775,17 @@ out:
* @data_offset_in_page: block index where data swapping starts
* @block_len_in_page: the number of blocks to be swapped
* @uninit: orig extent is uninitialized or not
* @err: pointer to save return value
*
* Save the data in original inode blocks and replace original inode extents
* with donor inode extents by calling mext_replace_branches().
* Finally, write out the saved data in new original inode blocks. Return 0
* on success, or a negative error value on failure.
* Finally, write out the saved data in new original inode blocks. Return
* replaced block count.
*/
static int
move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
pgoff_t orig_page_offset, int data_offset_in_page,
int block_len_in_page, int uninit)
int block_len_in_page, int uninit, int *err)
{
struct inode *orig_inode = o_filp->f_dentry->d_inode;
struct address_space *mapping = orig_inode->i_mapping;
@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int w_flags = 0;
unsigned int tmp_data_len, data_len;
unsigned int tmp_data_size, data_size, replaced_size;
void *fsdata;
int ret, i, jblocks;
int i, jblocks;
int err2 = 0;
int replaced_count = 0;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
/*
@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
handle = ext4_journal_start(orig_inode, jblocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
return ret;
*err = PTR_ERR(handle);
return 0;
}
if (segment_eq(get_fs(), KERNEL_DS))
@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
* Just swap data blocks between orig and donor.
*/
if (uninit) {
ret = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page);
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
replaced_count = mext_replace_branches(handle, orig_inode,
donor_inode, orig_blk_offset,
block_len_in_page, err);
goto out2;
}
offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
/* Calculate data_len */
/* Calculate data_size */
if ((orig_blk_offset + block_len_in_page - 1) ==
((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
/* Replace the last block */
tmp_data_len = orig_inode->i_size & (blocksize - 1);
tmp_data_size = orig_inode->i_size & (blocksize - 1);
/*
* If data_len equal zero, it shows data_len is multiples of
* If data_size equal zero, it shows data_size is multiples of
* blocksize. So we set appropriate value.
*/
if (tmp_data_len == 0)
tmp_data_len = blocksize;
if (tmp_data_size == 0)
tmp_data_size = blocksize;
data_len = tmp_data_len +
data_size = tmp_data_size +
((block_len_in_page - 1) << orig_inode->i_blkbits);
} else {
data_len = block_len_in_page << orig_inode->i_blkbits;
}
} else
data_size = block_len_in_page << orig_inode->i_blkbits;
ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
replaced_size = data_size;
*err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
&page, &fsdata);
if (unlikely(ret < 0))
if (unlikely(*err < 0))
goto out;
if (!PageUptodate(page)) {
@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
/* Release old bh and drop refs */
try_to_release_page(page, 0);
ret = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset, block_len_in_page);
if (ret < 0)
goto out;
/* Clear the inode cache not to refer to the old data */
ext4_ext_invalidate_cache(orig_inode);
ext4_ext_invalidate_cache(donor_inode);
replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
orig_blk_offset, block_len_in_page,
&err2);
if (err2) {
if (replaced_count) {
block_len_in_page = replaced_count;
replaced_size =
block_len_in_page << orig_inode->i_blkbits;
} else
goto out;
}
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
bh = bh->b_this_page;
for (i = 0; i < block_len_in_page; i++) {
ret = ext4_get_block(orig_inode,
*err = ext4_get_block(orig_inode,
(sector_t)(orig_blk_offset + i), bh, 0);
if (ret < 0)
if (*err < 0)
goto out;
if (bh->b_this_page != NULL)
bh = bh->b_this_page;
}
ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
*err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
page, fsdata);
page = NULL;
@ -951,7 +921,10 @@ out:
out2:
ext4_journal_stop(handle);
return ret < 0 ? ret : 0;
if (err2)
*err = err2;
return replaced_count;
}
/**
@ -962,7 +935,6 @@ out2:
* @orig_start: logical start offset in block for orig
* @donor_start: logical start offset in block for donor
* @len: the number of blocks to be moved
* @moved_len: moved block length
*
* Check the arguments of ext4_move_extents() whether the files can be
* exchanged with each other.
@ -970,8 +942,8 @@ out2:
*/
static int
mext_check_arguments(struct inode *orig_inode,
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len, __u64 moved_len)
struct inode *donor_inode, __u64 orig_start,
__u64 donor_start, __u64 *len)
{
ext4_lblk_t orig_blocks, donor_blocks;
unsigned int blkbits = orig_inode->i_blkbits;
@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
ext4_debug("ext4 move extent: suid or sgid is set"
" to donor file [ino:orig %lu, donor %lu]\n",
orig_inode->i_ino, donor_inode->i_ino);
return -EINVAL;
}
/* Ext4 move extent does not support swapfile */
if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
ext4_debug("ext4 move extent: The argument files should "
@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
return -EINVAL;
}
if (moved_len) {
ext4_debug("ext4 move extent: moved_len should be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
}
if ((orig_start > EXT_MAX_BLOCK) ||
(donor_start > EXT_MAX_BLOCK) ||
(*len > EXT_MAX_BLOCK) ||
@ -1088,7 +1060,7 @@ mext_check_arguments(struct inode *orig_inode,
}
if (!*len) {
ext4_debug("ext4 move extent: len shoudld not be 0 "
ext4_debug("ext4 move extent: len should not be 0 "
"[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
donor_inode->i_ino);
return -EINVAL;
@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
return -EINVAL;
}
/* protect orig and donor against a truncate */
/* Protect orig and donor inodes against a truncate */
ret1 = mext_inode_double_lock(orig_inode, donor_inode);
if (ret1 < 0)
return ret1;
mext_double_down_read(orig_inode, donor_inode);
/* Protect extent tree against block allocations via delalloc */
double_down_write_data_sem(orig_inode, donor_inode);
/* Check the filesystem environment whether move_extent can be done */
ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
donor_start, &len, *moved_len);
mext_double_up_read(orig_inode, donor_inode);
donor_start, &len);
if (ret1)
goto out;
@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
seq_start = le32_to_cpu(ext_cur->ee_block);
rest_blocks = seq_blocks;
/* Discard preallocations of two inodes */
down_write(&EXT4_I(orig_inode)->i_data_sem);
ext4_discard_preallocations(orig_inode);
up_write(&EXT4_I(orig_inode)->i_data_sem);
down_write(&EXT4_I(donor_inode)->i_data_sem);
ext4_discard_preallocations(donor_inode);
up_write(&EXT4_I(donor_inode)->i_data_sem);
/*
* Up semaphore to avoid following problems:
* a. transaction deadlock among ext4_journal_start,
* ->write_begin via pagefault, and jbd2_journal_commit
* b. racing with ->readpage, ->write_begin, and ext4_get_block
* in move_extent_per_page
*/
double_up_write_data_sem(orig_inode, donor_inode);
while (orig_page_offset <= seq_end_page) {
/* Swap original branches with new branches */
ret1 = move_extent_per_page(o_filp, donor_inode,
block_len_in_page = move_extent_per_page(
o_filp, donor_inode,
orig_page_offset,
data_offset_in_page,
block_len_in_page, uninit);
if (ret1 < 0)
goto out;
orig_page_offset++;
block_len_in_page, uninit,
&ret1);
/* Count how many blocks we have exchanged */
*moved_len += block_len_in_page;
if (ret1 < 0)
break;
if (*moved_len > len) {
ext4_error(orig_inode->i_sb, __func__,
"We replaced blocks too much! "
"sum of replaced: %llu requested: %llu",
*moved_len, len);
ret1 = -EIO;
goto out;
break;
}
orig_page_offset++;
data_offset_in_page = 0;
rest_blocks -= block_len_in_page;
if (rest_blocks > blocks_per_page)
@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
block_len_in_page = rest_blocks;
}
double_down_write_data_sem(orig_inode, donor_inode);
if (ret1 < 0)
break;
/* Decrease buffer counter */
if (holecheck_path)
ext4_ext_drop_refs(holecheck_path);
@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
}
out:
if (*moved_len) {
ext4_discard_preallocations(orig_inode);
ext4_discard_preallocations(donor_inode);
}
if (orig_path) {
ext4_ext_drop_refs(orig_path);
kfree(orig_path);
@ -1422,7 +1406,7 @@ out:
ext4_ext_drop_refs(holecheck_path);
kfree(holecheck_path);
}
double_up_write_data_sem(orig_inode, donor_inode);
ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
if (ret1)

View file

@ -1292,9 +1292,6 @@ errout:
* add_dirent_to_buf will attempt search the directory block for
* space. It will return -ENOSPC if no space is available, and -EIO
* and -EEXIST if directory entry already exists.
*
* NOTE! bh is NOT released in the case where ENOSPC is returned. In
* all other cases bh is released.
*/
static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
struct inode *inode, struct ext4_dir_entry_2 *de,
@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
top = bh->b_data + blocksize - reclen;
while ((char *) de <= top) {
if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
bh, offset)) {
brelse(bh);
bh, offset))
return -EIO;
}
if (ext4_match(namelen, name, de)) {
brelse(bh);
if (ext4_match(namelen, name, de))
return -EEXIST;
}
nlen = EXT4_DIR_REC_LEN(de->name_len);
rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
if ((de->inode? rlen - nlen: rlen) >= reclen)
@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
err = ext4_journal_get_write_access(handle, bh);
if (err) {
ext4_std_error(dir->i_sb, err);
brelse(bh);
return err;
}
@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
err = ext4_handle_dirty_metadata(handle, dir, bh);
if (err)
ext4_std_error(dir->i_sb, err);
brelse(bh);
return 0;
}
@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
if (!(de))
return retval;
return add_dirent_to_buf(handle, dentry, inode, de, bh);
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
brelse(bh);
return retval;
}
/*
@ -1514,8 +1507,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
if(!bh)
return retval;
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
if (retval != -ENOSPC)
if (retval != -ENOSPC) {
brelse(bh);
return retval;
}
if (blocks == 1 && !dx_fallback &&
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
@ -1528,7 +1523,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
de = (struct ext4_dir_entry_2 *) bh->b_data;
de->inode = 0;
de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
return add_dirent_to_buf(handle, dentry, inode, de, bh);
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
brelse(bh);
return retval;
}
/*
@ -1561,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
goto journal_error;
err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
if (err != -ENOSPC) {
bh = NULL;
if (err != -ENOSPC)
goto cleanup;
}
/* Block full, should compress but for now just split */
dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@ -1657,7 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (!de)
goto cleanup;
err = add_dirent_to_buf(handle, dentry, inode, de, bh);
bh = NULL;
goto cleanup;
journal_error:
@ -1775,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@ -1809,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@ -1846,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@ -2259,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);

View file

@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
goto exit_bh;
if (IS_ERR(gdb = bclean(handle, sb, block))) {
err = PTR_ERR(bh);
err = PTR_ERR(gdb);
goto exit_bh;
}
ext4_handle_dirty_metadata(handle, NULL, gdb);

View file

@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
if (sb->s_dirt)
ext4_commit_super(sb, 1);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
if (sbi->s_journal) {
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
ext4_abort(sb, __func__,
"Couldn't clean up the journal");
}
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
es->s_state = cpu_to_le16(sbi->s_mount_state);
@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&(ei->i_block_reservation_lock));
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
return &ei->vfs_inode;
}
@ -899,6 +903,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
if (test_opt(sb, NO_AUTO_DA_ALLOC))
seq_puts(seq, ",noauto_da_alloc");
if (test_opt(sb, DISCARD))
seq_puts(seq, ",discard");
if (test_opt(sb, NOLOAD))
seq_puts(seq, ",norecovery");
ext4_show_quota_options(seq, sb);
return 0;
@ -1079,7 +1089,8 @@ enum {
Opt_usrquota, Opt_grpquota, Opt_i_version,
Opt_stripe, Opt_delalloc, Opt_nodelalloc,
Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_discard, Opt_nodiscard,
};
static const match_table_t tokens = {
@ -1104,6 +1115,7 @@ static const match_table_t tokens = {
{Opt_acl, "acl"},
{Opt_noacl, "noacl"},
{Opt_noload, "noload"},
{Opt_noload, "norecovery"},
{Opt_nobh, "nobh"},
{Opt_bh, "bh"},
{Opt_commit, "commit=%u"},
@ -1144,6 +1156,8 @@ static const match_table_t tokens = {
{Opt_auto_da_alloc, "auto_da_alloc=%u"},
{Opt_auto_da_alloc, "auto_da_alloc"},
{Opt_noauto_da_alloc, "noauto_da_alloc"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
{Opt_err, NULL},
};
@ -1565,6 +1579,12 @@ set_qf_format:
else
set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
break;
case Opt_discard:
set_opt(sbi->s_mount_opt, DISCARD);
break;
case Opt_nodiscard:
clear_opt(sbi->s_mount_opt, DISCARD);
break;
default:
ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
@ -1673,14 +1693,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
size_t size;
int i;
if (!sbi->s_es->s_log_groups_per_flex) {
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
if (groups_per_flex < 2) {
sbi->s_log_groups_per_flex = 0;
return 1;
}
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
/* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@ -2721,26 +2741,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
if (ext4_load_journal(sb, es, journal_devnum))
goto failed_mount3;
if (!(sb->s_flags & MS_RDONLY) &&
EXT4_SB(sb)->s_journal->j_failed_commit) {
ext4_msg(sb, KERN_CRIT, "error: "
"ext4_fill_super: Journal transaction "
"%u is corrupt",
EXT4_SB(sb)->s_journal->j_failed_commit);
if (test_opt(sb, ERRORS_RO)) {
ext4_msg(sb, KERN_CRIT,
"Mounting filesystem read-only");
sb->s_flags |= MS_RDONLY;
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
}
if (test_opt(sb, ERRORS_PANIC)) {
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb, 1);
goto failed_mount4;
}
}
} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
@ -3668,13 +3668,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
ext4_free_blocks_count_set(es, buf->f_bfree);
buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
if (buf->f_bfree < ext4_r_blocks_count(es))
buf->f_bavail = 0;
buf->f_files = le32_to_cpu(es->s_inodes_count);
buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
buf->f_namelen = EXT4_NAME_LEN;
fsid = le64_to_cpup((void *)es->s_uuid) ^
le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@ -3966,6 +3964,58 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
}
#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext2_fs_type = {
.owner = THIS_MODULE,
.name = "ext2",
.get_sb = ext4_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
static inline void register_as_ext2(void)
{
int err = register_filesystem(&ext2_fs_type);
if (err)
printk(KERN_WARNING
"EXT4-fs: Unable to register as ext2 (%d)\n", err);
}
static inline void unregister_as_ext2(void)
{
unregister_filesystem(&ext2_fs_type);
}
#else
static inline void register_as_ext2(void) { }
static inline void unregister_as_ext2(void) { }
#endif
#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
.owner = THIS_MODULE,
.name = "ext3",
.get_sb = ext4_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
static inline void register_as_ext3(void)
{
int err = register_filesystem(&ext3_fs_type);
if (err)
printk(KERN_WARNING
"EXT4-fs: Unable to register as ext3 (%d)\n", err);
}
static inline void unregister_as_ext3(void)
{
unregister_filesystem(&ext3_fs_type);
}
#else
static inline void register_as_ext3(void) { }
static inline void unregister_as_ext3(void) { }
#endif
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
@ -3995,11 +4045,15 @@ static int __init init_ext4_fs(void)
err = init_inodecache();
if (err)
goto out1;
register_as_ext2();
register_as_ext3();
err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
return 0;
out:
unregister_as_ext2();
unregister_as_ext3();
destroy_inodecache();
out1:
exit_ext4_xattr();
@ -4015,6 +4069,8 @@ out4:
static void __exit exit_ext4_fs(void)
{
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
exit_ext4_xattr();

View file

@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ea_bdebug(bh, "refcount now=0; freeing");
if (ce)
mb_cache_entry_free(ce);
ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
get_bh(bh);
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
error = ext4_handle_dirty_metadata(handle, inode, bh);
@ -832,7 +833,8 @@ inserted:
new_bh = sb_getblk(sb, block);
if (!new_bh) {
getblk_failed:
ext4_free_blocks(handle, inode, block, 1, 1);
ext4_free_blocks(handle, inode, 0, block, 1,
EXT4_FREE_BLOCKS_METADATA);
error = -EIO;
goto cleanup;
}
@ -988,6 +990,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (error)
goto cleanup;
error = ext4_journal_get_write_access(handle, is.iloc.bh);
if (error)
goto cleanup;
if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@ -1013,9 +1019,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (flags & XATTR_CREATE)
goto cleanup;
}
error = ext4_journal_get_write_access(handle, is.iloc.bh);
if (error)
goto cleanup;
if (!value) {
if (!is.s.not_found)
error = ext4_xattr_ibody_set(handle, inode, &i, &is);

View file

@ -636,6 +636,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
JBUFFER_TRACE(jh, "ph3: write metadata");
flags = jbd2_journal_write_metadata_buffer(commit_transaction,
jh, &new_jh, blocknr);
if (flags < 0) {
jbd2_journal_abort(journal, flags);
continue;
}
set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
wbuf[bufs++] = jh2bh(new_jh);

View file

@ -78,6 +78,7 @@ EXPORT_SYMBOL(jbd2_journal_errno);
EXPORT_SYMBOL(jbd2_journal_ack_err);
EXPORT_SYMBOL(jbd2_journal_clear_err);
EXPORT_SYMBOL(jbd2_log_wait_commit);
EXPORT_SYMBOL(jbd2_log_start_commit);
EXPORT_SYMBOL(jbd2_journal_start_commit);
EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
EXPORT_SYMBOL(jbd2_journal_wipe);
@ -358,6 +359,10 @@ repeat:
jbd_unlock_bh_state(bh_in);
tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
if (!tmp) {
jbd2_journal_put_journal_head(new_jh);
return -ENOMEM;
}
jbd_lock_bh_state(bh_in);
if (jh_in->b_frozen_data) {
jbd2_free(tmp, bh_in->b_size);
@ -1248,6 +1253,13 @@ int jbd2_journal_load(journal_t *journal)
if (jbd2_journal_recover(journal))
goto recovery_error;
if (journal->j_failed_commit) {
printk(KERN_ERR "JBD2: journal transaction %u on %s "
"is corrupt.\n", journal->j_failed_commit,
journal->j_devname);
return -EIO;
}
/* OK, we've finished with the dynamic journal bits:
* reinitialise the dynamic contents of the superblock in memory
* and reset them on disk. */

View file

@ -38,7 +38,7 @@ TRACE_EVENT(ext4_free_inode,
__entry->blocks = inode->i_blocks;
),
TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->uid, __entry->gid,
(unsigned long long) __entry->blocks)
@ -61,7 +61,7 @@ TRACE_EVENT(ext4_request_inode,
__entry->mode = mode;
),
TP_printk("dev %s dir %lu mode %d",
TP_printk("dev %s dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
__entry->mode)
);
@ -85,7 +85,7 @@ TRACE_EVENT(ext4_allocate_inode,
__entry->mode = mode;
),
TP_printk("dev %s ino %lu dir %lu mode %d",
TP_printk("dev %s ino %lu dir %lu mode 0%o",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
@ -305,7 +305,6 @@ TRACE_EVENT(ext4_da_writepages_result,
__field( int, ret )
__field( int, pages_written )
__field( long, pages_skipped )
__field( char, encountered_congestion )
__field( char, more_io )
__field( char, no_nrwrite_index_update )
__field( pgoff_t, writeback_index )
@ -317,17 +316,16 @@ TRACE_EVENT(ext4_da_writepages_result,
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->pages_skipped = wbc->pages_skipped;
__entry->encountered_congestion = wbc->encountered_congestion;
__entry->more_io = wbc->more_io;
__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu",
TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d no_nrwrite_index_update %d writeback_index %lu",
jbd2_dev_to_name(__entry->dev),
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->encountered_congestion, __entry->more_io,
__entry->more_io,
__entry->no_nrwrite_index_update,
(unsigned long) __entry->writeback_index)
);
@ -591,30 +589,32 @@ TRACE_EVENT(ext4_allocate_blocks,
TRACE_EVENT(ext4_free_blocks,
TP_PROTO(struct inode *inode, __u64 block, unsigned long count,
int metadata),
int flags),
TP_ARGS(inode, block, count, metadata),
TP_ARGS(inode, block, count, flags),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, block )
__field( unsigned long, count )
__field( int, metadata )
__field( int, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
__entry->count = count;
__entry->metadata = metadata;
__entry->flags = flags;
),
TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->block, __entry->count, __entry->metadata)
__entry->mode, __entry->block, __entry->count,
__entry->flags)
);
TRACE_EVENT(ext4_sync_file,
@ -848,6 +848,32 @@ TRACE_EVENT(ext4_mballoc_free,
__entry->result_len, __entry->result_logical)
);
TRACE_EVENT(ext4_forget,
TP_PROTO(struct inode *inode, int is_metadata, __u64 block),
TP_ARGS(inode, is_metadata, block),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( umode_t, mode )
__field( int, is_metadata )
__field( __u64, block )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
__entry->block = block;
),
TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
__entry->mode, __entry->is_metadata, __entry->block)
);
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */