mirror of
https://github.com/adulau/aha.git
synced 2024-12-27 03:06:10 +00:00
truncate: new helpers
Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
eca6f534e6
commit
25d9e2d152
9 changed files with 120 additions and 108 deletions
|
@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
|
||||||
mm start up ... this is a loose form of stability on mm_users. For
|
mm start up ... this is a loose form of stability on mm_users. For
|
||||||
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
|
example, it is used in copy_mm to protect against a racing tlb_gather_mmu
|
||||||
single address space optimization, so that the zap_page_range (from
|
single address space optimization, so that the zap_page_range (from
|
||||||
vmtruncate) does not lose sending ipi's to cloned threads that might
|
truncate) does not lose sending ipi's to cloned threads that might
|
||||||
be spawned underneath it and go to user mode to drag in pte's into tlbs.
|
be spawned underneath it and go to user mode to drag in pte's into tlbs.
|
||||||
|
|
||||||
swap_lock
|
swap_lock
|
||||||
|
|
46
fs/attr.c
46
fs/attr.c
|
@ -18,7 +18,7 @@
|
||||||
/* Taken over from the old code... */
|
/* Taken over from the old code... */
|
||||||
|
|
||||||
/* POSIX UID/GID verification for setting inode attributes. */
|
/* POSIX UID/GID verification for setting inode attributes. */
|
||||||
int inode_change_ok(struct inode *inode, struct iattr *attr)
|
int inode_change_ok(const struct inode *inode, struct iattr *attr)
|
||||||
{
|
{
|
||||||
int retval = -EPERM;
|
int retval = -EPERM;
|
||||||
unsigned int ia_valid = attr->ia_valid;
|
unsigned int ia_valid = attr->ia_valid;
|
||||||
|
@ -60,9 +60,51 @@ fine:
|
||||||
error:
|
error:
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(inode_change_ok);
|
EXPORT_SYMBOL(inode_change_ok);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* inode_newsize_ok - may this inode be truncated to a given size
|
||||||
|
* @inode: the inode to be truncated
|
||||||
|
* @offset: the new size to assign to the inode
|
||||||
|
* @Returns: 0 on success, -ve errno on failure
|
||||||
|
*
|
||||||
|
* inode_newsize_ok will check filesystem limits and ulimits to check that the
|
||||||
|
* new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
|
||||||
|
* when necessary. Caller must not proceed with inode size change if failure is
|
||||||
|
* returned. @inode must be a file (not directory), with appropriate
|
||||||
|
* permissions to allow truncate (inode_newsize_ok does NOT check these
|
||||||
|
* conditions).
|
||||||
|
*
|
||||||
|
* inode_newsize_ok must be called with i_mutex held.
|
||||||
|
*/
|
||||||
|
int inode_newsize_ok(const struct inode *inode, loff_t offset)
|
||||||
|
{
|
||||||
|
if (inode->i_size < offset) {
|
||||||
|
unsigned long limit;
|
||||||
|
|
||||||
|
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
||||||
|
if (limit != RLIM_INFINITY && offset > limit)
|
||||||
|
goto out_sig;
|
||||||
|
if (offset > inode->i_sb->s_maxbytes)
|
||||||
|
goto out_big;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* truncation of in-use swapfiles is disallowed - it would
|
||||||
|
* cause subsequent swapout to scribble on the now-freed
|
||||||
|
* blocks.
|
||||||
|
*/
|
||||||
|
if (IS_SWAPFILE(inode))
|
||||||
|
return -ETXTBSY;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
out_sig:
|
||||||
|
send_sig(SIGXFSZ, current, 0);
|
||||||
|
out_big:
|
||||||
|
return -EFBIG;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(inode_newsize_ok);
|
||||||
|
|
||||||
int inode_setattr(struct inode * inode, struct iattr * attr)
|
int inode_setattr(struct inode * inode, struct iattr * attr)
|
||||||
{
|
{
|
||||||
unsigned int ia_valid = attr->ia_valid;
|
unsigned int ia_valid = attr->ia_valid;
|
||||||
|
|
|
@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
|
||||||
#define buffer_migrate_page NULL
|
#define buffer_migrate_page NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern int inode_change_ok(struct inode *, struct iattr *);
|
extern int inode_change_ok(const struct inode *, struct iattr *);
|
||||||
|
extern int inode_newsize_ok(const struct inode *, loff_t offset);
|
||||||
extern int __must_check inode_setattr(struct inode *, struct iattr *);
|
extern int __must_check inode_setattr(struct inode *, struct iattr *);
|
||||||
|
|
||||||
extern void file_update_time(struct file *file);
|
extern void file_update_time(struct file *file);
|
||||||
|
|
|
@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
|
||||||
unmap_mapping_range(mapping, holebegin, holelen, 0);
|
unmap_mapping_range(mapping, holebegin, holelen, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int vmtruncate(struct inode * inode, loff_t offset);
|
extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
|
||||||
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
|
extern int vmtruncate(struct inode *inode, loff_t offset);
|
||||||
|
extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
|
||||||
|
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
|
||||||
|
|
|
@ -58,7 +58,7 @@
|
||||||
/*
|
/*
|
||||||
* Lock ordering:
|
* Lock ordering:
|
||||||
*
|
*
|
||||||
* ->i_mmap_lock (vmtruncate)
|
* ->i_mmap_lock (truncate_pagecache)
|
||||||
* ->private_lock (__free_pte->__set_page_dirty_buffers)
|
* ->private_lock (__free_pte->__set_page_dirty_buffers)
|
||||||
* ->swap_lock (exclusive_swap_page, others)
|
* ->swap_lock (exclusive_swap_page, others)
|
||||||
* ->mapping->tree_lock
|
* ->mapping->tree_lock
|
||||||
|
|
62
mm/memory.c
62
mm/memory.c
|
@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
|
||||||
unsigned long addr = vma->vm_start;
|
unsigned long addr = vma->vm_start;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hide vma from rmap and vmtruncate before freeing pgtables
|
* Hide vma from rmap and truncate_pagecache before freeing
|
||||||
|
* pgtables
|
||||||
*/
|
*/
|
||||||
anon_vma_unlink(vma);
|
anon_vma_unlink(vma);
|
||||||
unlink_file_vma(vma);
|
unlink_file_vma(vma);
|
||||||
|
@ -2407,7 +2408,7 @@ restart:
|
||||||
* @mapping: the address space containing mmaps to be unmapped.
|
* @mapping: the address space containing mmaps to be unmapped.
|
||||||
* @holebegin: byte in first page to unmap, relative to the start of
|
* @holebegin: byte in first page to unmap, relative to the start of
|
||||||
* the underlying file. This will be rounded down to a PAGE_SIZE
|
* the underlying file. This will be rounded down to a PAGE_SIZE
|
||||||
* boundary. Note that this is different from vmtruncate(), which
|
* boundary. Note that this is different from truncate_pagecache(), which
|
||||||
* must keep the partial page. In contrast, we must get rid of
|
* must keep the partial page. In contrast, we must get rid of
|
||||||
* partial pages.
|
* partial pages.
|
||||||
* @holelen: size of prospective hole in bytes. This will be rounded
|
* @holelen: size of prospective hole in bytes. This will be rounded
|
||||||
|
@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(unmap_mapping_range);
|
EXPORT_SYMBOL(unmap_mapping_range);
|
||||||
|
|
||||||
/**
|
|
||||||
* vmtruncate - unmap mappings "freed" by truncate() syscall
|
|
||||||
* @inode: inode of the file used
|
|
||||||
* @offset: file offset to start truncating
|
|
||||||
*
|
|
||||||
* NOTE! We have to be ready to update the memory sharing
|
|
||||||
* between the file and the memory map for a potential last
|
|
||||||
* incomplete page. Ugly, but necessary.
|
|
||||||
*/
|
|
||||||
int vmtruncate(struct inode * inode, loff_t offset)
|
|
||||||
{
|
|
||||||
if (inode->i_size < offset) {
|
|
||||||
unsigned long limit;
|
|
||||||
|
|
||||||
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
|
||||||
if (limit != RLIM_INFINITY && offset > limit)
|
|
||||||
goto out_sig;
|
|
||||||
if (offset > inode->i_sb->s_maxbytes)
|
|
||||||
goto out_big;
|
|
||||||
i_size_write(inode, offset);
|
|
||||||
} else {
|
|
||||||
struct address_space *mapping = inode->i_mapping;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* truncation of in-use swapfiles is disallowed - it would
|
|
||||||
* cause subsequent swapout to scribble on the now-freed
|
|
||||||
* blocks.
|
|
||||||
*/
|
|
||||||
if (IS_SWAPFILE(inode))
|
|
||||||
return -ETXTBSY;
|
|
||||||
i_size_write(inode, offset);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* unmap_mapping_range is called twice, first simply for
|
|
||||||
* efficiency so that truncate_inode_pages does fewer
|
|
||||||
* single-page unmaps. However after this first call, and
|
|
||||||
* before truncate_inode_pages finishes, it is possible for
|
|
||||||
* private pages to be COWed, which remain after
|
|
||||||
* truncate_inode_pages finishes, hence the second
|
|
||||||
* unmap_mapping_range call must be made for correctness.
|
|
||||||
*/
|
|
||||||
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
|
||||||
truncate_inode_pages(mapping, offset);
|
|
||||||
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (inode->i_op->truncate)
|
|
||||||
inode->i_op->truncate(inode);
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
out_sig:
|
|
||||||
send_sig(SIGXFSZ, current, 0);
|
|
||||||
out_big:
|
|
||||||
return -EFBIG;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL(vmtruncate);
|
|
||||||
|
|
||||||
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
|
int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
|
||||||
{
|
{
|
||||||
struct address_space *mapping = inode->i_mapping;
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
|
|
@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
|
||||||
if (vma->vm_file) {
|
if (vma->vm_file) {
|
||||||
/*
|
/*
|
||||||
* Subtle point from Rajesh Venkatasubramanian: before
|
* Subtle point from Rajesh Venkatasubramanian: before
|
||||||
* moving file-based ptes, we must lock vmtruncate out,
|
* moving file-based ptes, we must lock truncate_pagecache
|
||||||
* since it might clean the dst vma before the src vma,
|
* out, since it might clean the dst vma before the src vma,
|
||||||
* and we propagate stale pages into the dst afterward.
|
* and we propagate stale pages into the dst afterward.
|
||||||
*/
|
*/
|
||||||
mapping = vma->vm_file->f_mapping;
|
mapping = vma->vm_file->f_mapping;
|
||||||
|
|
40
mm/nommu.c
40
mm/nommu.c
|
@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
|
||||||
struct vm_operations_struct generic_file_vm_ops = {
|
struct vm_operations_struct generic_file_vm_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
* Handle all mappings that got truncated by a "truncate()"
|
|
||||||
* system call.
|
|
||||||
*
|
|
||||||
* NOTE! We have to be ready to update the memory sharing
|
|
||||||
* between the file and the memory map for a potential last
|
|
||||||
* incomplete page. Ugly, but necessary.
|
|
||||||
*/
|
|
||||||
int vmtruncate(struct inode *inode, loff_t offset)
|
|
||||||
{
|
|
||||||
struct address_space *mapping = inode->i_mapping;
|
|
||||||
unsigned long limit;
|
|
||||||
|
|
||||||
if (inode->i_size < offset)
|
|
||||||
goto do_expand;
|
|
||||||
i_size_write(inode, offset);
|
|
||||||
|
|
||||||
truncate_inode_pages(mapping, offset);
|
|
||||||
goto out_truncate;
|
|
||||||
|
|
||||||
do_expand:
|
|
||||||
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
|
|
||||||
if (limit != RLIM_INFINITY && offset > limit)
|
|
||||||
goto out_sig;
|
|
||||||
if (offset > inode->i_sb->s_maxbytes)
|
|
||||||
goto out;
|
|
||||||
i_size_write(inode, offset);
|
|
||||||
|
|
||||||
out_truncate:
|
|
||||||
if (inode->i_op->truncate)
|
|
||||||
inode->i_op->truncate(inode);
|
|
||||||
return 0;
|
|
||||||
out_sig:
|
|
||||||
send_sig(SIGXFSZ, current, 0);
|
|
||||||
out:
|
|
||||||
return -EFBIG;
|
|
||||||
}
|
|
||||||
|
|
||||||
EXPORT_SYMBOL(vmtruncate);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return the total memory allocated for this pointer, not
|
* Return the total memory allocated for this pointer, not
|
||||||
* just what the caller asked for.
|
* just what the caller asked for.
|
||||||
|
|
|
@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
|
||||||
return invalidate_inode_pages2_range(mapping, 0, -1);
|
return invalidate_inode_pages2_range(mapping, 0, -1);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
|
EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* truncate_pagecache - unmap and remove pagecache that has been truncated
|
||||||
|
* @inode: inode
|
||||||
|
* @old: old file offset
|
||||||
|
* @new: new file offset
|
||||||
|
*
|
||||||
|
* inode's new i_size must already be written before truncate_pagecache
|
||||||
|
* is called.
|
||||||
|
*
|
||||||
|
* This function should typically be called before the filesystem
|
||||||
|
* releases resources associated with the freed range (eg. deallocates
|
||||||
|
* blocks). This way, pagecache will always stay logically coherent
|
||||||
|
* with on-disk format, and the filesystem would not have to deal with
|
||||||
|
* situations such as writepage being called for a page that has already
|
||||||
|
* had its underlying blocks deallocated.
|
||||||
|
*/
|
||||||
|
void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
|
||||||
|
{
|
||||||
|
if (new < old) {
|
||||||
|
struct address_space *mapping = inode->i_mapping;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* unmap_mapping_range is called twice, first simply for
|
||||||
|
* efficiency so that truncate_inode_pages does fewer
|
||||||
|
* single-page unmaps. However after this first call, and
|
||||||
|
* before truncate_inode_pages finishes, it is possible for
|
||||||
|
* private pages to be COWed, which remain after
|
||||||
|
* truncate_inode_pages finishes, hence the second
|
||||||
|
* unmap_mapping_range call must be made for correctness.
|
||||||
|
*/
|
||||||
|
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
|
||||||
|
truncate_inode_pages(mapping, new);
|
||||||
|
unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(truncate_pagecache);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* vmtruncate - unmap mappings "freed" by truncate() syscall
|
||||||
|
* @inode: inode of the file used
|
||||||
|
* @offset: file offset to start truncating
|
||||||
|
*
|
||||||
|
* NOTE! We have to be ready to update the memory sharing
|
||||||
|
* between the file and the memory map for a potential last
|
||||||
|
* incomplete page. Ugly, but necessary.
|
||||||
|
*/
|
||||||
|
int vmtruncate(struct inode *inode, loff_t offset)
|
||||||
|
{
|
||||||
|
loff_t oldsize;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
error = inode_newsize_ok(inode, offset);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
oldsize = inode->i_size;
|
||||||
|
i_size_write(inode, offset);
|
||||||
|
truncate_pagecache(inode, oldsize, offset);
|
||||||
|
if (inode->i_op->truncate)
|
||||||
|
inode->i_op->truncate(inode);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(vmtruncate);
|
||||||
|
|
Loading…
Reference in a new issue