truncate: new helpers

Introduce new truncate helpers truncate_pagecache and inode_newsize_ok. vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and into mm/truncate.c. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2024-12-27 03:06:10 +00:00 · 2009-08-21 02:35:05 +10:00 · 2009-08-21 02:35:05 +10:00 · 25d9e2d152
commit 25d9e2d152
parent eca6f534e6
9 changed files with 120 additions and 108 deletions
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 swap_lock
--- a/fs/attr.c
+++ b/fs/attr.c
@ -18,7 +18,7 @@
 /* Taken over from the old code... */
 /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
 	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
@ -60,9 +60,51 @@ fine:
 error:
 	return retval;
 }
 EXPORT_SYMBOL(inode_change_ok);
 /**
 * inode_newsize_ok - may this inode be truncated to a given size
 * @inode:	the inode to be truncated
 * @offset:	the new size to assign to the inode
 * @Returns:	0 on success, -ve errno on failure
 *
 * inode_newsize_ok will check filesystem limits and ulimits to check that the
 * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
 * when necessary. Caller must not proceed with inode size change if failure is
 * returned. @inode must be a file (not directory), with appropriate
 * permissions to allow truncate (inode_newsize_ok does NOT check these
 * conditions).
 *
 * inode_newsize_ok must be called with i_mutex held.
 */
 int inode_newsize_ok(const struct inode *inode, loff_t offset)
 {
 	if (inode->i_size < offset) {
 		unsigned long limit;
 		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
 		if (limit != RLIM_INFINITY && offset > limit)
 			goto out_sig;
 		if (offset > inode->i_sb->s_maxbytes)
 			goto out_big;
 	} else {
 		/*
 		 * truncation of in-use swapfiles is disallowed - it would
 		 * cause subsequent swapout to scribble on the now-freed
 		 * blocks.
 		 */
 		if (IS_SWAPFILE(inode))
 			return -ETXTBSY;
 	}
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
 out_big:
 	return -EFBIG;
 }
 EXPORT_SYMBOL(inode_newsize_ok);
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
 #define buffer_migrate_page NULL
 #endif
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 extern void file_update_time(struct file *file);
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
-extern int vmtruncate(struct inode * inode, loff_t offset);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern int vmtruncate(struct inode *inode, loff_t offset);
 extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
--- a/mm/filemap.c
+++ b/mm/filemap.c
@ -58,7 +58,7 @@
 /*
 * Lock ordering:
 *
- *  ->i_mmap_lock		(vmtruncate)
+ *  ->i_mmap_lock		(truncate_pagecache)
 *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
 *      ->swap_lock		(exclusive_swap_page, others)
 *        ->mapping->tree_lock
--- a/mm/memory.c
+++ b/mm/memory.c
@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long addr = vma->vm_start;
 		/*
-		 * Hide vma from rmap and vmtruncate before freeing pgtables
+		 * Hide vma from rmap and truncate_pagecache before freeing
 		 * pgtables
 		 */
 		anon_vma_unlink(vma);
 		unlink_file_vma(vma);
@ -2407,7 +2408,7 @@ restart:
 * @mapping: the address space containing mmaps to be unmapped.
 * @holebegin: byte in first page to unmap, relative to the start of
 * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
 * must keep the partial page.  In contrast, we must get rid of
 * partial pages.
 * @holelen: size of prospective hole in bytes.  This will be rounded
@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 /**
 * vmtruncate - unmap mappings "freed" by truncate() syscall
 * @inode: inode of the file used
 * @offset: file offset to start truncating
 *
 * NOTE! We have to be ready to update the memory sharing
 * between the file and the memory map for a potential last
 * incomplete page.  Ugly, but necessary.
 */
 int vmtruncate(struct inode * inode, loff_t offset)
 {
 	if (inode->i_size < offset) {
 		unsigned long limit;
 		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
 		if (limit != RLIM_INFINITY && offset > limit)
 			goto out_sig;
 		if (offset > inode->i_sb->s_maxbytes)
 			goto out_big;
 		i_size_write(inode, offset);
 	} else {
 		struct address_space *mapping = inode->i_mapping;
 		/*
 		 * truncation of in-use swapfiles is disallowed - it would
 		 * cause subsequent swapout to scribble on the now-freed
 		 * blocks.
 		 */
 		if (IS_SWAPFILE(inode))
 			return -ETXTBSY;
 		i_size_write(inode, offset);
 		/*
 		 * unmap_mapping_range is called twice, first simply for
 		 * efficiency so that truncate_inode_pages does fewer
 		 * single-page unmaps.  However after this first call, and
 		 * before truncate_inode_pages finishes, it is possible for
 		 * private pages to be COWed, which remain after
 		 * truncate_inode_pages finishes, hence the second
 		 * unmap_mapping_range call must be made for correctness.
 		 */
 		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 		truncate_inode_pages(mapping, offset);
 		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
 	}
 	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
 out_big:
 	return -EFBIG;
 }
 EXPORT_SYMBOL(vmtruncate);
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
 	struct address_space *mapping = inode->i_mapping;
--- a/mm/mremap.c
+++ b/mm/mremap.c
@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, we must lock vmtruncate out,
+		 * moving file-based ptes, we must lock truncate_pagecache
-		 * since it might clean the dst vma before the src vma,
+		 * out, since it might clean the dst vma before the src vma,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
--- a/mm/nommu.c
+++ b/mm/nommu.c
@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
 struct vm_operations_struct generic_file_vm_ops = {
 };
 /*
 * Handle all mappings that got truncated by a "truncate()"
 * system call.
 *
 * NOTE! We have to be ready to update the memory sharing
 * between the file and the memory map for a potential last
 * incomplete page.  Ugly, but necessary.
 */
 int vmtruncate(struct inode *inode, loff_t offset)
 {
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long limit;
 	if (inode->i_size < offset)
 		goto do_expand;
 	i_size_write(inode, offset);
 	truncate_inode_pages(mapping, offset);
 	goto out_truncate;
 do_expand:
 	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
 	if (limit != RLIM_INFINITY && offset > limit)
 		goto out_sig;
 	if (offset > inode->i_sb->s_maxbytes)
 		goto out;
 	i_size_write(inode, offset);
 out_truncate:
 	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return 0;
 out_sig:
 	send_sig(SIGXFSZ, current, 0);
 out:
 	return -EFBIG;
 }
 EXPORT_SYMBOL(vmtruncate);
 /*
 * Return the total memory allocated for this pointer, not
 * just what the caller asked for.
--- a/mm/truncate.c
+++ b/mm/truncate.c
@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
 	return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 /**
 * truncate_pagecache - unmap and remove pagecache that has been truncated
 * @inode: inode
 * @old: old file offset
 * @new: new file offset
 *
 * inode's new i_size must already be written before truncate_pagecache
 * is called.
 *
 * This function should typically be called before the filesystem
 * releases resources associated with the freed range (eg. deallocates
 * blocks). This way, pagecache will always stay logically coherent
 * with on-disk format, and the filesystem would not have to deal with
 * situations such as writepage being called for a page that has already
 * had its underlying blocks deallocated.
 */
 void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
 {
 	if (new < old) {
 		struct address_space *mapping = inode->i_mapping;
 		/*
 		 * unmap_mapping_range is called twice, first simply for
 		 * efficiency so that truncate_inode_pages does fewer
 		 * single-page unmaps.  However after this first call, and
 		 * before truncate_inode_pages finishes, it is possible for
 		 * private pages to be COWed, which remain after
 		 * truncate_inode_pages finishes, hence the second
 		 * unmap_mapping_range call must be made for correctness.
 		 */
 		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
 		truncate_inode_pages(mapping, new);
 		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
 	}
 }
 EXPORT_SYMBOL(truncate_pagecache);
 /**
 * vmtruncate - unmap mappings "freed" by truncate() syscall
 * @inode: inode of the file used
 * @offset: file offset to start truncating
 *
 * NOTE! We have to be ready to update the memory sharing
 * between the file and the memory map for a potential last
 * incomplete page.  Ugly, but necessary.
 */
 int vmtruncate(struct inode *inode, loff_t offset)
 {
 	loff_t oldsize;
 	int error;
 	error = inode_newsize_ok(inode, offset);
 	if (error)
 		return error;
 	oldsize = inode->i_size;
 	i_size_write(inode, offset);
 	truncate_pagecache(inode, oldsize, offset);
 	if (inode->i_op->truncate)
 		inode->i_op->truncate(inode);
 	return error;
 }
 EXPORT_SYMBOL(vmtruncate);