aha/fs
Jiri Pirko 1f10206cf8 getrusage: fill ru_maxrss value
Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
mark.  This struct is filled as a parameter to getrusage syscall.
->ru_maxrss value is set to KBs which is the way it is done in BSD
systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
which seems to be incorrect behavior.  Maintainer of this util was
notified by me with the patch which corrects it and cc'ed.

To make this happen we extend struct signal_struct by two fields.  The
first one is ->maxrss which we use to store rss hiwater of the task.  The
second one is ->cmaxrss which we use to store highest rss hiwater of all
task childs.  These values are used in k_getrusage() to actually fill
->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
struct exists.

Note:
exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
it is intetionally behavior. *BSD getrusage have exec() inheriting.

test programs
========================================================

getrusage.c
===========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"

 #define err(str) perror(str), exit(1)

int main(int argc, char** argv)
{
	int status;

	printf("allocate 100MB\n");
	consume(100);

	printf("testcase1: fork inherit? \n");
	printf("  expect: initial.self ~= child.self\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase2: fork inherit? (cont.) \n");
	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("child");
		_exit(0);
	}
	printf("\n");

	printf("testcase3: fork + malloc \n");
	printf("  expect: child.self ~= initial.self + 50MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		printf("allocate +50MB\n");
		consume(50);
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase4: grandchild maxrss\n");
	printf("  expect: post_wait.children ~= 300MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 0 -g 300");
		_exit(0);
	}
	printf("\n");

	printf("testcase5: zombie\n");
	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
	show_rusage("initial");
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("pre_wait");
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 400");
		_exit(0);
	}
	printf("\n");

	printf("testcase6: SIG_IGN\n");
	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
	show_rusage("initial");
	signal(SIGCHLD, SIG_IGN);
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("after_zombie");
	} else {
		system("./child -n 500");
		_exit(0);
	}
	printf("\n");
	signal(SIGCHLD, SIG_DFL);

	printf("testcase7: exec (without fork) \n");
	printf("  expect: initial ~= exec \n");
	show_rusage("initial");
	execl("./child", "child", "-v", NULL);

	return 0;
}

child.c
=======
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>

 #include "common.h"

int main(int argc, char** argv)
{
	int status;
	int c;
	long consume_size = 0;
	long grandchild_consume_size = 0;
	int show = 0;

	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
		switch (c) {
		case 'n':
			consume_size = atol(optarg);
			break;
		case 'v':
			show = 1;
			break;
		case 'g':

			grandchild_consume_size = atol(optarg);
			break;
		default:
			break;
		}
	}

	if (show)
		show_rusage("exec");

	if (consume_size) {
		printf("child alloc %ldMB\n", consume_size);
		consume(consume_size);
	}

	if (grandchild_consume_size) {
		if (fork()) {
			wait(&status);
		} else {
			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
			consume(grandchild_consume_size);

			exit(0);
		}
	}

	return 0;
}

common.c
========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"
 #define err(str) perror(str), exit(1)

void show_rusage(char *prefix)
{
    	int err, err2;
    	struct rusage rusage_self;
    	struct rusage rusage_children;

    	printf("%s: ", prefix);
    	err = getrusage(RUSAGE_SELF, &rusage_self);
    	if (!err)
    		printf("self %ld ", rusage_self.ru_maxrss);
    	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
    	if (!err2)
    		printf("children %ld ", rusage_children.ru_maxrss);

    	printf("\n");
}

/* Some buggy OS need this worthless CPU waste. */
void make_pagefault(void)
{
	void *addr;
	int size = getpagesize();
	int i;

	for (i=0; i<1000; i++) {
		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
		if (addr == MAP_FAILED)
			err("make_pagefault");
		memset(addr, 0, size);
		munmap(addr, size);
	}
}

void consume(int mega)
{
    	size_t sz = mega * 1024 * 1024;
    	void *ptr;

    	ptr = malloc(sz);
    	memset(ptr, 0, sz);
	make_pagefault();
}

pid_t __fork(void)
{
	pid_t pid;

	pid = fork();
	make_pagefault();

	return pid;
}

common.h
========
void show_rusage(char *prefix);
void make_pagefault(void);
void consume(int mega);
pid_t __fork(void);

FreeBSD result (expected result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 103492 children 0
fork child: self 103540 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 103540 children 103540
child: self 103564 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 103564 children 103564
allocate +50MB
fork child: self 154860 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 103564 children 154860
grandchild alloc 300MB
post_wait: self 103564 children 308720

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 103564 children 308720
child alloc 400MB
pre_wait: self 103564 children 308720
post_wait: self 103564 children 411312

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 103564 children 411312
child alloc 500MB
after_zombie: self 103624 children 411312

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 103624 children 411312
exec: self 103624 children 411312

Linux result (actual test result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 102848 children 0
fork child: self 102572 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 102876 children 102644
child: self 102572 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 102876 children 102644
allocate +50MB
fork child: self 153804 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 102876 children 153864
grandchild alloc 300MB
post_wait: self 102876 children 307536

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 102876 children 307536
child alloc 400MB
pre_wait: self 102876 children 307536
post_wait: self 102876 children 410076

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 102876 children 410076
child alloc 500MB
after_zombie: self 102880 children 410076

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 102880 children 410076
exec: self 102880 children 410076

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-09-23 07:39:30 -07:00
..
9p 9p: remove unnecessary v9fses->options which duplicates the mount string 2009-08-17 16:42:28 -05:00
adfs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
affs affs: add ->sync_fs 2009-06-11 21:36:14 -04:00
afs seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
autofs trivial: remove unnecessary semicolons 2009-09-21 15:14:58 +02:00
autofs4 autofs4 - fix missed case when changing to use struct path 2009-08-31 17:44:05 -10:00
befs const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
bfs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
btrfs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
cachefiles enforce ->sync_fs is only called for rw superblock 2009-06-11 21:36:06 -04:00
cifs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
coda splice: implement default splice_read method 2009-05-11 14:13:10 +02:00
configfs writeback: add name to backing_dev_info 2009-09-11 09:20:26 +02:00
cramfs
debugfs debugfs: use specified mode to possibly mark files read/write only 2009-06-15 21:30:28 -07:00
devpts Move magic numbers into magic.h 2009-09-23 07:39:28 -07:00
dlm seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
ecryptfs const: mark remaining address_space_operations const 2009-09-22 07:17:24 -07:00
efs get rid of BKL in fs/efs 2009-06-17 00:36:36 -04:00
exofs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
exportfs
ext2 const: make block_device_operations const 2009-09-22 07:17:25 -07:00
ext3 const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
ext4 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
fat fat: Opencode sync_page_range_nolock() 2009-09-14 17:08:17 +02:00
freevxfs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
fscache FS-Cache: Fixup renamed filenames in comments in internal.h 2009-05-27 10:20:13 -07:00
fuse Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse 2009-09-18 09:23:03 -07:00
gfs2 trivial: fix typo "to to" in multiple files 2009-09-21 15:14:55 +02:00
hfs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
hfsplus headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
hostfs hostfs: set maximum filesize in superblock for proper LFS support 2009-06-30 18:56:03 -07:00
hpfs headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
hppfs
hugetlbfs Move magic numbers into magic.h 2009-09-23 07:39:28 -07:00
isofs isofs: fix Joliet regression 2009-07-10 19:18:59 -07:00
jbd jbd: Annotate transaction start also for journal_restart() 2009-09-16 17:44:10 +02:00
jbd2 seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
jffs2 const: mark remaining export_operations const 2009-09-22 07:17:24 -07:00
jfs jffs2/jfs/xfs: switch over to 'check_acl' rather than 'permission()' 2009-09-08 11:09:04 -07:00
lockd Merge branch 'for-2.6.32' of git://linux-nfs.org/~bfields/linux 2009-09-22 07:54:33 -07:00
minix Making fs/minix/minix.h double including safe 2009-06-22 11:34:42 -07:00
ncpfs NLS: update handling of Unicode 2009-06-15 21:44:43 -07:00
nfs seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
nfs_common
nfsd seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
nilfs2 const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
nls NLS: update handling of Unicode 2009-06-15 21:44:43 -07:00
notify inotify: update the group mask on mark addition 2009-08-28 12:51:14 -04:00
ntfs ntfs: remove ntfs_file_write 2009-09-23 07:39:29 -07:00
ocfs2 seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
omfs const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
openpromfs
partitions const: make block_device_operations const 2009-09-22 07:17:25 -07:00
proc seq_file: constify seq_operations 2009-09-23 07:39:29 -07:00
qnx4 qnx4: remove write support 2009-09-23 07:39:30 -07:00
quota const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
ramfs writeback: add name to backing_dev_info 2009-09-11 09:20:26 +02:00
reiserfs const: make struct super_block::s_qcop const 2009-09-22 07:17:24 -07:00
romfs const: mark remaining inode_operations as const 2009-09-22 07:17:24 -07:00
smbfs smbfs: read buffer overflow 2009-09-23 07:39:27 -07:00
squashfs const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
sysfs Merge branch 'writeback' of git://git.kernel.dk/linux-2.6-block 2009-09-11 09:17:05 -07:00
sysv get rid of BKL in fs/sysv 2009-06-17 00:36:37 -04:00
ubifs const: mark remaining address_space_operations const 2009-09-22 07:17:24 -07:00
udf udf: Fix possible corruption when close races with write 2009-09-14 19:13:01 +02:00
ufs ufs: sector_t cannot be negative 2009-06-18 13:03:46 -07:00
xfs Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jikos/trivial 2009-09-22 07:51:45 -07:00
aio.c aio.c: move EXPORT* macros to line after function 2009-09-23 07:39:29 -07:00
anon_inodes.c anonfd: split interface into file creation and install 2009-09-23 07:39:29 -07:00
attr.c
bad_inode.c
binfmt_aout.c
binfmt_elf.c mm: add get_dump_page 2009-09-22 07:17:40 -07:00
binfmt_elf_fdpic.c mm: add get_dump_page 2009-09-22 07:17:40 -07:00
binfmt_em86.c
binfmt_flat.c flat: fix uninitialized ptr with shared libs 2009-08-07 10:39:57 -07:00
binfmt_misc.c
binfmt_script.c
binfmt_som.c
bio-integrity.c block: Create bip slabs with embedded integrity vectors 2009-07-01 10:56:25 +02:00
bio.c block: fix sg SG_DXFER_TO_FROM_DEV regression 2009-07-10 20:31:53 +02:00
block_dev.c const: make block_device_operations const 2009-09-22 07:17:25 -07:00
buffer.c fs/buffer.c: clean up EXPORT* macros 2009-09-23 07:39:29 -07:00
char_dev.c Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6 2009-09-11 09:19:35 -07:00
compat.c fix compat_sys_utimensat() 2009-09-23 07:39:30 -07:00
compat_binfmt_elf.c
compat_ioctl.c compat_ioctl: hook up compat handler for FIEMAP ioctl 2009-08-07 10:39:56 -07:00
dcache.c sched: Pull up the might_sleep() check into cond_resched() 2009-07-18 15:51:44 +02:00
dcookies.c
direct-io.c block: Do away with the notion of hardsect_size 2009-05-22 23:22:54 +02:00
drop_caches.c mm: remove __invalidate_mapping_pages variant 2009-06-16 19:47:43 -07:00
eventfd.c anonfd: split interface into file creation and install 2009-09-23 07:39:29 -07:00
eventpoll.c epoll: fix nested calls support 2009-06-18 13:03:41 -07:00
exec.c getrusage: fill ru_maxrss value 2009-09-23 07:39:30 -07:00
fcntl.c headers: smp_lock.h redux 2009-07-12 12:22:34 -07:00
fifo.c
file.c
file_table.c fs: move mark_files_ro into file_table.c 2009-06-11 21:36:02 -04:00
filesystems.c fs: Mark get_filesystem_list() as __init function. 2009-04-20 23:02:52 -04:00
fs-writeback.c writeback: fix possible bdi writeback refcounting problem 2009-09-16 15:18:53 +02:00
fs_struct.c
generic_acl.c
inode.c fs: turn iprune_mutex into rwsem 2009-09-23 07:39:29 -07:00
internal.h Trim a bit of crap from fs.h 2009-06-11 21:36:07 -04:00
ioctl.c fs: Add new pre-allocation ioctls to vfs for compatibility with legacy xfs ioctls 2009-06-24 08:15:27 -04:00
ioprio.c
Kconfig tmpfs: depend on shmem 2009-09-22 07:17:41 -07:00
Kconfig.binfmt
libfs.c vfs: make get_sb_pseudo set s_maxbytes to value that can be cast to signed 2009-08-18 16:31:12 -07:00
locks.c const: make lock_manager_operations const 2009-09-22 07:17:25 -07:00
Makefile nilfs2: update makefile and Kconfig 2009-04-07 08:31:16 -07:00
mbcache.c
mpage.c ext4: Properly initialize the buffer_head state 2009-05-13 15:13:42 -04:00
namei.c Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6 2009-09-11 08:55:49 -07:00
namespace.c vfs: mnt_want_write_file(): fix special file handling 2009-08-07 10:39:56 -07:00
nfsctl.c
no-block.c
open.c CRED: Add some configurable debugging [try #6] 2009-09-02 21:29:01 +10:00
pipe.c lockdep: Fix lockdep annotation for pipe_double_lock() 2009-07-22 21:14:14 +02:00
pnode.c
pnode.h
posix_acl.c
read_write.c splice: implement default splice_read method 2009-05-11 14:13:10 +02:00
read_write.h
readdir.c
select.c poll/select: avoid arithmetic overflow in __estimate_accuracy() 2009-09-23 07:39:27 -07:00
seq_file.c seq_file: add function to write binary data 2009-06-18 13:03:57 -07:00
signalfd.c
splice.c Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block 2009-09-14 17:55:15 -07:00
stack.c
stat.c kill vfs_stat_fd / vfs_lstat_fd 2009-04-20 23:02:52 -04:00
super.c const: mark remaining super_operations const 2009-09-22 07:17:24 -07:00
sync.c fs/buffer.c: clean up EXPORT* macros 2009-09-23 07:39:29 -07:00
timerfd.c
utimes.c
xattr.c VFS: Factor out part of vfs_setxattr so it can be called from the SELinux hook for inode_setsecctx. 2009-09-10 10:11:22 +10:00
xattr_acl.c