aha/fs/romfs/inode.c
Paul Jackson 4b6a9316fa [PATCH] cpuset memory spread: slab cache filesystems
Mark file system inode and similar slab caches subject to SLAB_MEM_SPREAD
memory spreading.

If a slab cache is marked SLAB_MEM_SPREAD, then anytime that a task that's
in a cpuset with the 'memory_spread_slab' option enabled goes to allocate
from such a slab cache, the allocations are spread evenly over all the
memory nodes (task->mems_allowed) allowed to that task, instead of favoring
allocation on the node local to the current cpu.

The following inode and similar caches are marked SLAB_MEM_SPREAD:

    file                               cache
    ====                               =====
    fs/adfs/super.c                    adfs_inode_cache
    fs/affs/super.c                    affs_inode_cache
    fs/befs/linuxvfs.c                 befs_inode_cache
    fs/bfs/inode.c                     bfs_inode_cache
    fs/block_dev.c                     bdev_cache
    fs/cifs/cifsfs.c                   cifs_inode_cache
    fs/coda/inode.c                    coda_inode_cache
    fs/dquot.c                         dquot
    fs/efs/super.c                     efs_inode_cache
    fs/ext2/super.c                    ext2_inode_cache
    fs/ext2/xattr.c (fs/mbcache.c)     ext2_xattr
    fs/ext3/super.c                    ext3_inode_cache
    fs/ext3/xattr.c (fs/mbcache.c)     ext3_xattr
    fs/fat/cache.c                     fat_cache
    fs/fat/inode.c                     fat_inode_cache
    fs/freevxfs/vxfs_super.c           vxfs_inode
    fs/hpfs/super.c                    hpfs_inode_cache
    fs/isofs/inode.c                   isofs_inode_cache
    fs/jffs/inode-v23.c                jffs_fm
    fs/jffs2/super.c                   jffs2_i
    fs/jfs/super.c                     jfs_ip
    fs/minix/inode.c                   minix_inode_cache
    fs/ncpfs/inode.c                   ncp_inode_cache
    fs/nfs/direct.c                    nfs_direct_cache
    fs/nfs/inode.c                     nfs_inode_cache
    fs/ntfs/super.c                    ntfs_big_inode_cache_name
    fs/ntfs/super.c                    ntfs_inode_cache
    fs/ocfs2/dlm/dlmfs.c               dlmfs_inode_cache
    fs/ocfs2/super.c                   ocfs2_inode_cache
    fs/proc/inode.c                    proc_inode_cache
    fs/qnx4/inode.c                    qnx4_inode_cache
    fs/reiserfs/super.c                reiser_inode_cache
    fs/romfs/inode.c                   romfs_inode_cache
    fs/smbfs/inode.c                   smb_inode_cache
    fs/sysv/inode.c                    sysv_inode_cache
    fs/udf/super.c                     udf_inode_cache
    fs/ufs/super.c                     ufs_inode_cache
    net/socket.c                       sock_inode_cache
    net/sunrpc/rpc_pipe.c              rpc_inode_cache

The choice of which slab caches to so mark was quite simple.  I marked
those already marked SLAB_RECLAIM_ACCOUNT, except for fs/xfs, dentry_cache,
inode_cache, and buffer_head, which were marked in a previous patch.  Even
though SLAB_RECLAIM_ACCOUNT is for a different purpose, it marks the same
potentially large file system i/o related slab caches as we need for memory
spreading.

Given that the rule now becomes "wherever you would have used a
SLAB_RECLAIM_ACCOUNT slab cache flag before (usually the inode cache), use
the SLAB_MEM_SPREAD flag too", this should be easy enough to maintain.
Future file system writers will just copy one of the existing file system
slab cache setups and tend to get it right without thinking.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-03-24 07:33:23 -08:00

648 lines
15 KiB
C

/*
* ROMFS file system, Linux implementation
*
* Copyright (C) 1997-1999 Janos Farkas <chexum@shadow.banki.hu>
*
* Using parts of the minix filesystem
* Copyright (C) 1991, 1992 Linus Torvalds
*
* and parts of the affs filesystem additionally
* Copyright (C) 1993 Ray Burr
* Copyright (C) 1996 Hans-Joachim Widmaier
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Changes
* Changed for 2.1.19 modules
* Jan 1997 Initial release
* Jun 1997 2.1.43+ changes
* Proper page locking in readpage
* Changed to work with 2.1.45+ fs
* Jul 1997 Fixed follow_link
* 2.1.47
* lookup shouldn't return -ENOENT
* from Horst von Brand:
* fail on wrong checksum
* double unlock_super was possible
* correct namelen for statfs
* spotted by Bill Hawes:
* readlink shouldn't iput()
* Jun 1998 2.1.106 from Avery Pennarun: glibc scandir()
* exposed a problem in readdir
* 2.1.107 code-freeze spellchecker run
* Aug 1998 2.1.118+ VFS changes
* Sep 1998 2.1.122 another VFS change (follow_link)
* Apr 1999 2.2.7 no more EBADF checking in
* lookup/readdir, use ERR_PTR
* Jun 1999 2.3.6 d_alloc_root use changed
* 2.3.9 clean up usage of ENOENT/negative
* dentries in lookup
* clean up page flags setting
* (error, uptodate, locking) in
* in readpage
* use init_special_inode for
* fifos/sockets (and streamline) in
* read_inode, fix _ops table order
* Aug 1999 2.3.16 __initfunc() => __init change
* Oct 1999 2.3.24 page->owner hack obsoleted
* Nov 1999 2.3.27 2.3.25+ page->offset => index change
*/
/* todo:
* - see Documentation/filesystems/romfs.txt
* - use allocated, not stack memory for file names?
* - considering write access...
* - network (tftp) files?
* - merge back some _op tables
*/
/*
* Sorry about some optimizations and for some goto's. I just wanted
* to squeeze some more bytes out of this code.. :)
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/romfs_fs.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/vfs.h>
#include <asm/uaccess.h>
struct romfs_inode_info {
unsigned long i_metasize; /* size of non-data area */
unsigned long i_dataoffset; /* from the start of fs */
struct inode vfs_inode;
};
/* instead of private superblock data */
static inline unsigned long romfs_maxsize(struct super_block *sb)
{
return (unsigned long)sb->s_fs_info;
}
static inline struct romfs_inode_info *ROMFS_I(struct inode *inode)
{
return list_entry(inode, struct romfs_inode_info, vfs_inode);
}
static __u32
romfs_checksum(void *data, int size)
{
__u32 sum;
__be32 *ptr;
sum = 0; ptr = data;
size>>=2;
while (size>0) {
sum += be32_to_cpu(*ptr++);
size--;
}
return sum;
}
static struct super_operations romfs_ops;
static int romfs_fill_super(struct super_block *s, void *data, int silent)
{
struct buffer_head *bh;
struct romfs_super_block *rsb;
struct inode *root;
int sz;
/* I would parse the options here, but there are none.. :) */
sb_set_blocksize(s, ROMBSIZE);
s->s_maxbytes = 0xFFFFFFFF;
bh = sb_bread(s, 0);
if (!bh) {
/* XXX merge with other printk? */
printk ("romfs: unable to read superblock\n");
goto outnobh;
}
rsb = (struct romfs_super_block *)bh->b_data;
sz = be32_to_cpu(rsb->size);
if (rsb->word0 != ROMSB_WORD0 || rsb->word1 != ROMSB_WORD1
|| sz < ROMFH_SIZE) {
if (!silent)
printk ("VFS: Can't find a romfs filesystem on dev "
"%s.\n", s->s_id);
goto out;
}
if (romfs_checksum(rsb, min_t(int, sz, 512))) {
printk ("romfs: bad initial checksum on dev "
"%s.\n", s->s_id);
goto out;
}
s->s_magic = ROMFS_MAGIC;
s->s_fs_info = (void *)(long)sz;
s->s_flags |= MS_RDONLY;
/* Find the start of the fs */
sz = (ROMFH_SIZE +
strnlen(rsb->name, ROMFS_MAXFN) + 1 + ROMFH_PAD)
& ROMFH_MASK;
s->s_op = &romfs_ops;
root = iget(s, sz);
if (!root)
goto out;
s->s_root = d_alloc_root(root);
if (!s->s_root)
goto outiput;
brelse(bh);
return 0;
outiput:
iput(root);
out:
brelse(bh);
outnobh:
return -EINVAL;
}
/* That's simple too. */
static int
romfs_statfs(struct super_block *sb, struct kstatfs *buf)
{
buf->f_type = ROMFS_MAGIC;
buf->f_bsize = ROMBSIZE;
buf->f_bfree = buf->f_bavail = buf->f_ffree;
buf->f_blocks = (romfs_maxsize(sb)+ROMBSIZE-1)>>ROMBSBITS;
buf->f_namelen = ROMFS_MAXFN;
return 0;
}
/* some helper routines */
static int
romfs_strnlen(struct inode *i, unsigned long offset, unsigned long count)
{
struct buffer_head *bh;
unsigned long avail, maxsize, res;
maxsize = romfs_maxsize(i->i_sb);
if (offset >= maxsize)
return -1;
/* strnlen is almost always valid */
if (count > maxsize || offset+count > maxsize)
count = maxsize-offset;
bh = sb_bread(i->i_sb, offset>>ROMBSBITS);
if (!bh)
return -1; /* error */
avail = ROMBSIZE - (offset & ROMBMASK);
maxsize = min_t(unsigned long, count, avail);
res = strnlen(((char *)bh->b_data)+(offset&ROMBMASK), maxsize);
brelse(bh);
if (res < maxsize)
return res; /* found all of it */
while (res < count) {
offset += maxsize;
bh = sb_bread(i->i_sb, offset>>ROMBSBITS);
if (!bh)
return -1;
maxsize = min_t(unsigned long, count - res, ROMBSIZE);
avail = strnlen(bh->b_data, maxsize);
res += avail;
brelse(bh);
if (avail < maxsize)
return res;
}
return res;
}
static int
romfs_copyfrom(struct inode *i, void *dest, unsigned long offset, unsigned long count)
{
struct buffer_head *bh;
unsigned long avail, maxsize, res;
maxsize = romfs_maxsize(i->i_sb);
if (offset >= maxsize || count > maxsize || offset+count>maxsize)
return -1;
bh = sb_bread(i->i_sb, offset>>ROMBSBITS);
if (!bh)
return -1; /* error */
avail = ROMBSIZE - (offset & ROMBMASK);
maxsize = min_t(unsigned long, count, avail);
memcpy(dest, ((char *)bh->b_data) + (offset & ROMBMASK), maxsize);
brelse(bh);
res = maxsize; /* all of it */
while (res < count) {
offset += maxsize;
dest += maxsize;
bh = sb_bread(i->i_sb, offset>>ROMBSBITS);
if (!bh)
return -1;
maxsize = min_t(unsigned long, count - res, ROMBSIZE);
memcpy(dest, bh->b_data, maxsize);
brelse(bh);
res += maxsize;
}
return res;
}
static unsigned char romfs_dtype_table[] = {
DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_SOCK, DT_FIFO
};
static int
romfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct inode *i = filp->f_dentry->d_inode;
struct romfs_inode ri;
unsigned long offset, maxoff;
int j, ino, nextfh;
int stored = 0;
char fsname[ROMFS_MAXFN]; /* XXX dynamic? */
lock_kernel();
maxoff = romfs_maxsize(i->i_sb);
offset = filp->f_pos;
if (!offset) {
offset = i->i_ino & ROMFH_MASK;
if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0)
goto out;
offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
}
/* Not really failsafe, but we are read-only... */
for(;;) {
if (!offset || offset >= maxoff) {
offset = maxoff;
filp->f_pos = offset;
goto out;
}
filp->f_pos = offset;
/* Fetch inode info */
if (romfs_copyfrom(i, &ri, offset, ROMFH_SIZE) <= 0)
goto out;
j = romfs_strnlen(i, offset+ROMFH_SIZE, sizeof(fsname)-1);
if (j < 0)
goto out;
fsname[j]=0;
romfs_copyfrom(i, fsname, offset+ROMFH_SIZE, j);
ino = offset;
nextfh = be32_to_cpu(ri.next);
if ((nextfh & ROMFH_TYPE) == ROMFH_HRD)
ino = be32_to_cpu(ri.spec);
if (filldir(dirent, fsname, j, offset, ino,
romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) {
goto out;
}
stored++;
offset = nextfh & ROMFH_MASK;
}
out:
unlock_kernel();
return stored;
}
static struct dentry *
romfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
{
unsigned long offset, maxoff;
int fslen, res;
struct inode *inode;
char fsname[ROMFS_MAXFN]; /* XXX dynamic? */
struct romfs_inode ri;
const char *name; /* got from dentry */
int len;
res = -EACCES; /* placeholder for "no data here" */
offset = dir->i_ino & ROMFH_MASK;
lock_kernel();
if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0)
goto out;
maxoff = romfs_maxsize(dir->i_sb);
offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
/* OK, now find the file whose name is in "dentry" in the
* directory specified by "dir". */
name = dentry->d_name.name;
len = dentry->d_name.len;
for(;;) {
if (!offset || offset >= maxoff)
goto out0;
if (romfs_copyfrom(dir, &ri, offset, ROMFH_SIZE) <= 0)
goto out;
/* try to match the first 16 bytes of name */
fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, ROMFH_SIZE);
if (len < ROMFH_SIZE) {
if (len == fslen) {
/* both are shorter, and same size */
romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1);
if (strncmp (name, fsname, len) == 0)
break;
}
} else if (fslen >= ROMFH_SIZE) {
/* both are longer; XXX optimize max size */
fslen = romfs_strnlen(dir, offset+ROMFH_SIZE, sizeof(fsname)-1);
if (len == fslen) {
romfs_copyfrom(dir, fsname, offset+ROMFH_SIZE, len+1);
if (strncmp(name, fsname, len) == 0)
break;
}
}
/* next entry */
offset = be32_to_cpu(ri.next) & ROMFH_MASK;
}
/* Hard link handling */
if ((be32_to_cpu(ri.next) & ROMFH_TYPE) == ROMFH_HRD)
offset = be32_to_cpu(ri.spec) & ROMFH_MASK;
if ((inode = iget(dir->i_sb, offset)))
goto outi;
/*
* it's a bit funky, _lookup needs to return an error code
* (negative) or a NULL, both as a dentry. ENOENT should not
* be returned, instead we need to create a negative dentry by
* d_add(dentry, NULL); and return 0 as no error.
* (Although as I see, it only matters on writable file
* systems).
*/
out0: inode = NULL;
outi: res = 0;
d_add (dentry, inode);
out: unlock_kernel();
return ERR_PTR(res);
}
/*
* Ok, we do readpage, to be able to execute programs. Unfortunately,
* we can't use bmap, since we may have looser alignments.
*/
static int
romfs_readpage(struct file *file, struct page * page)
{
struct inode *inode = page->mapping->host;
loff_t offset, avail, readlen;
void *buf;
int result = -EIO;
page_cache_get(page);
lock_kernel();
buf = kmap(page);
if (!buf)
goto err_out;
/* 32 bit warning -- but not for us :) */
offset = page_offset(page);
if (offset < i_size_read(inode)) {
avail = inode->i_size-offset;
readlen = min_t(unsigned long, avail, PAGE_SIZE);
if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
if (readlen < PAGE_SIZE) {
memset(buf + readlen,0,PAGE_SIZE-readlen);
}
SetPageUptodate(page);
result = 0;
}
}
if (result) {
memset(buf, 0, PAGE_SIZE);
SetPageError(page);
}
flush_dcache_page(page);
unlock_page(page);
kunmap(page);
err_out:
page_cache_release(page);
unlock_kernel();
return result;
}
/* Mapping from our types to the kernel */
static struct address_space_operations romfs_aops = {
.readpage = romfs_readpage
};
static struct file_operations romfs_dir_operations = {
.read = generic_read_dir,
.readdir = romfs_readdir,
};
static struct inode_operations romfs_dir_inode_operations = {
.lookup = romfs_lookup,
};
static mode_t romfs_modemap[] =
{
0, S_IFDIR+0644, S_IFREG+0644, S_IFLNK+0777,
S_IFBLK+0600, S_IFCHR+0600, S_IFSOCK+0644, S_IFIFO+0644
};
static void
romfs_read_inode(struct inode *i)
{
int nextfh, ino;
struct romfs_inode ri;
ino = i->i_ino & ROMFH_MASK;
i->i_mode = 0;
/* Loop for finding the real hard link */
for(;;) {
if (romfs_copyfrom(i, &ri, ino, ROMFH_SIZE) <= 0) {
printk("romfs: read error for inode 0x%x\n", ino);
return;
}
/* XXX: do romfs_checksum here too (with name) */
nextfh = be32_to_cpu(ri.next);
if ((nextfh & ROMFH_TYPE) != ROMFH_HRD)
break;
ino = be32_to_cpu(ri.spec) & ROMFH_MASK;
}
i->i_nlink = 1; /* Hard to decide.. */
i->i_size = be32_to_cpu(ri.size);
i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
i->i_uid = i->i_gid = 0;
/* Precalculate the data offset */
ino = romfs_strnlen(i, ino+ROMFH_SIZE, ROMFS_MAXFN);
if (ino >= 0)
ino = ((ROMFH_SIZE+ino+1+ROMFH_PAD)&ROMFH_MASK);
else
ino = 0;
ROMFS_I(i)->i_metasize = ino;
ROMFS_I(i)->i_dataoffset = ino+(i->i_ino&ROMFH_MASK);
/* Compute permissions */
ino = romfs_modemap[nextfh & ROMFH_TYPE];
/* only "normal" files have ops */
switch (nextfh & ROMFH_TYPE) {
case 1:
i->i_size = ROMFS_I(i)->i_metasize;
i->i_op = &romfs_dir_inode_operations;
i->i_fop = &romfs_dir_operations;
if (nextfh & ROMFH_EXEC)
ino |= S_IXUGO;
i->i_mode = ino;
break;
case 2:
i->i_fop = &generic_ro_fops;
i->i_data.a_ops = &romfs_aops;
if (nextfh & ROMFH_EXEC)
ino |= S_IXUGO;
i->i_mode = ino;
break;
case 3:
i->i_op = &page_symlink_inode_operations;
i->i_data.a_ops = &romfs_aops;
i->i_mode = ino | S_IRWXUGO;
break;
default:
/* depending on MBZ for sock/fifos */
nextfh = be32_to_cpu(ri.spec);
init_special_inode(i, ino,
MKDEV(nextfh>>16,nextfh&0xffff));
}
}
static kmem_cache_t * romfs_inode_cachep;
static struct inode *romfs_alloc_inode(struct super_block *sb)
{
struct romfs_inode_info *ei;
ei = (struct romfs_inode_info *)kmem_cache_alloc(romfs_inode_cachep, SLAB_KERNEL);
if (!ei)
return NULL;
return &ei->vfs_inode;
}
static void romfs_destroy_inode(struct inode *inode)
{
kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
}
static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
{
struct romfs_inode_info *ei = (struct romfs_inode_info *) foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR)
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
{
romfs_inode_cachep = kmem_cache_create("romfs_inode_cache",
sizeof(struct romfs_inode_info),
0, SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
init_once, NULL);
if (romfs_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static void destroy_inodecache(void)
{
if (kmem_cache_destroy(romfs_inode_cachep))
printk(KERN_INFO "romfs_inode_cache: not all structures were freed\n");
}
static int romfs_remount(struct super_block *sb, int *flags, char *data)
{
*flags |= MS_RDONLY;
return 0;
}
static struct super_operations romfs_ops = {
.alloc_inode = romfs_alloc_inode,
.destroy_inode = romfs_destroy_inode,
.read_inode = romfs_read_inode,
.statfs = romfs_statfs,
.remount_fs = romfs_remount,
};
static struct super_block *romfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super);
}
static struct file_system_type romfs_fs_type = {
.owner = THIS_MODULE,
.name = "romfs",
.get_sb = romfs_get_sb,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
static int __init init_romfs_fs(void)
{
int err = init_inodecache();
if (err)
goto out1;
err = register_filesystem(&romfs_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
out1:
return err;
}
static void __exit exit_romfs_fs(void)
{
unregister_filesystem(&romfs_fs_type);
destroy_inodecache();
}
/* Yes, works even as a module... :) */
module_init(init_romfs_fs)
module_exit(exit_romfs_fs)
MODULE_LICENSE("GPL");