mempolicy: use struct mempolicy pointer in shmem_sb_info

This patch replaces the mempolicy mode, mode_flags, and nodemask in the
shmem_sb_info struct with a struct mempolicy pointer, initialized to NULL.
This removes dependency on the details of mempolicy from shmem.c and hugetlbfs
inode.c and simplifies the interfaces.

mpol_parse_str() in mempolicy.c is changed to return, via a pointer to a
pointer arg, a struct mempolicy pointer on success.  For MPOL_DEFAULT, the
returned pointer is NULL.  Further, mpol_parse_str() now takes a 'no_context'
argument that causes the input nodemask to be stored in the w.user_nodemask of
the created mempolicy for use when the mempolicy is installed in a tmpfs inode
shared policy tree.  At that time, any cpuset contextualization is applied to
the original input nodemask.  This preserves the previous behavior where the
input nodemask was stored in the superblock.  We can think of the returned
mempolicy as "context free".

Because mpol_parse_str() is now calling mpol_new(), we can remove from
mpol_to_str() the semantic checks that mpol_new() already performs.

Add 'no_context' parameter to mpol_to_str() to specify that it should format
the nodemask in w.user_nodemask for 'bind' and 'interleave' policies.

Change mpol_shared_policy_init() to take a pointer to a "context free" struct
mempolicy and to create a new, "contextualized" mempolicy using the mode,
mode_flags and user_nodemask from the input mempolicy.

  Note: we know that the mempolicy passed to mpol_to_str() or
  mpol_shared_policy_init() from a tmpfs superblock is "context free".  This
  is currently the only instance thereof.  However, if we found more uses for
  this concept, and introduced any ambiguity as to whether a mempolicy was
  context free or not, we could add another internal mode flag to identify
  context free mempolicies.  Then, we could remove the 'no_context' argument
  from mpol_to_str().

Added shmem_get_sbmpol() to return a reference counted superblock mempolicy,
if one exists, to pass to mpol_shared_policy_init().  We must add the
reference under the sb stat_lock to prevent races with replacement of the mpol
by remount.  This reference is removed in mpol_shared_policy_init().

[akpm@linux-foundation.org: build fix]
[akpm@linux-foundation.org: another build fix]
[akpm@linux-foundation.org: yet another build fix]
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Lee Schermerhorn 2008-04-28 02:13:26 -07:00 committed by Linus Torvalds
parent 3f226aa1cb
commit 71fe804b6d
5 changed files with 130 additions and 91 deletions

View file

@ -504,7 +504,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
INIT_LIST_HEAD(&inode->i_mapping->private_list); INIT_LIST_HEAD(&inode->i_mapping->private_list);
info = HUGETLBFS_I(inode); info = HUGETLBFS_I(inode);
mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0, NULL); mpol_shared_policy_init(&info->policy, NULL);
switch (mode & S_IFMT) { switch (mode & S_IFMT) {
default: default:
init_special_inode(inode, mode, dev); init_special_inode(inode, mode, dev);

View file

@ -182,8 +182,7 @@ struct shared_policy {
spinlock_t lock; spinlock_t lock;
}; };
void mpol_shared_policy_init(struct shared_policy *info, unsigned short mode, void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
unsigned short flags, nodemask_t *nodes);
int mpol_set_shared_policy(struct shared_policy *info, int mpol_set_shared_policy(struct shared_policy *info,
struct vm_area_struct *vma, struct vm_area_struct *vma,
struct mempolicy *new); struct mempolicy *new);
@ -216,10 +215,10 @@ int do_migrate_pages(struct mm_struct *mm,
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
extern int mpol_parse_str(char *str, unsigned short *mode, extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
unsigned short *mode_flags, nodemask_t *policy_nodes);
extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
int no_context);
#endif #endif
#else #else
@ -262,8 +261,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info,
return -EINVAL; return -EINVAL;
} }
static inline void mpol_shared_policy_init(struct shared_policy *info, static inline void mpol_shared_policy_init(struct shared_policy *sp,
unsigned short mode, unsigned short flags, nodemask_t *nodes) struct mempolicy *mpol)
{ {
} }
@ -322,13 +321,14 @@ static inline void check_highest_zone(int k)
} }
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
static inline int mpol_parse_str(char *value, unsigned short *policy, static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
unsigned short flags, nodemask_t *policy_nodes) int no_context)
{ {
return 1; return 1; /* error */
} }
static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
int no_context)
{ {
return 0; return 0;
} }

View file

@ -34,9 +34,7 @@ struct shmem_sb_info {
uid_t uid; /* Mount uid for root directory */ uid_t uid; /* Mount uid for root directory */
gid_t gid; /* Mount gid for root directory */ gid_t gid; /* Mount gid for root directory */
mode_t mode; /* Mount mode for root directory */ mode_t mode; /* Mount mode for root directory */
unsigned short policy; /* Default NUMA memory alloc policy */ struct mempolicy *mpol; /* default memory policy for mappings */
unsigned short flags; /* Optional mempolicy flags */
nodemask_t policy_nodes; /* nodemask for preferred and bind */
}; };
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode) static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)

View file

@ -1828,27 +1828,35 @@ restart:
return 0; return 0;
} }
void mpol_shared_policy_init(struct shared_policy *info, unsigned short policy, /**
unsigned short flags, nodemask_t *policy_nodes) * mpol_shared_policy_init - initialize shared policy for inode
* @sp: pointer to inode shared policy
* @mpol: struct mempolicy to install
*
* Install non-NULL @mpol in inode's shared policy rb-tree.
* On entry, the current task has a reference on a non-NULL @mpol.
* This must be released on exit.
*/
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
{ {
info->root = RB_ROOT; sp->root = RB_ROOT; /* empty tree == default mempolicy */
spin_lock_init(&info->lock); spin_lock_init(&sp->lock);
if (policy != MPOL_DEFAULT) { if (mpol) {
struct mempolicy *newpol; struct vm_area_struct pvma;
struct mempolicy *new;
/* Falls back to NULL policy [MPOL_DEFAULT] on any error */ /* contextualize the tmpfs mount point mempolicy */
newpol = mpol_new(policy, flags, policy_nodes); new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
if (!IS_ERR(newpol)) { mpol_put(mpol); /* drop our ref on sb mpol */
/* Create pseudo-vma that contains just the policy */ if (IS_ERR(new))
struct vm_area_struct pvma; return; /* no valid nodemask intersection */
memset(&pvma, 0, sizeof(struct vm_area_struct)); /* Create pseudo-vma that contains just the policy */
/* Policy covers entire file */ memset(&pvma, 0, sizeof(struct vm_area_struct));
pvma.vm_end = TASK_SIZE; pvma.vm_end = TASK_SIZE; /* policy covers entire file */
mpol_set_shared_policy(info, &pvma, newpol); mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
mpol_put(newpol); mpol_put(new); /* drop initial ref */
}
} }
} }
@ -1962,18 +1970,27 @@ static const char * const policy_types[] =
/** /**
* mpol_parse_str - parse string to mempolicy * mpol_parse_str - parse string to mempolicy
* @str: string containing mempolicy to parse * @str: string containing mempolicy to parse
* @mode: pointer to returned policy mode * @mpol: pointer to struct mempolicy pointer, returned on success.
* @mode_flags: pointer to returned flags * @no_context: flag whether to "contextualize" the mempolicy
* @policy_nodes: pointer to returned nodemask
* *
* Format of input: * Format of input:
* <mode>[=<flags>][:<nodelist>] * <mode>[=<flags>][:<nodelist>]
* *
* Currently only used for tmpfs/shmem mount options * if @no_context is true, save the input nodemask in w.user_nodemask in
* the returned mempolicy. This will be used to "clone" the mempolicy in
* a specific context [cpuset] at a later time. Used to parse tmpfs mpol
* mount option. Note that if 'static' or 'relative' mode flags were
* specified, the input nodemask will already have been saved. Saving
* it again is redundant, but safe.
*
* On success, returns 0, else 1
*/ */
int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags, int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
nodemask_t *policy_nodes)
{ {
struct mempolicy *new = NULL;
unsigned short uninitialized_var(mode);
unsigned short uninitialized_var(mode_flags);
nodemask_t nodes;
char *nodelist = strchr(str, ':'); char *nodelist = strchr(str, ':');
char *flags = strchr(str, '='); char *flags = strchr(str, '=');
int i; int i;
@ -1982,26 +1999,30 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
if (nodelist) { if (nodelist) {
/* NUL-terminate mode or flags string */ /* NUL-terminate mode or flags string */
*nodelist++ = '\0'; *nodelist++ = '\0';
if (nodelist_parse(nodelist, *policy_nodes)) if (nodelist_parse(nodelist, nodes))
goto out; goto out;
if (!nodes_subset(*policy_nodes, node_states[N_HIGH_MEMORY])) if (!nodes_subset(nodes, node_states[N_HIGH_MEMORY]))
goto out; goto out;
} } else
nodes_clear(nodes);
if (flags) if (flags)
*flags++ = '\0'; /* terminate mode string */ *flags++ = '\0'; /* terminate mode string */
for (i = 0; i <= MPOL_LOCAL; i++) { for (i = 0; i <= MPOL_LOCAL; i++) {
if (!strcmp(str, policy_types[i])) { if (!strcmp(str, policy_types[i])) {
*mode = i; mode = i;
break; break;
} }
} }
if (i > MPOL_LOCAL) if (i > MPOL_LOCAL)
goto out; goto out;
switch (*mode) { switch (mode) {
case MPOL_PREFERRED: case MPOL_PREFERRED:
/* Insist on a nodelist of one node only */ /*
* Insist on a nodelist of one node only
*/
if (nodelist) { if (nodelist) {
char *rest = nodelist; char *rest = nodelist;
while (isdigit(*rest)) while (isdigit(*rest))
@ -2010,63 +2031,73 @@ int mpol_parse_str(char *str, unsigned short *mode, unsigned short *mode_flags,
err = 0; err = 0;
} }
break; break;
case MPOL_BIND:
/* Insist on a nodelist */
if (nodelist)
err = 0;
break;
case MPOL_INTERLEAVE: case MPOL_INTERLEAVE:
/* /*
* Default to online nodes with memory if no nodelist * Default to online nodes with memory if no nodelist
*/ */
if (!nodelist) if (!nodelist)
*policy_nodes = node_states[N_HIGH_MEMORY]; nodes = node_states[N_HIGH_MEMORY];
err = 0; err = 0;
break; break;
default: case MPOL_LOCAL:
/* /*
* MPOL_DEFAULT or MPOL_LOCAL * Don't allow a nodelist; mpol_new() checks flags
* Don't allow a nodelist nor flags
*/ */
if (!nodelist && !flags) if (nodelist)
err = 0;
if (*mode == MPOL_DEFAULT)
goto out; goto out;
/* else MPOL_LOCAL */ mode = MPOL_PREFERRED;
*mode = MPOL_PREFERRED;
nodes_clear(*policy_nodes);
break; break;
/*
* case MPOL_BIND: mpol_new() enforces non-empty nodemask.
* case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags.
*/
} }
*mode_flags = 0; mode_flags = 0;
if (flags) { if (flags) {
/* /*
* Currently, we only support two mutually exclusive * Currently, we only support two mutually exclusive
* mode flags. * mode flags.
*/ */
if (!strcmp(flags, "static")) if (!strcmp(flags, "static"))
*mode_flags |= MPOL_F_STATIC_NODES; mode_flags |= MPOL_F_STATIC_NODES;
else if (!strcmp(flags, "relative")) else if (!strcmp(flags, "relative"))
*mode_flags |= MPOL_F_RELATIVE_NODES; mode_flags |= MPOL_F_RELATIVE_NODES;
else else
err = 1; err = 1;
} }
new = mpol_new(mode, mode_flags, &nodes);
if (IS_ERR(new))
err = 1;
else if (no_context)
new->w.user_nodemask = nodes; /* save for contextualization */
out: out:
/* Restore string for error message */ /* Restore string for error message */
if (nodelist) if (nodelist)
*--nodelist = ':'; *--nodelist = ':';
if (flags) if (flags)
*--flags = '='; *--flags = '=';
if (!err)
*mpol = new;
return err; return err;
} }
#endif /* CONFIG_TMPFS */ #endif /* CONFIG_TMPFS */
/* /**
* mpol_to_str - format a mempolicy structure for printing
* @buffer: to contain formatted mempolicy string
* @maxlen: length of @buffer
* @pol: pointer to mempolicy to be formatted
* @no_context: "context free" mempolicy - use nodemask in w.user_nodemask
*
* Convert a mempolicy into a string. * Convert a mempolicy into a string.
* Returns the number of characters in buffer (if positive) * Returns the number of characters in buffer (if positive)
* or an error (negative) * or an error (negative)
*/ */
int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, int no_context)
{ {
char *p = buffer; char *p = buffer;
int l; int l;
@ -2100,7 +2131,10 @@ int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
case MPOL_BIND: case MPOL_BIND:
/* Fall through */ /* Fall through */
case MPOL_INTERLEAVE: case MPOL_INTERLEAVE:
nodes = pol->v.nodes; if (no_context)
nodes = pol->w.user_nodemask;
else
nodes = pol->v.nodes;
break; break;
default: default:
@ -2231,7 +2265,7 @@ int show_numa_map(struct seq_file *m, void *v)
return 0; return 0;
pol = get_vma_policy(priv->task, vma, vma->vm_start); pol = get_vma_policy(priv->task, vma, vma->vm_start);
mpol_to_str(buffer, sizeof(buffer), pol); mpol_to_str(buffer, sizeof(buffer), pol, 0);
mpol_cond_put(pol); mpol_cond_put(pol);
seq_printf(m, "%08lx %s", vma->vm_start, buffer); seq_printf(m, "%08lx %s", vma->vm_start, buffer);

View file

@ -1079,23 +1079,29 @@ redirty:
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
static void shmem_show_mpol(struct seq_file *seq, unsigned short mode, static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol)
unsigned short flags, const nodemask_t policy_nodes)
{ {
struct mempolicy temp;
char buffer[64]; char buffer[64];
if (mode == MPOL_DEFAULT) if (!mpol || mpol->mode == MPOL_DEFAULT)
return; /* show nothing */ return; /* show nothing */
temp.mode = mode; mpol_to_str(buffer, sizeof(buffer), mpol, 1);
temp.flags = flags;
temp.v.nodes = policy_nodes;
mpol_to_str(buffer, sizeof(buffer), &temp);
seq_printf(seq, ",mpol=%s", buffer); seq_printf(seq, ",mpol=%s", buffer);
} }
static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
{
struct mempolicy *mpol = NULL;
if (sbinfo->mpol) {
spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */
mpol = sbinfo->mpol;
mpol_get(mpol);
spin_unlock(&sbinfo->stat_lock);
}
return mpol;
}
#endif /* CONFIG_TMPFS */ #endif /* CONFIG_TMPFS */
static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp, static struct page *shmem_swapin(swp_entry_t entry, gfp_t gfp,
@ -1135,8 +1141,7 @@ static struct page *shmem_alloc_page(gfp_t gfp,
} }
#else /* !CONFIG_NUMA */ #else /* !CONFIG_NUMA */
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS
static inline void shmem_show_mpol(struct seq_file *seq, unsigned short policy, static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *p)
unsigned short flags, const nodemask_t policy_nodes)
{ {
} }
#endif /* CONFIG_TMPFS */ #endif /* CONFIG_TMPFS */
@ -1154,6 +1159,13 @@ static inline struct page *shmem_alloc_page(gfp_t gfp,
} }
#endif /* CONFIG_NUMA */ #endif /* CONFIG_NUMA */
#if !defined(CONFIG_NUMA) || !defined(CONFIG_TMPFS)
static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
{
return NULL;
}
#endif
/* /*
* shmem_getpage - either get the page from swap or allocate a new one * shmem_getpage - either get the page from swap or allocate a new one
* *
@ -1508,8 +1520,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
case S_IFREG: case S_IFREG:
inode->i_op = &shmem_inode_operations; inode->i_op = &shmem_inode_operations;
inode->i_fop = &shmem_file_operations; inode->i_fop = &shmem_file_operations;
mpol_shared_policy_init(&info->policy, sbinfo->policy, mpol_shared_policy_init(&info->policy,
sbinfo->flags, &sbinfo->policy_nodes); shmem_get_sbmpol(sbinfo));
break; break;
case S_IFDIR: case S_IFDIR:
inc_nlink(inode); inc_nlink(inode);
@ -1523,8 +1535,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
* Must not load anything in the rbtree, * Must not load anything in the rbtree,
* mpol_free_shared_policy will not be called. * mpol_free_shared_policy will not be called.
*/ */
mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, 0, mpol_shared_policy_init(&info->policy, NULL);
NULL);
break; break;
} }
} else } else
@ -2139,8 +2150,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
if (*rest) if (*rest)
goto bad_val; goto bad_val;
} else if (!strcmp(this_char,"mpol")) { } else if (!strcmp(this_char,"mpol")) {
if (mpol_parse_str(value, &sbinfo->policy, if (mpol_parse_str(value, &sbinfo->mpol, 1))
&sbinfo->flags, &sbinfo->policy_nodes))
goto bad_val; goto bad_val;
} else { } else {
printk(KERN_ERR "tmpfs: Bad mount option %s\n", printk(KERN_ERR "tmpfs: Bad mount option %s\n",
@ -2191,9 +2201,9 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
sbinfo->free_blocks = config.max_blocks - blocks; sbinfo->free_blocks = config.max_blocks - blocks;
sbinfo->max_inodes = config.max_inodes; sbinfo->max_inodes = config.max_inodes;
sbinfo->free_inodes = config.max_inodes - inodes; sbinfo->free_inodes = config.max_inodes - inodes;
sbinfo->policy = config.policy;
sbinfo->flags = config.flags; mpol_put(sbinfo->mpol);
sbinfo->policy_nodes = config.policy_nodes; sbinfo->mpol = config.mpol; /* transfers initial ref */
out: out:
spin_unlock(&sbinfo->stat_lock); spin_unlock(&sbinfo->stat_lock);
return error; return error;
@ -2214,8 +2224,7 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_printf(seq, ",uid=%u", sbinfo->uid); seq_printf(seq, ",uid=%u", sbinfo->uid);
if (sbinfo->gid != 0) if (sbinfo->gid != 0)
seq_printf(seq, ",gid=%u", sbinfo->gid); seq_printf(seq, ",gid=%u", sbinfo->gid);
shmem_show_mpol(seq, sbinfo->policy, sbinfo->flags, shmem_show_mpol(seq, sbinfo->mpol);
sbinfo->policy_nodes);
return 0; return 0;
} }
#endif /* CONFIG_TMPFS */ #endif /* CONFIG_TMPFS */
@ -2245,9 +2254,7 @@ static int shmem_fill_super(struct super_block *sb,
sbinfo->mode = S_IRWXUGO | S_ISVTX; sbinfo->mode = S_IRWXUGO | S_ISVTX;
sbinfo->uid = current->fsuid; sbinfo->uid = current->fsuid;
sbinfo->gid = current->fsgid; sbinfo->gid = current->fsgid;
sbinfo->policy = MPOL_DEFAULT; sbinfo->mpol = NULL;
sbinfo->flags = 0;
sbinfo->policy_nodes = node_states[N_HIGH_MEMORY];
sb->s_fs_info = sbinfo; sb->s_fs_info = sbinfo;
#ifdef CONFIG_TMPFS #ifdef CONFIG_TMPFS