Staging: pohmelfs: distributed locking and cache coherency protocol.

POHMELFS utilizes writeback cache, which is built on top of MO(E)SI-like
coherency protocol. This patch includes its implementation and cache
object processing helpers (like allocation and completion callbacks).

POHMELFS uses scalable cached read/write locking. No additional requests
are performed if lock is granted to the filesystem. The same protocol
is used by the server to on-demand flushing of the client's cache (for
example when server wants to update local data).

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
Evgeniy Polyakov 2009-02-09 17:02:39 +03:00 committed by Greg Kroah-Hartman
parent b3f08cad3d
commit ac7036c131
2 changed files with 353 additions and 0 deletions

View file

@ -0,0 +1,182 @@
/*
* 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/fs.h>
#include <linux/fsnotify.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include "netfs.h"
static int pohmelfs_send_lock_trans(struct pohmelfs_inode *pi,
u64 id, u64 start, u32 size, int type)
{
struct inode *inode = &pi->vfs_inode;
struct pohmelfs_sb *psb = POHMELFS_SB(inode->i_sb);
struct netfs_trans *t;
struct netfs_cmd *cmd;
int path_len, err;
void *data;
struct netfs_lock *l;
int isize = (type & POHMELFS_LOCK_GRAB) ? 0 : sizeof(struct netfs_inode_info);
err = pohmelfs_path_length(pi);
if (err < 0)
goto err_out_exit;
path_len = err;
err = -ENOMEM;
t = netfs_trans_alloc(psb, path_len + sizeof(struct netfs_lock) + isize, 0, 0);
if (!t)
goto err_out_exit;
cmd = netfs_trans_current(t);
data = cmd + 1;
err = pohmelfs_construct_path_string(pi, data, path_len);
if (err < 0)
goto err_out_free;
path_len = err;
l = data + path_len;
l->start = start;
l->size = size;
l->type = type;
l->ino = pi->ino;
cmd->cmd = NETFS_LOCK;
cmd->start = 0;
cmd->id = id;
cmd->size = sizeof(struct netfs_lock) + path_len + isize;
cmd->ext = path_len;
cmd->csize = 0;
netfs_convert_cmd(cmd);
netfs_convert_lock(l);
if (isize) {
struct netfs_inode_info *info = (struct netfs_inode_info *)(l + 1);
info->mode = inode->i_mode;
info->nlink = inode->i_nlink;
info->uid = inode->i_uid;
info->gid = inode->i_gid;
info->blocks = inode->i_blocks;
info->rdev = inode->i_rdev;
info->size = inode->i_size;
info->version = inode->i_version;
netfs_convert_inode_info(info);
}
netfs_trans_update(cmd, t, path_len + sizeof(struct netfs_lock) + isize);
return netfs_trans_finish(t, psb);
err_out_free:
netfs_trans_free(t);
err_out_exit:
printk("%s: err: %d.\n", __func__, err);
return err;
}
int pohmelfs_data_lock(struct pohmelfs_inode *pi, u64 start, u32 size, int type)
{
struct pohmelfs_sb *psb = POHMELFS_SB(pi->vfs_inode.i_sb);
struct pohmelfs_mcache *m;
int err = -ENOMEM;
struct iattr iattr;
struct inode *inode = &pi->vfs_inode;
dprintk("%s: %p: ino: %llu, start: %llu, size: %u, "
"type: %d, locked as: %d, owned: %d.\n",
__func__, &pi->vfs_inode, pi->ino,
start, size, type, pi->lock_type,
!!test_bit(NETFS_INODE_OWNED, &pi->state));
if (!pohmelfs_need_lock(pi, type))
return 0;
m = pohmelfs_mcache_alloc(psb, start, size, NULL);
if (IS_ERR(m))
return PTR_ERR(m);
err = pohmelfs_send_lock_trans(pi, m->gen, start, size,
type | POHMELFS_LOCK_GRAB);
if (err)
goto err_out_put;
err = wait_for_completion_timeout(&m->complete, psb->mcache_timeout);
if (err)
err = m->err;
else
err = -ETIMEDOUT;
if (err) {
printk("%s: %p: ino: %llu, mgen: %llu, start: %llu, size: %u, err: %d.\n",
__func__, &pi->vfs_inode, pi->ino, m->gen, start, size, err);
}
if (err && (err != -ENOENT))
goto err_out_put;
if (!err) {
netfs_convert_inode_info(&m->info);
iattr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_SIZE | ATTR_ATIME;
iattr.ia_mode = m->info.mode;
iattr.ia_uid = m->info.uid;
iattr.ia_gid = m->info.gid;
iattr.ia_size = m->info.size;
iattr.ia_atime = CURRENT_TIME;
dprintk("%s: %p: ino: %llu, mgen: %llu, start: %llu, isize: %llu -> %llu.\n",
__func__, &pi->vfs_inode, pi->ino, m->gen, start, inode->i_size, m->info.size);
err = pohmelfs_setattr_raw(inode, &iattr);
if (!err) {
struct dentry *dentry = d_find_alias(inode);
if (dentry) {
fsnotify_change(dentry, iattr.ia_valid);
dput(dentry);
}
}
}
pi->lock_type = type;
set_bit(NETFS_INODE_OWNED, &pi->state);
pohmelfs_mcache_put(psb, m);
return 0;
err_out_put:
pohmelfs_mcache_put(psb, m);
return err;
}
int pohmelfs_data_unlock(struct pohmelfs_inode *pi, u64 start, u32 size, int type)
{
dprintk("%s: %p: ino: %llu, start: %llu, size: %u, type: %d.\n",
__func__, &pi->vfs_inode, pi->ino, start, size, type);
pi->lock_type = 0;
clear_bit(NETFS_INODE_REMOTE_DIR_SYNCED, &pi->state);
clear_bit(NETFS_INODE_OWNED, &pi->state);
return pohmelfs_send_lock_trans(pi, pi->ino, start, size, type);
}

View file

@ -0,0 +1,171 @@
/*
* 2007+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include "netfs.h"
static struct kmem_cache *pohmelfs_mcache_cache;
static mempool_t *pohmelfs_mcache_pool;
static inline int pohmelfs_mcache_cmp(u64 gen, u64 new)
{
if (gen < new)
return 1;
if (gen > new)
return -1;
return 0;
}
struct pohmelfs_mcache *pohmelfs_mcache_search(struct pohmelfs_sb *psb, u64 gen)
{
struct rb_root *root = &psb->mcache_root;
struct rb_node *n = root->rb_node;
struct pohmelfs_mcache *tmp, *ret = NULL;
int cmp;
while (n) {
tmp = rb_entry(n, struct pohmelfs_mcache, mcache_entry);
cmp = pohmelfs_mcache_cmp(tmp->gen, gen);
if (cmp < 0)
n = n->rb_left;
else if (cmp > 0)
n = n->rb_right;
else {
ret = tmp;
pohmelfs_mcache_get(ret);
break;
}
}
return ret;
}
static int pohmelfs_mcache_insert(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
{
struct rb_root *root = &psb->mcache_root;
struct rb_node **n = &root->rb_node, *parent = NULL;
struct pohmelfs_mcache *ret = NULL, *tmp;
int cmp;
while (*n) {
parent = *n;
tmp = rb_entry(parent, struct pohmelfs_mcache, mcache_entry);
cmp = pohmelfs_mcache_cmp(tmp->gen, m->gen);
if (cmp < 0)
n = &parent->rb_left;
else if (cmp > 0)
n = &parent->rb_right;
else {
ret = tmp;
break;
}
}
if (ret)
return -EEXIST;
rb_link_node(&m->mcache_entry, parent, n);
rb_insert_color(&m->mcache_entry, root);
return 0;
}
static int pohmelfs_mcache_remove(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
{
if (m && m->mcache_entry.rb_parent_color) {
rb_erase(&m->mcache_entry, &psb->mcache_root);
m->mcache_entry.rb_parent_color = 0;
return 1;
}
return 0;
}
void pohmelfs_mcache_remove_locked(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
{
mutex_lock(&psb->mcache_lock);
pohmelfs_mcache_remove(psb, m);
mutex_unlock(&psb->mcache_lock);
}
struct pohmelfs_mcache *pohmelfs_mcache_alloc(struct pohmelfs_sb *psb, u64 start,
unsigned int size, void *data)
{
struct pohmelfs_mcache *m;
int err = -ENOMEM;
m = mempool_alloc(pohmelfs_mcache_pool, GFP_KERNEL);
if (!m)
goto err_out_exit;
init_completion(&m->complete);
m->err = 0;
atomic_set(&m->refcnt, 1);
m->data = data;
m->start = start;
m->size = size;
m->gen = atomic_long_inc_return(&psb->mcache_gen);
mutex_lock(&psb->mcache_lock);
err = pohmelfs_mcache_insert(psb, m);
mutex_unlock(&psb->mcache_lock);
if (err)
goto err_out_free;
return m;
err_out_free:
mempool_free(m, pohmelfs_mcache_pool);
err_out_exit:
return ERR_PTR(err);
}
void pohmelfs_mcache_free(struct pohmelfs_sb *psb, struct pohmelfs_mcache *m)
{
pohmelfs_mcache_remove_locked(psb, m);
mempool_free(m, pohmelfs_mcache_pool);
}
int __init pohmelfs_mcache_init(void)
{
pohmelfs_mcache_cache = kmem_cache_create("pohmelfs_mcache_cache",
sizeof(struct pohmelfs_mcache),
0, (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), NULL);
if (!pohmelfs_mcache_cache)
goto err_out_exit;
pohmelfs_mcache_pool = mempool_create_slab_pool(256, pohmelfs_mcache_cache);
if (!pohmelfs_mcache_pool)
goto err_out_free;
return 0;
err_out_free:
kmem_cache_destroy(pohmelfs_mcache_cache);
err_out_exit:
return -ENOMEM;
}
void pohmelfs_mcache_exit(void)
{
mempool_destroy(pohmelfs_mcache_pool);
kmem_cache_destroy(pohmelfs_mcache_cache);
}