cifs: convert oplock breaks to use slow_work facility (try #4)

This is the fourth respin of the patch to convert oplock breaks to
use the slow_work facility.

A customer of ours was testing a backport of one of the earlier
patchsets, and hit a "Busy inodes after umount..." problem. An oplock
break job had raced with a umount, and the superblock got torn down and
its memory reused. When the oplock break job tried to dereference the
inode->i_sb, the kernel oopsed.

This patchset has the oplock break job hold an inode and vfsmount
reference until the oplock break completes.  With this, there should be
no need to take a tcon reference (the vfsmount implicitly holds one
already).

Currently, when an oplock break comes in there's a chance that the
oplock break job won't occur if the allocation of the oplock_q_entry
fails. There are also some rather nasty races in the allocation and
handling these structs.

Rather than allocating oplock queue entries when an oplock break comes
in, add a few extra fields to the cifsFileInfo struct. Get rid of the
dedicated cifs_oplock_thread as well and queue the oplock break job to
the slow_work thread pool.

This approach also has the advantage that the oplock break jobs can
potentially run in parallel rather than be serialized like they are
today.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steve French <sfrench@us.ibm.com>
This commit is contained in:
Jeff Layton 2009-09-21 06:47:50 -04:00 committed by Steve French
parent 48541bd3dd
commit 3bc303c254
10 changed files with 119 additions and 175 deletions

View file

@ -2,6 +2,7 @@ config CIFS
tristate "CIFS support (advanced network filesystem, SMBFS successor)"
depends on INET
select NLS
select SLOW_WORK
help
This is the client VFS module for the Common Internet File System
(CIFS) protocol which is the successor to the Server Message Block

View file

@ -64,9 +64,6 @@ unsigned int multiuser_mount = 0;
unsigned int extended_security = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
extern struct task_struct *oplockThread; /* remove sparse warning */
struct task_struct *oplockThread = NULL;
/* extern struct task_struct * dnotifyThread; remove sparse warning */
static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
module_param(CIFSMaxBufSize, int, 0);
@ -973,89 +970,12 @@ cifs_destroy_mids(void)
kmem_cache_destroy(cifs_oplock_cachep);
}
static int cifs_oplock_thread(void *dummyarg)
{
struct oplock_q_entry *oplock_item;
struct cifsTconInfo *pTcon;
struct inode *inode;
__u16 netfid;
int rc, waitrc = 0;
set_freezable();
do {
if (try_to_freeze())
continue;
spin_lock(&cifs_oplock_lock);
if (list_empty(&cifs_oplock_list)) {
spin_unlock(&cifs_oplock_lock);
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(39*HZ);
} else {
oplock_item = list_entry(cifs_oplock_list.next,
struct oplock_q_entry, qhead);
cFYI(1, ("found oplock item to write out"));
pTcon = oplock_item->tcon;
inode = oplock_item->pinode;
netfid = oplock_item->netfid;
spin_unlock(&cifs_oplock_lock);
DeleteOplockQEntry(oplock_item);
/* can not grab inode sem here since it would
deadlock when oplock received on delete
since vfs_unlink holds the i_mutex across
the call */
/* mutex_lock(&inode->i_mutex);*/
if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_CIFS_EXPERIMENTAL
if (CIFS_I(inode)->clientCanCacheAll == 0)
break_lease(inode, FMODE_READ);
else if (CIFS_I(inode)->clientCanCacheRead == 0)
break_lease(inode, FMODE_WRITE);
#endif
rc = filemap_fdatawrite(inode->i_mapping);
if (CIFS_I(inode)->clientCanCacheRead == 0) {
waitrc = filemap_fdatawait(
inode->i_mapping);
invalidate_remote_inode(inode);
}
if (rc == 0)
rc = waitrc;
} else
rc = 0;
/* mutex_unlock(&inode->i_mutex);*/
if (rc)
CIFS_I(inode)->write_behind_rc = rc;
cFYI(1, ("Oplock flush inode %p rc %d",
inode, rc));
/* releasing stale oplock after recent reconnect
of smb session using a now incorrect file
handle is not a data integrity issue but do
not bother sending an oplock release if session
to server still is disconnected since oplock
already released by the server in that case */
if (!pTcon->need_reconnect) {
rc = CIFSSMBLock(0, pTcon, netfid,
0 /* len */ , 0 /* offset */, 0,
0, LOCKING_ANDX_OPLOCK_RELEASE,
false /* wait flag */);
cFYI(1, ("Oplock release rc = %d", rc));
}
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1); /* yield in case q were corrupt */
}
} while (!kthread_should_stop());
return 0;
}
static int __init
init_cifs(void)
{
int rc = 0;
cifs_proc_init();
INIT_LIST_HEAD(&cifs_tcp_ses_list);
INIT_LIST_HEAD(&cifs_oplock_list);
#ifdef CONFIG_CIFS_EXPERIMENTAL
INIT_LIST_HEAD(&GlobalDnotifyReqList);
INIT_LIST_HEAD(&GlobalDnotifyRsp_Q);
@ -1084,7 +1004,6 @@ init_cifs(void)
rwlock_init(&GlobalSMBSeslock);
rwlock_init(&cifs_tcp_ses_lock);
spin_lock_init(&GlobalMid_Lock);
spin_lock_init(&cifs_oplock_lock);
if (cifs_max_pending < 2) {
cifs_max_pending = 2;
@ -1119,18 +1038,15 @@ init_cifs(void)
if (rc)
goto out_unregister_key_type;
#endif
oplockThread = kthread_run(cifs_oplock_thread, NULL, "cifsoplockd");
if (IS_ERR(oplockThread)) {
rc = PTR_ERR(oplockThread);
cERROR(1, ("error %d create oplock thread", rc));
goto out_unregister_dfs_key_type;
}
rc = slow_work_register_user();
if (rc)
goto out_unregister_resolver_key;
return 0;
out_unregister_dfs_key_type:
#ifdef CONFIG_CIFS_DFS_UPCALL
out_unregister_resolver_key:
unregister_key_type(&key_type_dns_resolver);
#ifdef CONFIG_CIFS_DFS_UPCALL
out_unregister_key_type:
#endif
#ifdef CONFIG_CIFS_UPCALL
@ -1165,7 +1081,6 @@ exit_cifs(void)
cifs_destroy_inodecache();
cifs_destroy_mids();
cifs_destroy_request_bufs();
kthread_stop(oplockThread);
}
MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");

View file

@ -18,6 +18,7 @@
*/
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/slow-work.h>
#include "cifs_fs_sb.h"
#include "cifsacl.h"
/*
@ -346,14 +347,16 @@ struct cifsFileInfo {
/* lock scope id (0 if none) */
struct file *pfile; /* needed for writepage */
struct inode *pInode; /* needed for oplock break */
struct vfsmount *mnt;
struct mutex lock_mutex;
struct list_head llist; /* list of byte range locks we have. */
bool closePend:1; /* file is marked to close */
bool invalidHandle:1; /* file closed via session abend */
bool messageMode:1; /* for pipes: message vs byte mode */
bool oplock_break_cancelled:1;
atomic_t count; /* reference count */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
struct slow_work oplock_break; /* slow_work job for oplock breaks */
};
/* Take a reference on the file private data */
@ -670,12 +673,6 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock;
*/
GLOBAL_EXTERN rwlock_t GlobalSMBSeslock;
/* Global list of oplocks */
GLOBAL_EXTERN struct list_head cifs_oplock_list;
/* Protects the cifs_oplock_list */
GLOBAL_EXTERN spinlock_t cifs_oplock_lock;
/* Outstanding dir notify requests */
GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
/* DirNotify response queue */
@ -726,3 +723,4 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/
extern const struct slow_work_ops cifs_oplock_break_ops;

View file

@ -86,17 +86,13 @@ extern int CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
const int stage,
const struct nls_table *nls_cp);
extern __u16 GetNextMid(struct TCP_Server_Info *server);
extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16,
struct cifsTconInfo *);
extern void DeleteOplockQEntry(struct oplock_q_entry *);
extern void DeleteTconOplockQEntries(struct cifsTconInfo *);
extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
extern u64 cifs_UnixTimeToNT(struct timespec);
extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
int offset);
extern int cifs_posix_open(char *full_path, struct inode **pinode,
struct super_block *sb, int mode, int oflags,
struct vfsmount *mnt, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid);
extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr,
FILE_UNIX_BASIC_INFO *info,

View file

@ -94,6 +94,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
open_file = list_entry(tmp, struct cifsFileInfo, tlist);
open_file->invalidHandle = true;
open_file->oplock_break_cancelled = true;
}
write_unlock(&GlobalSMBSeslock);
/* BB Add call to invalidate_inodes(sb) for all superblocks mounted

View file

@ -1670,7 +1670,6 @@ cifs_put_tcon(struct cifsTconInfo *tcon)
CIFSSMBTDis(xid, tcon);
_FreeXid(xid);
DeleteTconOplockQEntries(tcon);
tconInfoFree(tcon);
cifs_put_smb_ses(ses);
}

View file

@ -24,6 +24,7 @@
#include <linux/stat.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifsglob.h"
@ -131,11 +132,12 @@ cifs_bp_rename_retry:
static void
cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
struct cifsTconInfo *tcon, bool write_only)
struct vfsmount *mnt, bool write_only)
{
int oplock = 0;
struct cifsFileInfo *pCifsFile;
struct cifsInodeInfo *pCifsInode;
struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb);
pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
@ -148,17 +150,19 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
pCifsFile->netfid = fileHandle;
pCifsFile->pid = current->tgid;
pCifsFile->pInode = igrab(newinode);
pCifsFile->mnt = mnt;
pCifsFile->invalidHandle = false;
pCifsFile->closePend = false;
mutex_init(&pCifsFile->fh_mutex);
mutex_init(&pCifsFile->lock_mutex);
INIT_LIST_HEAD(&pCifsFile->llist);
atomic_set(&pCifsFile->count, 1);
slow_work_init(&pCifsFile->oplock_break, &cifs_oplock_break_ops);
/* set the following in open now
pCifsFile->pfile = file; */
write_lock(&GlobalSMBSeslock);
list_add(&pCifsFile->tlist, &tcon->openFileList);
list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList);
pCifsInode = CIFS_I(newinode);
if (pCifsInode) {
/* if readable file instance put first in list*/
@ -179,14 +183,14 @@ cifs_fill_fileinfo(struct inode *newinode, __u16 fileHandle,
}
int cifs_posix_open(char *full_path, struct inode **pinode,
struct super_block *sb, int mode, int oflags,
struct vfsmount *mnt, int mode, int oflags,
__u32 *poplock, __u16 *pnetfid, int xid)
{
int rc;
bool write_only = false;
FILE_UNIX_BASIC_INFO *presp_data;
__u32 posix_flags = 0;
struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb);
struct cifs_fattr fattr;
cFYI(1, ("posix open %s", full_path));
@ -243,7 +247,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
/* get new inode and set it up */
if (*pinode == NULL) {
*pinode = cifs_iget(sb, &fattr);
*pinode = cifs_iget(mnt->mnt_sb, &fattr);
if (!*pinode) {
rc = -ENOMEM;
goto posix_open_ret;
@ -252,7 +256,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
cifs_fattr_to_inode(*pinode, &fattr);
}
cifs_fill_fileinfo(*pinode, *pnetfid, cifs_sb->tcon, write_only);
cifs_fill_fileinfo(*pinode, *pnetfid, mnt, write_only);
posix_open_ret:
kfree(presp_data);
@ -322,7 +326,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
(CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = cifs_posix_open(full_path, &newinode, inode->i_sb,
rc = cifs_posix_open(full_path, &newinode, nd->path.mnt,
mode, oflags, &oplock, &fileHandle, xid);
/* EIO could indicate that (posix open) operation is not
supported, despite what server claimed in capability
@ -469,8 +473,8 @@ cifs_create_set_dentry:
/* mknod case - do not leave file open */
CIFSSMBClose(xid, tcon, fileHandle);
} else if (!(posix_create) && (newinode)) {
cifs_fill_fileinfo(newinode, fileHandle,
cifs_sb->tcon, write_only);
cifs_fill_fileinfo(newinode, fileHandle, nd->path.mnt,
write_only);
}
cifs_create_out:
kfree(buf);
@ -682,8 +686,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
(nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
(nd->intent.open.flags & O_CREAT)) {
rc = cifs_posix_open(full_path, &newInode,
parent_dir_inode->i_sb,
rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
nd->intent.open.create_mode,
nd->intent.open.flags, &oplock,
&fileHandle, xid);

View file

@ -30,6 +30,7 @@
#include <linux/writeback.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/delay.h>
#include <linux/mount.h>
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
@ -51,11 +52,13 @@ static inline struct cifsFileInfo *cifs_init_private(
INIT_LIST_HEAD(&private_data->llist);
private_data->pfile = file; /* needed for writepage */
private_data->pInode = igrab(inode);
private_data->mnt = file->f_path.mnt;
private_data->invalidHandle = false;
private_data->closePend = false;
/* Initialize reference count to one. The private data is
freed on the release of the last reference */
atomic_set(&private_data->count, 1);
slow_work_init(&private_data->oplock_break, &cifs_oplock_break_ops);
return private_data;
}
@ -327,7 +330,7 @@ int cifs_open(struct inode *inode, struct file *file)
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
rc = cifs_posix_open(full_path, &inode, inode->i_sb,
rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@ -547,7 +550,7 @@ reopen_error_exit:
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
int oflags = (int) cifs_posix_convert_flags(file->f_flags);
/* can not refresh inode info since size could be stale */
rc = cifs_posix_open(full_path, NULL, inode->i_sb,
rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
cifs_sb->mnt_file_mode /* ignored */,
oflags, &oplock, &netfid, xid);
if (rc == 0) {
@ -2312,6 +2315,73 @@ out:
return rc;
}
static void
cifs_oplock_break(struct slow_work *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
struct inode *inode = cfile->pInode;
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
int rc, waitrc = 0;
if (inode && S_ISREG(inode->i_mode)) {
#ifdef CONFIG_CIFS_EXPERIMENTAL
if (cinode->clientCanCacheAll == 0)
break_lease(inode, FMODE_READ);
else if (cinode->clientCanCacheRead == 0)
break_lease(inode, FMODE_WRITE);
#endif
rc = filemap_fdatawrite(inode->i_mapping);
if (cinode->clientCanCacheRead == 0) {
waitrc = filemap_fdatawait(inode->i_mapping);
invalidate_remote_inode(inode);
}
if (!rc)
rc = waitrc;
if (rc)
cinode->write_behind_rc = rc;
cFYI(1, ("Oplock flush inode %p rc %d", inode, rc));
}
/*
* releasing stale oplock after recent reconnect of smb session using
* a now incorrect file handle is not a data integrity issue but do
* not bother sending an oplock release if session to server still is
* disconnected since oplock already released by the server
*/
if (!cfile->closePend && !cfile->oplock_break_cancelled) {
rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
LOCKING_ANDX_OPLOCK_RELEASE, false);
cFYI(1, ("Oplock release rc = %d", rc));
}
}
static int
cifs_oplock_break_get(struct slow_work *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
mntget(cfile->mnt);
cifsFileInfo_get(cfile);
return 0;
}
static void
cifs_oplock_break_put(struct slow_work *work)
{
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
mntput(cfile->mnt);
cifsFileInfo_put(cfile);
}
const struct slow_work_ops cifs_oplock_break_ops = {
.get_ref = cifs_oplock_break_get,
.put_ref = cifs_oplock_break_put,
.execute = cifs_oplock_break,
};
const struct address_space_operations cifs_addr_ops = {
.readpage = cifs_readpage,
.readpages = cifs_readpages,

View file

@ -32,7 +32,6 @@
extern mempool_t *cifs_sm_req_poolp;
extern mempool_t *cifs_req_poolp;
extern struct task_struct *oplockThread;
/* The xid serves as a useful identifier for each incoming vfs request,
in a similar way to the mid which is useful to track each sent smb,
@ -500,6 +499,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
struct cifsTconInfo *tcon;
struct cifsInodeInfo *pCifsInode;
struct cifsFileInfo *netfile;
int rc;
cFYI(1, ("Checking for oplock break or dnotify response"));
if ((pSMB->hdr.Command == SMB_COM_NT_TRANSACT) &&
@ -569,19 +569,30 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
if (pSMB->Fid != netfile->netfid)
continue;
read_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
/*
* don't do anything if file is about to be
* closed anyway.
*/
if (netfile->closePend) {
read_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
return true;
}
cFYI(1, ("file id match, oplock break"));
pCifsInode = CIFS_I(netfile->pInode);
pCifsInode->clientCanCacheAll = false;
if (pSMB->OplockLevel == 0)
pCifsInode->clientCanCacheRead = false;
AllocOplockQEntry(netfile->pInode,
netfile->netfid, tcon);
cFYI(1, ("about to wake up oplock thread"));
if (oplockThread)
wake_up_process(oplockThread);
rc = slow_work_enqueue(&netfile->oplock_break);
if (rc) {
cERROR(1, ("failed to enqueue oplock "
"break: %d\n", rc));
} else {
netfile->oplock_break_cancelled = false;
}
read_unlock(&GlobalSMBSeslock);
read_unlock(&cifs_tcp_ses_lock);
return true;
}
read_unlock(&GlobalSMBSeslock);

View file

@ -103,56 +103,6 @@ DeleteMidQEntry(struct mid_q_entry *midEntry)
mempool_free(midEntry, cifs_mid_poolp);
}
struct oplock_q_entry *
AllocOplockQEntry(struct inode *pinode, __u16 fid, struct cifsTconInfo *tcon)
{
struct oplock_q_entry *temp;
if ((pinode == NULL) || (tcon == NULL)) {
cERROR(1, ("Null parms passed to AllocOplockQEntry"));
return NULL;
}
temp = (struct oplock_q_entry *) kmem_cache_alloc(cifs_oplock_cachep,
GFP_KERNEL);
if (temp == NULL)
return temp;
else {
temp->pinode = pinode;
temp->tcon = tcon;
temp->netfid = fid;
spin_lock(&cifs_oplock_lock);
list_add_tail(&temp->qhead, &cifs_oplock_list);
spin_unlock(&cifs_oplock_lock);
}
return temp;
}
void DeleteOplockQEntry(struct oplock_q_entry *oplockEntry)
{
spin_lock(&cifs_oplock_lock);
/* should we check if list empty first? */
list_del(&oplockEntry->qhead);
spin_unlock(&cifs_oplock_lock);
kmem_cache_free(cifs_oplock_cachep, oplockEntry);
}
void DeleteTconOplockQEntries(struct cifsTconInfo *tcon)
{
struct oplock_q_entry *temp;
if (tcon == NULL)
return;
spin_lock(&cifs_oplock_lock);
list_for_each_entry(temp, &cifs_oplock_list, qhead) {
if ((temp->tcon) && (temp->tcon == tcon)) {
list_del(&temp->qhead);
kmem_cache_free(cifs_oplock_cachep, temp);
}
}
spin_unlock(&cifs_oplock_lock);
}
static int
smb_sendv(struct TCP_Server_Info *server, struct kvec *iov, int n_vec)
{