From 96f287b0cf512ee537826943c15b0b8647472f70 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:09:56 -0500 Subject: [PATCH 01/75] NFS: BKL removal from the mount code... None of the code in nfs_umount_begin() or nfs_remount() has any BKL dependency. Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 90be551b80c..f0188eaf372 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -714,8 +714,6 @@ static void nfs_umount_begin(struct super_block *sb) struct nfs_server *server; struct rpc_clnt *rpc; - lock_kernel(); - server = NFS_SB(sb); /* -EIO all pending I/O */ rpc = server->client_acl; @@ -724,8 +722,6 @@ static void nfs_umount_begin(struct super_block *sb) rpc = server->client; if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); - - unlock_kernel(); } static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int version) @@ -1881,7 +1877,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) if (data == NULL) return -ENOMEM; - lock_kernel(); /* fill out struct with values from existing mount */ data->flags = nfss->flags; data->rsize = nfss->rsize; @@ -1907,7 +1902,6 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) error = nfs_compare_remount_data(nfss, data); out: kfree(data); - unlock_kernel(); return error; } From feb8ca37cc3d83c07fd042509ef1e176cfeb2cfa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: [PATCH 02/75] SUNRPC: Ensure that we honour autoclose before attempting to reconnect If the XPRT_CLOSE_WAIT flag is set, we need to ensure that we call xprt->ops->close() while holding xprt_lock_write() before we can start reconnecting. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index fd46d42afa8..469de292c23 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -700,6 +700,10 @@ void xprt_connect(struct rpc_task *task) } if (!xprt_lock_write(xprt, task)) return; + + if (test_and_clear_bit(XPRT_CLOSE_WAIT, &xprt->state)) + xprt->ops->close(xprt); + if (xprt_connected(xprt)) xprt_release_write(xprt, task); else { From f0380f3d16df8f9e2fcd1d8c16fb0d94370bea99 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: [PATCH 03/75] RPC: Fix two potential races in put_rpccred It is possible for rpcauth_destroy_credcache() to cause the rpc credentials to be unhashed while put_rpccred is waiting for the rpc_credcache_lock on another cpu. Should this happen, then we can end up calling hlist_del_rcu(&cred->cr_hash) a second time in put_rpccred, thus causing list corruption. Should the credential actually be hashed, it is also possible for rpcauth_lookup_credcache to find and reference it before we get round to unhashing it. In this case, the call to rpcauth_unhash_cred will fail, and so we should just exit without destroying the cred. Reported-by: Neil Brown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 54a4e042f10..7ee6f7eaddf 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -123,16 +123,19 @@ rpcauth_unhash_cred_locked(struct rpc_cred *cred) clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags); } -static void +static int rpcauth_unhash_cred(struct rpc_cred *cred) { spinlock_t *cache_lock; + int ret; cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); - if (atomic_read(&cred->cr_count) == 0) + ret = atomic_read(&cred->cr_count) == 0; + if (ret) rpcauth_unhash_cred_locked(cred); spin_unlock(cache_lock); + return ret; } /* @@ -446,31 +449,35 @@ void put_rpccred(struct rpc_cred *cred) { /* Fast path for unhashed credentials */ - if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - goto need_lock; - - if (!atomic_dec_and_test(&cred->cr_count)) + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) { + if (atomic_dec_and_test(&cred->cr_count)) + cred->cr_ops->crdestroy(cred); return; - goto out_destroy; -need_lock: + } + if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) return; if (!list_empty(&cred->cr_lru)) { number_cred_unused--; list_del_init(&cred->cr_lru); } - if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) - rpcauth_unhash_cred(cred); if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { - cred->cr_expire = jiffies; - list_add_tail(&cred->cr_lru, &cred_unused); - number_cred_unused++; - spin_unlock(&rpc_credcache_lock); - return; + if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) { + cred->cr_expire = jiffies; + list_add_tail(&cred->cr_lru, &cred_unused); + number_cred_unused++; + goto out_nodestroy; + } + if (!rpcauth_unhash_cred(cred)) { + /* We were hashed and someone looked us up... */ + goto out_nodestroy; + } } spin_unlock(&rpc_credcache_lock); -out_destroy: cred->cr_ops->crdestroy(cred); + return; +out_nodestroy: + spin_unlock(&rpc_credcache_lock); } EXPORT_SYMBOL_GPL(put_rpccred); From d327cf7449e6fd5cbac784c641770e9366faa386 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: [PATCH 04/75] Re: acl trouble after upgrading ubuntu Subject: [PATCH] nfs: fix acl decoding Commit 28f566942c6b1d929f5e240e69e7081b77b238d3 "NFS: use dynamically computed compound_hdr.replen for xdr_inline_pages offset" accidentally changed the amount of space to allow for the acl reply, resulting in an IO error on attempts to get an acl. Reported-by: Paul Rudin Cc: Benny Halevy Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 20b4e30e6c8..3a71b40a990 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -2096,7 +2096,7 @@ nfs4_xdr_enc_getacl(struct rpc_rqst *req, __be32 *p, encode_compound_hdr(&xdr, req, &hdr); encode_sequence(&xdr, &args->seq_args, &hdr); encode_putfh(&xdr, args->fh, &hdr); - replen = hdr.replen + nfs4_fattr_bitmap_maxsz + 1; + replen = hdr.replen + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz + 1; encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0, &hdr); xdr_inline_pages(&req->rq_rcv_buf, replen << 2, From d18cc1fda25295416a2855d44c2936db01df9eec Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 08:10:17 -0500 Subject: [PATCH 05/75] NFSv4: Fix a potential state manager deadlock when returning delegations The nfsv4 state manager could potentially deadlock inside __nfs_inode_return_delegation() if the server reboots, so that the calls to nfs_msync_inode() end up waiting on state recovery to complete. Also ensure that if a server reboot or network partition causes us to have to stop returning delegations, that NFS4CLNT_DELEGRETURN is set so that the state manager can resume any outstanding delegation returns after it has dealt with the state recovery situation. Finally, ensure that the state manager doesn't wait for the DELEGRETURN call to complete. It doesn't need to, and that too can cause a deadlock. Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 42 ++++++++++++++++++++++++++++-------------- fs/nfs/delegation.h | 2 +- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6dd48a4405b..eeecd69c130 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -92,7 +92,7 @@ out: return status; } -static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) +static int nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_open_context *ctx; @@ -116,10 +116,11 @@ again: err = nfs_delegation_claim_locks(ctx, state); put_nfs_open_context(ctx); if (err != 0) - return; + return err; goto again; } spin_unlock(&inode->i_lock); + return 0; } /* @@ -261,30 +262,34 @@ static void nfs_msync_inode(struct inode *inode) /* * Basic procedure for returning a delegation to the server */ -static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation) +static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) { struct nfs_inode *nfsi = NFS_I(inode); + int err; - nfs_msync_inode(inode); /* * Guard against new delegated open/lock/unlock calls and against * state recovery */ down_write(&nfsi->rwsem); - nfs_delegation_claim_opens(inode, &delegation->stateid); + err = nfs_delegation_claim_opens(inode, &delegation->stateid); up_write(&nfsi->rwsem); - nfs_msync_inode(inode); + if (err) + goto out; - return nfs_do_return_delegation(inode, delegation, 1); + err = nfs_do_return_delegation(inode, delegation, issync); +out: + return err; } /* * Return all delegations that have been marked for return */ -void nfs_client_return_marked_delegations(struct nfs_client *clp) +int nfs_client_return_marked_delegations(struct nfs_client *clp) { struct nfs_delegation *delegation; struct inode *inode; + int err = 0; restart: rcu_read_lock(); @@ -298,12 +303,18 @@ restart: delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); spin_unlock(&clp->cl_lock); rcu_read_unlock(); - if (delegation != NULL) - __nfs_inode_return_delegation(inode, delegation); + if (delegation != NULL) { + filemap_flush(inode->i_mapping); + err = __nfs_inode_return_delegation(inode, delegation, 0); + } iput(inode); - goto restart; + if (!err) + goto restart; + set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + return err; } rcu_read_unlock(); + return 0; } /* @@ -338,8 +349,10 @@ int nfs_inode_return_delegation(struct inode *inode) spin_lock(&clp->cl_lock); delegation = nfs_detach_delegation_locked(nfsi, NULL); spin_unlock(&clp->cl_lock); - if (delegation != NULL) - err = __nfs_inode_return_delegation(inode, delegation); + if (delegation != NULL) { + nfs_msync_inode(inode); + err = __nfs_inode_return_delegation(inode, delegation, 1); + } } return err; } @@ -368,7 +381,8 @@ void nfs_super_return_all_delegations(struct super_block *sb) spin_unlock(&delegation->lock); } rcu_read_unlock(); - nfs_client_return_marked_delegations(clp); + if (nfs_client_return_marked_delegations(clp) != 0) + nfs4_schedule_state_manager(clp); } static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index 09f38379517..e225a129012 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -42,7 +42,7 @@ void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); -void nfs_client_return_marked_delegations(struct nfs_client *clp); +int nfs_client_return_marked_delegations(struct nfs_client *clp); void nfs_delegation_mark_reclaim(struct nfs_client *clp); void nfs_delegation_reap_unclaimed(struct nfs_client *clp); From e345e88a774875cec26e097ea3ff2dc40c4f9da2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:52:41 -0500 Subject: [PATCH 06/75] NFSv4: Fix up the callers of nfs4_state_end_reclaim_reboot In practice, we need to ensure that we call nfs4_state_end_reclaim_reboot in 2 cases: - If we lose the lease while we were reclaiming state OR - After we're done with reboot recovery Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2ef4fecf398..456631969a5 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1060,6 +1060,7 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_end_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); break; case -NFS4ERR_EXPIRED: @@ -1263,7 +1264,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } } /* First recover reboot state... */ - if (test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { + if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { status = nfs4_do_reclaim(clp, nfs4_reboot_recovery_ops[clp->cl_minorversion]); if (status == -NFS4ERR_STALE_CLIENTID) @@ -1309,8 +1310,6 @@ static void nfs4_state_manager(struct nfs_client *clp) out_error: printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" " with error %d\n", clp->cl_hostname, -status); - if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) - nfs4_state_end_reclaim_reboot(clp); nfs4_clear_state_manager_bit(clp); } From 4f7cdf18e14f81860b856ef7694ef58eb1a751c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:53:20 -0500 Subject: [PATCH 07/75] NFSv4: The state manager shouldn't exit on errors that were handled nfs4_recovery_handle_error() will correctly handle errors such as NFS4ERR_CB_PATH_DOWN, however because they are still passed back to the main loop in nfs4_state_manager(), they can cause the latter to exit prematurely. Fix this by letting nfs4_recovery_handle_error() change the error value in cases where there is no action required by the caller. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 456631969a5..8f27b4f0a53 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1051,12 +1051,12 @@ static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp) clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); } -static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) +static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { case -NFS4ERR_CB_PATH_DOWN: nfs_handle_cb_pathdown(clp); - break; + return 0; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); @@ -1075,6 +1075,7 @@ static void nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_SEQ_MISORDERED: set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); } + return error; } static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops) @@ -1094,8 +1095,7 @@ restart: if (status < 0) { set_bit(ops->owner_flag_bit, &sp->so_flags); nfs4_put_state_owner(sp); - nfs4_recovery_handle_error(clp, status); - return status; + return nfs4_recovery_handle_error(clp, status); } nfs4_put_state_owner(sp); goto restart; @@ -1125,8 +1125,7 @@ static int nfs4_check_lease(struct nfs_client *clp) status = ops->renew_lease(clp, cred); put_rpccred(cred); out: - nfs4_recovery_handle_error(clp, status); - return status; + return nfs4_recovery_handle_error(clp, status); } static int nfs4_reclaim_lease(struct nfs_client *clp) From c8b7ae3d3221536228260757444ee10c6d71793f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:53:21 -0500 Subject: [PATCH 08/75] NFSv4: Ensure the state manager handles NFS4ERR_NO_GRACE correctly Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 8f27b4f0a53..b53e37fb030 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1057,6 +1057,9 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_CB_PATH_DOWN: nfs_handle_cb_pathdown(clp); return 0; + case -NFS4ERR_NO_GRACE: + nfs4_state_end_reclaim_reboot(clp); + return 0; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); From a9ed2e2583747fb3139a764c317fac58893b968f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:53:21 -0500 Subject: [PATCH 09/75] NFSv4: Handle NFS4ERR_GRACE when recovering an expired lease. If our lease expires, and the server reboots while we're recovering, we need to be able to wait until the grace period is over. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 741a562177f..c7b8d39a3ce 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1488,7 +1488,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s return ret; } -static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) +static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; @@ -1496,10 +1496,16 @@ static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4 do { err = _nfs4_open_expired(ctx, state); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); + switch (err) { + default: + goto out; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + nfs4_handle_exception(server, err, &exception); + err = 0; + } } while (exception.retry); +out: return err; } @@ -4049,10 +4055,16 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) return 0; err = _nfs4_do_setlk(state, F_SETLK, request, 0); - if (err != -NFS4ERR_DELAY) - break; - nfs4_handle_exception(server, err, &exception); + switch (err) { + default: + goto out; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + nfs4_handle_exception(server, err, &exception); + err = 0; + } } while (exception.retry); +out: return err; } From b6d408ba8c8be3646dea6f80a2fe55ac403119c8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:53:22 -0500 Subject: [PATCH 10/75] NFSv4: Fix up error handling in the state manager main loop. The nfs4_state_manager should not be looking at the error values when deciding whether or not to loop round in order to handle a higher priority state recovery task. It should rather be looking at the clp->cl_state. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 48 ++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index b53e37fb030..62927879572 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1046,11 +1046,6 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); } -static void nfs4_state_end_reclaim_nograce(struct nfs_client *clp) -{ - clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); -} - static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) { switch (error) { @@ -1237,7 +1232,8 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_reclaim_lease(clp); if (status) { nfs4_set_lease_expired(clp, status); - if (status == -EAGAIN) + if (test_bit(NFS4CLNT_LEASE_EXPIRED, + &clp->cl_state)) continue; if (clp->cl_cons_state == NFS_CS_SESSION_INITING) @@ -1249,9 +1245,12 @@ static void nfs4_state_manager(struct nfs_client *clp) if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { status = nfs4_check_lease(clp); - if (status != 0) + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) continue; + if (status < 0 && status != -NFS4ERR_CB_PATH_DOWN) + goto out_error; } + /* Initialize or reset the session */ if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) && nfs4_has_session(clp)) { @@ -1259,41 +1258,36 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_initialize_session(clp); else status = nfs4_reset_session(clp); - if (status) { - if (status == -NFS4ERR_STALE_CLIENTID) - continue; + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) + continue; + if (status < 0) goto out_error; - } } + /* First recover reboot state... */ if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) { status = nfs4_do_reclaim(clp, nfs4_reboot_recovery_ops[clp->cl_minorversion]); - if (status == -NFS4ERR_STALE_CLIENTID) - continue; - if (test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || + test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) continue; nfs4_state_end_reclaim_reboot(clp); - continue; + if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) + continue; + if (status < 0) + goto out_error; } /* Now recover expired state... */ if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { status = nfs4_do_reclaim(clp, nfs4_nograce_recovery_ops[clp->cl_minorversion]); - if (status < 0) { - set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state); - if (status == -NFS4ERR_STALE_CLIENTID) - continue; - if (status == -NFS4ERR_EXPIRED) - continue; - if (test_bit(NFS4CLNT_SESSION_SETUP, - &clp->cl_state)) - continue; + if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || + test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) || + test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) + continue; + if (status < 0) goto out_error; - } else - nfs4_state_end_reclaim_nograce(clp); - continue; } if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) { From 0a6566ecd3afc74aad11c2e07794ff5529c13862 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:54:01 -0500 Subject: [PATCH 11/75] NFSv4: Ensure nfs_dns_lookup() and nfs_dns_update() are declared static Fix two 'sparse' warnings in fs/nfs/dns_resolve.c Signed-off-by: Trond Myklebust --- fs/nfs/dns_resolve.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index f4d54ba97cc..95e1ca765d4 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -146,7 +146,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, return 0; } -struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, +static struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, struct nfs_dns_ent *key) { struct cache_head *ch; @@ -159,7 +159,7 @@ struct nfs_dns_ent *nfs_dns_lookup(struct cache_detail *cd, return container_of(ch, struct nfs_dns_ent, h); } -struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd, +static struct nfs_dns_ent *nfs_dns_update(struct cache_detail *cd, struct nfs_dns_ent *new, struct nfs_dns_ent *key) { From 1185a552e3d78807031f4021c5edb60d3e8838f1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:54:02 -0500 Subject: [PATCH 12/75] NFSv4: Ensure nfs4_close_context() is declared as static Fix another 'sparse' warning in fs/nfs/nfs4proc.c Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c7b8d39a3ce..df8a734f1c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1987,7 +1987,7 @@ out_drop: return 0; } -void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) +static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) { if (ctx->state == NULL) return; From d4e935bd67ca05db4119b67801d9ece6ae139f05 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 3 Dec 2009 15:58:33 -0500 Subject: [PATCH 13/75] The rpc server does not require that service threads take the BKL. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 293fa0528a6..e66ec5d169f 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -78,11 +78,6 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); - /* - * FIXME: do we really need to run this under the BKL? If so, please - * add a comment about what it's intended to protect. - */ - lock_kernel(); while (!kthread_should_stop()) { /* * Listen for a request on the socket @@ -104,7 +99,6 @@ nfs4_callback_svc(void *vrqstp) preverr = err; svc_process(rqstp); } - unlock_kernel(); return 0; } @@ -160,11 +154,6 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); - /* - * FIXME: do we really need to run this under the BKL? If so, please - * add a comment about what it's intended to protect. - */ - lock_kernel(); while (!kthread_should_stop()) { prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); spin_lock_bh(&serv->sv_cb_lock); @@ -183,7 +172,6 @@ nfs41_callback_svc(void *vrqstp) } finish_wait(&serv->sv_cb_waitq, &wq); } - unlock_kernel(); return 0; } From ee671b016fbfc26d69c3fe02e28706222beb1149 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 14/75] NFS: convert proto= option to use netids rather than a protoname Solaris uses netids as values for the proto= option, so that when someone specifies "tcp6" they get traffic over TCP + IPv6. Until recently, this has never really been an issue for Linux since it didn't support NFS over IPv6. The netid and the protocol name were generally always the same (modulo any strange configuration in /etc/netconfig). The solaris manpage documents their proto= option as: proto= _netid_ | rdma This patch is intended to bring Linux closer to how the Solaris proto= option works, by declaring a static netid mapping in the kernel and converting the proto= and mountproto= options to follow it and display the proper values in /proc/mounts. Much of this functionality will need to be provided by a userspace mount.nfs patch. Chuck Lever has a patch to change mount.nfs in the same way. In principle, we could do *all* of this in userspace but that would mean that the options in /proc/mounts may not match the options used by userspace. The alternative to the static mapping here is to add a mechanism to upcall to userspace for netid's. I'm not opposed to that option, but it'll probably mean more overhead (and quite a bit more code). Rather than shoot for that at first, I figured it was probably better to start simply. Comments welcome. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 94 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 13 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f0188eaf372..bfad7464875 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -175,14 +175,16 @@ static const match_table_t nfs_mount_option_tokens = { }; enum { - Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma, + Opt_xprt_udp, Opt_xprt_udp6, Opt_xprt_tcp, Opt_xprt_tcp6, Opt_xprt_rdma, Opt_xprt_err }; static const match_table_t nfs_xprt_protocol_tokens = { { Opt_xprt_udp, "udp" }, + { Opt_xprt_udp6, "udp6" }, { Opt_xprt_tcp, "tcp" }, + { Opt_xprt_tcp6, "tcp6" }, { Opt_xprt_rdma, "rdma" }, { Opt_xprt_err, NULL } @@ -492,6 +494,45 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) return sec_flavours[i].str; } +static void nfs_show_mountd_netid(struct seq_file *m, struct nfs_server *nfss, + int showdefaults) +{ + struct sockaddr *sap = (struct sockaddr *) &nfss->mountd_address; + + seq_printf(m, ",mountproto="); + switch (sap->sa_family) { + case AF_INET: + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, RPCBIND_NETID_UDP); + break; + case IPPROTO_TCP: + seq_printf(m, RPCBIND_NETID_TCP); + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } + break; + case AF_INET6: + switch (nfss->mountd_protocol) { + case IPPROTO_UDP: + seq_printf(m, RPCBIND_NETID_UDP6); + break; + case IPPROTO_TCP: + seq_printf(m, RPCBIND_NETID_TCP6); + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } + break; + default: + if (showdefaults) + seq_printf(m, "auto"); + } +} + static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { @@ -518,17 +559,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->mountd_port || showdefaults) seq_printf(m, ",mountport=%u", nfss->mountd_port); - switch (nfss->mountd_protocol) { - case IPPROTO_UDP: - seq_printf(m, ",mountproto=udp"); - break; - case IPPROTO_TCP: - seq_printf(m, ",mountproto=tcp"); - break; - default: - if (showdefaults) - seq_printf(m, ",mountproto=auto"); - } + nfs_show_mountd_netid(m, nfss, showdefaults); } /* @@ -578,7 +609,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_puts(m, nfs_infop->nostr); } seq_printf(m, ",proto=%s", - rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); + rpc_peeraddr2str(nfss->client, RPC_DISPLAY_NETID)); if (version == 4) { if (nfss->port != NFS_PORT) seq_printf(m, ",port=%u", nfss->port); @@ -883,6 +914,8 @@ static int nfs_parse_mount_options(char *raw, { char *p, *string, *secdata; int rc, sloppy = 0, invalid_option = 0; + unsigned short protofamily = AF_UNSPEC; + unsigned short mountfamily = AF_UNSPEC; if (!raw) { dfprintk(MOUNT, "NFS: mount options string was NULL.\n"); @@ -1228,12 +1261,17 @@ static int nfs_parse_mount_options(char *raw, token = match_token(string, nfs_xprt_protocol_tokens, args); + protofamily = AF_INET; switch (token) { + case Opt_xprt_udp6: + protofamily = AF_INET6; case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; kfree(string); break; + case Opt_xprt_tcp6: + protofamily = AF_INET6; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; @@ -1261,10 +1299,15 @@ static int nfs_parse_mount_options(char *raw, nfs_xprt_protocol_tokens, args); kfree(string); + mountfamily = AF_INET; switch (token) { + case Opt_xprt_udp6: + mountfamily = AF_INET6; case Opt_xprt_udp: mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; break; + case Opt_xprt_tcp6: + mountfamily = AF_INET6; case Opt_xprt_tcp: mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; break; @@ -1363,8 +1406,33 @@ static int nfs_parse_mount_options(char *raw, if (!sloppy && invalid_option) return 0; + /* + * verify that any proto=/mountproto= options match the address + * familiies in the addr=/mountaddr= options. + */ + if (protofamily != AF_UNSPEC && + protofamily != mnt->nfs_server.address.ss_family) + goto out_proto_mismatch; + + if (mountfamily != AF_UNSPEC) { + if (mnt->mount_server.addrlen) { + if (mountfamily != mnt->mount_server.address.ss_family) + goto out_mountproto_mismatch; + } else { + if (mountfamily != mnt->nfs_server.address.ss_family) + goto out_mountproto_mismatch; + } + } + return 1; +out_mountproto_mismatch: + printk(KERN_INFO "NFS: mount server address does not match mountproto= " + "option\n"); + return 0; +out_proto_mismatch: + printk(KERN_INFO "NFS: server address does not match proto= option\n"); + return 0; out_invalid_address: printk(KERN_INFO "NFS: bad IP address specified: %s\n", p); return 0; From a01878aac57eac6eb4bf194788ab2cc440490d0f Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 15/75] NFS: reorder nfs4_sequence_regs to remove 8 bytes of padding on 64 bits reorder nfs4_sequence_args to remove 8 bytes of padding on 64 bit builds. The size of this structure drops to 24 bytes from 32 and reduces the text size of nfs.ko. On my x86_64 size reports text data bss 2.6.32-rc5 200996 8512 432 209940 33414 nfs.ko +patch 200884 8512 432 209828 333a4 nfs.ko Signed-off-by: Richard Kennedy Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 62f63fb0c4c..8c880372536 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -170,8 +170,8 @@ struct nfs4_sequence_args { struct nfs4_sequence_res { struct nfs4_session *sr_session; u8 sr_slotid; /* slot used to send request */ - unsigned long sr_renewal_time; int sr_status; /* sequence operation status */ + unsigned long sr_renewal_time; }; struct nfs4_get_lease_time_args { From dd1fd90fe65e2e642f0e58e2ff4849f317a6c43d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 16/75] SUNRPC: Display compressed (shorthand) IPv6 presentation addresses Recent changes to snprintf() introduced the %pI6c formatter, which can display an IPv6 address with standard shorthanding. Using a shorthanded address can save us a few bytes of memory for each stored presentation address, or a few bytes on the wire when sending these in a universal address. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/addr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index c7450c8f0a7..6dcdd251781 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -55,16 +55,8 @@ static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap, /* * RFC 4291, Section 2.2.1 - * - * To keep the result as short as possible, especially - * since we don't shorthand, we don't want leading zeros - * in each halfword, so avoid %pI6. */ - return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x", - ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]), - ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]), - ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]), - ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7])); + return snprintf(buf, buflen, "%pI6c", addr); } static size_t rpc_ntop6(const struct sockaddr *sap, From d250e190fb9b06f4c595eade88b3d0b705fb330a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 17/75] NFS: Display compressed (shorthand) IPv6 in /proc/mounts Recent changes to snprintf() introduced the %pI6c formatter, which can display an IPv6 address with standard shorthanding. Use this new formatter when displaying IPv6 server addresses in /proc/mounts. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index bfad7464875..837032731bb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -546,7 +546,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss, } case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; - seq_printf(m, ",mountaddr=%pI6", &sin6->sin6_addr); + seq_printf(m, ",mountaddr=%pI6c", &sin6->sin6_addr); break; } default: From dd47f96c077b4516727e497e4b6fd47a06778c0a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 18/75] NFS: Revert default r/wsize behavior When the "rsize=" or "wsize=" mount options are not specified, text-based mounts have slightly different behavior than legacy binary mounts. Text-based mounts use the smaller of the server's maximum and the client's maximum, but binary mounts use the smaller of the server's _preferred_ size and the client's maximum. This difference is actually pretty subtle. Most servers advertise the same value as their maximum and their preferred transfer size, so the end result is the same in most cases. The reason for this difference is that for text-based mounts, if r/wsize are not specified, they are set to the largest value supported by the client. For legacy mounts, the values are set to zero if these options are not specified. nfs_server_set_fsinfo() can negotiate the transfer size defaults correctly in any case. There's no need to specify any particular value as default in the text-based option parsing logic. Note that nfs4 doesn't use nfs_server_set_fsinfo(), but the mount.nfs4 command does set rsize and wsize to 0 if the user didn't specify these options. So, make the same change for text-based NFSv4 mounts. Thanks to James Pearson for reporting and diagnosing the problem. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 837032731bb..ce907efc550 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -761,8 +761,6 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve data = kzalloc(sizeof(*data), GFP_KERNEL); if (data) { - data->rsize = NFS_MAX_FILE_IO_SIZE; - data->wsize = NFS_MAX_FILE_IO_SIZE; data->acregmin = NFS_DEF_ACREGMIN; data->acregmax = NFS_DEF_ACREGMAX; data->acdirmin = NFS_DEF_ACDIRMIN; From 206a134b4d8abf57cd34dffacf993869355b9aac Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 19/75] SUNRPC: Check explicitly for tk_status == 0 in call_transmit_status() The success case, where task->tk_status == 0, is by far the most frequent case in call_transmit_status(). The default: arm of the switch statement in call_transmit_status() handles the 0 case. default: was moved close to the top of the switch statement in call_transmit_status() under the theory that the compiler places object code for the earliest arms of a switch statement first, making the CPU do less work. The default: arm of a switch statement, however, is executed only after all the other cases have been checked. Even if the compiler rearranges the object code, the default: arm is the "last resort", meaning all of the other cases have been explicitly exhausted. That makes the current arrangement about as inefficient as it gets for the common case. To fix this, add an explicit check for zero before the switch statement. That forces the compiler to do the zero check first, no matter what optimizations it might try to do to the switch statement. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 38829e20500..7bcd931e06e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1180,10 +1180,22 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; + + /* + * Common case: success. Force the compiler to put this + * test first. + */ + if (task->tk_status == 0) { + xprt_end_transmit(task); + rpc_task_force_reencode(task); + return; + } + switch (task->tk_status) { case -EAGAIN: break; default: + dprint_status(task); xprt_end_transmit(task); /* * Special cases: if we've been waiting on the From 09a21c4102c8f7893368553273d39c0cadedf9af Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 20/75] SUNRPC: Allow RPCs to fail quickly if the server is unreachable The kernel sometimes makes RPC calls to services that aren't running. Because the kernel's RPC client always assumes the hard retry semantic when reconnecting a connection-oriented RPC transport, the underlying reconnect logic takes a long while to time out, even though the remote may have responded immediately with ECONNREFUSED. In certain cases, like upcalls to our local rpcbind daemon, or for NFS mount requests, we'd like the kernel to fail immediately if the remote service isn't reachable. This allows another transport to be tried immediately, or the pending request can be abandoned quickly. Introduce a per-request flag which controls how call_transmit_status() behaves when request transmission fails because the server cannot be reached. We don't want soft connection semantics to apply to other errors. The default case of the switch statement in call_transmit_status() no longer falls through; the fall through code is copied to the default case, and a "break;" is added. The transport's connection re-establishment timeout is also ignored for such requests. We want the request to fail immediately, so the reconnect delay is skipped. Additionally, we don't want a connect failure here to further increase the reconnect timeout value, since this request will not be retried. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 2 ++ net/sunrpc/clnt.c | 11 +++++++++-- net/sunrpc/xprtsock.c | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 401097781fc..1906782ec86 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -130,12 +130,14 @@ struct rpc_task_setup { #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ +#define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) +#define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7bcd931e06e..68a23583f44 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1197,6 +1197,8 @@ call_transmit_status(struct rpc_task *task) default: dprint_status(task); xprt_end_transmit(task); + rpc_task_force_reencode(task); + break; /* * Special cases: if we've been waiting on the * socket's write_space() callback, or if the @@ -1204,11 +1206,16 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: - case -ECONNRESET: - case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + if (RPC_IS_SOFTCONN(task)) { + xprt_end_transmit(task); + rpc_exit(task, task->tk_status); + break; + } + case -ECONNRESET: + case -ENOTCONN: case -EPIPE: rpc_task_force_reencode(task); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37c5475ba25..ff312f8b018 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2033,7 +2033,7 @@ static void xs_connect(struct rpc_task *task) if (xprt_test_and_set_connecting(xprt)) return; - if (transport->sock != NULL) { + if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { dprintk("RPC: xs_connect delayed xprt %p for %lu " "seconds\n", xprt, xprt->reestablish_timeout / HZ); From 5a46211540a83871196489247f57da2bdde58d87 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 21/75] SUNRPC: Simplify synopsis of rpcb_local_clnt() Clean up: At one point, rpcb_local_clnt() handled IPv6 loopback addresses too, but it doesn't any more; only IPv4 loopback is used now. Get rid of the @addr and @addrlen arguments to rpcb_local_clnt(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 830faf4d999..28f50da60ce 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -163,13 +163,12 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, - size_t addrlen, u32 version) +static struct rpc_clnt *rpcb_create_local(u32 version) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, - .address = addr, - .addrsize = addrlen, + .address = (struct sockaddr *)&rpcb_inaddr_loopback, + .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, .version = version, @@ -211,14 +210,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, static int rpcb_register_call(const u32 version, struct rpc_message *msg) { - struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; - size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(addr, addrlen, version); + rpcb_clnt = rpcb_create_local(version); if (!IS_ERR(rpcb_clnt)) { error = rpc_call_sync(rpcb_clnt, msg, 0); rpc_shutdown_client(rpcb_clnt); From c526611dd631b2802b6b0221ffb306c5fa25c86c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 22/75] SUNRPC: Use a cached RPC client and transport for rpcbind upcalls The kernel's rpcbind client creates and deletes an rpc_clnt and its underlying transport socket for every upcall to the local rpcbind daemon. When starting a typical NFS server on IPv4 and IPv6, the NFS service itself does three upcalls (one per version) times two upcalls (one per transport) times two upcalls (one per address family), making 12, plus another one for the initial call to unregister previous NFS services. Starting the NLM service adds an additional 13 upcalls, for similar reasons. (Currently the NFS service doesn't start IPv6 listeners, but it will soon enough). Instead, let's create an rpc_clnt for rpcbind upcalls during the first local rpcbind query, and cache it. This saves the overhead of creating and destroying an rpc_clnt and a socket for every upcall. The new logic also prevents the kernel from attempting an RPCB_SET or RPCB_UNSET if it knows from the start that the local portmapper does not support rpcbind protocol version 4. This will cut down on the number of rpcbind upcalls in legacy environments. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 93 +++++++++++++++++++++++++++++++++------- net/sunrpc/sunrpc_syms.c | 3 ++ 2 files changed, 80 insertions(+), 16 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 28f50da60ce..116db74dd94 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,9 @@ static void rpcb_getport_done(struct rpc_task *, void *); static void rpcb_map_release(void *data); static struct rpc_program rpcb_program; +static struct rpc_clnt * rpcb_local_clnt; +static struct rpc_clnt * rpcb_local_clnt4; + struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -163,7 +167,13 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static struct rpc_clnt *rpcb_create_local(u32 version) +static DEFINE_MUTEX(rpcb_create_local_mutex); + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) { struct rpc_create_args args = { .protocol = XPRT_TRANSPORT_UDP, @@ -171,12 +181,46 @@ static struct rpc_clnt *rpcb_create_local(u32 version) .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", .program = &rpcb_program, - .version = version, + .version = RPCBVERS_2, .authflavor = RPC_AUTH_UNIX, .flags = RPC_CLNT_CREATE_NOPING, }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; - return rpc_create(&args); + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create local rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + /* + * This results in an RPC ping. On systems running portmapper, + * the v4 ping will fail. Proceed anyway, but disallow rpcb + * v4 upcalls. + */ + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to create local rpcbind v4 " + "cleint (errno %ld).\n", PTR_ERR(clnt4)); + clnt4 = NULL; + } + + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; + +out: + mutex_unlock(&rpcb_create_local_mutex); + return result; } static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, @@ -208,20 +252,13 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(const u32 version, struct rpc_message *msg) +static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) { - struct rpc_clnt *rpcb_clnt; int result, error = 0; msg->rpc_resp = &result; - rpcb_clnt = rpcb_create_local(version); - if (!IS_ERR(rpcb_clnt)) { - error = rpc_call_sync(rpcb_clnt, msg, 0); - rpc_shutdown_client(rpcb_clnt); - } else - error = PTR_ERR(rpcb_clnt); - + error = rpc_call_sync(clnt, msg, 0); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -276,6 +313,11 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), @@ -285,7 +327,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call(RPCBVERS_2, &msg); + return rpcb_register_call(rpcb_local_clnt, &msg); } /* @@ -310,7 +352,7 @@ static int rpcb_register_inet4(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -337,7 +379,7 @@ static int rpcb_register_inet6(const struct sockaddr *sap, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - result = rpcb_register_call(RPCBVERS_4, msg); + result = rpcb_register_call(rpcb_local_clnt4, msg); kfree(map->r_addr); return result; } @@ -353,7 +395,7 @@ static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(RPCBVERS_4, msg); + return rpcb_register_call(rpcb_local_clnt4, msg); } /** @@ -411,6 +453,13 @@ int rpcb_v4_register(const u32 program, const u32 version, struct rpc_message msg = { .rpc_argp = &map, }; + int error; + + error = rpcb_create_local(); + if (error) + return error; + if (rpcb_local_clnt4 == NULL) + return -EPROTONOSUPPORT; if (address == NULL) return rpcb_unregister_all_protofamilies(&msg); @@ -1024,3 +1073,15 @@ static struct rpc_program rpcb_program = { .version = rpcb_version, .stats = &rpcb_stats, }; + +/** + * cleanup_rpcb_clnt - remove xprtsock's sysctls, unregister + * + */ +void cleanup_rpcb_clnt(void) +{ + if (rpcb_local_clnt4) + rpc_shutdown_client(rpcb_local_clnt4); + if (rpcb_local_clnt) + rpc_shutdown_client(rpcb_local_clnt); +} diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 8cce9218901..f438347d817 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -24,6 +24,8 @@ extern struct cache_detail ip_map_cache, unix_gid_cache; +extern void cleanup_rpcb_clnt(void); + static int __init init_sunrpc(void) { @@ -53,6 +55,7 @@ out: static void __exit cleanup_sunrpc(void) { + cleanup_rpcb_clnt(); rpcauth_remove_module(); cleanup_socket_xprt(); svc_cleanup_xprt_sock(); From 2a76b3bfa22993fc09166bd6a8db0dbe902b6813 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 23/75] SUNRPC: Use TCP for local rpcbind upcalls Use TCP with the soft connect semantic for local rpcbind upcalls so the kernel can detect immediately if the local rpcbind daemon is not running. Signed-off-by: Chuck Lever --- net/sunrpc/rpcb_clnt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 116db74dd94..401154094ee 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -176,7 +176,7 @@ static DEFINE_MUTEX(rpcb_create_local_mutex); static int rpcb_create_local(void) { struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, + .protocol = XPRT_TRANSPORT_TCP, .address = (struct sockaddr *)&rpcb_inaddr_loopback, .addrsize = sizeof(rpcb_inaddr_loopback), .servername = "localhost", @@ -258,7 +258,7 @@ static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, 0); + error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); From 012da158f636347c4eb28fd1e1cca020fef5e54d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 24/75] SUNRPC: Use soft connects for autobinding over TCP Autobinding is handled by the rpciod process, not in user processes that are generating regular RPC requests. Thus autobinding is usually not affected by signals targetting user processes, such as KILL or timer expiration events. In addition, an RPC request generated by a user process that has RPC_TASK_SOFTCONN set and needs to perform an autobind will hang if the remote rpcbind service is not available. For rpcbind queries on connection-oriented transports, let's use the new soft connect semantic to return control to the user's process quickly, if the kernel's rpcbind client can't connect to the remote rpcbind service. Logic is introduced in call_bind_status() to handle connection errors that occurred during an asynchronous rpcbind query. The logic abandons the rpcbind query if the RPC request has SOFTCONN set, and retries after a few seconds in the normal case. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 17 ++++++++++++++++- net/sunrpc/rpcb_clnt.c | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 68a23583f44..4b76ef9336c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1060,7 +1060,7 @@ call_bind_status(struct rpc_task *task) goto retry_timeout; case -EPFNOSUPPORT: /* server doesn't support any rpcbind version we know of */ - dprintk("RPC: %5u remote rpcbind service unavailable\n", + dprintk("RPC: %5u unrecognized remote rpcbind service\n", task->tk_pid); break; case -EPROTONOSUPPORT: @@ -1069,6 +1069,21 @@ call_bind_status(struct rpc_task *task) task->tk_status = 0; task->tk_action = call_bind; return; + case -ECONNREFUSED: /* connection problems */ + case -ECONNRESET: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + case -EPIPE: + dprintk("RPC: %5u remote rpcbind unreachable: %d\n", + task->tk_pid, task->tk_status); + if (!RPC_IS_SOFTCONN(task)) { + rpc_delay(task, 5*HZ); + goto retry_timeout; + } + status = task->tk_status; + break; default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 401154094ee..3e3772d8eb9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -537,7 +537,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi .rpc_message = &msg, .callback_ops = &rpcb_getport_ops, .callback_data = map, - .flags = RPC_TASK_ASYNC, + .flags = RPC_TASK_ASYNC | RPC_TASK_SOFTCONN, }; return rpc_run_task(&task_setup_data); From caabea8a565fb4e16f8e58e16bb9d535e591b709 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 25/75] SUNRPC: Use soft connect semantics when performing RPC ping Currently, if a remote RPC service is unreachable, an RPC ping will hang until the underlying transport connect attempt times out. A more desirable behavior might be to have the ping fail immediately so upper layers can recover appropriately. In the case of an NFS mount, for instance, this would mean the mount(2) system call could fail immediately if the server isn't listening, rather than hanging uninterruptibly for more than 3 minutes. Change rpc_ping() so that it fails immediately for connection-oriented transports. rpc_create() will then fail immediately for such transports if an RPC ping was requested. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4b76ef9336c..97931d9f857 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -79,7 +79,7 @@ static void call_connect_status(struct rpc_task *task); static __be32 *rpc_encode_header(struct rpc_task *task); static __be32 *rpc_verify_header(struct rpc_task *task); -static int rpc_ping(struct rpc_clnt *clnt, int flags); +static int rpc_ping(struct rpc_clnt *clnt); static void rpc_register_client(struct rpc_clnt *clnt) { @@ -340,7 +340,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) return clnt; if (!(args->flags & RPC_CLNT_CREATE_NOPING)) { - int err = rpc_ping(clnt, RPC_TASK_SOFT); + int err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); return ERR_PTR(err); @@ -528,7 +528,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; - err = rpc_ping(clnt, RPC_TASK_SOFT); + err = rpc_ping(clnt); if (err != 0) { rpc_shutdown_client(clnt); clnt = ERR_PTR(err); @@ -1709,14 +1709,14 @@ static struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt, int flags) +static int rpc_ping(struct rpc_clnt *clnt) { struct rpc_message msg = { .rpc_proc = &rpcproc_null, }; int err; msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0); - err = rpc_call_sync(clnt, &msg, flags); + err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN); put_rpccred(msg.rpc_cred); return err; } From 3a28becc35e5c8f1fabb707bcd8a473712653de6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 26/75] SUNRPC: soft connect semantics for UDP Introduce soft connect behavior for UDP transports. In this case, a major timeout returns ETIMEDOUT instead of EIO. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 97931d9f857..154034b675b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1380,6 +1380,10 @@ call_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; + if (RPC_IS_SOFTCONN(task)) { + rpc_exit(task, -ETIMEDOUT); + return; + } if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", From 9c4c761a629caa5572c1a29a8288416070d5d6b7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 27/75] NFSv4.1: Handle NFSv4.1 session errors in the lock recovery code Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2ef4fecf398..3004089e97b 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -877,6 +877,10 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_EXPIRED: case -NFS4ERR_NO_GRACE: case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", @@ -959,6 +963,10 @@ restart: case -NFS4ERR_NO_GRACE: nfs4_state_mark_reclaim_nograce(sp->so_client, state); case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out_err; } nfs4_put_open_state(state); From e48de5ec25b37d42292c876c1d3337766aae89bd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 28/75] nfs: remove unnecessary check from nfs_rename() VFS already checks if both source and target are directories. Signed-off-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7cb298525ee..b5fae1953e9 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1601,13 +1601,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * silly-rename. If the silly-rename succeeds, the * copied dentry is hashed and becomes the new target. */ - if (!new_inode) - goto go_ahead; - if (S_ISDIR(new_inode->i_mode)) { - error = -EISDIR; - if (!S_ISDIR(old_inode->i_mode)) - goto out; - } else if (atomic_read(&new_dentry->d_count) > 2) { + if (new_inode && !S_ISDIR(new_inode->i_mode) && + atomic_read(&new_dentry->d_count) > 2) { int err; /* copy the target dentry's name */ dentry = d_alloc(new_dentry->d_parent, @@ -1627,7 +1622,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out; } -go_ahead: /* * ... prune child dentries and writebacks if needed. */ From 28f79a1a695e7a5b00af3b6713b449e08581ffbb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 29/75] nfs: fix comments in nfs_rename() Comments are wrong or out of date. In particular d_drop() doesn't free the inode it just unhashes the dentry. And if target is a directory then it is not checked for being busy. Signed-off-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b5fae1953e9..11d0c4cffff 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1581,7 +1581,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* * To prevent any new references to the target during the rename, - * we unhash the dentry and free the inode in advance. + * we unhash the dentry in advance. */ if (!d_unhashed(new_dentry)) { d_drop(new_dentry); @@ -1594,12 +1594,10 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, atomic_read(&new_dentry->d_count)); /* - * First check whether the target is busy ... we can't - * safely do _any_ rename if the target is in use. - * - * For files, make a copy of the dentry and then do a - * silly-rename. If the silly-rename succeeds, the - * copied dentry is hashed and becomes the new target. + * For non-directories, check whether the target is busy and if so, + * make a copy of the dentry and then do a silly-rename. If the + * silly-rename succeeds, the copied dentry is hashed and becomes + * the new target. */ if (new_inode && !S_ISDIR(new_inode->i_mode) && atomic_read(&new_dentry->d_count) > 2) { From 27226104e60964f21717e0f452cecd45c85a64c6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 30/75] nfs: dont unhash target if renaming a directory Move unhashing the target to after the check for existence and being a non-directory. If renaming a directory then the VFS already unhashes the target if it is not busy. If it's busy then acquiring more references during the rename makes no difference. Signed-off-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 56 +++++++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 11d0c4cffff..76b7f539d76 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1579,15 +1579,6 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *dentry = NULL, *rehash = NULL; int error = -EBUSY; - /* - * To prevent any new references to the target during the rename, - * we unhash the dentry in advance. - */ - if (!d_unhashed(new_dentry)) { - d_drop(new_dentry); - rehash = new_dentry; - } - dfprintk(VFS, "NFS: rename(%s/%s -> %s/%s, ct=%d)\n", old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name, @@ -1599,25 +1590,36 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, * silly-rename succeeds, the copied dentry is hashed and becomes * the new target. */ - if (new_inode && !S_ISDIR(new_inode->i_mode) && - atomic_read(&new_dentry->d_count) > 2) { - int err; - /* copy the target dentry's name */ - dentry = d_alloc(new_dentry->d_parent, - &new_dentry->d_name); - if (!dentry) - goto out; + if (new_inode && !S_ISDIR(new_inode->i_mode)) { + /* + * To prevent any new references to the target during the + * rename, we unhash the dentry in advance. + */ + if (!d_unhashed(new_dentry)) { + d_drop(new_dentry); + rehash = new_dentry; + } - /* silly-rename the existing target ... */ - err = nfs_sillyrename(new_dir, new_dentry); - if (!err) { - new_dentry = rehash = dentry; - new_inode = NULL; - /* instantiate the replacement target */ - d_instantiate(new_dentry, NULL); - } else if (atomic_read(&new_dentry->d_count) > 1) - /* dentry still busy? */ - goto out; + if (atomic_read(&new_dentry->d_count) > 2) { + int err; + + /* copy the target dentry's name */ + dentry = d_alloc(new_dentry->d_parent, + &new_dentry->d_name); + if (!dentry) + goto out; + + /* silly-rename the existing target ... */ + err = nfs_sillyrename(new_dir, new_dentry); + if (!err) { + new_dentry = rehash = dentry; + new_inode = NULL; + /* instantiate the replacement target */ + d_instantiate(new_dentry, NULL); + } else if (atomic_read(&new_dentry->d_count) > 1) + /* dentry still busy? */ + goto out; + } } /* From 24e93025ee434a58d35e5abb283c5bcc9a13e477 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 31/75] nfs: clean up sillyrenaming in nfs_rename() The d_instantiate(new_dentry, NULL) is superfluous, the dentry is already negative. Rehashing this dummy dentry isn't needed either, d_move() works fine on an unhashed target. The re-checking for busy after a failed nfs_sillyrename() is bogus too: new_dentry->d_count < 2 would be a bug here. Signed-off-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 76b7f539d76..2c5ace4f00a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1611,14 +1611,11 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* silly-rename the existing target ... */ err = nfs_sillyrename(new_dir, new_dentry); - if (!err) { - new_dentry = rehash = dentry; - new_inode = NULL; - /* instantiate the replacement target */ - d_instantiate(new_dentry, NULL); - } else if (atomic_read(&new_dentry->d_count) > 1) - /* dentry still busy? */ + if (err) goto out; + + new_dentry = dentry; + new_inode = NULL; } } From 44ed3556bad809797f7b06a4a88918fd8a23d6fe Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 3 Dec 2009 15:58:56 -0500 Subject: [PATCH 32/75] NFS4ERR_FILE_OPEN handling in Linux/NFS NFS4ERR_FILE_OPEN is return by the server when an operation cannot be performed because the file is currently open and local (to the server) semantics prohibit the operation while the file is open. A typical case is a RENAME operation on an MS-Windows platform, which prevents rename while the file is open. While it is possible that such a condition is transitory, it is also very possible that the file will be held open for an extended period of time thus preventing the operation. The current behaviour of Linux/NFS is to retry the operation indefinitely. This is not appropriate - we do not expect a rename to take an arbitrary amount of time to complete. Rather, and error should be returned. The most obvious error code would be EBUSY, which is a legal at least for 'rename' and 'unlink', and accurately captures the reason for the error. This patch allows a few retries until about 2 seconds have elapsed, then returns EBUSY. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 741a562177f..40da0d5bc5f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -275,6 +275,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, /* FALLTHROUGH */ #endif /* !defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: + if (exception->timeout > HZ) { + /* We have retried a decent amount, time to + * fail + */ + ret = -EBUSY; + break; + } case -NFS4ERR_GRACE: case -NFS4ERR_DELAY: ret = nfs4_delay(server->client, &exception->timeout); From 4d643d1dfa9349164fe928e255f68020d91dbfe0 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:52:24 -0500 Subject: [PATCH 33/75] nfs41: add create session into establish_clid Reported-by: Trond Myklebust Resetting the clientid from the state manager could result in not confirming the clientid due to create session not being called. Move the create session call from the NFS4CLNT_SESSION_SETUP state manager initialize session case into the NFS4CLNT_LEASE_EXPIRED case establish_clid call. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 7 +++---- fs/nfs/nfs4state.c | 35 ++++++++++++++--------------------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 6ea07a3c75d..99d507d22f2 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -200,9 +200,11 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *); +extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); +extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0f9b7541e04..528b60a23ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4315,7 +4315,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, * NFS4ERR_BADSESSION in the sequence operation, and will therefore * be in some phase of session reset. */ -static int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) +int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) { nfs4_verifier verifier; struct nfs41_exchange_id_args args = { @@ -4601,7 +4601,6 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) if (!session) return NULL; - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); /* * The create session reply races with the server back * channel probe. Mark the client NFS_CS_SESSION_INITING @@ -4967,7 +4966,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .state_flag_bit = NFS_STATE_RECLAIM_REBOOT, .recover_open = nfs4_open_reclaim, .recover_lock = nfs4_lock_reclaim, - .establish_clid = nfs4_proc_exchange_id, + .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; #endif /* CONFIG_NFS_V4_1 */ @@ -4987,7 +4986,7 @@ struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, .recover_open = nfs4_open_expired, .recover_lock = nfs4_lock_expired, - .establish_clid = nfs4_proc_exchange_id, + .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 2eb0059bd69..967fc76a246 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -116,6 +116,19 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) #if defined(CONFIG_NFS_V4_1) +int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) +{ + int status; + + status = nfs4_proc_exchange_id(clp, cred); + if (status == 0) + /* create session schedules state renewal upon success */ + status = nfs4_proc_create_session(clp, 0); + if (status == 0) + nfs_mark_client_ready(clp, NFS_CS_READY); + return status; +} + struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) { struct rpc_cred *cred; @@ -1162,7 +1175,6 @@ static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) switch (err) { case -NFS4ERR_STALE_CLIENTID: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); } } @@ -1188,24 +1200,8 @@ out: return status; } -static int nfs4_initialize_session(struct nfs_client *clp) -{ - int status; - - status = nfs4_proc_create_session(clp, 0); - if (!status) { - nfs_mark_client_ready(clp, NFS_CS_READY); - } else if (status == -NFS4ERR_STALE_CLIENTID) { - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); - } else { - nfs_mark_client_ready(clp, status); - } - return status; -} #else /* CONFIG_NFS_V4_1 */ static int nfs4_reset_session(struct nfs_client *clp) { return 0; } -static int nfs4_initialize_session(struct nfs_client *clp) { return 0; } #endif /* CONFIG_NFS_V4_1 */ /* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors @@ -1262,10 +1258,7 @@ static void nfs4_state_manager(struct nfs_client *clp) /* Initialize or reset the session */ if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) && nfs4_has_session(clp)) { - if (clp->cl_cons_state == NFS_CS_SESSION_INITING) - status = nfs4_initialize_session(clp); - else - status = nfs4_reset_session(clp); + status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) continue; if (status < 0) From 6df08189ffd33d8357759561dba49d25c0335858 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:05 -0500 Subject: [PATCH 34/75] nfs41: rename cl_state session SETUP bit to RESET The bit is no longer used for session setup, only for session reset. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 +- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 8 ++++---- fs/nfs/nfs4state.c | 8 ++++---- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e21b1bb9972..ebcd3795389 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -183,7 +183,7 @@ static inline void nfs4_restart_rpc(struct rpc_task *task, { #ifdef CONFIG_NFS_V4_1 if (nfs4_has_session(clp) && - test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) { + test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { rpc_restart_call_prepare(task); return; } diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 99d507d22f2..e9ecd6bf925 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -44,7 +44,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_REBOOT, NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, - NFS4CLNT_SESSION_SETUP, + NFS4CLNT_SESSION_RESET, }; /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 528b60a23ed..ff1c7344e08 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -270,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); exception->retry = 1; /* FALLTHROUGH */ #endif /* !defined(CONFIG_NFS_V4_1) */ @@ -446,7 +446,7 @@ static int nfs4_recover_session(struct nfs4_session *session) ret = nfs4_wait_clnt_recover(clp); if (ret != 0) break; - if (!test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + if (!test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) break; nfs4_schedule_state_manager(clp); ret = -EIO; @@ -475,7 +475,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, tbl = &session->fc_slot_table; spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4CLNT_SESSION_SETUP, &session->clp->cl_state)) { + if (test_bit(NFS4CLNT_SESSION_RESET, &session->clp->cl_state)) { if (tbl->highest_used_slotid != -1) { rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); @@ -3363,7 +3363,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); task->tk_status = 0; return -EAGAIN; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 967fc76a246..902d8749bde 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1092,7 +1092,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED: - set_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state); + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); } return error; } @@ -1256,7 +1256,7 @@ static void nfs4_state_manager(struct nfs_client *clp) } /* Initialize or reset the session */ - if (test_and_clear_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) + if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) && nfs4_has_session(clp)) { status = nfs4_reset_session(clp); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) @@ -1270,7 +1270,7 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_do_reclaim(clp, nfs4_reboot_recovery_ops[clp->cl_minorversion]); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state)) + test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) continue; nfs4_state_end_reclaim_reboot(clp); if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) @@ -1284,7 +1284,7 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_do_reclaim(clp, nfs4_nograce_recovery_ops[clp->cl_minorversion]); if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) || - test_bit(NFS4CLNT_SESSION_SETUP, &clp->cl_state) || + test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) || test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) continue; if (status < 0) From 1d9ddde94aed01c4618cf6f70883cc511c3b2b62 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:27 -0500 Subject: [PATCH 35/75] nfs41: nfs4_get_lease_time will never session reset Make this clear by calling rpc_restart-call. Prepare for nfs4_restart_rpc() to free slots. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ff1c7344e08..a23110d1d2a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4408,7 +4408,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; - nfs4_restart_rpc(task, data->clp); + rpc_restart_call(task); return; } nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res); From e608e79f1bf4b967afcf57777e63b5f0939b00e8 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:29 -0500 Subject: [PATCH 36/75] nfs41: call free slot from nfs4_restart_rpc nfs41_sequence_free_slot can be called multiple times on SEQUENCE operation errors. No reason to inline nfs4_restart_rpc Reported-by: Trond Myklebust nfs_writeback_done and nfs_readpage_retry call nfs4_restart_rpc outside the error handler, and the slot is not freed prior to restarting in the rpc_prepare state during session reset. Fix this by moving the call to nfs41_sequence_free_slot from the error path of nfs41_sequence_done into nfs4_restart_rpc, and by removing the test for NFS4CLNT_SESSION_SETUP. Always free slot and goto the rpc prepare state on async errors. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 16 ++-------------- fs/nfs/nfs4proc.c | 29 +++++++++++++++++++++++------ fs/nfs/read.c | 3 ++- fs/nfs/unlink.c | 4 +++- fs/nfs/write.c | 3 ++- 5 files changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ebcd3795389..a6f7b6cbfd0 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -177,26 +177,14 @@ extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); extern struct rpc_procinfo nfs3_procedures[]; extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); -/* nfs4proc.c */ -static inline void nfs4_restart_rpc(struct rpc_task *task, - const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp) && - test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) { - rpc_restart_call_prepare(task); - return; - } -#endif /* CONFIG_NFS_V4_1 */ - rpc_restart_call(task); -} - /* nfs4xdr.c */ #ifdef CONFIG_NFS_V4 extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); #endif /* nfs4proc.c */ +extern void nfs4_restart_rpc(struct rpc_task *, const struct nfs_client *, + struct nfs4_sequence_res *); #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; #endif diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a23110d1d2a..6ef50aa785d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -644,6 +644,19 @@ static void nfs4_sequence_done(const struct nfs_server *server, #endif /* CONFIG_NFS_V4_1 */ } +void nfs4_restart_rpc(struct rpc_task *task, const struct nfs_client *clp, + struct nfs4_sequence_res *res) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) { + nfs41_sequence_free_slot(clp, res); + rpc_restart_call_prepare(task); + return; + } +#endif /* CONFIG_NFS_V4_1 */ + rpc_restart_call(task); +} + /* no restart, therefore free slot here */ static void nfs4_sequence_done_free_slot(const struct nfs_server *server, struct nfs4_sequence_res *res, @@ -1750,7 +1763,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client); + nfs4_restart_rpc(task, server->nfs_client, + &calldata->res.seq_res); return; } } @@ -2988,7 +3002,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client); + nfs4_restart_rpc(task, server->nfs_client, &data->res.seq_res); return -EAGAIN; } @@ -3013,7 +3027,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client, + &data->res.seq_res); return -EAGAIN; } if (task->tk_status >= 0) { @@ -3041,7 +3056,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client, + &data->res.seq_res); return -EAGAIN; } nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client, @@ -3755,7 +3771,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) nfs4_restart_rpc(task, - calldata->server->nfs_client); + calldata->server->nfs_client, + &calldata->res.seq_res); } nfs4_sequence_free_slot(calldata->server->nfs_client, &calldata->res.seq_res); @@ -4890,7 +4907,7 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (_nfs4_async_handle_error(task, NULL, clp, NULL) == -EAGAIN) { - nfs4_restart_rpc(task, clp); + nfs4_restart_rpc(task, clp, task->tk_msg.rpc_resp); return; } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 12c9e66d3f1..3e04fb9ea64 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -368,7 +368,8 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); + nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client, + &data->res.seq_res); return; out: nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client, diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1064c91ae81..52f7bdb12c8 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -81,9 +81,11 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; struct inode *dir = data->dir; + struct nfs_removeres *res = task->tk_msg.rpc_resp; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); + nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client, + &res->seq_res); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 53eb26c16b5..556668ff022 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1216,7 +1216,8 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - nfs4_restart_rpc(task, server->nfs_client); + nfs4_restart_rpc(task, server->nfs_client, + &data->res.seq_res); return -EAGAIN; } if (time_before(complain, jiffies)) { From 36bbe34239f63377b5179ad32fd13cd71d6e1ba7 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:30 -0500 Subject: [PATCH 37/75] nfs41: free the slot on unhandled read errors nfs4_read_done returns zero on unhandled errors. nfs_readpage_result will return on a negative tk_status without freeing the slot. Call nfs4_sequence_free_slot on unhandled errors in nfs4_read_done. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6ef50aa785d..f0b6d569a50 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3009,6 +3009,9 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) nfs_invalidate_atime(data->inode); if (task->tk_status > 0) renew_lease(server, data->timestamp); + else if (task->tk_status < 0) + nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); + return 0; } From b9179237e2b2b4d34b5821cca3db280ebbc8694a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:32 -0500 Subject: [PATCH 38/75] nfs41: fix switch in nfs4_handle_exception Do not fall through and call nfs4_delay on session error handling. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f0b6d569a50..d0cb7cb367c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -272,7 +272,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, errorcode); set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); exception->retry = 1; - /* FALLTHROUGH */ + break; #endif /* !defined(CONFIG_NFS_V4_1) */ case -NFS4ERR_FILE_OPEN: if (exception->timeout > HZ) { From 8ba9bf8e5160b0b4ebc56f64ad445c6c0ecdac1d Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:34 -0500 Subject: [PATCH 39/75] nfs41: fix switch in nfs4_recovery_handle_error Do not fall through and set NFS4CLNT_SESSION_RESET bit on NFS4ERR_EXPIRED Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 902d8749bde..f56f6be5e31 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1085,6 +1085,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_EXPIRED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_start_reclaim_nograce(clp); + break; case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: case -NFS4ERR_BAD_HIGH_SLOT: From 2628eddff15cb38b7246d5c18e2f7dc3e2c0d0c2 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:35 -0500 Subject: [PATCH 40/75] nfs41: don't clear tk_action on success Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d0cb7cb367c..68463d0ba9b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -534,7 +534,7 @@ int nfs4_setup_sequence(struct nfs_client *clp, goto out; ret = nfs41_setup_sequence(clp->cl_session, args, res, cache_reply, task); - if (ret != -EAGAIN) { + if (ret && ret != -EAGAIN) { /* terminate rpc task */ task->tk_status = ret; task->tk_action = NULL; From 05f0d2364726c92f6b870db654967088349379fe Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:37 -0500 Subject: [PATCH 41/75] nfs41: remove nfs4_recover_session nfs4_recover_session can put rpciod to sleep. Just use nfs4_schedule_recovery. Reported-by: Trond Myklebust Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 68463d0ba9b..fb62919f7f2 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -436,24 +436,6 @@ out: return ret_id; } -static int nfs4_recover_session(struct nfs4_session *session) -{ - struct nfs_client *clp = session->clp; - unsigned int loop; - int ret; - - for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) { - ret = nfs4_wait_clnt_recover(clp); - if (ret != 0) - break; - if (!test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) - break; - nfs4_schedule_state_manager(clp); - ret = -EIO; - } - return ret; -} - static int nfs41_setup_sequence(struct nfs4_session *session, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, @@ -462,7 +444,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session, { struct nfs4_slot *slot; struct nfs4_slot_table *tbl; - int status = 0; u8 slotid; dprintk("--> %s\n", __func__); @@ -485,11 +466,10 @@ static int nfs41_setup_sequence(struct nfs4_session *session, /* The slot table is empty; start the reset thread */ dprintk("%s Session Reset\n", __func__); + rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); + nfs4_schedule_state_manager(session->clp); spin_unlock(&tbl->slot_tbl_lock); - status = nfs4_recover_session(session); - if (status) - return status; - spin_lock(&tbl->slot_tbl_lock); + return -EAGAIN; } slotid = nfs4_find_slot(tbl, task); From ea028ac92541ac30bf202ed94cb53eec2ea0c9d6 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:38 -0500 Subject: [PATCH 42/75] nfs41: nfs41: fix state manager deadlock in session reset If the session is reset during state recovery, the state manager thread can sleep on the slot_tbl_waitq causing a deadlock. Add a completion framework to the session. Have the state manager thread set a new session state (NFS4CLNT_SESSION_DRAINING) and wait for the session slot table to drain. Signal the state manager thread in nfs41_sequence_free_slot when the NFS4CLNT_SESSION_DRAINING bit is set and the session is drained. Reported-by: Trond Myklebust Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 26 +++++++++++++++++--------- fs/nfs/nfs4state.c | 15 +++++++++++++++ include/linux/nfs_fs_sb.h | 1 + 4 files changed, 34 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e9ecd6bf925..5c7740178a0 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -45,6 +45,7 @@ enum nfs4_client_state { NFS4CLNT_RECLAIM_NOGRACE, NFS4CLNT_DELEGRETURN, NFS4CLNT_SESSION_RESET, + NFS4CLNT_SESSION_DRAINING, }; /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fb62919f7f2..c1bc9cad5e8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -361,6 +361,16 @@ void nfs41_sequence_free_slot(const struct nfs_client *clp, } nfs4_free_slot(tbl, res->sr_slotid); res->sr_slotid = NFS4_MAX_SLOT_TABLE; + + /* Signal state manager thread if session is drained */ + if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { + spin_lock(&tbl->slot_tbl_lock); + if (tbl->highest_used_slotid == -1) { + dprintk("%s COMPLETE: Session Drained\n", __func__); + complete(&clp->cl_session->complete); + } + spin_unlock(&tbl->slot_tbl_lock); + } } static void nfs41_sequence_done(struct nfs_client *clp, @@ -457,15 +467,11 @@ static int nfs41_setup_sequence(struct nfs4_session *session, spin_lock(&tbl->slot_tbl_lock); if (test_bit(NFS4CLNT_SESSION_RESET, &session->clp->cl_state)) { - if (tbl->highest_used_slotid != -1) { - rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - spin_unlock(&tbl->slot_tbl_lock); - dprintk("<-- %s: Session reset: draining\n", __func__); - return -EAGAIN; - } - - /* The slot table is empty; start the reset thread */ - dprintk("%s Session Reset\n", __func__); + /* + * The state manager will wait until the slot table is empty. + * Schedule the reset thread + */ + dprintk("%s Schedule Session Reset\n", __func__); rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); nfs4_schedule_state_manager(session->clp); spin_unlock(&tbl->slot_tbl_lock); @@ -4506,6 +4512,7 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session) 1); if (status) return status; + init_completion(&session->complete); status = nfs4_reset_slot_table(&session->bc_slot_table, session->bc_attrs.max_reqs, @@ -4608,6 +4615,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) * nfs_client struct */ clp->cl_cons_state = NFS_CS_SESSION_INITING; + init_completion(&session->complete); tbl = &session->fc_slot_table; spin_lock_init(&tbl->slot_tbl_lock); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f56f6be5e31..3c1433598b6 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1181,8 +1181,23 @@ static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) static int nfs4_reset_session(struct nfs_client *clp) { + struct nfs4_session *ses = clp->cl_session; + struct nfs4_slot_table *tbl = &ses->fc_slot_table; int status; + INIT_COMPLETION(ses->complete); + spin_lock(&tbl->slot_tbl_lock); + if (tbl->highest_used_slotid != -1) { + set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); + spin_unlock(&tbl->slot_tbl_lock); + status = wait_for_completion_interruptible(&ses->complete); + clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); + if (status) /* -ERESTARTSYS */ + goto out; + } else { + spin_unlock(&tbl->slot_tbl_lock); + } + status = nfs4_proc_destroy_session(clp->cl_session); if (status && status != -NFS4ERR_BADSESSION && status != -NFS4ERR_DEADSESSION) { diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 320569eabe3..34fc6be5bfc 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -209,6 +209,7 @@ struct nfs4_session { unsigned long session_state; u32 hash_alg; u32 ssv_len; + struct completion complete; /* The fore and back channel */ struct nfs4_channel_attrs fc_attrs; From 691daf3b0c410c8bcab6735796be03ea446e1924 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 15:55:39 -0500 Subject: [PATCH 43/75] nfs41: drain session cleanup Do not wake up the next slot_tbl_waitq task in nfs4_free_slot because we may be draining the slot. Either signal the state manager that the session is drained (the state manager wakes up tasks) OR wake up the next task. In nfs41_sequence_done, the slot dereference is only needed in the sequence operation success case. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c1bc9cad5e8..a0f73e99ff3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -336,7 +336,6 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) else tbl->highest_used_slotid = -1; } - rpc_wake_up_next(&tbl->slot_tbl_waitq); spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, free_slotid, tbl->highest_used_slotid); @@ -353,14 +352,13 @@ void nfs41_sequence_free_slot(const struct nfs_client *clp, } tbl = &clp->cl_session->fc_slot_table; if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { - dprintk("%s: No slot\n", __func__); /* just wake up the next guy waiting since * we may have not consumed a slot after all */ - rpc_wake_up_next(&tbl->slot_tbl_waitq); - return; + dprintk("%s: No slot\n", __func__); + } else { + nfs4_free_slot(tbl, res->sr_slotid); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; } - nfs4_free_slot(tbl, res->sr_slotid); - res->sr_slotid = NFS4_MAX_SLOT_TABLE; /* Signal state manager thread if session is drained */ if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { @@ -370,6 +368,8 @@ void nfs41_sequence_free_slot(const struct nfs_client *clp, complete(&clp->cl_session->complete); } spin_unlock(&tbl->slot_tbl_lock); + } else { + rpc_wake_up_next(&tbl->slot_tbl_waitq); } } @@ -394,10 +394,10 @@ static void nfs41_sequence_done(struct nfs_client *clp, if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) goto out; - tbl = &clp->cl_session->fc_slot_table; - slot = tbl->slots + res->sr_slotid; - + /* Check the SEQUENCE operation status */ if (res->sr_status == 0) { + tbl = &clp->cl_session->fc_slot_table; + slot = tbl->slots + res->sr_slotid; /* Update the slot's sequence and clientid lease timer */ ++slot->seq_nr; timestamp = res->sr_renewal_time; From 0b9e2d41f1f0360be08809d4e3bb56b67be6241a Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 4 Dec 2009 16:02:14 -0500 Subject: [PATCH 44/75] nfs41: only state manager sets NFS4CLNT_SESSION_SETUP Replace sync and async handlers setting of the NFS4CLNT_SESSION_SETUP bit with setting NFS4CLNT_CHECK_LEASE, and let the state manager decide to reset the session. Signed-off-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++--- fs/nfs/nfs4state.c | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a0f73e99ff3..637a6b476bd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -270,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR: %d Reset session\n", __func__, errorcode); - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_state_recovery(clp); exception->retry = 1; break; #endif /* !defined(CONFIG_NFS_V4_1) */ @@ -466,7 +466,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, tbl = &session->fc_slot_table; spin_lock(&tbl->slot_tbl_lock); - if (test_bit(NFS4CLNT_SESSION_RESET, &session->clp->cl_state)) { + if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state)) { /* * The state manager will wait until the slot table is empty. * Schedule the reset thread @@ -3368,7 +3368,7 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, case -NFS4ERR_SEQ_MISORDERED: dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); - set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_state_recovery(clp); task->tk_status = 0; return -EAGAIN; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3c1433598b6..46c69e2248e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1094,6 +1094,8 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) case -NFS4ERR_SEQ_FALSE_RETRY: case -NFS4ERR_SEQ_MISORDERED: set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + /* Zero session reset errors */ + return 0; } return error; } @@ -1191,7 +1193,6 @@ static int nfs4_reset_session(struct nfs_client *clp) set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); spin_unlock(&tbl->slot_tbl_lock); status = wait_for_completion_interruptible(&ses->complete); - clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); if (status) /* -ERESTARTSYS */ goto out; } else { @@ -1212,6 +1213,7 @@ static int nfs4_reset_session(struct nfs_client *clp) /* fall through*/ out: /* Wake up the next rpc task even on error */ + clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq); return status; } From 07bccc2dd4e8745859f0fa7d120ea39320fbcdbf Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:19:01 -0500 Subject: [PATCH 45/75] nfs41: add support for callback with RPC version number 4 The NFSv4.1 spec-29 (18.36.3) says that the server MUST use an ONC RPC (program) version number equal to 4 in callbacks sent to the client. For now we allow both versions 1 and 4. Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/callback.c | 1 + fs/nfs/callback_xdr.c | 7 +++++++ fs/nfs/internal.h | 1 + fs/nfs/nfs4_fs.h | 1 + 4 files changed, 10 insertions(+) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e66ec5d169f..73ab220354d 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -385,6 +385,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) */ static struct svc_version *nfs4_callback_version[] = { [1] = &nfs4_callback_version1, + [4] = &nfs4_callback_version4, }; static struct svc_stat nfs4_callback_stats; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 76b0aa0f73b..0fda5e66241 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -718,3 +718,10 @@ struct svc_version nfs4_callback_version1 = { .vs_dispatch = NULL, }; +struct svc_version nfs4_callback_version4 = { + .vs_vers = 4, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a6f7b6cbfd0..7b890ba37fa 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -156,6 +156,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; +extern struct svc_version nfs4_callback_version4; /* pagelist.c */ extern int __init nfs_init_nfspagecache(void); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5c7740178a0..0d25a82451d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -290,6 +290,7 @@ struct nfs4_mount_data; /* callback_xdr.c */ extern struct svc_version nfs4_callback_version1; +extern struct svc_version nfs4_callback_version4; #else From b4a6f4966efc7e70dc8d8a9e60744de6845b14bf Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:19:11 -0500 Subject: [PATCH 46/75] nfs4: minor delegation cleaning Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index eeecd69c130..6fc9fe0af3d 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -391,8 +391,7 @@ static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + nfs_mark_return_delegation(clp, delegation); } rcu_read_unlock(); } @@ -427,8 +426,7 @@ static void nfs_client_mark_return_unreferenced_delegations(struct nfs_client *c list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + nfs_mark_return_delegation(clp, delegation); } rcu_read_unlock(); } From c79571a508801e055a0be583d6dc70bddad7bb64 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:20:52 -0500 Subject: [PATCH 47/75] nfs4: V2 return/expire delegations depending on their type Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 6fc9fe0af3d..98dbc8f5ced 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -385,27 +385,41 @@ void nfs_super_return_all_delegations(struct super_block *sb) nfs4_schedule_state_manager(clp); } -static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +static +void nfs_client_mark_return_all_delegation_types(struct nfs_client *clp, fmode_t flags) { struct nfs_delegation *delegation; rcu_read_lock(); list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) { - nfs_mark_return_delegation(clp, delegation); + if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) + continue; + if (delegation->type & flags) + nfs_mark_return_delegation(clp, delegation); } rcu_read_unlock(); } +static void nfs_client_mark_return_all_delegations(struct nfs_client *clp) +{ + nfs_client_mark_return_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); +} + static void nfs_delegation_run_state_manager(struct nfs_client *clp) { if (test_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) nfs4_schedule_state_manager(clp); } +static void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +{ + nfs_client_mark_return_all_delegation_types(clp, flags); + nfs_delegation_run_state_manager(clp); +} + void nfs_expire_all_delegations(struct nfs_client *clp) { - nfs_client_mark_return_all_delegations(clp); - nfs_delegation_run_state_manager(clp); + nfs_expire_all_delegation_types(clp, FMODE_READ|FMODE_WRITE); } /* From 31f0960778c78198957cf02cc970d92b72b929e4 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:27:02 -0500 Subject: [PATCH 48/75] nfs41: V2 initial support for CB_RECALL_ANY For now the clients returns _all_ the delegations of the specificed type it holds Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 11 +++++++++++ fs/nfs/callback_proc.c | 29 +++++++++++++++++++++++++++++ fs/nfs/callback_xdr.c | 27 ++++++++++++++++++++++++++- fs/nfs/delegation.c | 2 +- fs/nfs/delegation.h | 1 + 5 files changed, 68 insertions(+), 2 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 07baa8254ca..0ca830984c4 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -106,6 +106,17 @@ struct cb_sequenceres { extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res); + +#define RCA4_TYPE_MASK_RDATA_DLG 0 +#define RCA4_TYPE_MASK_WDATA_DLG 1 + +struct cb_recallanyargs { + struct sockaddr *craa_addr; + uint32_t craa_objs_to_keep; + uint32_t craa_type_mask; +}; + +extern unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy); #endif /* CONFIG_NFS_V4_1 */ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b7da1f54da6..61b85306bb2 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -227,4 +227,33 @@ out: return res->csr_status; } +unsigned nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy) +{ + struct nfs_client *clp; + int status; + fmode_t flags = 0; + + status = htonl(NFS4ERR_OP_NOT_IN_SESSION); + clp = nfs_find_client(args->craa_addr, 4); + if (clp == NULL) + goto out; + + dprintk("NFS: RECALL_ANY callback request from %s\n", + rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR)); + + if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *) + &args->craa_type_mask)) + flags = FMODE_READ; + if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *) + &args->craa_type_mask)) + flags |= FMODE_WRITE; + + if (flags) + nfs_expire_all_delegation_types(clp, flags); + status = htonl(NFS4_OK); +out: + dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); + return status; +} + #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 0fda5e66241..8e1a2511c8b 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -23,6 +23,7 @@ #if defined(CONFIG_NFS_V4_1) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) +#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #endif /* CONFIG_NFS_V4_1 */ #define NFSDBG_FACILITY NFSDBG_CALLBACK @@ -326,6 +327,25 @@ out_free: goto out; } +static unsigned decode_recallany_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_recallanyargs *args) +{ + uint32_t *p; + + args->craa_addr = svc_addr(rqstp); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->craa_objs_to_keep = ntohl(*p++); + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_BADXDR); + args->craa_type_mask = ntohl(*p); + + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ static __be32 encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) @@ -533,6 +553,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_GETATTR: case OP_CB_RECALL: case OP_CB_SEQUENCE: + case OP_CB_RECALL_ANY: *op = &callback_ops[op_nr]; break; @@ -540,7 +561,6 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_NOTIFY_DEVICEID: case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: - case OP_CB_RECALL_ANY: case OP_CB_RECALLABLE_OBJ_AVAIL: case OP_CB_RECALL_SLOT: case OP_CB_WANTS_CANCELLED: @@ -688,6 +708,11 @@ static struct callback_op callback_ops[] = { .encode_res = (callback_encode_res_t)encode_cb_sequence_res, .res_maxsize = CB_OP_SEQUENCE_RES_MAXSZ, }, + [OP_CB_RECALL_ANY] = { + .process_op = (callback_process_op_t)nfs4_callback_recallany, + .decode_args = (callback_decode_arg_t)decode_recallany_args, + .res_maxsize = CB_OP_RECALLANY_RES_MAXSZ, + }, #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 98dbc8f5ced..f4758ec4213 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -411,7 +411,7 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } -static void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) +void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags) { nfs_client_mark_return_all_delegation_types(clp, flags); nfs_delegation_run_state_manager(clp); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index e225a129012..f6d0731a8cf 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -40,6 +40,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); void nfs_super_return_all_delegations(struct super_block *sb); void nfs_expire_all_delegations(struct nfs_client *clp); +void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags); void nfs_expire_unreferenced_delegations(struct nfs_client *clp); void nfs_handle_cb_pathdown(struct nfs_client *clp); int nfs_client_return_marked_delegations(struct nfs_client *clp); From d8cb1a7ce36d44602946f06af4267da304fb4011 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:29:53 -0500 Subject: [PATCH 49/75] nfs41: check if session exists and if it is persistent Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7b890ba37fa..7466d24893f 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -30,6 +30,15 @@ static inline int nfs4_has_session(const struct nfs_client *clp) return 0; } +static inline int nfs4_has_persistent_session(const struct nfs_client *clp) +{ +#ifdef CONFIG_NFS_V4_1 + if (nfs4_has_session(clp)) + return (clp->cl_session->flags & SESSION4_PERSIST); +#endif /* CONFIG_NFS_V4_1 */ + return 0; +} + struct nfs_clone_mount { const struct super_block *sb; const struct dentry *dentry; From 4882ef72cd9a5c006087ca94a228323018eac29f Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:30:21 -0500 Subject: [PATCH 50/75] nfs41: add support for the exclusive create flags In v4.1 the client MUST/SHOULD use the EXCLUSIVE4_1 flag instead of EXCLUSIVE4, and GUARDED when the server supports persistent sessions. For now (and until we support suppattr_exclcreat), we don't send any attributes with EXCLUSIVE4_1 relying in the subsequent SETATTR as in v4.0 Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 12 +++++++++--- fs/nfs/nfs4xdr.c | 23 ++++++++++++++++++++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 637a6b476bd..e07f6c7f5b9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -726,9 +726,15 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, p->o_arg.bitmask = server->attr_bitmask; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (flags & O_EXCL) { - u32 *s = (u32 *) p->o_arg.u.verifier.data; - s[0] = jiffies; - s[1] = current->pid; + if (nfs4_has_persistent_session(server->nfs_client)) { + /* GUARDED */ + p->o_arg.u.attrs = &p->attrs; + memcpy(&p->attrs, attrs, sizeof(p->attrs)); + } else { /* EXCLUSIVE4_1 */ + u32 *s = (u32 *) p->o_arg.u.verifier.data; + s[0] = jiffies; + s[1] = current->pid; + } } else if (flags & O_CREAT) { p->o_arg.u.attrs = &p->attrs; memcpy(&p->attrs, attrs, sizeof(p->attrs)); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3a71b40a990..0b1f3fcdd28 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include #include #include "nfs4_fs.h" +#include "internal.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -134,7 +135,7 @@ static int nfs4_stat_to_errno(int); #define decode_lookup_maxsz (op_decode_hdr_maxsz) #define encode_share_access_maxsz \ (2) -#define encode_createmode_maxsz (1 + encode_attrs_maxsz) +#define encode_createmode_maxsz (1 + encode_attrs_maxsz + encode_verifier_maxsz) #define encode_opentype_maxsz (1 + encode_createmode_maxsz) #define encode_claim_null_maxsz (1 + nfs4_name_maxsz) #define encode_open_maxsz (op_encode_hdr_maxsz + \ @@ -1140,6 +1141,7 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) { __be32 *p; + struct nfs_client *clp; p = reserve_space(xdr, 4); switch(arg->open_flags & O_EXCL) { @@ -1148,8 +1150,23 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op encode_attrs(xdr, arg->u.attrs, arg->server); break; default: - *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); - encode_nfs4_verifier(xdr, &arg->u.verifier); + clp = arg->server->nfs_client; + if (clp->cl_minorversion > 0) { + if (nfs4_has_persistent_session(clp)) { + *p = cpu_to_be32(NFS4_CREATE_GUARDED); + encode_attrs(xdr, arg->u.attrs, arg->server); + } else { + struct iattr dummy; + + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); + encode_nfs4_verifier(xdr, &arg->u.verifier); + dummy.ia_valid = 0; + encode_attrs(xdr, &dummy, arg->server); + } + } else { + *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); + } } } From 7b183d0d432ab3525ae29511a5348ead3e790620 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:33:25 -0500 Subject: [PATCH 51/75] nfs41: remove server-only EXCHGID4_FLAG_CONFIRMED_R flag from exchange_id Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e07f6c7f5b9..dfd1ea99e2d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4349,6 +4349,9 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) dprintk("--> %s\n", __func__); BUG_ON(clp == NULL); + /* Remove server-only flags */ + args.flags &= ~EXCHGID4_FLAG_CONFIRMED_R; + p = (u32 *)verifier.data; *p++ = htonl((u32)clp->cl_boot_time.tv_sec); *p = htonl((u32)clp->cl_boot_time.tv_nsec); From 2449ea2e191123729b2dc37a06fcb9d6ea7e2736 Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:36:55 -0500 Subject: [PATCH 52/75] nfs41: V2 adjust max_rqst_sz, max_resp_sz w.r.t to rsize, wsize The v4.1 client should take into account the desired rsize, wsize when negotiating the max size in CREATE_SESSION. Accordingly, it should use rsize, wsize that are smaller than the session negotiated values. Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 14 ++++++++++++-- fs/nfs/internal.h | 4 ++++ fs/nfs/nfs4proc.c | 7 +++++-- fs/nfs/nfs4xdr.c | 14 ++++++++++++++ 4 files changed, 35 insertions(+), 4 deletions(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 99ea196f071..ee77713ce68 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1260,10 +1260,20 @@ error: static void nfs4_session_set_rwsize(struct nfs_server *server) { #ifdef CONFIG_NFS_V4_1 + struct nfs4_session *sess; + u32 server_resp_sz; + u32 server_rqst_sz; + if (!nfs4_has_session(server->nfs_client)) return; - server->rsize = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - server->wsize = server->nfs_client->cl_session->fc_attrs.max_rqst_sz; + sess = server->nfs_client->cl_session; + server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead; + server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead; + + if (server->rsize > server_resp_sz) + server->rsize = server_resp_sz; + if (server->wsize > server_rqst_sz) + server->wsize = server_rqst_sz; #endif /* CONFIG_NFS_V4_1 */ } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7466d24893f..83a9284b83c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -191,6 +191,10 @@ extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); #ifdef CONFIG_NFS_V4 extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); #endif +#ifdef CONFIG_NFS_V4_1 +extern const u32 nfs41_maxread_overhead; +extern const u32 nfs41_maxwrite_overhead; +#endif /* nfs4proc.c */ extern void nfs4_restart_rpc(struct rpc_task *, const struct nfs_client *, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index dfd1ea99e2d..d897b9e34f1 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4862,13 +4862,16 @@ int nfs4_proc_destroy_session(struct nfs4_session *session) int nfs4_init_session(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; + struct nfs4_session *session; int ret; if (!nfs4_has_session(clp)) return 0; - clp->cl_session->fc_attrs.max_rqst_sz = server->wsize; - clp->cl_session->fc_attrs.max_resp_sz = server->rsize; + session = clp->cl_session; + session->fc_attrs.max_rqst_sz = server->wsize + nfs41_maxwrite_overhead; + session->fc_attrs.max_resp_sz = server->rsize + nfs41_maxread_overhead; + ret = nfs4_recover_expired_lease(server); if (!ret) ret = nfs4_check_client_ready(clp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0b1f3fcdd28..4ddd04a1113 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -677,6 +678,19 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) + +const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_encode_hdr_maxsz + + encode_sequence_maxsz + + encode_putfh_maxsz + + encode_getattr_maxsz) * + XDR_UNIT); + +const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH + + compound_decode_hdr_maxsz + + decode_sequence_maxsz + + decode_putfh_maxsz) * + XDR_UNIT); #endif /* CONFIG_NFS_V4_1 */ static const umode_t nfs_type2fmt[] = { From 0629e370dd5819efa5cf8d418a8e6729efe388ef Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:46:14 -0500 Subject: [PATCH 53/75] nfs41: check SEQUENCE status flag the server can indicate a number of error conditions by setting the appropriate bits in the SEQUENCE operation. The client re-establishes state with the server when it receives one of those, with the action depending on the specific case. Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4state.c | 22 ++++++++++++++++++++++ fs/nfs/nfs4xdr.c | 4 ++-- include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 1 + 6 files changed, 30 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 0d25a82451d..9a866368896 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -270,6 +270,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_state_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); +extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d897b9e34f1..71993f12221 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -405,6 +405,8 @@ static void nfs41_sequence_done(struct nfs_client *clp, if (time_before(clp->cl_last_renewal, timestamp)) clp->cl_last_renewal = timestamp; spin_unlock(&clp->cl_lock); + /* Check sequence flags */ + nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); return; } out: diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 46c69e2248e..a86f3acf321 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1173,6 +1173,28 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) } #ifdef CONFIG_NFS_V4_1 +void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) +{ + if (!flags) + return; + else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_reboot(clp); + nfs4_schedule_state_recovery(clp); + } else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | + SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | + SEQ4_STATUS_ADMIN_STATE_REVOKED | + SEQ4_STATUS_RECALLABLE_STATE_REVOKED | + SEQ4_STATUS_LEASE_MOVED)) { + set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); + nfs4_state_start_reclaim_nograce(clp); + nfs4_schedule_state_recovery(clp); + } else if (flags & (SEQ4_STATUS_CB_PATH_DOWN | + SEQ4_STATUS_BACKCHANNEL_FAULT | + SEQ4_STATUS_CB_PATH_DOWN_SESSION)) + nfs_expire_all_delegations(clp); +} + static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) { switch (err) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4ddd04a1113..f740472370a 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4614,8 +4614,8 @@ static int decode_sequence(struct xdr_stream *xdr, dummy = be32_to_cpup(p++); /* target highest slot id - currently not processed */ dummy = be32_to_cpup(p++); - /* result flags - currently not processed */ - dummy = be32_to_cpup(p); + /* result flags */ + res->sr_status_flags = be32_to_cpup(p); status = 0; out_err: res->sr_status = status; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c4c06020810..89404bfb72b 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -128,6 +128,8 @@ #define SEQ4_STATUS_RECALLABLE_STATE_REVOKED 0x00000040 #define SEQ4_STATUS_LEASE_MOVED 0x00000080 #define SEQ4_STATUS_RESTART_RECLAIM_NEEDED 0x00000100 +#define SEQ4_STATUS_CB_PATH_DOWN_SESSION 0x00000200 +#define SEQ4_STATUS_BACKCHANNEL_FAULT 0x00000400 #define NFS4_MAX_UINT64 (~(u64)0) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8c880372536..ea32db30c28 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -172,6 +172,7 @@ struct nfs4_sequence_res { u8 sr_slotid; /* slot used to send request */ int sr_status; /* sequence operation status */ unsigned long sr_renewal_time; + u32 sr_status_flags; }; struct nfs4_get_lease_time_args { From 2597641deae82c9a95e255518da189ab557da0af Mon Sep 17 00:00:00 2001 From: Alexandros Batsakis Date: Sat, 5 Dec 2009 13:48:55 -0500 Subject: [PATCH 54/75] nfs41: v2 fix cb_recall bug in NFSv4.1 the seqid part of a stateid in CB_RECALL must be 0 Signed-off-by: Alexandros Batsakis Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 5 ++++- fs/nfs/callback_proc.c | 37 +++++++++++++++++++++++++++++++++++-- fs/nfs/delegation.c | 9 ++++++--- fs/nfs/delegation.h | 4 +++- 4 files changed, 48 insertions(+), 7 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 0ca830984c4..d4036be0b58 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -106,6 +106,8 @@ struct cb_sequenceres { extern unsigned nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res); +extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, + const nfs4_stateid *stateid); #define RCA4_TYPE_MASK_RDATA_DLG 0 #define RCA4_TYPE_MASK_WDATA_DLG 1 @@ -125,8 +127,9 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy); #ifdef CONFIG_NFS_V4 extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); extern void nfs_callback_down(int minorversion); +extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, + const nfs4_stateid *stateid); #endif /* CONFIG_NFS_V4 */ - /* * nfs41: Callbacks are expected to not cause substantial latency, * so we limit their concurrency to 1 by setting up the maximum number diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 61b85306bb2..defa9b4c470 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -61,6 +61,16 @@ out: return res->status; } +static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs_delegation *, const nfs4_stateid *) +{ +#if defined(CONFIG_NFS_V4_1) + if (clp->cl_minorversion > 0) + return nfs41_validate_delegation_stateid; +#endif + return nfs4_validate_delegation_stateid; +} + + __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) { struct nfs_client *clp; @@ -81,7 +91,8 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) inode = nfs_delegation_find_inode(clp, &args->fh); if (inode != NULL) { /* Set up a helper thread to actually return the delegation */ - switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { + switch (nfs_async_inode_return_delegation(inode, &args->stateid, + nfs_validate_delegation_stateid(clp))) { case 0: res = 0; break; @@ -102,8 +113,31 @@ out: return res; } +int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, + sizeof(delegation->stateid.data)) != 0) + return 0; + return 1; +} + #if defined(CONFIG_NFS_V4_1) +int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) +{ + if (delegation == NULL) + return 0; + + /* seqid is 4-bytes long */ + if (((u32 *) &stateid->data)[0] != 0) + return 0; + if (memcmp(&delegation->stateid.data[4], &stateid->data[4], + sizeof(stateid->data)-4)) + return 0; + + return 1; +} + /* * Validate the sequenceID sent by the server. * Return success if the sequenceID is one more than what we last saw on @@ -255,5 +289,4 @@ out: dprintk("%s: exit with status = %d\n", __func__, ntohl(status)); return status; } - #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index f4758ec4213..2563bebc4c6 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -454,18 +454,21 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) /* * Asynchronous delegation recall! */ -int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, + int (*validate_stateid)(struct nfs_delegation *delegation, + const nfs4_stateid *stateid)) { struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; struct nfs_delegation *delegation; rcu_read_lock(); delegation = rcu_dereference(NFS_I(inode)->delegation); - if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data, - sizeof(delegation->stateid.data)) != 0) { + + if (!validate_stateid(delegation, stateid)) { rcu_read_unlock(); return -ENOENT; } + nfs_mark_return_delegation(clp, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index f6d0731a8cf..944b627ec6e 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -34,7 +34,9 @@ enum { int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); int nfs_inode_return_delegation(struct inode *inode); -int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid, + int (*validate_stateid)(struct nfs_delegation *delegation, + const nfs4_stateid *stateid)); void nfs_inode_return_delegation_noreclaim(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle); From 0556d1a6958ba15659ac2089ebc0a3c5e71f08a8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 5 Dec 2009 15:03:20 -0500 Subject: [PATCH 55/75] NFSv41: nfs4_reset_session must always set NFS4CLNT_SESSION_DRAINING Otherwise we have no guarantees that other processes won't start another RPC call while we're resetting the session. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index a86f3acf321..bc4ca6f4d8c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1211,8 +1211,8 @@ static int nfs4_reset_session(struct nfs_client *clp) INIT_COMPLETION(ses->complete); spin_lock(&tbl->slot_tbl_lock); + set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); if (tbl->highest_used_slotid != -1) { - set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); spin_unlock(&tbl->slot_tbl_lock); status = wait_for_completion_interruptible(&ses->complete); if (status) /* -ERESTARTSYS */ From 8b173218bd7dfa5723ab96cc37b32dc380446bab Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sat, 5 Dec 2009 16:08:39 -0500 Subject: [PATCH 56/75] Cleanup some NFSv4 XDR decode comments Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index f740472370a..bc8711d3029 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5340,7 +5340,7 @@ out: } /* - * FSINFO request + * Decode FSINFO response */ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs4_fsinfo_res *res) @@ -5361,7 +5361,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, } /* - * PATHCONF request + * Decode PATHCONF response */ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, struct nfs4_pathconf_res *res) @@ -5382,7 +5382,7 @@ static int nfs4_xdr_dec_pathconf(struct rpc_rqst *req, __be32 *p, } /* - * STATFS request + * Decode STATFS response */ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, struct nfs4_statfs_res *res) @@ -5403,7 +5403,7 @@ static int nfs4_xdr_dec_statfs(struct rpc_rqst *req, __be32 *p, } /* - * GETATTR_BITMAP request + * Decode GETATTR_BITMAP response */ static int nfs4_xdr_dec_server_caps(struct rpc_rqst *req, __be32 *p, struct nfs4_server_caps_res *res) { @@ -5442,7 +5442,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy) } /* - * a SETCLIENTID request + * Decode SETCLIENTID response */ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp) @@ -5459,7 +5459,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p, } /* - * a SETCLIENTID_CONFIRM request + * Decode SETCLIENTID_CONFIRM response */ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *fsinfo) { @@ -5479,7 +5479,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str } /* - * DELEGRETURN request + * Decode DELEGRETURN response */ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_delegreturnres *res) { @@ -5505,7 +5505,7 @@ out: } /* - * FS_LOCATIONS request + * Decode FS_LOCATIONS response */ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs4_fs_locations_res *res) @@ -5535,7 +5535,7 @@ out: #if defined(CONFIG_NFS_V4_1) /* - * EXCHANGE_ID request + * Decode EXCHANGE_ID response */ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, void *res) @@ -5552,7 +5552,7 @@ static int nfs4_xdr_dec_exchange_id(struct rpc_rqst *rqstp, uint32_t *p, } /* - * a CREATE_SESSION request + * Decode CREATE_SESSION response */ static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, struct nfs41_create_session_res *res) @@ -5569,7 +5569,7 @@ static int nfs4_xdr_dec_create_session(struct rpc_rqst *rqstp, uint32_t *p, } /* - * a DESTROY_SESSION request + * Decode DESTROY_SESSION response */ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) @@ -5586,7 +5586,7 @@ static int nfs4_xdr_dec_destroy_session(struct rpc_rqst *rqstp, uint32_t *p, } /* - * a SEQUENCE request + * Decode SEQUENCE response */ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_sequence_res *res) @@ -5603,7 +5603,7 @@ static int nfs4_xdr_dec_sequence(struct rpc_rqst *rqstp, uint32_t *p, } /* - * a GET_LEASE_TIME request + * Decode GET_LEASE_TIME response */ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_get_lease_time_res *res) From 180197536b15d5862b389ce90b46ec8d004056f6 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sat, 5 Dec 2009 16:08:40 -0500 Subject: [PATCH 57/75] nfs41: RECLAIM_COMPLETE XDR functionality XDR encoding and decoding for RECLAIM_COMPLETE. Implements the necessary encoding to indicate reclaim complete for the entire client. In the future, it can be extended to provide reclaim complete functionality for a single file system after migration. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 66 +++++++++++++++++++++++++++++++++++++++++ include/linux/nfs4.h | 1 + include/linux/nfs_xdr.h | 10 +++++++ 3 files changed, 77 insertions(+) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index bc8711d3029..e437fd6a819 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -301,6 +301,8 @@ static int nfs4_stat_to_errno(int); XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 4) #define decode_sequence_maxsz (op_decode_hdr_maxsz + \ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) +#define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) +#define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -678,6 +680,12 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putrootfh_maxsz + \ decode_fsinfo_maxsz) +#define NFS4_enc_reclaim_complete_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_reclaim_complete_maxsz) +#define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_reclaim_complete_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1623,6 +1631,19 @@ static void encode_destroy_session(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_destroy_session_maxsz; } + +static void encode_reclaim_complete(struct xdr_stream *xdr, + struct nfs41_reclaim_complete_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_RECLAIM_COMPLETE); + *p++ = cpu_to_be32(args->one_fs); + hdr->nops++; + hdr->replen += decode_reclaim_complete_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ static void encode_sequence(struct xdr_stream *xdr, @@ -2451,6 +2472,26 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p, encode_nops(&hdr); return 0; } + +/* + * a RECLAIM_COMPLETE request + */ +static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, + struct nfs41_reclaim_complete_args *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args) + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, req, &hdr); + encode_sequence(&xdr, &args->seq_args, &hdr); + encode_reclaim_complete(&xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -4559,6 +4600,11 @@ static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) { return decode_op_hdr(xdr, OP_DESTROY_SESSION); } + +static int decode_reclaim_complete(struct xdr_stream *xdr, void *dummy) +{ + return decode_op_hdr(xdr, OP_RECLAIM_COMPLETE); +} #endif /* CONFIG_NFS_V4_1 */ static int decode_sequence(struct xdr_stream *xdr, @@ -5622,6 +5668,25 @@ static int nfs4_xdr_dec_get_lease_time(struct rpc_rqst *rqstp, uint32_t *p, status = decode_fsinfo(&xdr, res->lr_fsinfo); return status; } + +/* + * Decode RECLAIM_COMPLETE response + */ +static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, + struct nfs41_reclaim_complete_res *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_sequence(&xdr, &res->seq_res, rqstp); + if (!status) + status = decode_reclaim_complete(&xdr, (void *)NULL); + return status; +} #endif /* CONFIG_NFS_V4_1 */ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) @@ -5798,6 +5863,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(DESTROY_SESSION, enc_destroy_session, dec_destroy_session), PROC(SEQUENCE, enc_sequence, dec_sequence), PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), + PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 89404bfb72b..9b8299af374 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -530,6 +530,7 @@ enum { NFSPROC4_CLNT_DESTROY_SESSION, NFSPROC4_CLNT_SEQUENCE, NFSPROC4_CLNT_GET_LEASE_TIME, + NFSPROC4_CLNT_RECLAIM_COMPLETE, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ea32db30c28..51071b33575 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -939,6 +939,16 @@ struct nfs41_create_session_args { struct nfs41_create_session_res { struct nfs_client *client; }; + +struct nfs41_reclaim_complete_args { + /* In the future extend to include curr_fh for use with migration */ + unsigned char one_fs:1; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_reclaim_complete_res { + struct nfs4_sequence_res seq_res; +}; #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; From fce5c838e13392cc88a1330d1471fe6419e02ed7 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sat, 5 Dec 2009 16:08:41 -0500 Subject: [PATCH 58/75] nfs41: RECLAIM_COMPLETE functionality Implements RECLAIM_COMPLETE as an asynchronous RPC. NFS4ERR_DELAY is retried, NFS4ERR_DEADSESSION invokes the error handling but does not result in a retry, since we don't want to have a lingering RECLAIM_COMPLETE call sent in the middle of a possible new state recovery cycle. If a session reset occurs, a new wave of reclaim operations will follow, containing their own RECLAIM_COMPLETE call. We don't want a retry to get on the way of recovery by incorrectly indicating to the server that we're done reclaiming state. A subsequent patch invokes the functionality. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/nfs4state.c | 8 ++++ 3 files changed, 114 insertions(+) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 9a866368896..90923223a7c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -181,6 +181,7 @@ struct nfs4_state_recovery_ops { int (*recover_lock)(struct nfs4_state *, struct file_lock *); int (*establish_clid)(struct nfs_client *, struct rpc_cred *); struct rpc_cred * (*get_clid_cred)(struct nfs_client *); + int (*reclaim_complete)(struct nfs_client *); }; struct nfs4_state_maintenance_ops { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 71993f12221..206ce30a595 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4971,6 +4971,110 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, &nfs41_sequence_ops, (void *)clp); } +struct nfs4_reclaim_complete_data { + struct nfs_client *clp; + struct nfs41_reclaim_complete_args arg; + struct nfs41_reclaim_complete_res res; +}; + +static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + + if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args, + &calldata->res.seq_res, 0, task)) + return; + + rpc_call_start(task); +} + +static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + struct nfs_client *clp = calldata->clp; + struct nfs4_sequence_res *res = &calldata->res.seq_res; + + dprintk("--> %s\n", __func__); + nfs41_sequence_done(clp, res, task->tk_status); + switch (task->tk_status) { + case 0: + case -NFS4ERR_COMPLETE_ALREADY: + break; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + /* + * Handle the session error, but do not retry the operation, as + * we have no way of telling whether the clientid had to be + * reset before we got our reply. If reset, a new wave of + * reclaim operations will follow, containing their own reclaim + * complete. We don't want our retry to get on the way of + * recovery by incorrectly indicating to the server that we're + * done reclaiming state since the process had to be restarted. + */ + _nfs4_async_handle_error(task, NULL, clp, NULL); + break; + default: + if (_nfs4_async_handle_error( + task, NULL, clp, NULL) == -EAGAIN) { + rpc_restart_call_prepare(task); + return; + } + } + nfs41_sequence_free_slot(clp, res); + + dprintk("<-- %s\n", __func__); +} + +static void nfs4_free_reclaim_complete_data(void *data) +{ + struct nfs4_reclaim_complete_data *calldata = data; + + kfree(calldata); +} + +static const struct rpc_call_ops nfs4_reclaim_complete_call_ops = { + .rpc_call_prepare = nfs4_reclaim_complete_prepare, + .rpc_call_done = nfs4_reclaim_complete_done, + .rpc_release = nfs4_free_reclaim_complete_data, +}; + +/* + * Issue a global reclaim complete. + */ +static int nfs41_proc_reclaim_complete(struct nfs_client *clp) +{ + struct nfs4_reclaim_complete_data *calldata; + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RECLAIM_COMPLETE], + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_reclaim_complete_call_ops, + .flags = RPC_TASK_ASYNC, + }; + int status = -ENOMEM; + + dprintk("--> %s\n", __func__); + calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + if (calldata == NULL) + goto out; + calldata->clp = clp; + calldata->arg.one_fs = 0; + calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; + + msg.rpc_argp = &calldata->arg; + msg.rpc_resp = &calldata->res; + task_setup_data.callback_data = calldata; + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + status = PTR_ERR(task); + rpc_put_task(task); +out: + dprintk("<-- %s status=%d\n", __func__, status); + return status; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -4990,6 +5094,7 @@ struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = { .recover_lock = nfs4_lock_reclaim, .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, + .reclaim_complete = nfs41_proc_reclaim_complete, }; #endif /* CONFIG_NFS_V4_1 */ diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index bc4ca6f4d8c..f2d2dc49755 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1032,6 +1032,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); } +static void nfs4_reclaim_complete(struct nfs_client *clp, + const struct nfs4_state_recovery_ops *ops) +{ + /* Notify the server we're done reclaiming our state */ + if (ops->reclaim_complete) + (void)ops->reclaim_complete(clp); +} + static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { struct nfs4_state_owner *sp; From da6ebfe34a3921cfb47b938fb819abc78c6080e5 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sat, 5 Dec 2009 16:08:41 -0500 Subject: [PATCH 59/75] nfs41: Invoke RECLAIM_COMPLETE This patch invokes RECLAIM_COMPLETE after the client is done reclaiming state. There are interpretations of the spec that suggest that RECLAIM_COMPLETE should also be issued after a new clientid has been obtained from the server and even if there is no state to reclaim. This tells the server that the client has no state to reclaim even if the client isn't aware the server may have rebooted. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index f2d2dc49755..873dda7fec3 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1046,6 +1046,9 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) struct rb_node *pos; struct nfs4_state *state; + nfs4_reclaim_complete(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); + if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) return; From f26468fb9384e73fb357d2e84d3e9c88c7d1129d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 5 Dec 2009 19:32:11 -0500 Subject: [PATCH 60/75] NFSv41: Fix nfs4_proc_create_session We should not assume that nfs41_init_clientid() will always want to initialise the session. If it is being called due to a server reboot, then we just want to reset the session after re-establishing the clientid. Fix this by getting rid of the 'reset' parameter in nfs4_proc_create_session(), and instead relying on whether or not the session slot table pointer is non-NULL. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 60 +++++++++++++++++++--------------------------- fs/nfs/nfs4state.c | 4 ++-- 3 files changed, 27 insertions(+), 39 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 90923223a7c..50dd55069d3 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -222,7 +222,7 @@ extern int nfs4_setup_sequence(struct nfs_client *clp, int cache_reply, struct rpc_task *task); extern void nfs4_destroy_session(struct nfs4_session *session); extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); -extern int nfs4_proc_create_session(struct nfs_client *, int reset); +extern int nfs4_proc_create_session(struct nfs_client *); extern int nfs4_proc_destroy_session(struct nfs4_session *); extern int nfs4_init_session(struct nfs_server *server); #else /* CONFIG_NFS_v4_1 */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 206ce30a595..be96d28bacc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4501,7 +4501,6 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, int max_slots, spin_lock(&tbl->slot_tbl_lock); for (i = 0; i < max_slots; ++i) tbl->slots[i].seq_nr = ivalue; - tbl->highest_used_slotid = -1; spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__, tbl, tbl->slots, tbl->max_slots); @@ -4552,7 +4551,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session) static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, int max_slots, int ivalue) { - int i; struct nfs4_slot *slot; int ret = -ENOMEM; @@ -4563,18 +4561,9 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); if (!slot) goto out; - for (i = 0; i < max_slots; ++i) - slot[i].seq_nr = ivalue; ret = 0; spin_lock(&tbl->slot_tbl_lock); - if (tbl->slots != NULL) { - spin_unlock(&tbl->slot_tbl_lock); - dprintk("%s: slot table already initialized. tbl=%p slots=%p\n", - __func__, tbl, tbl->slots); - WARN_ON(1); - goto out_free; - } tbl->max_slots = max_slots; tbl->slots = slot; tbl->highest_used_slotid = -1; /* no slot is currently used */ @@ -4584,10 +4573,6 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, out: dprintk("<-- %s: return %d\n", __func__, ret); return ret; - -out_free: - kfree(slot); - goto out; } /* @@ -4595,17 +4580,24 @@ out_free: */ static int nfs4_init_slot_tables(struct nfs4_session *session) { - int status; + struct nfs4_slot_table *tbl; + int status = 0; - status = nfs4_init_slot_table(&session->fc_slot_table, - session->fc_attrs.max_reqs, 1); - if (status) - return status; + tbl = &session->fc_slot_table; + if (tbl->slots == NULL) { + status = nfs4_init_slot_table(tbl, + session->fc_attrs.max_reqs, 1); + if (status) + return status; + } - status = nfs4_init_slot_table(&session->bc_slot_table, - session->bc_attrs.max_reqs, 0); - if (status) - nfs4_destroy_slot_tables(session); + tbl = &session->bc_slot_table; + if (tbl->slots == NULL) { + status = nfs4_init_slot_table(tbl, + session->bc_attrs.max_reqs, 0); + if (status) + nfs4_destroy_slot_tables(session); + } return status; } @@ -4784,7 +4776,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp) * It is the responsibility of the caller to verify the session is * expired before calling this routine. */ -int nfs4_proc_create_session(struct nfs_client *clp, int reset) +int nfs4_proc_create_session(struct nfs_client *clp) { int status; unsigned *ptr; @@ -4797,12 +4789,13 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset) if (status) goto out; - /* Init or reset the fore channel */ - if (reset) - status = nfs4_reset_slot_tables(session); - else - status = nfs4_init_slot_tables(session); - dprintk("fore channel slot table initialization returned %d\n", status); + /* Init and reset the fore channel */ + status = nfs4_init_slot_tables(session); + dprintk("slot table initialization returned %d\n", status); + if (status) + goto out; + status = nfs4_reset_slot_tables(session); + dprintk("slot table reset returned %d\n", status); if (status) goto out; @@ -4810,10 +4803,6 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset) dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); - if (reset) - /* Lease time is aleady set */ - goto out; - /* Get the lease time */ status = nfs4_proc_get_lease_time(clp, &fsinfo); if (status == 0) { @@ -4821,7 +4810,6 @@ int nfs4_proc_create_session(struct nfs_client *clp, int reset) spin_lock(&clp->cl_lock); clp->cl_lease_time = fsinfo.lease_time * HZ; clp->cl_last_renewal = jiffies; - clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); spin_unlock(&clp->cl_lock); nfs4_schedule_state_renewal(clp); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 873dda7fec3..37f020eb92f 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -123,7 +123,7 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) status = nfs4_proc_exchange_id(clp, cred); if (status == 0) /* create session schedules state renewal upon success */ - status = nfs4_proc_create_session(clp, 0); + status = nfs4_proc_create_session(clp); if (status == 0) nfs_mark_client_ready(clp, NFS_CS_READY); return status; @@ -1240,7 +1240,7 @@ static int nfs4_reset_session(struct nfs_client *clp) } memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); - status = nfs4_proc_create_session(clp, 1); + status = nfs4_proc_create_session(clp); if (status) nfs4_session_recovery_handle_error(clp, status); /* fall through*/ From d61e612a728fb9bf848c4383f8f6645e822d5b57 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 5 Dec 2009 19:32:19 -0500 Subject: [PATCH 61/75] NFSv41: Clean up slot table management We no longer need to maintain a distinction between nfs41_sequence_done and nfs41_sequence_free_slot. This fixes a number of slot table leakages in the NFSv4.1 code. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 17 +------------ fs/nfs/nfs4proc.c | 65 +++++++++++------------------------------------ fs/nfs/read.c | 13 +++------- fs/nfs/unlink.c | 4 +-- fs/nfs/write.c | 4 +-- 5 files changed, 21 insertions(+), 82 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 83a9284b83c..b1a020c1172 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -197,8 +197,7 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -extern void nfs4_restart_rpc(struct rpc_task *, const struct nfs_client *, - struct nfs4_sequence_res *); +extern void nfs4_restart_rpc(struct rpc_task *, const struct nfs_client *); #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; #endif @@ -275,20 +274,6 @@ extern int _nfs4_call_sync_session(struct nfs_server *server, struct nfs4_sequence_res *res, int cache_reply); -#ifdef CONFIG_NFS_V4_1 -extern void nfs41_sequence_free_slot(const struct nfs_client *, - struct nfs4_sequence_res *res); -#endif /* CONFIG_NFS_V4_1 */ - -static inline void nfs4_sequence_free_slot(const struct nfs_client *clp, - struct nfs4_sequence_res *res) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) - nfs41_sequence_free_slot(clp, res); -#endif /* CONFIG_NFS_V4_1 */ -} - /* * Determine the device name as a string */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index be96d28bacc..c06a2bade59 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -341,15 +341,11 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) free_slotid, tbl->highest_used_slotid); } -void nfs41_sequence_free_slot(const struct nfs_client *clp, +static void nfs41_sequence_free_slot(const struct nfs_client *clp, struct nfs4_sequence_res *res) { struct nfs4_slot_table *tbl; - if (!nfs4_has_session(clp)) { - dprintk("%s: No session\n", __func__); - return; - } tbl = &clp->cl_session->fc_slot_table; if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { /* just wake up the next guy waiting since @@ -407,7 +403,6 @@ static void nfs41_sequence_done(struct nfs_client *clp, spin_unlock(&clp->cl_lock); /* Check sequence flags */ nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); - return; } out: /* The session may be reset by one of the error handlers. */ @@ -556,7 +551,6 @@ static void nfs41_call_sync_done(struct rpc_task *task, void *calldata) struct nfs41_call_sync_data *data = calldata; nfs41_sequence_done(data->clp, data->seq_res, task->tk_status); - nfs41_sequence_free_slot(data->clp, data->seq_res); } struct rpc_call_ops nfs41_call_sync_ops = { @@ -632,12 +626,10 @@ static void nfs4_sequence_done(const struct nfs_server *server, #endif /* CONFIG_NFS_V4_1 */ } -void nfs4_restart_rpc(struct rpc_task *task, const struct nfs_client *clp, - struct nfs4_sequence_res *res) +void nfs4_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) { #ifdef CONFIG_NFS_V4_1 if (nfs4_has_session(clp)) { - nfs41_sequence_free_slot(clp, res); rpc_restart_call_prepare(task); return; } @@ -645,15 +637,6 @@ void nfs4_restart_rpc(struct rpc_task *task, const struct nfs_client *clp, rpc_restart_call(task); } -/* no restart, therefore free slot here */ -static void nfs4_sequence_done_free_slot(const struct nfs_server *server, - struct nfs4_sequence_res *res, - int rpc_status) -{ - nfs4_sequence_done(server, res, rpc_status); - nfs4_sequence_free_slot(server->nfs_client, res); -} - static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { struct nfs_inode *nfsi = NFS_I(dir); @@ -1350,8 +1333,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata) data->rpc_status = task->tk_status; - nfs4_sequence_done_free_slot(data->o_arg.server, &data->o_res.seq_res, - task->tk_status); + nfs4_sequence_done(data->o_arg.server, &data->o_res.seq_res, + task->tk_status); if (RPC_ASSASSINATED(task)) return; @@ -1757,12 +1740,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client, - &calldata->res.seq_res); + nfs4_restart_rpc(task, server->nfs_client); return; } } - nfs4_sequence_free_slot(server->nfs_client, &calldata->res.seq_res); nfs_refresh_inode(calldata->inode, calldata->res.fattr); } @@ -2553,7 +2534,6 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) nfs4_sequence_done(res->server, &res->seq_res, task->tk_status); if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) return 0; - nfs4_sequence_free_slot(res->server->nfs_client, &res->seq_res); update_changeattr(dir, &res->cinfo); nfs_post_op_update_inode(dir, &res->dir_attr); return 1; @@ -2992,20 +2972,16 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) dprintk("--> %s\n", __func__); - /* nfs4_sequence_free_slot called in the read rpc_call_done */ nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client, &data->res.seq_res); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } nfs_invalidate_atime(data->inode); if (task->tk_status > 0) renew_lease(server, data->timestamp); - else if (task->tk_status < 0) - nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); - return 0; } @@ -3019,13 +2995,11 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { struct inode *inode = data->inode; - /* slot is freed in nfs_writeback_done */ nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client, - &data->res.seq_res); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } if (task->tk_status >= 0) { @@ -3053,12 +3027,9 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client, - &data->res.seq_res); + nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } - nfs4_sequence_free_slot(NFS_SERVER(inode)->nfs_client, - &data->res.seq_res); nfs_refresh_inode(inode, data->res.fattr); return 0; } @@ -3509,8 +3480,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_delegreturndata *data = calldata; - nfs4_sequence_done_free_slot(data->res.server, &data->res.seq_res, - task->tk_status); + nfs4_sequence_done(data->res.server, &data->res.seq_res, + task->tk_status); data->rpc_status = task->tk_status; if (data->rpc_status == 0) @@ -3768,11 +3739,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) nfs4_restart_rpc(task, - calldata->server->nfs_client, - &calldata->res.seq_res); + calldata->server->nfs_client); } - nfs4_sequence_free_slot(calldata->server->nfs_client, - &calldata->res.seq_res); } static void nfs4_locku_prepare(struct rpc_task *task, void *data) @@ -3954,8 +3922,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata) dprintk("%s: begin!\n", __func__); - nfs4_sequence_done_free_slot(data->server, &data->res.seq_res, - task->tk_status); + nfs4_sequence_done(data->server, &data->res.seq_res, + task->tk_status); data->rpc_status = task->tk_status; if (RPC_ASSASSINATED(task)) @@ -4425,10 +4393,9 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; - rpc_restart_call(task); + nfs4_restart_rpc(task, data->clp); return; } - nfs41_sequence_free_slot(data->clp, &data->res->lr_seq_res); dprintk("<-- %s\n", __func__); } @@ -4900,11 +4867,10 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (_nfs4_async_handle_error(task, NULL, clp, NULL) == -EAGAIN) { - nfs4_restart_rpc(task, clp, task->tk_msg.rpc_resp); + nfs4_restart_rpc(task, clp); return; } } - nfs41_sequence_free_slot(clp, task->tk_msg.rpc_resp); dprintk("%s rpc_cred %p\n", __func__, task->tk_msg.rpc_cred); kfree(task->tk_msg.rpc_argp); @@ -5008,7 +4974,6 @@ static void nfs4_reclaim_complete_done(struct rpc_task *task, void *data) return; } } - nfs41_sequence_free_slot(clp, res); dprintk("<-- %s\n", __func__); } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 3e04fb9ea64..d319bfbe513 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -356,26 +356,19 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data struct nfs_readres *resp = &data->res; if (resp->eof || resp->count == argp->count) - goto out; + return; /* This is a short read! */ nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); /* Has the server at least made some progress? */ if (resp->count == 0) - goto out; + return; /* Yes, so retry the read at the end of the data */ argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client, - &data->res.seq_res); - return; -out: - nfs4_sequence_free_slot(NFS_SERVER(data->inode)->nfs_client, - &data->res.seq_res); - return; - + nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); } /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 52f7bdb12c8..1064c91ae81 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -81,11 +81,9 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; struct inode *dir = data->dir; - struct nfs_removeres *res = task->tk_msg.rpc_resp; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client, - &res->seq_res); + nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 556668ff022..d546c607de0 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1216,8 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - nfs4_restart_rpc(task, server->nfs_client, - &data->res.seq_res); + nfs4_restart_rpc(task, server->nfs_client); return -EAGAIN; } if (time_before(complain, jiffies)) { @@ -1229,7 +1228,6 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) /* Can't do anything about it except throw an error. */ task->tk_status = -EIO; } - nfs4_sequence_free_slot(server->nfs_client, &data->res.seq_res); return 0; } From 35dc1d74a8d97a302a202ccb6751bf2bdbf5173e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 5 Dec 2009 19:32:19 -0500 Subject: [PATCH 62/75] NFSv41: Fix up some bugs in the NFS4CLNT_SESSION_DRAINING code Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 17 ++++++++--------- fs/nfs/nfs4state.c | 4 ++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c06a2bade59..9da7a872ee0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -318,13 +318,14 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp * so we need to scan down from highest_used_slotid to 0 looking for the now * highest slotid in use. * If none found, highest_used_slotid is set to -1. + * + * Must be called while holding tbl->slot_tbl_lock */ static void nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) { int slotid = free_slotid; - spin_lock(&tbl->slot_tbl_lock); /* clear used bit in bitmap */ __clear_bit(slotid, tbl->used_slots); @@ -336,7 +337,6 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) else tbl->highest_used_slotid = -1; } - spin_unlock(&tbl->slot_tbl_lock); dprintk("%s: free_slotid %u highest_used_slotid %d\n", __func__, free_slotid, tbl->highest_used_slotid); } @@ -351,22 +351,23 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp, /* just wake up the next guy waiting since * we may have not consumed a slot after all */ dprintk("%s: No slot\n", __func__); - } else { - nfs4_free_slot(tbl, res->sr_slotid); - res->sr_slotid = NFS4_MAX_SLOT_TABLE; + return; } + spin_lock(&tbl->slot_tbl_lock); + nfs4_free_slot(tbl, res->sr_slotid); + /* Signal state manager thread if session is drained */ if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) { - spin_lock(&tbl->slot_tbl_lock); if (tbl->highest_used_slotid == -1) { dprintk("%s COMPLETE: Session Drained\n", __func__); complete(&clp->cl_session->complete); } - spin_unlock(&tbl->slot_tbl_lock); } else { rpc_wake_up_next(&tbl->slot_tbl_waitq); } + spin_unlock(&tbl->slot_tbl_lock); + res->sr_slotid = NFS4_MAX_SLOT_TABLE; } static void nfs41_sequence_done(struct nfs_client *clp, @@ -470,7 +471,6 @@ static int nfs41_setup_sequence(struct nfs4_session *session, */ dprintk("%s Schedule Session Reset\n", __func__); rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); - nfs4_schedule_state_manager(session->clp); spin_unlock(&tbl->slot_tbl_lock); return -EAGAIN; } @@ -4489,7 +4489,6 @@ static int nfs4_reset_slot_tables(struct nfs4_session *session) 1); if (status) return status; - init_completion(&session->complete); status = nfs4_reset_slot_table(&session->bc_slot_table, session->bc_attrs.max_reqs, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 37f020eb92f..ef9622e500e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1220,10 +1220,10 @@ static int nfs4_reset_session(struct nfs_client *clp) struct nfs4_slot_table *tbl = &ses->fc_slot_table; int status; - INIT_COMPLETION(ses->complete); spin_lock(&tbl->slot_tbl_lock); set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); if (tbl->highest_used_slotid != -1) { + INIT_COMPLETION(ses->complete); spin_unlock(&tbl->slot_tbl_lock); status = wait_for_completion_interruptible(&ses->complete); if (status) /* -ERESTARTSYS */ @@ -1247,7 +1247,7 @@ static int nfs4_reset_session(struct nfs_client *clp) out: /* Wake up the next rpc task even on error */ clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); - rpc_wake_up_next(&clp->cl_session->fc_slot_table.slot_tbl_waitq); + rpc_wake_up(&clp->cl_session->fc_slot_table.slot_tbl_waitq); return status; } From bcb56164ceb21317208eee89c829580d51b84a6d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 5 Dec 2009 19:32:19 -0500 Subject: [PATCH 63/75] NFSv41: More cleanups Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9da7a872ee0..b4ef570eb53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -332,7 +332,7 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) /* update highest_used_slotid when it is freed */ if (slotid == tbl->highest_used_slotid) { slotid = find_last_bit(tbl->used_slots, tbl->max_slots); - if (slotid >= 0 && slotid < tbl->max_slots) + if (slotid < tbl->max_slots) tbl->highest_used_slotid = slotid; else tbl->highest_used_slotid = -1; @@ -363,9 +363,8 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp, dprintk("%s COMPLETE: Session Drained\n", __func__); complete(&clp->cl_session->complete); } - } else { + } else rpc_wake_up_next(&tbl->slot_tbl_waitq); - } spin_unlock(&tbl->slot_tbl_lock); res->sr_slotid = NFS4_MAX_SLOT_TABLE; } @@ -469,9 +468,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session, * The state manager will wait until the slot table is empty. * Schedule the reset thread */ - dprintk("%s Schedule Session Reset\n", __func__); rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL); spin_unlock(&tbl->slot_tbl_lock); + dprintk("%s Schedule Session Reset\n", __func__); return -EAGAIN; } From 9430fb6b5315f7bc94b05be915c64ebfefc55bbc Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sun, 6 Dec 2009 12:23:46 -0500 Subject: [PATCH 64/75] nfs41: nfs41_setup_state_renewal Move call to get the lease time and the setup of the state renewal out of nfs4_create_session so that it can be called after clearing the DRAINING flag. We use the getattr RPC to obtain the lease time, which requires a sequence slot. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 ++ fs/nfs/nfs4proc.c | 13 ------------- fs/nfs/nfs4state.c | 34 +++++++++++++++++++++++++++++----- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 50dd55069d3..7e57b04e401 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -225,6 +225,8 @@ extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); extern int nfs4_proc_create_session(struct nfs_client *); extern int nfs4_proc_destroy_session(struct nfs4_session *); extern int nfs4_init_session(struct nfs_server *server); +extern int nfs4_proc_get_lease_time(struct nfs_client *clp, + struct nfs_fsinfo *fsinfo); #else /* CONFIG_NFS_v4_1 */ static inline int nfs4_setup_sequence(struct nfs_client *clp, struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b4ef570eb53..4be03694256 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4745,7 +4745,6 @@ int nfs4_proc_create_session(struct nfs_client *clp) { int status; unsigned *ptr; - struct nfs_fsinfo fsinfo; struct nfs4_session *session = clp->cl_session; dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); @@ -4767,18 +4766,6 @@ int nfs4_proc_create_session(struct nfs_client *clp) ptr = (unsigned *)&session->sess_id.data[0]; dprintk("%s client>seqid %d sessionid %u:%u:%u:%u\n", __func__, clp->cl_seqid, ptr[0], ptr[1], ptr[2], ptr[3]); - - /* Get the lease time */ - status = nfs4_proc_get_lease_time(clp, &fsinfo); - if (status == 0) { - /* Update lease time and schedule renewal */ - spin_lock(&clp->cl_lock); - clp->cl_lease_time = fsinfo.lease_time * HZ; - clp->cl_last_renewal = jiffies; - spin_unlock(&clp->cl_lock); - - nfs4_schedule_state_renewal(clp); - } out: dprintk("<-- %s\n", __func__); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index ef9622e500e..9cfe6864d9e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -116,16 +116,38 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp) #if defined(CONFIG_NFS_V4_1) +static int nfs41_setup_state_renewal(struct nfs_client *clp) +{ + int status; + struct nfs_fsinfo fsinfo; + + status = nfs4_proc_get_lease_time(clp, &fsinfo); + if (status == 0) { + /* Update lease time and schedule renewal */ + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = jiffies; + spin_unlock(&clp->cl_lock); + + nfs4_schedule_state_renewal(clp); + } + + return status; +} + int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; status = nfs4_proc_exchange_id(clp, cred); - if (status == 0) - /* create session schedules state renewal upon success */ - status = nfs4_proc_create_session(clp); - if (status == 0) - nfs_mark_client_ready(clp, NFS_CS_READY); + if (status != 0) + goto out; + status = nfs4_proc_create_session(clp); + if (status != 0) + goto out; + nfs41_setup_state_renewal(clp); + nfs_mark_client_ready(clp, NFS_CS_READY); +out: return status; } @@ -1248,6 +1270,8 @@ out: /* Wake up the next rpc task even on error */ clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); rpc_wake_up(&clp->cl_session->fc_slot_table.slot_tbl_waitq); + if (status == 0) + nfs41_setup_state_renewal(clp); return status; } From 9dfdf404c99347e2e224e25f8626e7b6399a05cd Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Sun, 6 Dec 2009 12:57:34 -0500 Subject: [PATCH 65/75] nfs41: Don't clear DRAINING flag on NFS4ERR_STALE_CLIENTID If CREATE_SESSION fails with NFS4ERR_STALE_CLIENTID, don't clear the NFS4CLNT_SESSION_DRAINING flag and don't wake RPCs waiting for the session to be reestablished. We don't have a session yet, so there is no reason to wake other RPCs. This avoids sending spurious compounds with bogus sequenceID during session and state recovery. Signed-off-by: Ricardo Labiaga [Trond.Myklebust@netapp.com: cleaned up patch by adding the nfs41_begin/end_drain_session() helpers] Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4state.c | 59 ++++++++++++++++++++++++++++++++-------------- 2 files changed, 43 insertions(+), 18 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4be03694256..fbae2c94dbc 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4586,10 +4586,12 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) init_completion(&session->complete); tbl = &session->fc_slot_table; + tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table"); tbl = &session->bc_slot_table; + tbl->highest_used_slotid = -1; spin_lock_init(&tbl->slot_tbl_lock); rpc_init_wait_queue(&tbl->slot_tbl_waitq, "BackChannel Slot table"); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 9cfe6864d9e..1b629cca707 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -135,16 +135,43 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) return status; } +static void nfs41_end_drain_session(struct nfs_client *clp, + struct nfs4_session *ses) +{ + if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) + rpc_wake_up(&ses->fc_slot_table.slot_tbl_waitq); +} + +static int nfs41_begin_drain_session(struct nfs_client *clp, + struct nfs4_session *ses) +{ + struct nfs4_slot_table *tbl = &ses->fc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); + set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); + if (tbl->highest_used_slotid != -1) { + INIT_COMPLETION(ses->complete); + spin_unlock(&tbl->slot_tbl_lock); + return wait_for_completion_interruptible(&ses->complete); + } + spin_unlock(&tbl->slot_tbl_lock); + return 0; +} + int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; + status = nfs41_begin_drain_session(clp, clp->cl_session); + if (status != 0) + goto out; status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; status = nfs4_proc_create_session(clp); if (status != 0) goto out; + nfs41_end_drain_session(clp, clp->cl_session); nfs41_setup_state_renewal(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: @@ -1239,20 +1266,11 @@ static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) static int nfs4_reset_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl = &ses->fc_slot_table; int status; - spin_lock(&tbl->slot_tbl_lock); - set_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); - if (tbl->highest_used_slotid != -1) { - INIT_COMPLETION(ses->complete); - spin_unlock(&tbl->slot_tbl_lock); - status = wait_for_completion_interruptible(&ses->complete); - if (status) /* -ERESTARTSYS */ - goto out; - } else { - spin_unlock(&tbl->slot_tbl_lock); - } + status = nfs41_begin_drain_session(clp, ses); + if (status != 0) + return status; status = nfs4_proc_destroy_session(clp->cl_session); if (status && status != -NFS4ERR_BADSESSION && @@ -1265,13 +1283,18 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_proc_create_session(clp); if (status) nfs4_session_recovery_handle_error(clp, status); - /* fall through*/ + out: - /* Wake up the next rpc task even on error */ - clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state); - rpc_wake_up(&clp->cl_session->fc_slot_table.slot_tbl_waitq); - if (status == 0) - nfs41_setup_state_renewal(clp); + /* + * Let the state manager reestablish state + * without waking other tasks yet. + */ + if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { + /* Wake up the next rpc task */ + nfs41_end_drain_session(clp, ses); + if (status == 0) + nfs41_setup_state_renewal(clp); + } return status; } From 0110ee152b69f8cbde19d8bc1dd59e197e419d76 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Dec 2009 09:00:24 -0500 Subject: [PATCH 66/75] NFS: Fix up the declaration of nfs4_restart_rpc when NFSv4 not configured Also rename it: it is used in generic code, and so should not have a 'nfs4' prefix. Reported-by: Stephen Rothwell Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 13 ++++++++++++- fs/nfs/nfs4proc.c | 25 +++++++------------------ fs/nfs/read.c | 2 +- fs/nfs/unlink.c | 2 +- fs/nfs/write.c | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b1a020c1172..29e464d23b3 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -197,7 +197,6 @@ extern const u32 nfs41_maxwrite_overhead; #endif /* nfs4proc.c */ -extern void nfs4_restart_rpc(struct rpc_task *, const struct nfs_client *); #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; #endif @@ -367,3 +366,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) return ((unsigned long)len + (unsigned long)base + PAGE_SIZE - 1) >> PAGE_SHIFT; } + +/* + * Helper for restarting RPC calls in the possible presence of NFSv4.1 + * sessions. + */ +static inline void nfs_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) +{ + if (nfs4_has_session(clp)) + rpc_restart_call_prepare(task); + else + rpc_restart_call(task); +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index fbae2c94dbc..acde7765452 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -625,17 +625,6 @@ static void nfs4_sequence_done(const struct nfs_server *server, #endif /* CONFIG_NFS_V4_1 */ } -void nfs4_restart_rpc(struct rpc_task *task, const struct nfs_client *clp) -{ -#ifdef CONFIG_NFS_V4_1 - if (nfs4_has_session(clp)) { - rpc_restart_call_prepare(task); - return; - } -#endif /* CONFIG_NFS_V4_1 */ - rpc_restart_call(task); -} - static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) { struct nfs_inode *nfsi = NFS_I(dir); @@ -1739,7 +1728,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, server, state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client); + nfs_restart_rpc(task, server->nfs_client); return; } } @@ -2974,7 +2963,7 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) nfs4_sequence_done(server, &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, server->nfs_client); + nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; } @@ -2998,7 +2987,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } if (task->tk_status >= 0) { @@ -3026,7 +3015,7 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) nfs4_sequence_done(NFS_SERVER(inode), &data->res.seq_res, task->tk_status); if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { - nfs4_restart_rpc(task, NFS_SERVER(inode)->nfs_client); + nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); return -EAGAIN; } nfs_refresh_inode(inode, data->res.fattr); @@ -3737,7 +3726,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data) break; default: if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN) - nfs4_restart_rpc(task, + nfs_restart_rpc(task, calldata->server->nfs_client); } } @@ -4392,7 +4381,7 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); rpc_delay(task, NFS4_POLL_RETRY_MIN); task->tk_status = 0; - nfs4_restart_rpc(task, data->clp); + nfs_restart_rpc(task, data->clp); return; } dprintk("<-- %s\n", __func__); @@ -4854,7 +4843,7 @@ void nfs41_sequence_call_done(struct rpc_task *task, void *data) if (_nfs4_async_handle_error(task, NULL, clp, NULL) == -EAGAIN) { - nfs4_restart_rpc(task, clp); + nfs_restart_rpc(task, clp); return; } } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index d319bfbe513..db9b360ae19 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -368,7 +368,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data argp->offset += resp->count; argp->pgbase += resp->count; argp->count -= resp->count; - nfs4_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); + nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); } /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 1064c91ae81..6da3d3ff6ed 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -83,7 +83,7 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) struct inode *dir = data->dir; if (!NFS_PROTO(dir)->unlink_done(task, dir)) - nfs4_restart_rpc(task, NFS_SERVER(dir)->nfs_client); + nfs_restart_rpc(task, NFS_SERVER(dir)->nfs_client); } /** diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d546c607de0..a28123be08a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1216,7 +1216,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) */ argp->stable = NFS_FILE_SYNC; } - nfs4_restart_rpc(task, server->nfs_client); + nfs_restart_rpc(task, server->nfs_client); return -EAGAIN; } if (time_before(complain, jiffies)) { From f455848a11cbbf15989609a46b24e81a6f13a08e Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Mon, 7 Dec 2009 09:16:09 -0500 Subject: [PATCH 67/75] nfs41: Mark stateids in need of reclaim if state manager gets stale clientid The state manager was not marking the stateids as needing to be reclaimed after reestablishing the clientid. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1b629cca707..1936036f629 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1255,14 +1255,6 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs_expire_all_delegations(clp); } -static void nfs4_session_recovery_handle_error(struct nfs_client *clp, int err) -{ - switch (err) { - case -NFS4ERR_STALE_CLIENTID: - set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); - } -} - static int nfs4_reset_session(struct nfs_client *clp) { struct nfs4_session *ses = clp->cl_session; @@ -1275,14 +1267,14 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_proc_destroy_session(clp->cl_session); if (status && status != -NFS4ERR_BADSESSION && status != -NFS4ERR_DEADSESSION) { - nfs4_session_recovery_handle_error(clp, status); + status = nfs4_recovery_handle_error(clp, status); goto out; } memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN); status = nfs4_proc_create_session(clp); if (status) - nfs4_session_recovery_handle_error(clp, status); + status = nfs4_recovery_handle_error(clp, status); out: /* From bcfa49f6f931ce4097309ca8501d842a6f0ac860 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Mon, 7 Dec 2009 09:22:29 -0500 Subject: [PATCH 68/75] nfs41: Handle session errors during delegation return Add session error handling to nfs4_open_delegation_recall() Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index acde7765452..96dfff12736 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1168,6 +1168,14 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state case -ENOENT: case -ESTALE: goto out; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: + nfs4_schedule_state_recovery( + server->nfs_client); + goto out; case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: case -NFS4ERR_EXPIRED: From 79708861189eb89dea6711bd0464b097b69e7c79 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Mon, 7 Dec 2009 09:23:21 -0500 Subject: [PATCH 69/75] nfs41: Retry delegation return if it failed with session error Update nfs4_delegreturn_done() to retry the operation after setting the NFS4CLNT_SESSION_SETUP bit to indicate the need to reset the session. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 96dfff12736..d8c2ceb303d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3479,9 +3479,20 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) nfs4_sequence_done(data->res.server, &data->res.seq_res, task->tk_status); - data->rpc_status = task->tk_status; - if (data->rpc_status == 0) + switch (task->tk_status) { + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + case 0: renew_lease(data->res.server, data->timestamp); + break; + default: + if (nfs4_async_handle_error(task, data->res.server, NULL) == + -EAGAIN) { + nfs_restart_rpc(task, data->res.server->nfs_client); + return; + } + } + data->rpc_status = task->tk_status; } static void nfs4_delegreturn_release(void *calldata) From 74e7bb73a3e0d15a7db10b0f2b2efdeeef36609e Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Mon, 7 Dec 2009 09:48:30 -0500 Subject: [PATCH 70/75] nfs41: Handle NFSv4.1 session errors in the delegation recall code Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d8c2ceb303d..cdf17d62845 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4180,6 +4180,11 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -NFS4ERR_EXPIRED: case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_BADSESSION: + case -NFS4ERR_BADSLOT: + case -NFS4ERR_BAD_HIGH_SLOT: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + case -NFS4ERR_DEADSESSION: nfs4_schedule_state_recovery(server->nfs_client); goto out; case -ERESTARTSYS: From 88069f77e1ac580a495762ce7a631c251c52cb90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2009 08:33:16 -0500 Subject: [PATCH 71/75] NFSv41: Fix a potential state leakage when restarting nfs4_close_prepare Currently, if the call to nfs4_setup_sequence() in nfs4_close_prepare fails, any later retries will fail to launch an RPC call, due to the fact that the &state->flags will have been cleared. Ditto if nfs4_close_done() triggers a call to the NFSv4.1 version of nfs_restart_rpc(). We therefore move the actual clearing of the state->flags to nfs4_close_done(), when we know that the RPC call was successful. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 52 +++++++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cdf17d62845..9f5f11ecfd9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -761,13 +761,16 @@ static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode goto out; switch (mode & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: - ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0 + && state->n_rdonly != 0; break; case FMODE_WRITE: - ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0 + && state->n_wronly != 0; break; case FMODE_READ|FMODE_WRITE: - ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; + ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0 + && state->n_rdwr != 0; } out: return ret; @@ -1711,6 +1714,18 @@ static void nfs4_free_closedata(void *data) kfree(calldata); } +static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, + fmode_t fmode) +{ + spin_lock(&state->owner->so_lock); + if (!(fmode & FMODE_READ)) + clear_bit(NFS_O_RDONLY_STATE, &state->flags); + if (!(fmode & FMODE_WRITE)) + clear_bit(NFS_O_WRONLY_STATE, &state->flags); + clear_bit(NFS_O_RDWR_STATE, &state->flags); + spin_unlock(&state->owner->so_lock); +} + static void nfs4_close_done(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; @@ -1727,6 +1742,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data) case 0: nfs_set_open_stateid(state, &calldata->res.stateid, 0); renew_lease(server, calldata->timestamp); + nfs4_close_clear_stateid_flags(state, + calldata->arg.fmode); break; case -NFS4ERR_STALE_STATEID: case -NFS4ERR_OLD_STATEID: @@ -1747,38 +1764,39 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) { struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; - int clear_rd, clear_wr, clear_rdwr; + int call_close = 0; if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) return; - clear_rd = clear_wr = clear_rdwr = 0; + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; + calldata->arg.fmode = FMODE_READ|FMODE_WRITE; spin_lock(&state->owner->so_lock); /* Calculate the change in open mode */ if (state->n_rdwr == 0) { if (state->n_rdonly == 0) { - clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); - clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + calldata->arg.fmode &= ~FMODE_READ; } if (state->n_wronly == 0) { - clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); - clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); + call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags); + call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags); + calldata->arg.fmode &= ~FMODE_WRITE; } } spin_unlock(&state->owner->so_lock); - if (!clear_rd && !clear_wr && !clear_rdwr) { + + if (!call_close) { /* Note: exit _without_ calling nfs4_close_done */ task->tk_action = NULL; return; } + + if (calldata->arg.fmode == 0) + task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; + nfs_fattr_init(calldata->res.fattr); - if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) { - task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_READ; - } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) { - task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; - calldata->arg.fmode = FMODE_WRITE; - } calldata->timestamp = jiffies; if (nfs4_setup_sequence((NFS_SERVER(calldata->inode))->nfs_client, &calldata->arg.seq_args, &calldata->res.seq_res, From 480e3243df156e39eea6c91057e2ae612a6bbe19 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 8 Dec 2009 13:13:03 -0500 Subject: [PATCH 72/75] SUNRPC: IS_ERR/PTR_ERR confusion IS_ERR returns 1 or 0, PTR_ERR returns the error value. Signed-off-by: Roel Kluin Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index fc6a43ccd95..129d75ea25d 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -485,7 +485,7 @@ gss_refresh_upcall(struct rpc_task *task) dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); - if (IS_ERR(gss_msg) == -EAGAIN) { + if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); From 7cab89b275fd5647e72b781ac41b58a214640334 Mon Sep 17 00:00:00 2001 From: Ricardo Labiaga Date: Tue, 8 Dec 2009 14:35:28 -0500 Subject: [PATCH 73/75] nfs41: Invoke RECLAIM_COMPLETE on all new client ids The NFSv4.1 spec indicates RECLAIM_COMPLETE is to be issued whenever a client establishes a new client id, not only after detecting the server has rebooted. Set the NFS4CLNT_RECLAIM_REBOOT bit after every new client id has been established. This enables us to issue RECLAIM_COMPLETE during the wrap up of the NFS4CLNT_RECLAIM_REBOOT state. Signed-off-by: Ricardo Labiaga Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1936036f629..e76427e6346 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1095,12 +1095,12 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) struct rb_node *pos; struct nfs4_state *state; - nfs4_reclaim_complete(clp, - nfs4_reboot_recovery_ops[clp->cl_minorversion]); - if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) return; + nfs4_reclaim_complete(clp, + nfs4_reboot_recovery_ops[clp->cl_minorversion]); + for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); spin_lock(&sp->so_lock); @@ -1335,6 +1335,7 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { From 053e324f67b9921fe7de0c4cbc720d29cb4bf207 Mon Sep 17 00:00:00 2001 From: Suresh Jayaraman Date: Wed, 9 Dec 2009 23:15:22 +0530 Subject: [PATCH 74/75] rpc: remove unneeded function parameter in gss_add_msg() The pointer to struct gss_auth parameter in gss_add_msg is not really needed after commit 5b7ddd4a. Zap it. Signed-off-by: Suresh Jayaraman Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 129d75ea25d..3c3c50f38a1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -304,7 +304,7 @@ __gss_find_upcall(struct rpc_inode *rpci, uid_t uid) * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * -gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_inode *rpci = gss_msg->inode; struct inode *inode = &rpci->vfs_inode; @@ -445,7 +445,7 @@ gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cr gss_new = gss_alloc_msg(gss_auth, uid, clnt, gss_cred->gc_machine_cred); if (IS_ERR(gss_new)) return gss_new; - gss_msg = gss_add_msg(gss_auth, gss_new); + gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { struct inode *inode = &gss_new->inode->vfs_inode; int res = rpc_queue_upcall(inode, &gss_new->msg); From 190f38e5cedc910940b1da9015f00458c18f97b4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Dec 2009 09:05:55 -0500 Subject: [PATCH 75/75] NFS: Fix nfs_migrate_page() The call to migrate_page() will cause the page->private field to be cleared. Also fix up the locking around the page->private transfer, so that we ensure that calls to nfs_page_find_request() don't end up racing. Finally, fix up a double free bug: nfs_unlock_request() already calls nfs_release_request() for us... Reported-by: Wu Fengguang Tested-by: Andi Kleen Cc: stable@kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a28123be08a..6d40a538e3d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1611,15 +1611,16 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage, if (ret) goto out_unlock; page_cache_get(newpage); + spin_lock(&mapping->host->i_lock); req->wb_page = newpage; SetPagePrivate(newpage); - set_page_private(newpage, page_private(page)); + set_page_private(newpage, (unsigned long)req); ClearPagePrivate(page); set_page_private(page, 0); + spin_unlock(&mapping->host->i_lock); page_cache_release(page); out_unlock: nfs_clear_page_tag_locked(req); - nfs_release_request(req); out: return ret; }