]> rtime.felk.cvut.cz Git - linux-imx.git/blobdiff - fs/nfs/nfs4proc.c
NFSv4.1: Use CLAIM_DELEG_CUR_FH opens when available
[linux-imx.git] / fs / nfs / nfs4proc.c
index 5d864fb3657885ff9eb8a043d880f2c1490f8d0e..06e5a72b5fa233dd6acbf614de8d5d721d2c427c 100644 (file)
@@ -93,6 +93,8 @@ static int nfs4_map_errors(int err)
                return err;
        switch (err) {
        case -NFS4ERR_RESOURCE:
+       case -NFS4ERR_LAYOUTTRYLATER:
+       case -NFS4ERR_RECALLCONFLICT:
                return -EREMOTEIO;
        case -NFS4ERR_WRONGSEC:
                return -EPERM;
@@ -293,7 +295,9 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
                        }
                        if (state == NULL)
                                break;
-                       nfs4_schedule_stateid_recovery(server, state);
+                       ret = nfs4_schedule_stateid_recovery(server, state);
+                       if (ret < 0)
+                               break;
                        goto wait_on_recovery;
                case -NFS4ERR_DELEG_REVOKED:
                case -NFS4ERR_ADMIN_REVOKED:
@@ -301,11 +305,16 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
                        if (state == NULL)
                                break;
                        nfs_remove_bad_delegation(state->inode);
-                       nfs4_schedule_stateid_recovery(server, state);
+                       ret = nfs4_schedule_stateid_recovery(server, state);
+                       if (ret < 0)
+                               break;
                        goto wait_on_recovery;
                case -NFS4ERR_EXPIRED:
-                       if (state != NULL)
-                               nfs4_schedule_stateid_recovery(server, state);
+                       if (state != NULL) {
+                               ret = nfs4_schedule_stateid_recovery(server, state);
+                               if (ret < 0)
+                                       break;
+                       }
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_STALE_CLIENTID:
                        nfs4_schedule_lease_recovery(clp);
@@ -758,6 +767,35 @@ struct nfs4_opendata {
        int cancelled;
 };
 
+static bool nfs4_clear_cap_atomic_open_v1(struct nfs_server *server,
+               int err, struct nfs4_exception *exception)
+{
+       if (err != -EINVAL)
+               return false;
+       if (!(server->caps & NFS_CAP_ATOMIC_OPEN_V1))
+               return false;
+       server->caps &= ~NFS_CAP_ATOMIC_OPEN_V1;
+       exception->retry = 1;
+       return true;
+}
+
+static enum open_claim_type4
+nfs4_map_atomic_open_claim(struct nfs_server *server,
+               enum open_claim_type4 claim)
+{
+       if (server->caps & NFS_CAP_ATOMIC_OPEN_V1)
+               return claim;
+       switch (claim) {
+       default:
+               return claim;
+       case NFS4_OPEN_CLAIM_FH:
+               return NFS4_OPEN_CLAIM_NULL;
+       case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+               return NFS4_OPEN_CLAIM_DELEGATE_CUR;
+       case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+               return NFS4_OPEN_CLAIM_DELEGATE_PREV;
+       }
+}
 
 static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 {
@@ -773,6 +811,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
                struct nfs4_state_owner *sp, fmode_t fmode, int flags,
                const struct iattr *attrs,
+               enum open_claim_type4 claim,
                gfp_t gfp_mask)
 {
        struct dentry *parent = dget_parent(dentry);
@@ -791,7 +830,6 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->dir = parent;
        p->owner = sp;
        atomic_inc(&sp->so_count);
-       p->o_arg.fh = NFS_FH(dir);
        p->o_arg.open_flags = flags;
        p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
        /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
@@ -809,7 +847,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
        p->o_arg.server = server;
        p->o_arg.bitmask = server->attr_bitmask;
        p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
-       p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+       p->o_arg.claim = nfs4_map_atomic_open_claim(server, claim);
+       switch (p->o_arg.claim) {
+       case NFS4_OPEN_CLAIM_NULL:
+       case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+       case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+               p->o_arg.fh = NFS_FH(dir);
+               break;
+       case NFS4_OPEN_CLAIM_PREVIOUS:
+       case NFS4_OPEN_CLAIM_FH:
+       case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
+       case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
+               p->o_arg.fh = NFS_FH(dentry->d_inode);
+       }
        if (attrs != NULL && attrs->ia_valid != 0) {
                __be32 verf[2];
 
@@ -896,6 +946,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
                return 0;
        if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
                return 0;
+       if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+               return 0;
        nfs_mark_delegation_referenced(delegation);
        return 1;
 }
@@ -973,6 +1025,7 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
 
        spin_lock(&deleg_cur->lock);
        if (nfsi->delegation != deleg_cur ||
+          test_bit(NFS_DELEGATION_RETURNING, &deleg_cur->flags) ||
            (deleg_cur->type & fmode) != fmode)
                goto no_delegation_unlock;
 
@@ -1155,6 +1208,7 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
                        data->o_arg.fmode);
        iput(inode);
 out:
+       nfs_release_seqid(data->o_arg.seqid);
        return state;
 err_put_inode:
        iput(inode);
@@ -1187,11 +1241,13 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
        return ERR_PTR(-ENOENT);
 }
 
-static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx, struct nfs4_state *state)
+static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
+               struct nfs4_state *state, enum open_claim_type4 claim)
 {
        struct nfs4_opendata *opendata;
 
-       opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS);
+       opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0,
+                       NULL, claim, GFP_NOFS);
        if (opendata == NULL)
                return ERR_PTR(-ENOMEM);
        opendata->state = state;
@@ -1277,11 +1333,10 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
        fmode_t delegation_type = 0;
        int status;
 
-       opendata = nfs4_open_recoverdata_alloc(ctx, state);
+       opendata = nfs4_open_recoverdata_alloc(ctx, state,
+                       NFS4_OPEN_CLAIM_PREVIOUS);
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
-       opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
-       opendata->o_arg.fh = NFS_FH(state->inode);
        rcu_read_lock();
        delegation = rcu_dereference(NFS_I(state->inode)->delegation);
        if (delegation != NULL && test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) != 0)
@@ -1300,6 +1355,8 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state
        int err;
        do {
                err = _nfs4_do_open_reclaim(ctx, state);
+               if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
+                       continue;
                if (err != -NFS4ERR_DELAY)
                        break;
                nfs4_handle_exception(server, err, &exception);
@@ -1325,10 +1382,10 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
        struct nfs4_opendata *opendata;
        int ret;
 
-       opendata = nfs4_open_recoverdata_alloc(ctx, state);
+       opendata = nfs4_open_recoverdata_alloc(ctx, state,
+                       NFS4_OPEN_CLAIM_DELEG_CUR_FH);
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
-       opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
        nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
        ret = nfs4_open_recover(opendata, state);
        nfs4_opendata_put(opendata);
@@ -1352,19 +1409,18 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
+                               set_bit(NFS_DELEGATED_STATE, &state->flags);
                                nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+                               err = -EAGAIN;
                                goto out;
                        case -NFS4ERR_STALE_CLIENTID:
                        case -NFS4ERR_STALE_STATEID:
+                               set_bit(NFS_DELEGATED_STATE, &state->flags);
                        case -NFS4ERR_EXPIRED:
                                /* Don't recall a delegation if it was lost */
                                nfs4_schedule_lease_recovery(server->nfs_client);
+                               err = -EAGAIN;
                                goto out;
-                       case -ERESTARTSYS:
-                               /*
-                                * The show must go on: exit, but mark the
-                                * stateid as needing recovery.
-                                */
                        case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
@@ -1375,6 +1431,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
                                err = 0;
                                goto out;
                }
+               set_bit(NFS_DELEGATED_STATE, &state->flags);
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 out:
@@ -1463,7 +1520,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
        struct nfs4_state_owner *sp = data->owner;
 
        if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
-               return;
+               goto out_wait;
        /*
         * Check if we still need to send an OPEN call, or if we can use
         * a delegation instead.
@@ -1498,6 +1555,7 @@ unlock_no_action:
        rcu_read_unlock();
 out_no_action:
        task->tk_action = NULL;
+out_wait:
        nfs4_sequence_done(task, &data->o_res.seq_res);
 }
 
@@ -1626,7 +1684,8 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
 
 static int nfs4_opendata_access(struct rpc_cred *cred,
                                struct nfs4_opendata *opendata,
-                               struct nfs4_state *state, fmode_t fmode)
+                               struct nfs4_state *state, fmode_t fmode,
+                               int openflags)
 {
        struct nfs_access_entry cache;
        u32 mask;
@@ -1638,11 +1697,14 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
 
        mask = 0;
        /* don't check MAY_WRITE - a newly created file may not have
-        * write mode bits, but POSIX allows the creating process to write */
-       if (fmode & FMODE_READ)
-               mask |= MAY_READ;
-       if (fmode & FMODE_EXEC)
-               mask |= MAY_EXEC;
+        * write mode bits, but POSIX allows the creating process to write.
+        * use openflags to check for exec, because fmode won't
+        * always have FMODE_EXEC set when file open for exec. */
+       if (openflags & __FMODE_EXEC) {
+               /* ONLY check for exec rights */
+               mask = MAY_EXEC;
+       } else if (fmode & FMODE_READ)
+               mask = MAY_READ;
 
        cache.cred = cred;
        cache.jiffies = jiffies;
@@ -1709,7 +1771,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
        struct nfs4_opendata *opendata;
        int ret;
 
-       opendata = nfs4_open_recoverdata_alloc(ctx, state);
+       opendata = nfs4_open_recoverdata_alloc(ctx, state,
+                       NFS4_OPEN_CLAIM_FH);
        if (IS_ERR(opendata))
                return PTR_ERR(opendata);
        ret = nfs4_open_recover(opendata, state);
@@ -1727,6 +1790,8 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state
 
        do {
                err = _nfs4_open_expired(ctx, state);
+               if (nfs4_clear_cap_atomic_open_v1(server, err, &exception))
+                       continue;
                switch (err) {
                default:
                        goto out;
@@ -1841,6 +1906,43 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
                sattr->ia_valid |= ATTR_MTIME;
 }
 
+static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
+               fmode_t fmode,
+               int flags,
+               struct nfs4_state **res)
+{
+       struct nfs4_state_owner *sp = opendata->owner;
+       struct nfs_server *server = sp->so_server;
+       struct nfs4_state *state;
+       unsigned int seq;
+       int ret;
+
+       seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+
+       ret = _nfs4_proc_open(opendata);
+       if (ret != 0)
+               goto out;
+
+       state = nfs4_opendata_to_nfs4_state(opendata);
+       ret = PTR_ERR(state);
+       if (IS_ERR(state))
+               goto out;
+       if (server->caps & NFS_CAP_POSIX_LOCK)
+               set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
+
+       ret = nfs4_opendata_access(sp->so_cred, opendata, state, fmode, flags);
+       if (ret != 0)
+               goto out;
+
+       if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
+               nfs4_schedule_stateid_recovery(server, state);
+               nfs4_wait_clnt_recover(server->nfs_client);
+       }
+       *res = state;
+out:
+       return ret;
+}
+
 /*
  * Returns a referenced nfs4_state
  */
@@ -1857,6 +1959,7 @@ static int _nfs4_do_open(struct inode *dir,
        struct nfs4_state     *state = NULL;
        struct nfs_server       *server = NFS_SERVER(dir);
        struct nfs4_opendata *opendata;
+       enum open_claim_type4 claim = NFS4_OPEN_CLAIM_NULL;
        int status;
 
        /* Protect against reboot recovery conflicts */
@@ -1872,7 +1975,10 @@ static int _nfs4_do_open(struct inode *dir,
        if (dentry->d_inode != NULL)
                nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
        status = -ENOMEM;
-       opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL);
+       if (dentry->d_inode)
+               claim = NFS4_OPEN_CLAIM_FH;
+       opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr,
+                       claim, GFP_KERNEL);
        if (opendata == NULL)
                goto err_put_state_owner;
 
@@ -1885,18 +1991,7 @@ static int _nfs4_do_open(struct inode *dir,
        if (dentry->d_inode != NULL)
                opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
 
-       status = _nfs4_proc_open(opendata);
-       if (status != 0)
-               goto err_opendata_put;
-
-       state = nfs4_opendata_to_nfs4_state(opendata);
-       status = PTR_ERR(state);
-       if (IS_ERR(state))
-               goto err_opendata_put;
-       if (server->caps & NFS_CAP_POSIX_LOCK)
-               set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
-
-       status = nfs4_opendata_access(cred, opendata, state, fmode);
+       status = _nfs4_open_and_get_state(opendata, fmode, flags, &state);
        if (status != 0)
                goto err_opendata_put;
 
@@ -1941,6 +2036,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
                                        struct rpc_cred *cred,
                                        struct nfs4_threshold **ctx_th)
 {
+       struct nfs_server *server = NFS_SERVER(dir);
        struct nfs4_exception exception = { };
        struct nfs4_state *res;
        int status;
@@ -1984,7 +2080,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
                        exception.retry = 1;
                        continue;
                }
-               res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
+               if (nfs4_clear_cap_atomic_open_v1(server, status, &exception))
+                       continue;
+               res = ERR_PTR(nfs4_handle_exception(server,
                                        status, &exception));
        } while (exception.retry);
        return res;
@@ -2016,7 +2114,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
 
        nfs_fattr_init(fattr);
 
-       if (state != NULL) {
+       if (state != NULL && nfs4_valid_open_stateid(state)) {
                struct nfs_lockowner lockowner = {
                        .l_owner = current->files,
                        .l_pid = current->tgid,
@@ -2084,7 +2182,7 @@ static void nfs4_free_closedata(void *data)
        nfs4_put_open_state(calldata->state);
        nfs_free_seqid(calldata->arg.seqid);
        nfs4_put_state_owner(sp);
-       nfs_sb_deactive_async(sb);
+       nfs_sb_deactive(sb);
        kfree(calldata);
 }
 
@@ -2146,7 +2244,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 
        dprintk("%s: begin!\n", __func__);
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
-               return;
+               goto out_wait;
 
        task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
        calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
@@ -2164,20 +2262,20 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                        calldata->arg.fmode &= ~FMODE_WRITE;
                }
        }
+       if (!nfs4_valid_open_stateid(state))
+               call_close = 0;
        spin_unlock(&state->owner->so_lock);
 
        if (!call_close) {
                /* Note: exit _without_ calling nfs4_close_done */
-               task->tk_action = NULL;
-               nfs4_sequence_done(task, &calldata->res.seq_res);
-               goto out;
+               goto out_no_action;
        }
 
        if (calldata->arg.fmode == 0) {
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
                if (calldata->roc &&
                    pnfs_roc_drain(inode, &calldata->roc_barrier, task))
-                       goto out;
+                       goto out_wait;
        }
 
        nfs_fattr_init(calldata->res.fattr);
@@ -2187,8 +2285,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                                &calldata->res.seq_res,
                                task) != 0)
                nfs_release_seqid(calldata->arg.seqid);
-out:
        dprintk("%s: done!\n", __func__);
+       return;
+out_no_action:
+       task->tk_action = NULL;
+out_wait:
+       nfs4_sequence_done(task, &calldata->res.seq_res);
 }
 
 static const struct rpc_call_ops nfs4_close_ops = {
@@ -2593,7 +2695,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
        int status;
 
        if (pnfs_ld_layoutret_on_setattr(inode))
-               pnfs_return_layout(inode);
+               pnfs_commit_and_return_layout(inode);
 
        nfs_fattr_init(fattr);
        
@@ -3406,6 +3508,46 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
        return err;
 }
 
+int nfs4_set_rw_stateid(nfs4_stateid *stateid,
+               const struct nfs_open_context *ctx,
+               const struct nfs_lock_context *l_ctx,
+               fmode_t fmode)
+{
+       const struct nfs_lockowner *lockowner = NULL;
+
+       if (l_ctx != NULL)
+               lockowner = &l_ctx->lockowner;
+       return nfs4_select_rw_stateid(stateid, ctx->state, fmode, lockowner);
+}
+EXPORT_SYMBOL_GPL(nfs4_set_rw_stateid);
+
+static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
+               const struct nfs_open_context *ctx,
+               const struct nfs_lock_context *l_ctx,
+               fmode_t fmode)
+{
+       nfs4_stateid current_stateid;
+
+       if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
+               return false;
+       return nfs4_stateid_match(stateid, &current_stateid);
+}
+
+static bool nfs4_error_stateid_expired(int err)
+{
+       switch (err) {
+       case -NFS4ERR_DELEG_REVOKED:
+       case -NFS4ERR_ADMIN_REVOKED:
+       case -NFS4ERR_BAD_STATEID:
+       case -NFS4ERR_STALE_STATEID:
+       case -NFS4ERR_OLD_STATEID:
+       case -NFS4ERR_OPENMODE:
+       case -NFS4ERR_EXPIRED:
+               return true;
+       }
+       return false;
+}
+
 void __nfs4_read_done_cb(struct nfs_read_data *data)
 {
        nfs_invalidate_atime(data->header->inode);
@@ -3426,6 +3568,20 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
        return 0;
 }
 
+static bool nfs4_read_stateid_changed(struct rpc_task *task,
+               struct nfs_readargs *args)
+{
+
+       if (!nfs4_error_stateid_expired(task->tk_status) ||
+               nfs4_stateid_is_current(&args->stateid,
+                               args->context,
+                               args->lock_context,
+                               FMODE_READ))
+               return false;
+       rpc_restart_call_prepare(task);
+       return true;
+}
+
 static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 {
 
@@ -3433,7 +3589,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 
        if (!nfs4_sequence_done(task, &data->res.seq_res))
                return -EAGAIN;
-
+       if (nfs4_read_stateid_changed(task, &data->args))
+               return -EAGAIN;
        return data->read_done_cb ? data->read_done_cb(task, data) :
                                    nfs4_read_done_cb(task, data);
 }
@@ -3448,10 +3605,13 @@ static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message
 
 static void nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
 {
-       nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+       if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
                        &data->args.seq_args,
                        &data->res.seq_res,
-                       task);
+                       task))
+               return;
+       nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
+                       data->args.lock_context, FMODE_READ);
 }
 
 static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
@@ -3469,10 +3629,26 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
        return 0;
 }
 
+static bool nfs4_write_stateid_changed(struct rpc_task *task,
+               struct nfs_writeargs *args)
+{
+
+       if (!nfs4_error_stateid_expired(task->tk_status) ||
+               nfs4_stateid_is_current(&args->stateid,
+                               args->context,
+                               args->lock_context,
+                               FMODE_WRITE))
+               return false;
+       rpc_restart_call_prepare(task);
+       return true;
+}
+
 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 {
        if (!nfs4_sequence_done(task, &data->res.seq_res))
                return -EAGAIN;
+       if (nfs4_write_stateid_changed(task, &data->args))
+               return -EAGAIN;
        return data->write_done_cb ? data->write_done_cb(task, data) :
                nfs4_write_done_cb(task, data);
 }
@@ -3512,10 +3688,13 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
 
 static void nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
 {
-       nfs4_setup_sequence(NFS_SERVER(data->header->inode),
+       if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
                        &data->args.seq_args,
                        &data->res.seq_res,
-                       task);
+                       task))
+               return;
+       nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
+                       data->args.lock_context, FMODE_WRITE);
 }
 
 static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -3941,11 +4120,14 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
                case -NFS4ERR_OPENMODE:
                        if (state == NULL)
                                break;
-                       nfs4_schedule_stateid_recovery(server, state);
+                       if (nfs4_schedule_stateid_recovery(server, state) < 0)
+                               goto stateid_invalid;
                        goto wait_on_recovery;
                case -NFS4ERR_EXPIRED:
-                       if (state != NULL)
-                               nfs4_schedule_stateid_recovery(server, state);
+                       if (state != NULL) {
+                               if (nfs4_schedule_stateid_recovery(server, state) < 0)
+                                       goto stateid_invalid;
+                       }
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_STALE_CLIENTID:
                        nfs4_schedule_lease_recovery(clp);
@@ -3977,6 +4159,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
        }
        task->tk_status = nfs4_map_errors(task->tk_status);
        return 0;
+stateid_invalid:
+       task->tk_status = -EIO;
+       return 0;
 wait_on_recovery:
        rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
        if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
@@ -4419,12 +4604,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
        struct nfs4_unlockdata *calldata = data;
 
        if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
-               return;
+               goto out_wait;
        if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
                /* Note: exit _without_ running nfs4_locku_done */
-               task->tk_action = NULL;
-               nfs4_sequence_done(task, &calldata->res.seq_res);
-               return;
+               goto out_no_action;
        }
        calldata->timestamp = jiffies;
        if (nfs4_setup_sequence(calldata->server,
@@ -4432,6 +4615,11 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
                                &calldata->res.seq_res,
                                task) != 0)
                nfs_release_seqid(calldata->arg.seqid);
+       return;
+out_no_action:
+       task->tk_action = NULL;
+out_wait:
+       nfs4_sequence_done(task, &calldata->res.seq_res);
 }
 
 static const struct rpc_call_ops nfs4_locku_ops = {
@@ -4478,7 +4666,9 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
-       struct nfs_inode *nfsi = NFS_I(state->inode);
+       struct inode *inode = state->inode;
+       struct nfs4_state_owner *sp = state->owner;
+       struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_seqid *seqid;
        struct nfs4_lock_state *lsp;
        struct rpc_task *task;
@@ -4488,12 +4678,17 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
        status = nfs4_set_lock_state(state, request);
        /* Unlock _before_ we do the RPC call */
        request->fl_flags |= FL_EXISTS;
+       /* Exclude nfs_delegation_claim_locks() */
+       mutex_lock(&sp->so_delegreturn_mutex);
+       /* Exclude nfs4_reclaim_open_stateid() - note nesting! */
        down_read(&nfsi->rwsem);
        if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
                up_read(&nfsi->rwsem);
+               mutex_unlock(&sp->so_delegreturn_mutex);
                goto out;
        }
        up_read(&nfsi->rwsem);
+       mutex_unlock(&sp->so_delegreturn_mutex);
        if (status != 0)
                goto out;
        /* Is this a delegated lock? */
@@ -4572,7 +4767,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 
        dprintk("%s: begin!\n", __func__);
        if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
-               return;
+               goto out_wait;
        /* Do we need to do an open_to_lock_owner? */
        if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
                if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0) {
@@ -4583,15 +4778,23 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
                data->res.open_seqid = data->arg.open_seqid;
        } else
                data->arg.new_lock_owner = 0;
+       if (!nfs4_valid_open_stateid(state)) {
+               data->rpc_status = -EBADF;
+               task->tk_action = NULL;
+               goto out_release_open_seqid;
+       }
        data->timestamp = jiffies;
        if (nfs4_setup_sequence(data->server,
                                &data->arg.seq_args,
                                &data->res.seq_res,
                                task) == 0)
                return;
+out_release_open_seqid:
        nfs_release_seqid(data->arg.open_seqid);
 out_release_lock_seqid:
        nfs_release_seqid(data->arg.lock_seqid);
+out_wait:
+       nfs4_sequence_done(task, &data->res.seq_res);
        dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
 }
 
@@ -4809,8 +5012,10 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
 
 static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
+       struct nfs4_state_owner *sp = state->owner;
        struct nfs_inode *nfsi = NFS_I(state->inode);
        unsigned char fl_flags = request->fl_flags;
+       unsigned int seq;
        int status = -ENOLCK;
 
        if ((fl_flags & FL_POSIX) &&
@@ -4832,9 +5037,16 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
                status = do_vfs_lock(request->fl_file, request);
                goto out_unlock;
        }
+       seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
+       up_read(&nfsi->rwsem);
        status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
        if (status != 0)
+               goto out;
+       down_read(&nfsi->rwsem);
+       if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) {
+               status = -NFS4ERR_DELAY;
                goto out_unlock;
+       }
        /* Note: we always want to sleep here! */
        request->fl_flags = fl_flags | FL_SLEEP;
        if (do_vfs_lock(request->fl_file, request) < 0)
@@ -4941,24 +5153,22 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                        case 0:
                        case -ESTALE:
                                goto out;
-                       case -NFS4ERR_EXPIRED:
-                               nfs4_schedule_stateid_recovery(server, state);
                        case -NFS4ERR_STALE_CLIENTID:
                        case -NFS4ERR_STALE_STATEID:
+                               set_bit(NFS_DELEGATED_STATE, &state->flags);
+                       case -NFS4ERR_EXPIRED:
                                nfs4_schedule_lease_recovery(server->nfs_client);
+                               err = -EAGAIN;
                                goto out;
                        case -NFS4ERR_BADSESSION:
                        case -NFS4ERR_BADSLOT:
                        case -NFS4ERR_BAD_HIGH_SLOT:
                        case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
                        case -NFS4ERR_DEADSESSION:
+                               set_bit(NFS_DELEGATED_STATE, &state->flags);
                                nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
+                               err = -EAGAIN;
                                goto out;
-                       case -ERESTARTSYS:
-                               /*
-                                * The show must go on: exit, but mark the
-                                * stateid as needing recovery.
-                                */
                        case -NFS4ERR_DELEG_REVOKED:
                        case -NFS4ERR_ADMIN_REVOKED:
                        case -NFS4ERR_BAD_STATEID:
@@ -4971,9 +5181,8 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
                                /* kill_proc(fl->fl_pid, SIGLOST, 1); */
                                err = 0;
                                goto out;
-                       case -NFS4ERR_DELAY:
-                               break;
                }
+               set_bit(NFS_DELEGATED_STATE, &state->flags);
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
 out:
@@ -5991,6 +6200,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
        struct nfs_server *server = NFS_SERVER(inode);
        struct pnfs_layout_hdr *lo;
        struct nfs4_state *state = NULL;
+       unsigned long timeo, giveup;
 
        dprintk("--> %s\n", __func__);
 
@@ -6002,7 +6212,10 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
                goto out;
        case -NFS4ERR_LAYOUTTRYLATER:
        case -NFS4ERR_RECALLCONFLICT:
-               task->tk_status = -NFS4ERR_DELAY;
+               timeo = rpc_get_timeout(task->tk_client);
+               giveup = lgp->args.timestamp + timeo;
+               if (time_after(giveup, jiffies))
+                       task->tk_status = -NFS4ERR_DELAY;
                break;
        case -NFS4ERR_EXPIRED:
        case -NFS4ERR_BAD_STATEID:
@@ -6075,11 +6288,13 @@ static struct page **nfs4_alloc_pages(size_t size, gfp_t gfp_flags)
 static void nfs4_layoutget_release(void *calldata)
 {
        struct nfs4_layoutget *lgp = calldata;
-       struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+       struct inode *inode = lgp->args.inode;
+       struct nfs_server *server = NFS_SERVER(inode);
        size_t max_pages = max_response_pages(server);
 
        dprintk("--> %s\n", __func__);
        nfs4_free_pages(lgp->args.layout.pages, max_pages);
+       pnfs_put_layout_hdr(NFS_I(inode)->layout);
        put_nfs_open_context(lgp->args.ctx);
        kfree(calldata);
        dprintk("<-- %s\n", __func__);
@@ -6094,7 +6309,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
 struct pnfs_layout_segment *
 nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
 {
-       struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+       struct inode *inode = lgp->args.inode;
+       struct nfs_server *server = NFS_SERVER(inode);
        size_t max_pages = max_response_pages(server);
        struct rpc_task *task;
        struct rpc_message msg = {
@@ -6120,17 +6336,23 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
                return ERR_PTR(-ENOMEM);
        }
        lgp->args.layout.pglen = max_pages * PAGE_SIZE;
+       lgp->args.timestamp = jiffies;
 
        lgp->res.layoutp = &lgp->args.layout;
        lgp->res.seq_res.sr_slot = NULL;
        nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
+
+       /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+       pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
        task = rpc_run_task(&task_setup_data);
        if (IS_ERR(task))
                return ERR_CAST(task);
        status = nfs4_wait_for_completion_rpc_task(task);
        if (status == 0)
                status = task->tk_status;
-       if (status == 0)
+       /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+       if (status == 0 && lgp->res.layoutp->len)
                lseg = pnfs_layout_process(lgp);
        rpc_put_task(task);
        dprintk("<-- %s status=%d\n", __func__, status);
@@ -6346,22 +6568,8 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 static void nfs4_layoutcommit_release(void *calldata)
 {
        struct nfs4_layoutcommit_data *data = calldata;
-       struct pnfs_layout_segment *lseg, *tmp;
-       unsigned long *bitlock = &NFS_I(data->args.inode)->flags;
 
        pnfs_cleanup_layoutcommit(data);
-       /* Matched by references in pnfs_set_layoutcommit */
-       list_for_each_entry_safe(lseg, tmp, &data->lseg_list, pls_lc_list) {
-               list_del_init(&lseg->pls_lc_list);
-               if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
-                                      &lseg->pls_flags))
-                       pnfs_put_lseg(lseg);
-       }
-
-       clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
-       smp_mb__after_clear_bit();
-       wake_up_bit(bitlock, NFS_INO_LAYOUTCOMMITTING);
-
        put_rpccred(data->cred);
        kfree(data);
 }
@@ -6669,6 +6877,10 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = {
 
 static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
        .minor_version = 0,
+       .init_caps = NFS_CAP_READDIRPLUS
+               | NFS_CAP_ATOMIC_OPEN
+               | NFS_CAP_CHANGE_ATTR
+               | NFS_CAP_POSIX_LOCK,
        .call_sync = _nfs4_call_sync,
        .match_stateid = nfs4_match_stateid,
        .find_root_sec = nfs4_find_root_sec,
@@ -6680,6 +6892,12 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
 #if defined(CONFIG_NFS_V4_1)
 static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
        .minor_version = 1,
+       .init_caps = NFS_CAP_READDIRPLUS
+               | NFS_CAP_ATOMIC_OPEN
+               | NFS_CAP_CHANGE_ATTR
+               | NFS_CAP_POSIX_LOCK
+               | NFS_CAP_STATEID_NFSV41
+               | NFS_CAP_ATOMIC_OPEN_V1,
        .call_sync = nfs4_call_sync_sequence,
        .match_stateid = nfs41_match_stateid,
        .find_root_sec = nfs41_find_root_sec,