NFS: Never use user credentials for lease renewal
[linux-2.6-block.git] / fs / nfs / nfs4state.c
1 /*
2  *  fs/nfs/nfs4state.c
3  *
4  *  Client-side XDR for NFSv4.
5  *
6  *  Copyright (c) 2002 The Regents of the University of Michigan.
7  *  All rights reserved.
8  *
9  *  Kendrick Smith <kmsmith@umich.edu>
10  *
11  *  Redistribution and use in source and binary forms, with or without
12  *  modification, are permitted provided that the following conditions
13  *  are met:
14  *
15  *  1. Redistributions of source code must retain the above copyright
16  *     notice, this list of conditions and the following disclaimer.
17  *  2. Redistributions in binary form must reproduce the above copyright
18  *     notice, this list of conditions and the following disclaimer in the
19  *     documentation and/or other materials provided with the distribution.
20  *  3. Neither the name of the University nor the names of its
21  *     contributors may be used to endorse or promote products derived
22  *     from this software without specific prior written permission.
23  *
24  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
25  *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
26  *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
27  *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31  *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32  *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33  *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34  *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * Implementation of the NFSv4 state model.  For the time being,
37  * this is minimal, but will be made much more complex in a
38  * subsequent patch.
39  */
40
41 #include <linux/kernel.h>
42 #include <linux/slab.h>
43 #include <linux/fs.h>
44 #include <linux/nfs_fs.h>
45 #include <linux/nfs_idmap.h>
46 #include <linux/kthread.h>
47 #include <linux/module.h>
48 #include <linux/random.h>
49 #include <linux/ratelimit.h>
50 #include <linux/workqueue.h>
51 #include <linux/bitops.h>
52 #include <linux/jiffies.h>
53
54 #include <linux/sunrpc/clnt.h>
55
56 #include "nfs4_fs.h"
57 #include "callback.h"
58 #include "delegation.h"
59 #include "internal.h"
60 #include "nfs4session.h"
61 #include "pnfs.h"
62 #include "netns.h"
63
64 #define NFSDBG_FACILITY         NFSDBG_STATE
65
66 #define OPENOWNER_POOL_SIZE     8
67
68 const nfs4_stateid zero_stateid;
69 static DEFINE_MUTEX(nfs_clid_init_mutex);
70
71 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
72 {
73         struct nfs4_setclientid_res clid = {
74                 .clientid = clp->cl_clientid,
75                 .confirm = clp->cl_confirm,
76         };
77         unsigned short port;
78         int status;
79         struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
80
81         if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
82                 goto do_confirm;
83         port = nn->nfs_callback_tcpport;
84         if (clp->cl_addr.ss_family == AF_INET6)
85                 port = nn->nfs_callback_tcpport6;
86
87         status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
88         if (status != 0)
89                 goto out;
90         clp->cl_clientid = clid.clientid;
91         clp->cl_confirm = clid.confirm;
92         set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
93 do_confirm:
94         status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
95         if (status != 0)
96                 goto out;
97         clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
98         nfs4_schedule_state_renewal(clp);
99 out:
100         return status;
101 }
102
103 /**
104  * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
105  *
106  * @clp: nfs_client under test
107  * @result: OUT: found nfs_client, or clp
108  * @cred: credential to use for trunking test
109  *
110  * Returns zero, a negative errno, or a negative NFS4ERR status.
111  * If zero is returned, an nfs_client pointer is planted in
112  * "result".
113  *
114  * Note: The returned client may not yet be marked ready.
115  */
116 int nfs40_discover_server_trunking(struct nfs_client *clp,
117                                    struct nfs_client **result,
118                                    struct rpc_cred *cred)
119 {
120         struct nfs4_setclientid_res clid = {
121                 .clientid = clp->cl_clientid,
122                 .confirm = clp->cl_confirm,
123         };
124         struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
125         unsigned short port;
126         int status;
127
128         port = nn->nfs_callback_tcpport;
129         if (clp->cl_addr.ss_family == AF_INET6)
130                 port = nn->nfs_callback_tcpport6;
131
132         status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
133         if (status != 0)
134                 goto out;
135         clp->cl_clientid = clid.clientid;
136         clp->cl_confirm = clid.confirm;
137
138         status = nfs40_walk_client_list(clp, result, cred);
139         if (status == 0) {
140                 /* Sustain the lease, even if it's empty.  If the clientid4
141                  * goes stale it's of no use for trunking discovery. */
142                 nfs4_schedule_state_renewal(*result);
143         }
144 out:
145         return status;
146 }
147
148 struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
149 {
150         struct rpc_cred *cred = NULL;
151
152         if (clp->cl_machine_cred != NULL)
153                 cred = get_rpccred(clp->cl_machine_cred);
154         return cred;
155 }
156
157 static void nfs4_root_machine_cred(struct nfs_client *clp)
158 {
159         struct rpc_cred *cred, *new;
160
161         new = rpc_lookup_machine_cred(NULL);
162         spin_lock(&clp->cl_lock);
163         cred = clp->cl_machine_cred;
164         clp->cl_machine_cred = new;
165         spin_unlock(&clp->cl_lock);
166         if (cred != NULL)
167                 put_rpccred(cred);
168 }
169
170 static struct rpc_cred *
171 nfs4_get_renew_cred_server_locked(struct nfs_server *server)
172 {
173         struct rpc_cred *cred = NULL;
174         struct nfs4_state_owner *sp;
175         struct rb_node *pos;
176
177         for (pos = rb_first(&server->state_owners);
178              pos != NULL;
179              pos = rb_next(pos)) {
180                 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
181                 if (list_empty(&sp->so_states))
182                         continue;
183                 cred = get_rpccred(sp->so_cred);
184                 break;
185         }
186         return cred;
187 }
188
189 /**
190  * nfs4_get_renew_cred_locked - Acquire credential for a renew operation
191  * @clp: client state handle
192  *
193  * Returns an rpc_cred with reference count bumped, or NULL.
194  * Caller must hold clp->cl_lock.
195  */
196 struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp)
197 {
198         struct rpc_cred *cred = NULL;
199         struct nfs_server *server;
200
201         /* Use machine credentials if available */
202         cred = nfs4_get_machine_cred_locked(clp);
203         if (cred != NULL)
204                 goto out;
205
206         rcu_read_lock();
207         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
208                 cred = nfs4_get_renew_cred_server_locked(server);
209                 if (cred != NULL)
210                         break;
211         }
212         rcu_read_unlock();
213
214 out:
215         return cred;
216 }
217
218 #if defined(CONFIG_NFS_V4_1)
219
220 static int nfs41_setup_state_renewal(struct nfs_client *clp)
221 {
222         int status;
223         struct nfs_fsinfo fsinfo;
224
225         if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
226                 nfs4_schedule_state_renewal(clp);
227                 return 0;
228         }
229
230         status = nfs4_proc_get_lease_time(clp, &fsinfo);
231         if (status == 0) {
232                 /* Update lease time and schedule renewal */
233                 spin_lock(&clp->cl_lock);
234                 clp->cl_lease_time = fsinfo.lease_time * HZ;
235                 clp->cl_last_renewal = jiffies;
236                 spin_unlock(&clp->cl_lock);
237
238                 nfs4_schedule_state_renewal(clp);
239         }
240
241         return status;
242 }
243
244 static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
245 {
246         if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
247                 spin_lock(&tbl->slot_tbl_lock);
248                 nfs41_wake_slot_table(tbl);
249                 spin_unlock(&tbl->slot_tbl_lock);
250         }
251 }
252
253 static void nfs4_end_drain_session(struct nfs_client *clp)
254 {
255         struct nfs4_session *ses = clp->cl_session;
256
257         if (ses != NULL) {
258                 nfs4_end_drain_slot_table(&ses->bc_slot_table);
259                 nfs4_end_drain_slot_table(&ses->fc_slot_table);
260         }
261 }
262
263 /*
264  * Signal state manager thread if session fore channel is drained
265  */
266 void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl)
267 {
268         if (nfs4_slot_tbl_draining(tbl))
269                 complete(&tbl->complete);
270 }
271
272 static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl)
273 {
274         set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
275         spin_lock(&tbl->slot_tbl_lock);
276         if (tbl->highest_used_slotid != NFS4_NO_SLOT) {
277                 INIT_COMPLETION(tbl->complete);
278                 spin_unlock(&tbl->slot_tbl_lock);
279                 return wait_for_completion_interruptible(&tbl->complete);
280         }
281         spin_unlock(&tbl->slot_tbl_lock);
282         return 0;
283 }
284
285 static int nfs4_begin_drain_session(struct nfs_client *clp)
286 {
287         struct nfs4_session *ses = clp->cl_session;
288         int ret = 0;
289
290         /* back channel */
291         ret = nfs4_drain_slot_tbl(&ses->bc_slot_table);
292         if (ret)
293                 return ret;
294         /* fore channel */
295         return nfs4_drain_slot_tbl(&ses->fc_slot_table);
296 }
297
298 static void nfs41_finish_session_reset(struct nfs_client *clp)
299 {
300         clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
301         clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
302         /* create_session negotiated new slot table */
303         clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
304         nfs41_setup_state_renewal(clp);
305 }
306
307 int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
308 {
309         int status;
310
311         if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
312                 goto do_confirm;
313         nfs4_begin_drain_session(clp);
314         status = nfs4_proc_exchange_id(clp, cred);
315         if (status != 0)
316                 goto out;
317         set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
318 do_confirm:
319         status = nfs4_proc_create_session(clp, cred);
320         if (status != 0)
321                 goto out;
322         nfs41_finish_session_reset(clp);
323         nfs_mark_client_ready(clp, NFS_CS_READY);
324 out:
325         return status;
326 }
327
328 /**
329  * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
330  *
331  * @clp: nfs_client under test
332  * @result: OUT: found nfs_client, or clp
333  * @cred: credential to use for trunking test
334  *
335  * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
336  * If NFS4_OK is returned, an nfs_client pointer is planted in
337  * "result".
338  *
339  * Note: The returned client may not yet be marked ready.
340  */
341 int nfs41_discover_server_trunking(struct nfs_client *clp,
342                                    struct nfs_client **result,
343                                    struct rpc_cred *cred)
344 {
345         int status;
346
347         status = nfs4_proc_exchange_id(clp, cred);
348         if (status != NFS4_OK)
349                 return status;
350         set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
351
352         return nfs41_walk_client_list(clp, result, cred);
353 }
354
355 #endif /* CONFIG_NFS_V4_1 */
356
357 /**
358  * nfs4_get_clid_cred - Acquire credential for a setclientid operation
359  * @clp: client state handle
360  *
361  * Returns an rpc_cred with reference count bumped, or NULL.
362  */
363 struct rpc_cred *nfs4_get_clid_cred(struct nfs_client *clp)
364 {
365         struct rpc_cred *cred;
366
367         spin_lock(&clp->cl_lock);
368         cred = nfs4_get_machine_cred_locked(clp);
369         spin_unlock(&clp->cl_lock);
370         return cred;
371 }
372
373 static struct nfs4_state_owner *
374 nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
375 {
376         struct rb_node **p = &server->state_owners.rb_node,
377                        *parent = NULL;
378         struct nfs4_state_owner *sp;
379
380         while (*p != NULL) {
381                 parent = *p;
382                 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
383
384                 if (cred < sp->so_cred)
385                         p = &parent->rb_left;
386                 else if (cred > sp->so_cred)
387                         p = &parent->rb_right;
388                 else {
389                         if (!list_empty(&sp->so_lru))
390                                 list_del_init(&sp->so_lru);
391                         atomic_inc(&sp->so_count);
392                         return sp;
393                 }
394         }
395         return NULL;
396 }
397
398 static struct nfs4_state_owner *
399 nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
400 {
401         struct nfs_server *server = new->so_server;
402         struct rb_node **p = &server->state_owners.rb_node,
403                        *parent = NULL;
404         struct nfs4_state_owner *sp;
405         int err;
406
407         while (*p != NULL) {
408                 parent = *p;
409                 sp = rb_entry(parent, struct nfs4_state_owner, so_server_node);
410
411                 if (new->so_cred < sp->so_cred)
412                         p = &parent->rb_left;
413                 else if (new->so_cred > sp->so_cred)
414                         p = &parent->rb_right;
415                 else {
416                         if (!list_empty(&sp->so_lru))
417                                 list_del_init(&sp->so_lru);
418                         atomic_inc(&sp->so_count);
419                         return sp;
420                 }
421         }
422         err = ida_get_new(&server->openowner_id, &new->so_seqid.owner_id);
423         if (err)
424                 return ERR_PTR(err);
425         rb_link_node(&new->so_server_node, parent, p);
426         rb_insert_color(&new->so_server_node, &server->state_owners);
427         return new;
428 }
429
430 static void
431 nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp)
432 {
433         struct nfs_server *server = sp->so_server;
434
435         if (!RB_EMPTY_NODE(&sp->so_server_node))
436                 rb_erase(&sp->so_server_node, &server->state_owners);
437         ida_remove(&server->openowner_id, sp->so_seqid.owner_id);
438 }
439
440 static void
441 nfs4_init_seqid_counter(struct nfs_seqid_counter *sc)
442 {
443         sc->create_time = ktime_get();
444         sc->flags = 0;
445         sc->counter = 0;
446         spin_lock_init(&sc->lock);
447         INIT_LIST_HEAD(&sc->list);
448         rpc_init_wait_queue(&sc->wait, "Seqid_waitqueue");
449 }
450
451 static void
452 nfs4_destroy_seqid_counter(struct nfs_seqid_counter *sc)
453 {
454         rpc_destroy_wait_queue(&sc->wait);
455 }
456
457 /*
458  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
459  * create a new state_owner.
460  *
461  */
462 static struct nfs4_state_owner *
463 nfs4_alloc_state_owner(struct nfs_server *server,
464                 struct rpc_cred *cred,
465                 gfp_t gfp_flags)
466 {
467         struct nfs4_state_owner *sp;
468
469         sp = kzalloc(sizeof(*sp), gfp_flags);
470         if (!sp)
471                 return NULL;
472         sp->so_server = server;
473         sp->so_cred = get_rpccred(cred);
474         spin_lock_init(&sp->so_lock);
475         INIT_LIST_HEAD(&sp->so_states);
476         nfs4_init_seqid_counter(&sp->so_seqid);
477         atomic_set(&sp->so_count, 1);
478         INIT_LIST_HEAD(&sp->so_lru);
479         seqcount_init(&sp->so_reclaim_seqcount);
480         mutex_init(&sp->so_delegreturn_mutex);
481         return sp;
482 }
483
484 static void
485 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
486 {
487         struct rb_node *rb_node = &sp->so_server_node;
488
489         if (!RB_EMPTY_NODE(rb_node)) {
490                 struct nfs_server *server = sp->so_server;
491                 struct nfs_client *clp = server->nfs_client;
492
493                 spin_lock(&clp->cl_lock);
494                 if (!RB_EMPTY_NODE(rb_node)) {
495                         rb_erase(rb_node, &server->state_owners);
496                         RB_CLEAR_NODE(rb_node);
497                 }
498                 spin_unlock(&clp->cl_lock);
499         }
500 }
501
502 static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
503 {
504         nfs4_destroy_seqid_counter(&sp->so_seqid);
505         put_rpccred(sp->so_cred);
506         kfree(sp);
507 }
508
509 static void nfs4_gc_state_owners(struct nfs_server *server)
510 {
511         struct nfs_client *clp = server->nfs_client;
512         struct nfs4_state_owner *sp, *tmp;
513         unsigned long time_min, time_max;
514         LIST_HEAD(doomed);
515
516         spin_lock(&clp->cl_lock);
517         time_max = jiffies;
518         time_min = (long)time_max - (long)clp->cl_lease_time;
519         list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
520                 /* NB: LRU is sorted so that oldest is at the head */
521                 if (time_in_range(sp->so_expires, time_min, time_max))
522                         break;
523                 list_move(&sp->so_lru, &doomed);
524                 nfs4_remove_state_owner_locked(sp);
525         }
526         spin_unlock(&clp->cl_lock);
527
528         list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
529                 list_del(&sp->so_lru);
530                 nfs4_free_state_owner(sp);
531         }
532 }
533
534 /**
535  * nfs4_get_state_owner - Look up a state owner given a credential
536  * @server: nfs_server to search
537  * @cred: RPC credential to match
538  *
539  * Returns a pointer to an instantiated nfs4_state_owner struct, or NULL.
540  */
541 struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
542                                               struct rpc_cred *cred,
543                                               gfp_t gfp_flags)
544 {
545         struct nfs_client *clp = server->nfs_client;
546         struct nfs4_state_owner *sp, *new;
547
548         spin_lock(&clp->cl_lock);
549         sp = nfs4_find_state_owner_locked(server, cred);
550         spin_unlock(&clp->cl_lock);
551         if (sp != NULL)
552                 goto out;
553         new = nfs4_alloc_state_owner(server, cred, gfp_flags);
554         if (new == NULL)
555                 goto out;
556         do {
557                 if (ida_pre_get(&server->openowner_id, gfp_flags) == 0)
558                         break;
559                 spin_lock(&clp->cl_lock);
560                 sp = nfs4_insert_state_owner_locked(new);
561                 spin_unlock(&clp->cl_lock);
562         } while (sp == ERR_PTR(-EAGAIN));
563         if (sp != new)
564                 nfs4_free_state_owner(new);
565 out:
566         nfs4_gc_state_owners(server);
567         return sp;
568 }
569
570 /**
571  * nfs4_put_state_owner - Release a nfs4_state_owner
572  * @sp: state owner data to release
573  *
574  * Note that we keep released state owners on an LRU
575  * list.
576  * This caches valid state owners so that they can be
577  * reused, to avoid the OPEN_CONFIRM on minor version 0.
578  * It also pins the uniquifier of dropped state owners for
579  * a while, to ensure that those state owner names are
580  * never reused.
581  */
582 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
583 {
584         struct nfs_server *server = sp->so_server;
585         struct nfs_client *clp = server->nfs_client;
586
587         if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
588                 return;
589
590         sp->so_expires = jiffies;
591         list_add_tail(&sp->so_lru, &server->state_owners_lru);
592         spin_unlock(&clp->cl_lock);
593 }
594
595 /**
596  * nfs4_purge_state_owners - Release all cached state owners
597  * @server: nfs_server with cached state owners to release
598  *
599  * Called at umount time.  Remaining state owners will be on
600  * the LRU with ref count of zero.
601  */
602 void nfs4_purge_state_owners(struct nfs_server *server)
603 {
604         struct nfs_client *clp = server->nfs_client;
605         struct nfs4_state_owner *sp, *tmp;
606         LIST_HEAD(doomed);
607
608         spin_lock(&clp->cl_lock);
609         list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
610                 list_move(&sp->so_lru, &doomed);
611                 nfs4_remove_state_owner_locked(sp);
612         }
613         spin_unlock(&clp->cl_lock);
614
615         list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
616                 list_del(&sp->so_lru);
617                 nfs4_free_state_owner(sp);
618         }
619 }
620
621 static struct nfs4_state *
622 nfs4_alloc_open_state(void)
623 {
624         struct nfs4_state *state;
625
626         state = kzalloc(sizeof(*state), GFP_NOFS);
627         if (!state)
628                 return NULL;
629         atomic_set(&state->count, 1);
630         INIT_LIST_HEAD(&state->lock_states);
631         spin_lock_init(&state->state_lock);
632         seqlock_init(&state->seqlock);
633         return state;
634 }
635
636 void
637 nfs4_state_set_mode_locked(struct nfs4_state *state, fmode_t fmode)
638 {
639         if (state->state == fmode)
640                 return;
641         /* NB! List reordering - see the reclaim code for why.  */
642         if ((fmode & FMODE_WRITE) != (state->state & FMODE_WRITE)) {
643                 if (fmode & FMODE_WRITE)
644                         list_move(&state->open_states, &state->owner->so_states);
645                 else
646                         list_move_tail(&state->open_states, &state->owner->so_states);
647         }
648         state->state = fmode;
649 }
650
651 static struct nfs4_state *
652 __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
653 {
654         struct nfs_inode *nfsi = NFS_I(inode);
655         struct nfs4_state *state;
656
657         list_for_each_entry(state, &nfsi->open_states, inode_states) {
658                 if (state->owner != owner)
659                         continue;
660                 if (!nfs4_valid_open_stateid(state))
661                         continue;
662                 if (atomic_inc_not_zero(&state->count))
663                         return state;
664         }
665         return NULL;
666 }
667
668 static void
669 nfs4_free_open_state(struct nfs4_state *state)
670 {
671         kfree(state);
672 }
673
674 struct nfs4_state *
675 nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
676 {
677         struct nfs4_state *state, *new;
678         struct nfs_inode *nfsi = NFS_I(inode);
679
680         spin_lock(&inode->i_lock);
681         state = __nfs4_find_state_byowner(inode, owner);
682         spin_unlock(&inode->i_lock);
683         if (state)
684                 goto out;
685         new = nfs4_alloc_open_state();
686         spin_lock(&owner->so_lock);
687         spin_lock(&inode->i_lock);
688         state = __nfs4_find_state_byowner(inode, owner);
689         if (state == NULL && new != NULL) {
690                 state = new;
691                 state->owner = owner;
692                 atomic_inc(&owner->so_count);
693                 list_add(&state->inode_states, &nfsi->open_states);
694                 ihold(inode);
695                 state->inode = inode;
696                 spin_unlock(&inode->i_lock);
697                 /* Note: The reclaim code dictates that we add stateless
698                  * and read-only stateids to the end of the list */
699                 list_add_tail(&state->open_states, &owner->so_states);
700                 spin_unlock(&owner->so_lock);
701         } else {
702                 spin_unlock(&inode->i_lock);
703                 spin_unlock(&owner->so_lock);
704                 if (new)
705                         nfs4_free_open_state(new);
706         }
707 out:
708         return state;
709 }
710
711 void nfs4_put_open_state(struct nfs4_state *state)
712 {
713         struct inode *inode = state->inode;
714         struct nfs4_state_owner *owner = state->owner;
715
716         if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
717                 return;
718         spin_lock(&inode->i_lock);
719         list_del(&state->inode_states);
720         list_del(&state->open_states);
721         spin_unlock(&inode->i_lock);
722         spin_unlock(&owner->so_lock);
723         iput(inode);
724         nfs4_free_open_state(state);
725         nfs4_put_state_owner(owner);
726 }
727
728 /*
729  * Close the current file.
730  */
731 static void __nfs4_close(struct nfs4_state *state,
732                 fmode_t fmode, gfp_t gfp_mask, int wait)
733 {
734         struct nfs4_state_owner *owner = state->owner;
735         int call_close = 0;
736         fmode_t newstate;
737
738         atomic_inc(&owner->so_count);
739         /* Protect against nfs4_find_state() */
740         spin_lock(&owner->so_lock);
741         switch (fmode & (FMODE_READ | FMODE_WRITE)) {
742                 case FMODE_READ:
743                         state->n_rdonly--;
744                         break;
745                 case FMODE_WRITE:
746                         state->n_wronly--;
747                         break;
748                 case FMODE_READ|FMODE_WRITE:
749                         state->n_rdwr--;
750         }
751         newstate = FMODE_READ|FMODE_WRITE;
752         if (state->n_rdwr == 0) {
753                 if (state->n_rdonly == 0) {
754                         newstate &= ~FMODE_READ;
755                         call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
756                         call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
757                 }
758                 if (state->n_wronly == 0) {
759                         newstate &= ~FMODE_WRITE;
760                         call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
761                         call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
762                 }
763                 if (newstate == 0)
764                         clear_bit(NFS_DELEGATED_STATE, &state->flags);
765         }
766         nfs4_state_set_mode_locked(state, newstate);
767         spin_unlock(&owner->so_lock);
768
769         if (!call_close) {
770                 nfs4_put_open_state(state);
771                 nfs4_put_state_owner(owner);
772         } else
773                 nfs4_do_close(state, gfp_mask, wait);
774 }
775
776 void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
777 {
778         __nfs4_close(state, fmode, GFP_NOFS, 0);
779 }
780
781 void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
782 {
783         __nfs4_close(state, fmode, GFP_KERNEL, 1);
784 }
785
786 /*
787  * Search the state->lock_states for an existing lock_owner
788  * that is compatible with current->files
789  */
790 static struct nfs4_lock_state *
791 __nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
792 {
793         struct nfs4_lock_state *pos;
794         list_for_each_entry(pos, &state->lock_states, ls_locks) {
795                 if (type != NFS4_ANY_LOCK_TYPE && pos->ls_owner.lo_type != type)
796                         continue;
797                 switch (pos->ls_owner.lo_type) {
798                 case NFS4_POSIX_LOCK_TYPE:
799                         if (pos->ls_owner.lo_u.posix_owner != fl_owner)
800                                 continue;
801                         break;
802                 case NFS4_FLOCK_LOCK_TYPE:
803                         if (pos->ls_owner.lo_u.flock_owner != fl_pid)
804                                 continue;
805                 }
806                 atomic_inc(&pos->ls_count);
807                 return pos;
808         }
809         return NULL;
810 }
811
812 /*
813  * Return a compatible lock_state. If no initialized lock_state structure
814  * exists, return an uninitialized one.
815  *
816  */
817 static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner, pid_t fl_pid, unsigned int type)
818 {
819         struct nfs4_lock_state *lsp;
820         struct nfs_server *server = state->owner->so_server;
821
822         lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
823         if (lsp == NULL)
824                 return NULL;
825         nfs4_init_seqid_counter(&lsp->ls_seqid);
826         atomic_set(&lsp->ls_count, 1);
827         lsp->ls_state = state;
828         lsp->ls_owner.lo_type = type;
829         switch (lsp->ls_owner.lo_type) {
830         case NFS4_FLOCK_LOCK_TYPE:
831                 lsp->ls_owner.lo_u.flock_owner = fl_pid;
832                 break;
833         case NFS4_POSIX_LOCK_TYPE:
834                 lsp->ls_owner.lo_u.posix_owner = fl_owner;
835                 break;
836         default:
837                 goto out_free;
838         }
839         lsp->ls_seqid.owner_id = ida_simple_get(&server->lockowner_id, 0, 0, GFP_NOFS);
840         if (lsp->ls_seqid.owner_id < 0)
841                 goto out_free;
842         INIT_LIST_HEAD(&lsp->ls_locks);
843         return lsp;
844 out_free:
845         kfree(lsp);
846         return NULL;
847 }
848
849 void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_state *lsp)
850 {
851         ida_simple_remove(&server->lockowner_id, lsp->ls_seqid.owner_id);
852         nfs4_destroy_seqid_counter(&lsp->ls_seqid);
853         kfree(lsp);
854 }
855
856 /*
857  * Return a compatible lock_state. If no initialized lock_state structure
858  * exists, return an uninitialized one.
859  *
860  */
861 static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t owner, pid_t pid, unsigned int type)
862 {
863         struct nfs4_lock_state *lsp, *new = NULL;
864         
865         for(;;) {
866                 spin_lock(&state->state_lock);
867                 lsp = __nfs4_find_lock_state(state, owner, pid, type);
868                 if (lsp != NULL)
869                         break;
870                 if (new != NULL) {
871                         list_add(&new->ls_locks, &state->lock_states);
872                         set_bit(LK_STATE_IN_USE, &state->flags);
873                         lsp = new;
874                         new = NULL;
875                         break;
876                 }
877                 spin_unlock(&state->state_lock);
878                 new = nfs4_alloc_lock_state(state, owner, pid, type);
879                 if (new == NULL)
880                         return NULL;
881         }
882         spin_unlock(&state->state_lock);
883         if (new != NULL)
884                 nfs4_free_lock_state(state->owner->so_server, new);
885         return lsp;
886 }
887
888 /*
889  * Release reference to lock_state, and free it if we see that
890  * it is no longer in use
891  */
892 void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
893 {
894         struct nfs_server *server;
895         struct nfs4_state *state;
896
897         if (lsp == NULL)
898                 return;
899         state = lsp->ls_state;
900         if (!atomic_dec_and_lock(&lsp->ls_count, &state->state_lock))
901                 return;
902         list_del(&lsp->ls_locks);
903         if (list_empty(&state->lock_states))
904                 clear_bit(LK_STATE_IN_USE, &state->flags);
905         spin_unlock(&state->state_lock);
906         server = state->owner->so_server;
907         if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
908                 struct nfs_client *clp = server->nfs_client;
909
910                 clp->cl_mvops->free_lock_state(server, lsp);
911         } else
912                 nfs4_free_lock_state(server, lsp);
913 }
914
915 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
916 {
917         struct nfs4_lock_state *lsp = src->fl_u.nfs4_fl.owner;
918
919         dst->fl_u.nfs4_fl.owner = lsp;
920         atomic_inc(&lsp->ls_count);
921 }
922
923 static void nfs4_fl_release_lock(struct file_lock *fl)
924 {
925         nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
926 }
927
928 static const struct file_lock_operations nfs4_fl_lock_ops = {
929         .fl_copy_lock = nfs4_fl_copy_lock,
930         .fl_release_private = nfs4_fl_release_lock,
931 };
932
933 int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
934 {
935         struct nfs4_lock_state *lsp;
936
937         if (fl->fl_ops != NULL)
938                 return 0;
939         if (fl->fl_flags & FL_POSIX)
940                 lsp = nfs4_get_lock_state(state, fl->fl_owner, 0, NFS4_POSIX_LOCK_TYPE);
941         else if (fl->fl_flags & FL_FLOCK)
942                 lsp = nfs4_get_lock_state(state, NULL, fl->fl_pid,
943                                 NFS4_FLOCK_LOCK_TYPE);
944         else
945                 return -EINVAL;
946         if (lsp == NULL)
947                 return -ENOMEM;
948         fl->fl_u.nfs4_fl.owner = lsp;
949         fl->fl_ops = &nfs4_fl_lock_ops;
950         return 0;
951 }
952
953 static int nfs4_copy_lock_stateid(nfs4_stateid *dst,
954                 struct nfs4_state *state,
955                 const struct nfs_lockowner *lockowner)
956 {
957         struct nfs4_lock_state *lsp;
958         fl_owner_t fl_owner;
959         pid_t fl_pid;
960         int ret = -ENOENT;
961
962
963         if (lockowner == NULL)
964                 goto out;
965
966         if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
967                 goto out;
968
969         fl_owner = lockowner->l_owner;
970         fl_pid = lockowner->l_pid;
971         spin_lock(&state->state_lock);
972         lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
973         if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
974                 nfs4_stateid_copy(dst, &lsp->ls_stateid);
975                 ret = 0;
976                 smp_rmb();
977                 if (!list_empty(&lsp->ls_seqid.list))
978                         ret = -EWOULDBLOCK;
979         }
980         spin_unlock(&state->state_lock);
981         nfs4_put_lock_state(lsp);
982 out:
983         return ret;
984 }
985
986 static int nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
987 {
988         const nfs4_stateid *src;
989         int ret;
990         int seq;
991
992         do {
993                 src = &zero_stateid;
994                 seq = read_seqbegin(&state->seqlock);
995                 if (test_bit(NFS_OPEN_STATE, &state->flags))
996                         src = &state->open_stateid;
997                 nfs4_stateid_copy(dst, src);
998                 ret = 0;
999                 smp_rmb();
1000                 if (!list_empty(&state->owner->so_seqid.list))
1001                         ret = -EWOULDBLOCK;
1002         } while (read_seqretry(&state->seqlock, seq));
1003         return ret;
1004 }
1005
1006 /*
1007  * Byte-range lock aware utility to initialize the stateid of read/write
1008  * requests.
1009  */
1010 int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
1011                 fmode_t fmode, const struct nfs_lockowner *lockowner)
1012 {
1013         int ret = 0;
1014         if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
1015                 goto out;
1016         ret = nfs4_copy_lock_stateid(dst, state, lockowner);
1017         if (ret != -ENOENT)
1018                 goto out;
1019         ret = nfs4_copy_open_stateid(dst, state);
1020 out:
1021         if (nfs_server_capable(state->inode, NFS_CAP_STATEID_NFSV41))
1022                 dst->seqid = 0;
1023         return ret;
1024 }
1025
1026 struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
1027 {
1028         struct nfs_seqid *new;
1029
1030         new = kmalloc(sizeof(*new), gfp_mask);
1031         if (new != NULL) {
1032                 new->sequence = counter;
1033                 INIT_LIST_HEAD(&new->list);
1034                 new->task = NULL;
1035         }
1036         return new;
1037 }
1038
1039 void nfs_release_seqid(struct nfs_seqid *seqid)
1040 {
1041         struct nfs_seqid_counter *sequence;
1042
1043         if (list_empty(&seqid->list))
1044                 return;
1045         sequence = seqid->sequence;
1046         spin_lock(&sequence->lock);
1047         list_del_init(&seqid->list);
1048         if (!list_empty(&sequence->list)) {
1049                 struct nfs_seqid *next;
1050
1051                 next = list_first_entry(&sequence->list,
1052                                 struct nfs_seqid, list);
1053                 rpc_wake_up_queued_task(&sequence->wait, next->task);
1054         }
1055         spin_unlock(&sequence->lock);
1056 }
1057
1058 void nfs_free_seqid(struct nfs_seqid *seqid)
1059 {
1060         nfs_release_seqid(seqid);
1061         kfree(seqid);
1062 }
1063
1064 /*
1065  * Increment the seqid if the OPEN/OPEN_DOWNGRADE/CLOSE succeeded, or
1066  * failed with a seqid incrementing error -
1067  * see comments nfs_fs.h:seqid_mutating_error()
1068  */
1069 static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
1070 {
1071         switch (status) {
1072                 case 0:
1073                         break;
1074                 case -NFS4ERR_BAD_SEQID:
1075                         if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
1076                                 return;
1077                         pr_warn_ratelimited("NFS: v4 server returned a bad"
1078                                         " sequence-id error on an"
1079                                         " unconfirmed sequence %p!\n",
1080                                         seqid->sequence);
1081                 case -NFS4ERR_STALE_CLIENTID:
1082                 case -NFS4ERR_STALE_STATEID:
1083                 case -NFS4ERR_BAD_STATEID:
1084                 case -NFS4ERR_BADXDR:
1085                 case -NFS4ERR_RESOURCE:
1086                 case -NFS4ERR_NOFILEHANDLE:
1087                         /* Non-seqid mutating errors */
1088                         return;
1089         };
1090         /*
1091          * Note: no locking needed as we are guaranteed to be first
1092          * on the sequence list
1093          */
1094         seqid->sequence->counter++;
1095 }
1096
1097 void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
1098 {
1099         struct nfs4_state_owner *sp = container_of(seqid->sequence,
1100                                         struct nfs4_state_owner, so_seqid);
1101         struct nfs_server *server = sp->so_server;
1102
1103         if (status == -NFS4ERR_BAD_SEQID)
1104                 nfs4_drop_state_owner(sp);
1105         if (!nfs4_has_session(server->nfs_client))
1106                 nfs_increment_seqid(status, seqid);
1107 }
1108
1109 /*
1110  * Increment the seqid if the LOCK/LOCKU succeeded, or
1111  * failed with a seqid incrementing error -
1112  * see comments nfs_fs.h:seqid_mutating_error()
1113  */
1114 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
1115 {
1116         nfs_increment_seqid(status, seqid);
1117 }
1118
1119 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
1120 {
1121         struct nfs_seqid_counter *sequence = seqid->sequence;
1122         int status = 0;
1123
1124         spin_lock(&sequence->lock);
1125         seqid->task = task;
1126         if (list_empty(&seqid->list))
1127                 list_add_tail(&seqid->list, &sequence->list);
1128         if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
1129                 goto unlock;
1130         rpc_sleep_on(&sequence->wait, task, NULL);
1131         status = -EAGAIN;
1132 unlock:
1133         spin_unlock(&sequence->lock);
1134         return status;
1135 }
1136
1137 static int nfs4_run_state_manager(void *);
1138
1139 static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
1140 {
1141         smp_mb__before_clear_bit();
1142         clear_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
1143         smp_mb__after_clear_bit();
1144         wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
1145         rpc_wake_up(&clp->cl_rpcwaitq);
1146 }
1147
1148 /*
1149  * Schedule the nfs_client asynchronous state management routine
1150  */
1151 void nfs4_schedule_state_manager(struct nfs_client *clp)
1152 {
1153         struct task_struct *task;
1154         char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
1155
1156         if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1157                 return;
1158         __module_get(THIS_MODULE);
1159         atomic_inc(&clp->cl_count);
1160
1161         /* The rcu_read_lock() is not strictly necessary, as the state
1162          * manager is the only thread that ever changes the rpc_xprt
1163          * after it's initialized.  At this point, we're single threaded. */
1164         rcu_read_lock();
1165         snprintf(buf, sizeof(buf), "%s-manager",
1166                         rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
1167         rcu_read_unlock();
1168         task = kthread_run(nfs4_run_state_manager, clp, "%s", buf);
1169         if (IS_ERR(task)) {
1170                 printk(KERN_ERR "%s: kthread_run: %ld\n",
1171                         __func__, PTR_ERR(task));
1172                 nfs4_clear_state_manager_bit(clp);
1173                 nfs_put_client(clp);
1174                 module_put(THIS_MODULE);
1175         }
1176 }
1177
1178 /*
1179  * Schedule a lease recovery attempt
1180  */
1181 void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1182 {
1183         if (!clp)
1184                 return;
1185         if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1186                 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1187         dprintk("%s: scheduling lease recovery for server %s\n", __func__,
1188                         clp->cl_hostname);
1189         nfs4_schedule_state_manager(clp);
1190 }
1191 EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery);
1192
1193 int nfs4_wait_clnt_recover(struct nfs_client *clp)
1194 {
1195         int res;
1196
1197         might_sleep();
1198
1199         res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
1200                         nfs_wait_bit_killable, TASK_KILLABLE);
1201         if (res)
1202                 return res;
1203
1204         if (clp->cl_cons_state < 0)
1205                 return clp->cl_cons_state;
1206         return 0;
1207 }
1208
1209 int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1210 {
1211         unsigned int loop;
1212         int ret;
1213
1214         for (loop = NFS4_MAX_LOOP_ON_RECOVER; loop != 0; loop--) {
1215                 ret = nfs4_wait_clnt_recover(clp);
1216                 if (ret != 0)
1217                         break;
1218                 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1219                     !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1220                         break;
1221                 nfs4_schedule_state_manager(clp);
1222                 ret = -EIO;
1223         }
1224         return ret;
1225 }
1226
1227 /*
1228  * nfs40_handle_cb_pathdown - return all delegations after NFS4ERR_CB_PATH_DOWN
1229  * @clp: client to process
1230  *
1231  * Set the NFS4CLNT_LEASE_EXPIRED state in order to force a
1232  * resend of the SETCLIENTID and hence re-establish the
1233  * callback channel. Then return all existing delegations.
1234  */
1235 static void nfs40_handle_cb_pathdown(struct nfs_client *clp)
1236 {
1237         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1238         nfs_expire_all_delegations(clp);
1239         dprintk("%s: handling CB_PATHDOWN recovery for server %s\n", __func__,
1240                         clp->cl_hostname);
1241 }
1242
1243 void nfs4_schedule_path_down_recovery(struct nfs_client *clp)
1244 {
1245         nfs40_handle_cb_pathdown(clp);
1246         nfs4_schedule_state_manager(clp);
1247 }
1248
1249 static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1250 {
1251
1252         set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1253         /* Don't recover state that expired before the reboot */
1254         if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) {
1255                 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1256                 return 0;
1257         }
1258         set_bit(NFS_OWNER_RECLAIM_REBOOT, &state->owner->so_flags);
1259         set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1260         return 1;
1261 }
1262
1263 static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1264 {
1265         set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1266         clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
1267         set_bit(NFS_OWNER_RECLAIM_NOGRACE, &state->owner->so_flags);
1268         set_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
1269         return 1;
1270 }
1271
1272 int nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1273 {
1274         struct nfs_client *clp = server->nfs_client;
1275
1276         if (!nfs4_valid_open_stateid(state))
1277                 return -EBADF;
1278         nfs4_state_mark_reclaim_nograce(clp, state);
1279         dprintk("%s: scheduling stateid recovery for server %s\n", __func__,
1280                         clp->cl_hostname);
1281         nfs4_schedule_state_manager(clp);
1282         return 0;
1283 }
1284 EXPORT_SYMBOL_GPL(nfs4_schedule_stateid_recovery);
1285
1286 void nfs_inode_find_state_and_recover(struct inode *inode,
1287                 const nfs4_stateid *stateid)
1288 {
1289         struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
1290         struct nfs_inode *nfsi = NFS_I(inode);
1291         struct nfs_open_context *ctx;
1292         struct nfs4_state *state;
1293         bool found = false;
1294
1295         spin_lock(&inode->i_lock);
1296         list_for_each_entry(ctx, &nfsi->open_files, list) {
1297                 state = ctx->state;
1298                 if (state == NULL)
1299                         continue;
1300                 if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
1301                         continue;
1302                 if (!nfs4_stateid_match(&state->stateid, stateid))
1303                         continue;
1304                 nfs4_state_mark_reclaim_nograce(clp, state);
1305                 found = true;
1306         }
1307         spin_unlock(&inode->i_lock);
1308         if (found)
1309                 nfs4_schedule_state_manager(clp);
1310 }
1311
1312 static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
1313 {
1314         struct inode *inode = state->inode;
1315         struct nfs_inode *nfsi = NFS_I(inode);
1316         struct nfs_open_context *ctx;
1317
1318         spin_lock(&inode->i_lock);
1319         list_for_each_entry(ctx, &nfsi->open_files, list) {
1320                 if (ctx->state != state)
1321                         continue;
1322                 set_bit(NFS_CONTEXT_BAD, &ctx->flags);
1323         }
1324         spin_unlock(&inode->i_lock);
1325 }
1326
1327 static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
1328 {
1329         set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
1330         nfs4_state_mark_open_context_bad(state);
1331 }
1332
1333
1334 static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1335 {
1336         struct inode *inode = state->inode;
1337         struct nfs_inode *nfsi = NFS_I(inode);
1338         struct file_lock *fl;
1339         int status = 0;
1340
1341         if (inode->i_flock == NULL)
1342                 return 0;
1343
1344         /* Guard against delegation returns and new lock/unlock calls */
1345         down_write(&nfsi->rwsem);
1346         /* Protect inode->i_flock using the BKL */
1347         spin_lock(&inode->i_lock);
1348         for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
1349                 if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
1350                         continue;
1351                 if (nfs_file_open_context(fl->fl_file)->state != state)
1352                         continue;
1353                 spin_unlock(&inode->i_lock);
1354                 status = ops->recover_lock(state, fl);
1355                 switch (status) {
1356                         case 0:
1357                                 break;
1358                         case -ESTALE:
1359                         case -NFS4ERR_ADMIN_REVOKED:
1360                         case -NFS4ERR_STALE_STATEID:
1361                         case -NFS4ERR_BAD_STATEID:
1362                         case -NFS4ERR_EXPIRED:
1363                         case -NFS4ERR_NO_GRACE:
1364                         case -NFS4ERR_STALE_CLIENTID:
1365                         case -NFS4ERR_BADSESSION:
1366                         case -NFS4ERR_BADSLOT:
1367                         case -NFS4ERR_BAD_HIGH_SLOT:
1368                         case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1369                                 goto out;
1370                         default:
1371                                 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1372                                         "Zeroing state\n", __func__, status);
1373                         case -ENOMEM:
1374                         case -NFS4ERR_DENIED:
1375                         case -NFS4ERR_RECLAIM_BAD:
1376                         case -NFS4ERR_RECLAIM_CONFLICT:
1377                                 /* kill_proc(fl->fl_pid, SIGLOST, 1); */
1378                                 status = 0;
1379                 }
1380                 spin_lock(&inode->i_lock);
1381         }
1382         spin_unlock(&inode->i_lock);
1383 out:
1384         up_write(&nfsi->rwsem);
1385         return status;
1386 }
1387
1388 static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
1389 {
1390         struct nfs4_state *state;
1391         struct nfs4_lock_state *lock;
1392         int status = 0;
1393
1394         /* Note: we rely on the sp->so_states list being ordered 
1395          * so that we always reclaim open(O_RDWR) and/or open(O_WRITE)
1396          * states first.
1397          * This is needed to ensure that the server won't give us any
1398          * read delegations that we have to return if, say, we are
1399          * recovering after a network partition or a reboot from a
1400          * server that doesn't support a grace period.
1401          */
1402         spin_lock(&sp->so_lock);
1403         write_seqcount_begin(&sp->so_reclaim_seqcount);
1404 restart:
1405         list_for_each_entry(state, &sp->so_states, open_states) {
1406                 if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
1407                         continue;
1408                 if (!nfs4_valid_open_stateid(state))
1409                         continue;
1410                 if (state->state == 0)
1411                         continue;
1412                 atomic_inc(&state->count);
1413                 spin_unlock(&sp->so_lock);
1414                 status = ops->recover_open(sp, state);
1415                 if (status >= 0) {
1416                         status = nfs4_reclaim_locks(state, ops);
1417                         if (status >= 0) {
1418                                 spin_lock(&state->state_lock);
1419                                 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1420                                         if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
1421                                                 pr_warn_ratelimited("NFS: "
1422                                                         "%s: Lock reclaim "
1423                                                         "failed!\n", __func__);
1424                                 }
1425                                 spin_unlock(&state->state_lock);
1426                                 nfs4_put_open_state(state);
1427                                 spin_lock(&sp->so_lock);
1428                                 goto restart;
1429                         }
1430                 }
1431                 switch (status) {
1432                         default:
1433                                 printk(KERN_ERR "NFS: %s: unhandled error %d. "
1434                                         "Zeroing state\n", __func__, status);
1435                         case -ENOENT:
1436                         case -ENOMEM:
1437                         case -ESTALE:
1438                                 /*
1439                                  * Open state on this file cannot be recovered
1440                                  * All we can do is revert to using the zero stateid.
1441                                  */
1442                                 nfs4_state_mark_recovery_failed(state, status);
1443                                 break;
1444                         case -EAGAIN:
1445                                 ssleep(1);
1446                         case -NFS4ERR_ADMIN_REVOKED:
1447                         case -NFS4ERR_STALE_STATEID:
1448                         case -NFS4ERR_BAD_STATEID:
1449                         case -NFS4ERR_RECLAIM_BAD:
1450                         case -NFS4ERR_RECLAIM_CONFLICT:
1451                                 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1452                                 break;
1453                         case -NFS4ERR_EXPIRED:
1454                         case -NFS4ERR_NO_GRACE:
1455                                 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1456                         case -NFS4ERR_STALE_CLIENTID:
1457                         case -NFS4ERR_BADSESSION:
1458                         case -NFS4ERR_BADSLOT:
1459                         case -NFS4ERR_BAD_HIGH_SLOT:
1460                         case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1461                                 goto out_err;
1462                 }
1463                 nfs4_put_open_state(state);
1464                 spin_lock(&sp->so_lock);
1465                 goto restart;
1466         }
1467         write_seqcount_end(&sp->so_reclaim_seqcount);
1468         spin_unlock(&sp->so_lock);
1469         return 0;
1470 out_err:
1471         nfs4_put_open_state(state);
1472         spin_lock(&sp->so_lock);
1473         write_seqcount_end(&sp->so_reclaim_seqcount);
1474         spin_unlock(&sp->so_lock);
1475         return status;
1476 }
1477
1478 static void nfs4_clear_open_state(struct nfs4_state *state)
1479 {
1480         struct nfs4_lock_state *lock;
1481
1482         clear_bit(NFS_DELEGATED_STATE, &state->flags);
1483         clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1484         clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1485         clear_bit(NFS_O_RDWR_STATE, &state->flags);
1486         spin_lock(&state->state_lock);
1487         list_for_each_entry(lock, &state->lock_states, ls_locks) {
1488                 lock->ls_seqid.flags = 0;
1489                 clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
1490         }
1491         spin_unlock(&state->state_lock);
1492 }
1493
1494 static void nfs4_reset_seqids(struct nfs_server *server,
1495         int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1496 {
1497         struct nfs_client *clp = server->nfs_client;
1498         struct nfs4_state_owner *sp;
1499         struct rb_node *pos;
1500         struct nfs4_state *state;
1501
1502         spin_lock(&clp->cl_lock);
1503         for (pos = rb_first(&server->state_owners);
1504              pos != NULL;
1505              pos = rb_next(pos)) {
1506                 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1507                 sp->so_seqid.flags = 0;
1508                 spin_lock(&sp->so_lock);
1509                 list_for_each_entry(state, &sp->so_states, open_states) {
1510                         if (mark_reclaim(clp, state))
1511                                 nfs4_clear_open_state(state);
1512                 }
1513                 spin_unlock(&sp->so_lock);
1514         }
1515         spin_unlock(&clp->cl_lock);
1516 }
1517
1518 static void nfs4_state_mark_reclaim_helper(struct nfs_client *clp,
1519         int (*mark_reclaim)(struct nfs_client *clp, struct nfs4_state *state))
1520 {
1521         struct nfs_server *server;
1522
1523         rcu_read_lock();
1524         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1525                 nfs4_reset_seqids(server, mark_reclaim);
1526         rcu_read_unlock();
1527 }
1528
1529 static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
1530 {
1531         /* Mark all delegations for reclaim */
1532         nfs_delegation_mark_reclaim(clp);
1533         nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
1534 }
1535
1536 static void nfs4_reclaim_complete(struct nfs_client *clp,
1537                                  const struct nfs4_state_recovery_ops *ops,
1538                                  struct rpc_cred *cred)
1539 {
1540         /* Notify the server we're done reclaiming our state */
1541         if (ops->reclaim_complete)
1542                 (void)ops->reclaim_complete(clp, cred);
1543 }
1544
1545 static void nfs4_clear_reclaim_server(struct nfs_server *server)
1546 {
1547         struct nfs_client *clp = server->nfs_client;
1548         struct nfs4_state_owner *sp;
1549         struct rb_node *pos;
1550         struct nfs4_state *state;
1551
1552         spin_lock(&clp->cl_lock);
1553         for (pos = rb_first(&server->state_owners);
1554              pos != NULL;
1555              pos = rb_next(pos)) {
1556                 sp = rb_entry(pos, struct nfs4_state_owner, so_server_node);
1557                 spin_lock(&sp->so_lock);
1558                 list_for_each_entry(state, &sp->so_states, open_states) {
1559                         if (!test_and_clear_bit(NFS_STATE_RECLAIM_REBOOT,
1560                                                 &state->flags))
1561                                 continue;
1562                         nfs4_state_mark_reclaim_nograce(clp, state);
1563                 }
1564                 spin_unlock(&sp->so_lock);
1565         }
1566         spin_unlock(&clp->cl_lock);
1567 }
1568
1569 static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp)
1570 {
1571         struct nfs_server *server;
1572
1573         if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
1574                 return 0;
1575
1576         rcu_read_lock();
1577         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link)
1578                 nfs4_clear_reclaim_server(server);
1579         rcu_read_unlock();
1580
1581         nfs_delegation_reap_unclaimed(clp);
1582         return 1;
1583 }
1584
1585 static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1586 {
1587         const struct nfs4_state_recovery_ops *ops;
1588         struct rpc_cred *cred;
1589
1590         if (!nfs4_state_clear_reclaim_reboot(clp))
1591                 return;
1592         ops = clp->cl_mvops->reboot_recovery_ops;
1593         cred = nfs4_get_clid_cred(clp);
1594         nfs4_reclaim_complete(clp, ops, cred);
1595         put_rpccred(cred);
1596 }
1597
1598 static void nfs_delegation_clear_all(struct nfs_client *clp)
1599 {
1600         nfs_delegation_mark_reclaim(clp);
1601         nfs_delegation_reap_unclaimed(clp);
1602 }
1603
1604 static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
1605 {
1606         nfs_delegation_clear_all(clp);
1607         nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce);
1608 }
1609
1610 static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
1611 {
1612         switch (error) {
1613                 case 0:
1614                         break;
1615                 case -NFS4ERR_CB_PATH_DOWN:
1616                         nfs40_handle_cb_pathdown(clp);
1617                         break;
1618                 case -NFS4ERR_NO_GRACE:
1619                         nfs4_state_end_reclaim_reboot(clp);
1620                         break;
1621                 case -NFS4ERR_STALE_CLIENTID:
1622                 case -NFS4ERR_LEASE_MOVED:
1623                         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1624                         nfs4_state_clear_reclaim_reboot(clp);
1625                         nfs4_state_start_reclaim_reboot(clp);
1626                         break;
1627                 case -NFS4ERR_EXPIRED:
1628                         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1629                         nfs4_state_start_reclaim_nograce(clp);
1630                         break;
1631                 case -NFS4ERR_BADSESSION:
1632                 case -NFS4ERR_BADSLOT:
1633                 case -NFS4ERR_BAD_HIGH_SLOT:
1634                 case -NFS4ERR_DEADSESSION:
1635                 case -NFS4ERR_SEQ_FALSE_RETRY:
1636                 case -NFS4ERR_SEQ_MISORDERED:
1637                         set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1638                         /* Zero session reset errors */
1639                         break;
1640                 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1641                         set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1642                         break;
1643                 default:
1644                         dprintk("%s: failed to handle error %d for server %s\n",
1645                                         __func__, error, clp->cl_hostname);
1646                         return error;
1647         }
1648         dprintk("%s: handled error %d for server %s\n", __func__, error,
1649                         clp->cl_hostname);
1650         return 0;
1651 }
1652
1653 static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops)
1654 {
1655         struct nfs4_state_owner *sp;
1656         struct nfs_server *server;
1657         struct rb_node *pos;
1658         int status = 0;
1659
1660 restart:
1661         rcu_read_lock();
1662         list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1663                 nfs4_purge_state_owners(server);
1664                 spin_lock(&clp->cl_lock);
1665                 for (pos = rb_first(&server->state_owners);
1666                      pos != NULL;
1667                      pos = rb_next(pos)) {
1668                         sp = rb_entry(pos,
1669                                 struct nfs4_state_owner, so_server_node);
1670                         if (!test_and_clear_bit(ops->owner_flag_bit,
1671                                                         &sp->so_flags))
1672                                 continue;
1673                         atomic_inc(&sp->so_count);
1674                         spin_unlock(&clp->cl_lock);
1675                         rcu_read_unlock();
1676
1677                         status = nfs4_reclaim_open_state(sp, ops);
1678                         if (status < 0) {
1679                                 set_bit(ops->owner_flag_bit, &sp->so_flags);
1680                                 nfs4_put_state_owner(sp);
1681                                 return nfs4_recovery_handle_error(clp, status);
1682                         }
1683
1684                         nfs4_put_state_owner(sp);
1685                         goto restart;
1686                 }
1687                 spin_unlock(&clp->cl_lock);
1688         }
1689         rcu_read_unlock();
1690         return status;
1691 }
1692
1693 static int nfs4_check_lease(struct nfs_client *clp)
1694 {
1695         struct rpc_cred *cred;
1696         const struct nfs4_state_maintenance_ops *ops =
1697                 clp->cl_mvops->state_renewal_ops;
1698         int status;
1699
1700         /* Is the client already known to have an expired lease? */
1701         if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
1702                 return 0;
1703         spin_lock(&clp->cl_lock);
1704         cred = ops->get_state_renewal_cred_locked(clp);
1705         spin_unlock(&clp->cl_lock);
1706         if (cred == NULL) {
1707                 cred = nfs4_get_clid_cred(clp);
1708                 status = -ENOKEY;
1709                 if (cred == NULL)
1710                         goto out;
1711         }
1712         status = ops->renew_lease(clp, cred);
1713         put_rpccred(cred);
1714         if (status == -ETIMEDOUT) {
1715                 set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1716                 return 0;
1717         }
1718 out:
1719         return nfs4_recovery_handle_error(clp, status);
1720 }
1721
1722 /* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors
1723  * and for recoverable errors on EXCHANGE_ID for v4.1
1724  */
1725 static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1726 {
1727         switch (status) {
1728         case -NFS4ERR_SEQ_MISORDERED:
1729                 if (test_and_set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state))
1730                         return -ESERVERFAULT;
1731                 /* Lease confirmation error: retry after purging the lease */
1732                 ssleep(1);
1733                 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1734                 break;
1735         case -NFS4ERR_STALE_CLIENTID:
1736                 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1737                 nfs4_state_clear_reclaim_reboot(clp);
1738                 nfs4_state_start_reclaim_reboot(clp);
1739                 break;
1740         case -NFS4ERR_CLID_INUSE:
1741                 pr_err("NFS: Server %s reports our clientid is in use\n",
1742                         clp->cl_hostname);
1743                 nfs_mark_client_ready(clp, -EPERM);
1744                 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1745                 return -EPERM;
1746         case -EACCES:
1747         case -NFS4ERR_DELAY:
1748         case -ETIMEDOUT:
1749         case -EAGAIN:
1750                 ssleep(1);
1751                 break;
1752
1753         case -NFS4ERR_MINOR_VERS_MISMATCH:
1754                 if (clp->cl_cons_state == NFS_CS_SESSION_INITING)
1755                         nfs_mark_client_ready(clp, -EPROTONOSUPPORT);
1756                 dprintk("%s: exit with error %d for server %s\n",
1757                                 __func__, -EPROTONOSUPPORT, clp->cl_hostname);
1758                 return -EPROTONOSUPPORT;
1759         case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1760                                  * in nfs4_exchange_id */
1761         default:
1762                 dprintk("%s: exit with error %d for server %s\n", __func__,
1763                                 status, clp->cl_hostname);
1764                 return status;
1765         }
1766         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1767         dprintk("%s: handled error %d for server %s\n", __func__, status,
1768                         clp->cl_hostname);
1769         return 0;
1770 }
1771
1772 static int nfs4_establish_lease(struct nfs_client *clp)
1773 {
1774         struct rpc_cred *cred;
1775         const struct nfs4_state_recovery_ops *ops =
1776                 clp->cl_mvops->reboot_recovery_ops;
1777         int status;
1778
1779         cred = nfs4_get_clid_cred(clp);
1780         if (cred == NULL)
1781                 return -ENOENT;
1782         status = ops->establish_clid(clp, cred);
1783         put_rpccred(cred);
1784         if (status != 0)
1785                 return status;
1786         pnfs_destroy_all_layouts(clp);
1787         return 0;
1788 }
1789
1790 /*
1791  * Returns zero or a negative errno.  NFS4ERR values are converted
1792  * to local errno values.
1793  */
1794 static int nfs4_reclaim_lease(struct nfs_client *clp)
1795 {
1796         int status;
1797
1798         status = nfs4_establish_lease(clp);
1799         if (status < 0)
1800                 return nfs4_handle_reclaim_lease_error(clp, status);
1801         if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state))
1802                 nfs4_state_start_reclaim_nograce(clp);
1803         if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
1804                 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1805         clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1806         clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1807         return 0;
1808 }
1809
1810 static int nfs4_purge_lease(struct nfs_client *clp)
1811 {
1812         int status;
1813
1814         status = nfs4_establish_lease(clp);
1815         if (status < 0)
1816                 return nfs4_handle_reclaim_lease_error(clp, status);
1817         clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1818         set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1819         nfs4_state_start_reclaim_nograce(clp);
1820         return 0;
1821 }
1822
1823 /**
1824  * nfs4_discover_server_trunking - Detect server IP address trunking
1825  *
1826  * @clp: nfs_client under test
1827  * @result: OUT: found nfs_client, or clp
1828  *
1829  * Returns zero or a negative errno.  If zero is returned,
1830  * an nfs_client pointer is planted in "result".
1831  *
1832  * Note: since we are invoked in process context, and
1833  * not from inside the state manager, we cannot use
1834  * nfs4_handle_reclaim_lease_error().
1835  */
1836 int nfs4_discover_server_trunking(struct nfs_client *clp,
1837                                   struct nfs_client **result)
1838 {
1839         const struct nfs4_state_recovery_ops *ops =
1840                                 clp->cl_mvops->reboot_recovery_ops;
1841         struct rpc_clnt *clnt;
1842         struct rpc_cred *cred;
1843         int i, status;
1844
1845         dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
1846
1847         clnt = clp->cl_rpcclient;
1848         i = 0;
1849
1850         mutex_lock(&nfs_clid_init_mutex);
1851 again:
1852         status  = -ENOENT;
1853         cred = nfs4_get_clid_cred(clp);
1854         if (cred == NULL)
1855                 goto out_unlock;
1856
1857         status = ops->detect_trunking(clp, result, cred);
1858         put_rpccred(cred);
1859         switch (status) {
1860         case 0:
1861                 break;
1862         case -NFS4ERR_DELAY:
1863         case -ETIMEDOUT:
1864         case -EAGAIN:
1865                 ssleep(1);
1866         case -NFS4ERR_STALE_CLIENTID:
1867                 dprintk("NFS: %s after status %d, retrying\n",
1868                         __func__, status);
1869                 goto again;
1870         case -EACCES:
1871                 if (i++ == 0) {
1872                         nfs4_root_machine_cred(clp);
1873                         goto again;
1874                 }
1875                 if (i > 2)
1876                         break;
1877         case -NFS4ERR_CLID_INUSE:
1878         case -NFS4ERR_WRONGSEC:
1879                 clnt = rpc_clone_client_set_auth(clnt, RPC_AUTH_UNIX);
1880                 if (IS_ERR(clnt)) {
1881                         status = PTR_ERR(clnt);
1882                         break;
1883                 }
1884                 /* Note: this is safe because we haven't yet marked the
1885                  * client as ready, so we are the only user of
1886                  * clp->cl_rpcclient
1887                  */
1888                 clnt = xchg(&clp->cl_rpcclient, clnt);
1889                 rpc_shutdown_client(clnt);
1890                 clnt = clp->cl_rpcclient;
1891                 goto again;
1892
1893         case -NFS4ERR_MINOR_VERS_MISMATCH:
1894                 status = -EPROTONOSUPPORT;
1895                 break;
1896
1897         case -EKEYEXPIRED:
1898         case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1899                                  * in nfs4_exchange_id */
1900                 status = -EKEYEXPIRED;
1901                 break;
1902         default:
1903                 pr_warn("NFS: %s unhandled error %d. Exiting with error EIO\n",
1904                                 __func__, status);
1905                 status = -EIO;
1906         }
1907
1908 out_unlock:
1909         mutex_unlock(&nfs_clid_init_mutex);
1910         dprintk("NFS: %s: status = %d\n", __func__, status);
1911         return status;
1912 }
1913
1914 #ifdef CONFIG_NFS_V4_1
1915 void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
1916 {
1917         struct nfs_client *clp = session->clp;
1918
1919         switch (err) {
1920         default:
1921                 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
1922                 break;
1923         case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1924                 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
1925         }
1926         nfs4_schedule_lease_recovery(clp);
1927 }
1928 EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1929
1930 static void nfs41_ping_server(struct nfs_client *clp)
1931 {
1932         /* Use CHECK_LEASE to ping the server with a SEQUENCE */
1933         set_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1934         nfs4_schedule_state_manager(clp);
1935 }
1936
1937 void nfs41_server_notify_target_slotid_update(struct nfs_client *clp)
1938 {
1939         nfs41_ping_server(clp);
1940 }
1941
1942 void nfs41_server_notify_highest_slotid_update(struct nfs_client *clp)
1943 {
1944         nfs41_ping_server(clp);
1945 }
1946
1947 static void nfs4_reset_all_state(struct nfs_client *clp)
1948 {
1949         if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1950                 set_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
1951                 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1952                 nfs4_state_start_reclaim_nograce(clp);
1953                 dprintk("%s: scheduling reset of all state for server %s!\n",
1954                                 __func__, clp->cl_hostname);
1955                 nfs4_schedule_state_manager(clp);
1956         }
1957 }
1958
1959 static void nfs41_handle_server_reboot(struct nfs_client *clp)
1960 {
1961         if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1962                 nfs4_state_start_reclaim_reboot(clp);
1963                 dprintk("%s: server %s rebooted!\n", __func__,
1964                                 clp->cl_hostname);
1965                 nfs4_schedule_state_manager(clp);
1966         }
1967 }
1968
1969 static void nfs41_handle_state_revoked(struct nfs_client *clp)
1970 {
1971         nfs4_reset_all_state(clp);
1972         dprintk("%s: state revoked on server %s\n", __func__, clp->cl_hostname);
1973 }
1974
1975 static void nfs41_handle_recallable_state_revoked(struct nfs_client *clp)
1976 {
1977         /* This will need to handle layouts too */
1978         nfs_expire_all_delegations(clp);
1979         dprintk("%s: Recallable state revoked on server %s!\n", __func__,
1980                         clp->cl_hostname);
1981 }
1982
1983 static void nfs41_handle_backchannel_fault(struct nfs_client *clp)
1984 {
1985         nfs_expire_all_delegations(clp);
1986         if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1987                 nfs4_schedule_state_manager(clp);
1988         dprintk("%s: server %s declared a backchannel fault\n", __func__,
1989                         clp->cl_hostname);
1990 }
1991
1992 static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1993 {
1994         if (test_and_set_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
1995                 &clp->cl_state) == 0)
1996                 nfs4_schedule_state_manager(clp);
1997 }
1998
1999 void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
2000 {
2001         if (!flags)
2002                 return;
2003
2004         dprintk("%s: \"%s\" (client ID %llx) flags=0x%08x\n",
2005                 __func__, clp->cl_hostname, clp->cl_clientid, flags);
2006
2007         if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
2008                 nfs41_handle_server_reboot(clp);
2009         if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
2010                             SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
2011                             SEQ4_STATUS_ADMIN_STATE_REVOKED |
2012                             SEQ4_STATUS_LEASE_MOVED))
2013                 nfs41_handle_state_revoked(clp);
2014         if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
2015                 nfs41_handle_recallable_state_revoked(clp);
2016         if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT)
2017                 nfs41_handle_backchannel_fault(clp);
2018         else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
2019                                 SEQ4_STATUS_CB_PATH_DOWN_SESSION))
2020                 nfs41_handle_cb_path_down(clp);
2021 }
2022
2023 static int nfs4_reset_session(struct nfs_client *clp)
2024 {
2025         struct rpc_cred *cred;
2026         int status;
2027
2028         if (!nfs4_has_session(clp))
2029                 return 0;
2030         nfs4_begin_drain_session(clp);
2031         cred = nfs4_get_clid_cred(clp);
2032         status = nfs4_proc_destroy_session(clp->cl_session, cred);
2033         switch (status) {
2034         case 0:
2035         case -NFS4ERR_BADSESSION:
2036         case -NFS4ERR_DEADSESSION:
2037                 break;
2038         case -NFS4ERR_BACK_CHAN_BUSY:
2039         case -NFS4ERR_DELAY:
2040                 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2041                 status = 0;
2042                 ssleep(1);
2043                 goto out;
2044         default:
2045                 status = nfs4_recovery_handle_error(clp, status);
2046                 goto out;
2047         }
2048
2049         memset(clp->cl_session->sess_id.data, 0, NFS4_MAX_SESSIONID_LEN);
2050         status = nfs4_proc_create_session(clp, cred);
2051         if (status) {
2052                 dprintk("%s: session reset failed with status %d for server %s!\n",
2053                         __func__, status, clp->cl_hostname);
2054                 status = nfs4_handle_reclaim_lease_error(clp, status);
2055                 goto out;
2056         }
2057         nfs41_finish_session_reset(clp);
2058         dprintk("%s: session reset was successful for server %s!\n",
2059                         __func__, clp->cl_hostname);
2060 out:
2061         if (cred)
2062                 put_rpccred(cred);
2063         return status;
2064 }
2065
2066 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2067 {
2068         struct rpc_cred *cred;
2069         int ret;
2070
2071         if (!nfs4_has_session(clp))
2072                 return 0;
2073         nfs4_begin_drain_session(clp);
2074         cred = nfs4_get_clid_cred(clp);
2075         ret = nfs4_proc_bind_conn_to_session(clp, cred);
2076         if (cred)
2077                 put_rpccred(cred);
2078         clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2079         switch (ret) {
2080         case 0:
2081                 dprintk("%s: bind_conn_to_session was successful for server %s!\n",
2082                         __func__, clp->cl_hostname);
2083                 break;
2084         case -NFS4ERR_DELAY:
2085                 ssleep(1);
2086                 set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
2087                 break;
2088         default:
2089                 return nfs4_recovery_handle_error(clp, ret);
2090         }
2091         return 0;
2092 }
2093 #else /* CONFIG_NFS_V4_1 */
2094 static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
2095 static void nfs4_end_drain_session(struct nfs_client *clp) { }
2096
2097 static int nfs4_bind_conn_to_session(struct nfs_client *clp)
2098 {
2099         return 0;
2100 }
2101 #endif /* CONFIG_NFS_V4_1 */
2102
2103 static void nfs4_state_manager(struct nfs_client *clp)
2104 {
2105         int status = 0;
2106         const char *section = "", *section_sep = "";
2107
2108         /* Ensure exclusive access to NFSv4 state */
2109         do {
2110                 if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
2111                         section = "purge state";
2112                         status = nfs4_purge_lease(clp);
2113                         if (status < 0)
2114                                 goto out_error;
2115                         continue;
2116                 }
2117
2118                 if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
2119                         section = "lease expired";
2120                         /* We're going to have to re-establish a clientid */
2121                         status = nfs4_reclaim_lease(clp);
2122                         if (status < 0)
2123                                 goto out_error;
2124                         continue;
2125                 }
2126
2127                 /* Initialize or reset the session */
2128                 if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
2129                         section = "reset session";
2130                         status = nfs4_reset_session(clp);
2131                         if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
2132                                 continue;
2133                         if (status < 0)
2134                                 goto out_error;
2135                 }
2136
2137                 /* Send BIND_CONN_TO_SESSION */
2138                 if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
2139                                 &clp->cl_state)) {
2140                         section = "bind conn to session";
2141                         status = nfs4_bind_conn_to_session(clp);
2142                         if (status < 0)
2143                                 goto out_error;
2144                         continue;
2145                 }
2146
2147                 if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
2148                         section = "check lease";
2149                         status = nfs4_check_lease(clp);
2150                         if (status < 0)
2151                                 goto out_error;
2152                         continue;
2153                 }
2154
2155                 /* First recover reboot state... */
2156                 if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
2157                         section = "reclaim reboot";
2158                         status = nfs4_do_reclaim(clp,
2159                                 clp->cl_mvops->reboot_recovery_ops);
2160                         if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
2161                             test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
2162                                 continue;
2163                         nfs4_state_end_reclaim_reboot(clp);
2164                         if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
2165                                 continue;
2166                         if (status < 0)
2167                                 goto out_error;
2168                 }
2169
2170                 /* Now recover expired state... */
2171                 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
2172                         section = "reclaim nograce";
2173                         status = nfs4_do_reclaim(clp,
2174                                 clp->cl_mvops->nograce_recovery_ops);
2175                         if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
2176                             test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
2177                             test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
2178                                 continue;
2179                         if (status < 0)
2180                                 goto out_error;
2181                 }
2182
2183                 nfs4_end_drain_session(clp);
2184                 if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
2185                         nfs_client_return_marked_delegations(clp);
2186                         continue;
2187                 }
2188
2189                 nfs4_clear_state_manager_bit(clp);
2190                 /* Did we race with an attempt to give us more work? */
2191                 if (clp->cl_state == 0)
2192                         break;
2193                 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
2194                         break;
2195         } while (atomic_read(&clp->cl_count) > 1);
2196         return;
2197 out_error:
2198         if (strlen(section))
2199                 section_sep = ": ";
2200         pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
2201                         " with error %d\n", section_sep, section,
2202                         clp->cl_hostname, -status);
2203         ssleep(1);
2204         nfs4_end_drain_session(clp);
2205         nfs4_clear_state_manager_bit(clp);
2206 }
2207
2208 static int nfs4_run_state_manager(void *ptr)
2209 {
2210         struct nfs_client *clp = ptr;
2211
2212         allow_signal(SIGKILL);
2213         nfs4_state_manager(clp);
2214         nfs_put_client(clp);
2215         module_put_and_exit(0);
2216         return 0;
2217 }
2218
2219 /*
2220  * Local variables:
2221  *  c-basic-offset: 8
2222  * End:
2223  */