Merge branch 'fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux...
[linux-2.6-block.git] / fs / afs / server.c
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16
17 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
19
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22         atomic_inc(&net->servers_outstanding);
23 }
24
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27         if (atomic_dec_and_test(&net->servers_outstanding))
28                 wake_up_var(&net->servers_outstanding);
29 }
30
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35                                    const struct sockaddr_rxrpc *srx)
36 {
37         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38         const struct afs_addr_list *alist;
39         struct afs_server *server = NULL;
40         unsigned int i;
41         bool ipv6 = true;
42         int seq = 0, diff;
43
44         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47                 ipv6 = false;
48
49         rcu_read_lock();
50
51         do {
52                 if (server)
53                         afs_put_server(net, server);
54                 server = NULL;
55                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56
57                 if (ipv6) {
58                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59                                 alist = rcu_dereference(server->addresses);
60                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61                                         b = &alist->addrs[i].transport.sin6;
62                                         diff = ((u16 __force)a->sin6_port -
63                                                 (u16 __force)b->sin6_port);
64                                         if (diff == 0)
65                                                 diff = memcmp(&a->sin6_addr,
66                                                               &b->sin6_addr,
67                                                               sizeof(struct in6_addr));
68                                         if (diff == 0)
69                                                 goto found;
70                                         if (diff < 0) {
71                                                 // TODO: Sort the list
72                                                 //if (i == alist->nr_ipv4)
73                                                 //      goto not_found;
74                                                 break;
75                                         }
76                                 }
77                         }
78                 } else {
79                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
80                                 alist = rcu_dereference(server->addresses);
81                                 for (i = 0; i < alist->nr_ipv4; i++) {
82                                         b = &alist->addrs[i].transport.sin6;
83                                         diff = ((u16 __force)a->sin6_port -
84                                                 (u16 __force)b->sin6_port);
85                                         if (diff == 0)
86                                                 diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
87                                                         (u32 __force)b->sin6_addr.s6_addr32[3]);
88                                         if (diff == 0)
89                                                 goto found;
90                                         if (diff < 0) {
91                                                 // TODO: Sort the list
92                                                 //if (i == 0)
93                                                 //      goto not_found;
94                                                 break;
95                                         }
96                                 }
97                         }
98                 }
99
100         //not_found:
101                 server = NULL;
102         found:
103                 if (server && !atomic_inc_not_zero(&server->usage))
104                         server = NULL;
105
106         } while (need_seqretry(&net->fs_addr_lock, seq));
107
108         done_seqretry(&net->fs_addr_lock, seq);
109
110         rcu_read_unlock();
111         return server;
112 }
113
114 /*
115  * Look up a server by its UUID
116  */
117 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
118 {
119         struct afs_server *server = NULL;
120         struct rb_node *p;
121         int diff, seq = 0;
122
123         _enter("%pU", uuid);
124
125         do {
126                 /* Unfortunately, rbtree walking doesn't give reliable results
127                  * under just the RCU read lock, so we have to check for
128                  * changes.
129                  */
130                 if (server)
131                         afs_put_server(net, server);
132                 server = NULL;
133
134                 read_seqbegin_or_lock(&net->fs_lock, &seq);
135
136                 p = net->fs_servers.rb_node;
137                 while (p) {
138                         server = rb_entry(p, struct afs_server, uuid_rb);
139
140                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
141                         if (diff < 0) {
142                                 p = p->rb_left;
143                         } else if (diff > 0) {
144                                 p = p->rb_right;
145                         } else {
146                                 afs_get_server(server);
147                                 break;
148                         }
149
150                         server = NULL;
151                 }
152         } while (need_seqretry(&net->fs_lock, seq));
153
154         done_seqretry(&net->fs_lock, seq);
155
156         _leave(" = %p", server);
157         return server;
158 }
159
160 /*
161  * Install a server record in the namespace tree
162  */
163 static struct afs_server *afs_install_server(struct afs_net *net,
164                                              struct afs_server *candidate)
165 {
166         const struct afs_addr_list *alist;
167         struct afs_server *server;
168         struct rb_node **pp, *p;
169         int ret = -EEXIST, diff;
170
171         _enter("%p", candidate);
172
173         write_seqlock(&net->fs_lock);
174
175         /* Firstly install the server in the UUID lookup tree */
176         pp = &net->fs_servers.rb_node;
177         p = NULL;
178         while (*pp) {
179                 p = *pp;
180                 _debug("- consider %p", p);
181                 server = rb_entry(p, struct afs_server, uuid_rb);
182                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
183                 if (diff < 0)
184                         pp = &(*pp)->rb_left;
185                 else if (diff > 0)
186                         pp = &(*pp)->rb_right;
187                 else
188                         goto exists;
189         }
190
191         server = candidate;
192         rb_link_node(&server->uuid_rb, p, pp);
193         rb_insert_color(&server->uuid_rb, &net->fs_servers);
194         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
195
196         write_seqlock(&net->fs_addr_lock);
197         alist = rcu_dereference_protected(server->addresses,
198                                           lockdep_is_held(&net->fs_addr_lock.lock));
199
200         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
201          * it in the IPv4 and/or IPv6 reverse-map lists.
202          *
203          * TODO: For speed we want to use something other than a flat list
204          * here; even sorting the list in terms of lowest address would help a
205          * bit, but anything we might want to do gets messy and memory
206          * intensive.
207          */
208         if (alist->nr_ipv4 > 0)
209                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
210         if (alist->nr_addrs > alist->nr_ipv4)
211                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
212
213         write_sequnlock(&net->fs_addr_lock);
214         ret = 0;
215
216 exists:
217         afs_get_server(server);
218         write_sequnlock(&net->fs_lock);
219         return server;
220 }
221
222 /*
223  * allocate a new server record
224  */
225 static struct afs_server *afs_alloc_server(struct afs_net *net,
226                                            const uuid_t *uuid,
227                                            struct afs_addr_list *alist)
228 {
229         struct afs_server *server;
230
231         _enter("");
232
233         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
234         if (!server)
235                 goto enomem;
236
237         atomic_set(&server->usage, 1);
238         RCU_INIT_POINTER(server->addresses, alist);
239         server->addr_version = alist->version;
240         server->uuid = *uuid;
241         server->flags = (1UL << AFS_SERVER_FL_NEW);
242         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
243         rwlock_init(&server->fs_lock);
244         INIT_LIST_HEAD(&server->cb_interests);
245         rwlock_init(&server->cb_break_lock);
246
247         afs_inc_servers_outstanding(net);
248         _leave(" = %p", server);
249         return server;
250
251 enomem:
252         _leave(" = NULL [nomem]");
253         return NULL;
254 }
255
256 /*
257  * Look up an address record for a server
258  */
259 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
260                                                  struct key *key, const uuid_t *uuid)
261 {
262         struct afs_addr_cursor ac;
263         struct afs_addr_list *alist;
264         int ret;
265
266         ret = afs_set_vl_cursor(&ac, cell);
267         if (ret < 0)
268                 return ERR_PTR(ret);
269
270         while (afs_iterate_addresses(&ac)) {
271                 if (test_bit(ac.index, &ac.alist->yfs))
272                         alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
273                 else
274                         alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
275                 switch (ac.error) {
276                 case 0:
277                         afs_end_cursor(&ac);
278                         return alist;
279                 case -ECONNABORTED:
280                         ac.error = afs_abort_to_error(ac.abort_code);
281                         goto error;
282                 case -ENOMEM:
283                 case -ENONET:
284                         goto error;
285                 case -ENETUNREACH:
286                 case -EHOSTUNREACH:
287                 case -ECONNREFUSED:
288                         break;
289                 default:
290                         ac.error = -EIO;
291                         goto error;
292                 }
293         }
294
295 error:
296         return ERR_PTR(afs_end_cursor(&ac));
297 }
298
299 /*
300  * Get or create a fileserver record.
301  */
302 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
303                                      const uuid_t *uuid)
304 {
305         struct afs_addr_list *alist;
306         struct afs_server *server, *candidate;
307
308         _enter("%p,%pU", cell->net, uuid);
309
310         server = afs_find_server_by_uuid(cell->net, uuid);
311         if (server)
312                 return server;
313
314         alist = afs_vl_lookup_addrs(cell, key, uuid);
315         if (IS_ERR(alist))
316                 return ERR_CAST(alist);
317
318         candidate = afs_alloc_server(cell->net, uuid, alist);
319         if (!candidate) {
320                 afs_put_addrlist(alist);
321                 return ERR_PTR(-ENOMEM);
322         }
323
324         server = afs_install_server(cell->net, candidate);
325         if (server != candidate) {
326                 afs_put_addrlist(alist);
327                 kfree(candidate);
328         }
329
330         _leave(" = %p{%d}", server, atomic_read(&server->usage));
331         return server;
332 }
333
334 /*
335  * Set the server timer to fire after a given delay, assuming it's not already
336  * set for an earlier time.
337  */
338 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
339 {
340         if (net->live) {
341                 afs_inc_servers_outstanding(net);
342                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
343                         afs_dec_servers_outstanding(net);
344         }
345 }
346
347 /*
348  * Server management timer.  We have an increment on fs_outstanding that we
349  * need to pass along to the work item.
350  */
351 void afs_servers_timer(struct timer_list *timer)
352 {
353         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
354
355         _enter("");
356         if (!queue_work(afs_wq, &net->fs_manager))
357                 afs_dec_servers_outstanding(net);
358 }
359
360 /*
361  * Release a reference on a server record.
362  */
363 void afs_put_server(struct afs_net *net, struct afs_server *server)
364 {
365         unsigned int usage;
366
367         if (!server)
368                 return;
369
370         server->put_time = ktime_get_real_seconds();
371
372         usage = atomic_dec_return(&server->usage);
373
374         _enter("{%u}", usage);
375
376         if (likely(usage > 0))
377                 return;
378
379         afs_set_server_timer(net, afs_server_gc_delay);
380 }
381
382 static void afs_server_rcu(struct rcu_head *rcu)
383 {
384         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
385
386         afs_put_addrlist(rcu_access_pointer(server->addresses));
387         kfree(server);
388 }
389
390 /*
391  * destroy a dead server
392  */
393 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
394 {
395         struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
396         struct afs_addr_cursor ac = {
397                 .alist  = alist,
398                 .addr   = &alist->addrs[0],
399                 .start  = alist->index,
400                 .index  = alist->index,
401                 .error  = 0,
402         };
403         _enter("%p", server);
404
405         afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
406         call_rcu(&server->rcu, afs_server_rcu);
407         afs_dec_servers_outstanding(net);
408 }
409
410 /*
411  * Garbage collect any expired servers.
412  */
413 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
414 {
415         struct afs_server *server;
416         bool deleted;
417         int usage;
418
419         while ((server = gc_list)) {
420                 gc_list = server->gc_next;
421
422                 write_seqlock(&net->fs_lock);
423                 usage = 1;
424                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
425                 if (deleted) {
426                         rb_erase(&server->uuid_rb, &net->fs_servers);
427                         hlist_del_rcu(&server->proc_link);
428                 }
429                 write_sequnlock(&net->fs_lock);
430
431                 if (deleted) {
432                         write_seqlock(&net->fs_addr_lock);
433                         if (!hlist_unhashed(&server->addr4_link))
434                                 hlist_del_rcu(&server->addr4_link);
435                         if (!hlist_unhashed(&server->addr6_link))
436                                 hlist_del_rcu(&server->addr6_link);
437                         write_sequnlock(&net->fs_addr_lock);
438                         afs_destroy_server(net, server);
439                 }
440         }
441 }
442
443 /*
444  * Manage the records of servers known to be within a network namespace.  This
445  * includes garbage collecting unused servers.
446  *
447  * Note also that we were given an increment on net->servers_outstanding by
448  * whoever queued us that we need to deal with before returning.
449  */
450 void afs_manage_servers(struct work_struct *work)
451 {
452         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
453         struct afs_server *gc_list = NULL;
454         struct rb_node *cursor;
455         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
456         bool purging = !net->live;
457
458         _enter("");
459
460         /* Trawl the server list looking for servers that have expired from
461          * lack of use.
462          */
463         read_seqlock_excl(&net->fs_lock);
464
465         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
466                 struct afs_server *server =
467                         rb_entry(cursor, struct afs_server, uuid_rb);
468                 int usage = atomic_read(&server->usage);
469
470                 _debug("manage %pU %u", &server->uuid, usage);
471
472                 ASSERTCMP(usage, >=, 1);
473                 ASSERTIFCMP(purging, usage, ==, 1);
474
475                 if (usage == 1) {
476                         time64_t expire_at = server->put_time;
477
478                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
479                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
480                                 expire_at += afs_server_gc_delay;
481                         if (purging || expire_at <= now) {
482                                 server->gc_next = gc_list;
483                                 gc_list = server;
484                         } else if (expire_at < next_manage) {
485                                 next_manage = expire_at;
486                         }
487                 }
488         }
489
490         read_sequnlock_excl(&net->fs_lock);
491
492         /* Update the timer on the way out.  We have to pass an increment on
493          * servers_outstanding in the namespace that we are in to the timer or
494          * the work scheduler.
495          */
496         if (!purging && next_manage < TIME64_MAX) {
497                 now = ktime_get_real_seconds();
498
499                 if (next_manage - now <= 0) {
500                         if (queue_work(afs_wq, &net->fs_manager))
501                                 afs_inc_servers_outstanding(net);
502                 } else {
503                         afs_set_server_timer(net, next_manage - now);
504                 }
505         }
506
507         afs_gc_servers(net, gc_list);
508
509         afs_dec_servers_outstanding(net);
510         _leave(" [%d]", atomic_read(&net->servers_outstanding));
511 }
512
513 static void afs_queue_server_manager(struct afs_net *net)
514 {
515         afs_inc_servers_outstanding(net);
516         if (!queue_work(afs_wq, &net->fs_manager))
517                 afs_dec_servers_outstanding(net);
518 }
519
520 /*
521  * Purge list of servers.
522  */
523 void afs_purge_servers(struct afs_net *net)
524 {
525         _enter("");
526
527         if (del_timer_sync(&net->fs_timer))
528                 atomic_dec(&net->servers_outstanding);
529
530         afs_queue_server_manager(net);
531
532         _debug("wait");
533         wait_var_event(&net->servers_outstanding,
534                        !atomic_read(&net->servers_outstanding));
535         _leave("");
536 }
537
538 /*
539  * Probe a fileserver to find its capabilities.
540  *
541  * TODO: Try service upgrade.
542  */
543 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
544 {
545         _enter("");
546
547         fc->ac.addr = NULL;
548         fc->ac.start = READ_ONCE(fc->ac.alist->index);
549         fc->ac.index = fc->ac.start;
550         fc->ac.error = 0;
551         fc->ac.begun = false;
552
553         while (afs_iterate_addresses(&fc->ac)) {
554                 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
555                                         &fc->ac, fc->key);
556                 switch (fc->ac.error) {
557                 case 0:
558                         afs_end_cursor(&fc->ac);
559                         set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
560                         return true;
561                 case -ECONNABORTED:
562                         fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
563                         goto error;
564                 case -ENOMEM:
565                 case -ENONET:
566                         goto error;
567                 case -ENETUNREACH:
568                 case -EHOSTUNREACH:
569                 case -ECONNREFUSED:
570                 case -ETIMEDOUT:
571                 case -ETIME:
572                         break;
573                 default:
574                         fc->ac.error = -EIO;
575                         goto error;
576                 }
577         }
578
579 error:
580         afs_end_cursor(&fc->ac);
581         return false;
582 }
583
584 /*
585  * If we haven't already, try probing the fileserver to get its capabilities.
586  * We try not to instigate parallel probes, but it's possible that the parallel
587  * probes will fail due to authentication failure when ours would succeed.
588  *
589  * TODO: Try sending an anonymous probe if an authenticated probe fails.
590  */
591 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
592 {
593         bool success;
594         int ret, retries = 0;
595
596         _enter("");
597
598 retry:
599         if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
600                 _leave(" = t");
601                 return true;
602         }
603
604         if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
605                 success = afs_do_probe_fileserver(fc);
606                 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
607                 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
608                 _leave(" = t");
609                 return success;
610         }
611
612         _debug("wait");
613         ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
614                           TASK_INTERRUPTIBLE);
615         if (ret == -ERESTARTSYS) {
616                 fc->ac.error = ret;
617                 _leave(" = f [%d]", ret);
618                 return false;
619         }
620
621         retries++;
622         if (retries == 4) {
623                 fc->ac.error = -ESTALE;
624                 _leave(" = f [stale]");
625                 return false;
626         }
627         _debug("retry");
628         goto retry;
629 }
630
631 /*
632  * Get an update for a server's address list.
633  */
634 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
635 {
636         struct afs_addr_list *alist, *discard;
637
638         _enter("");
639
640         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
641                                     &server->uuid);
642         if (IS_ERR(alist)) {
643                 fc->ac.error = PTR_ERR(alist);
644                 _leave(" = f [%d]", fc->ac.error);
645                 return false;
646         }
647
648         discard = alist;
649         if (server->addr_version != alist->version) {
650                 write_lock(&server->fs_lock);
651                 discard = rcu_dereference_protected(server->addresses,
652                                                     lockdep_is_held(&server->fs_lock));
653                 rcu_assign_pointer(server->addresses, alist);
654                 server->addr_version = alist->version;
655                 write_unlock(&server->fs_lock);
656         }
657
658         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
659         afs_put_addrlist(discard);
660         _leave(" = t");
661         return true;
662 }
663
664 /*
665  * See if a server's address list needs updating.
666  */
667 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
668 {
669         time64_t now = ktime_get_real_seconds();
670         long diff;
671         bool success;
672         int ret, retries = 0;
673
674         _enter("");
675
676         ASSERT(server);
677
678 retry:
679         diff = READ_ONCE(server->update_at) - now;
680         if (diff > 0) {
681                 _leave(" = t [not now %ld]", diff);
682                 return true;
683         }
684
685         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
686                 success = afs_update_server_record(fc, server);
687                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
688                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
689                 _leave(" = %d", success);
690                 return success;
691         }
692
693         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
694                           TASK_INTERRUPTIBLE);
695         if (ret == -ERESTARTSYS) {
696                 fc->ac.error = ret;
697                 _leave(" = f [intr]");
698                 return false;
699         }
700
701         retries++;
702         if (retries == 4) {
703                 _leave(" = f [stale]");
704                 ret = -ESTALE;
705                 return false;
706         }
707         goto retry;
708 }