Merge tag 'mips_4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/jhogan/mips
[linux-2.6-block.git] / fs / afs / server.c
1 /* AFS server record management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells (dhowells@redhat.com)
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11
12 #include <linux/sched.h>
13 #include <linux/slab.h>
14 #include "afs_fs.h"
15 #include "internal.h"
16
17 static unsigned afs_server_gc_delay = 10;       /* Server record timeout in seconds */
18 static unsigned afs_server_update_delay = 30;   /* Time till VLDB recheck in secs */
19
20 static void afs_inc_servers_outstanding(struct afs_net *net)
21 {
22         atomic_inc(&net->servers_outstanding);
23 }
24
25 static void afs_dec_servers_outstanding(struct afs_net *net)
26 {
27         if (atomic_dec_and_test(&net->servers_outstanding))
28                 wake_up_var(&net->servers_outstanding);
29 }
30
31 /*
32  * Find a server by one of its addresses.
33  */
34 struct afs_server *afs_find_server(struct afs_net *net,
35                                    const struct sockaddr_rxrpc *srx)
36 {
37         const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
38         const struct afs_addr_list *alist;
39         struct afs_server *server = NULL;
40         unsigned int i;
41         bool ipv6 = true;
42         int seq = 0, diff;
43
44         if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
45             srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
46             srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
47                 ipv6 = false;
48
49         rcu_read_lock();
50
51         do {
52                 if (server)
53                         afs_put_server(net, server);
54                 server = NULL;
55                 read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
56
57                 if (ipv6) {
58                         hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
59                                 alist = rcu_dereference(server->addresses);
60                                 for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
61                                         b = &alist->addrs[i].transport.sin6;
62                                         diff = (u16)a->sin6_port - (u16)b->sin6_port;
63                                         if (diff == 0)
64                                                 diff = memcmp(&a->sin6_addr,
65                                                               &b->sin6_addr,
66                                                               sizeof(struct in6_addr));
67                                         if (diff == 0)
68                                                 goto found;
69                                         if (diff < 0) {
70                                                 // TODO: Sort the list
71                                                 //if (i == alist->nr_ipv4)
72                                                 //      goto not_found;
73                                                 break;
74                                         }
75                                 }
76                         }
77                 } else {
78                         hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
79                                 alist = rcu_dereference(server->addresses);
80                                 for (i = 0; i < alist->nr_ipv4; i++) {
81                                         b = &alist->addrs[i].transport.sin6;
82                                         diff = (u16)a->sin6_port - (u16)b->sin6_port;
83                                         if (diff == 0)
84                                                 diff = ((u32)a->sin6_addr.s6_addr32[3] -
85                                                         (u32)b->sin6_addr.s6_addr32[3]);
86                                         if (diff == 0)
87                                                 goto found;
88                                         if (diff < 0) {
89                                                 // TODO: Sort the list
90                                                 //if (i == 0)
91                                                 //      goto not_found;
92                                                 break;
93                                         }
94                                 }
95                         }
96                 }
97
98         //not_found:
99                 server = NULL;
100         found:
101                 if (server && !atomic_inc_not_zero(&server->usage))
102                         server = NULL;
103
104         } while (need_seqretry(&net->fs_addr_lock, seq));
105
106         done_seqretry(&net->fs_addr_lock, seq);
107
108         rcu_read_unlock();
109         return server;
110 }
111
112 /*
113  * Look up a server by its UUID
114  */
115 struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
116 {
117         struct afs_server *server = NULL;
118         struct rb_node *p;
119         int diff, seq = 0;
120
121         _enter("%pU", uuid);
122
123         do {
124                 /* Unfortunately, rbtree walking doesn't give reliable results
125                  * under just the RCU read lock, so we have to check for
126                  * changes.
127                  */
128                 if (server)
129                         afs_put_server(net, server);
130                 server = NULL;
131
132                 read_seqbegin_or_lock(&net->fs_lock, &seq);
133
134                 p = net->fs_servers.rb_node;
135                 while (p) {
136                         server = rb_entry(p, struct afs_server, uuid_rb);
137
138                         diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
139                         if (diff < 0) {
140                                 p = p->rb_left;
141                         } else if (diff > 0) {
142                                 p = p->rb_right;
143                         } else {
144                                 afs_get_server(server);
145                                 break;
146                         }
147
148                         server = NULL;
149                 }
150         } while (need_seqretry(&net->fs_lock, seq));
151
152         done_seqretry(&net->fs_lock, seq);
153
154         _leave(" = %p", server);
155         return server;
156 }
157
158 /*
159  * Install a server record in the namespace tree
160  */
161 static struct afs_server *afs_install_server(struct afs_net *net,
162                                              struct afs_server *candidate)
163 {
164         const struct afs_addr_list *alist;
165         struct afs_server *server;
166         struct rb_node **pp, *p;
167         int ret = -EEXIST, diff;
168
169         _enter("%p", candidate);
170
171         write_seqlock(&net->fs_lock);
172
173         /* Firstly install the server in the UUID lookup tree */
174         pp = &net->fs_servers.rb_node;
175         p = NULL;
176         while (*pp) {
177                 p = *pp;
178                 _debug("- consider %p", p);
179                 server = rb_entry(p, struct afs_server, uuid_rb);
180                 diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
181                 if (diff < 0)
182                         pp = &(*pp)->rb_left;
183                 else if (diff > 0)
184                         pp = &(*pp)->rb_right;
185                 else
186                         goto exists;
187         }
188
189         server = candidate;
190         rb_link_node(&server->uuid_rb, p, pp);
191         rb_insert_color(&server->uuid_rb, &net->fs_servers);
192         hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
193
194         write_seqlock(&net->fs_addr_lock);
195         alist = rcu_dereference_protected(server->addresses,
196                                           lockdep_is_held(&net->fs_addr_lock.lock));
197
198         /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
199          * it in the IPv4 and/or IPv6 reverse-map lists.
200          *
201          * TODO: For speed we want to use something other than a flat list
202          * here; even sorting the list in terms of lowest address would help a
203          * bit, but anything we might want to do gets messy and memory
204          * intensive.
205          */
206         if (alist->nr_ipv4 > 0)
207                 hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
208         if (alist->nr_addrs > alist->nr_ipv4)
209                 hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
210
211         write_sequnlock(&net->fs_addr_lock);
212         ret = 0;
213
214 exists:
215         afs_get_server(server);
216         write_sequnlock(&net->fs_lock);
217         return server;
218 }
219
220 /*
221  * allocate a new server record
222  */
223 static struct afs_server *afs_alloc_server(struct afs_net *net,
224                                            const uuid_t *uuid,
225                                            struct afs_addr_list *alist)
226 {
227         struct afs_server *server;
228
229         _enter("");
230
231         server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
232         if (!server)
233                 goto enomem;
234
235         atomic_set(&server->usage, 1);
236         RCU_INIT_POINTER(server->addresses, alist);
237         server->addr_version = alist->version;
238         server->uuid = *uuid;
239         server->flags = (1UL << AFS_SERVER_FL_NEW);
240         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
241         rwlock_init(&server->fs_lock);
242         INIT_LIST_HEAD(&server->cb_interests);
243         rwlock_init(&server->cb_break_lock);
244
245         afs_inc_servers_outstanding(net);
246         _leave(" = %p", server);
247         return server;
248
249 enomem:
250         _leave(" = NULL [nomem]");
251         return NULL;
252 }
253
254 /*
255  * Look up an address record for a server
256  */
257 static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
258                                                  struct key *key, const uuid_t *uuid)
259 {
260         struct afs_addr_cursor ac;
261         struct afs_addr_list *alist;
262         int ret;
263
264         ret = afs_set_vl_cursor(&ac, cell);
265         if (ret < 0)
266                 return ERR_PTR(ret);
267
268         while (afs_iterate_addresses(&ac)) {
269                 if (test_bit(ac.index, &ac.alist->yfs))
270                         alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
271                 else
272                         alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
273                 switch (ac.error) {
274                 case 0:
275                         afs_end_cursor(&ac);
276                         return alist;
277                 case -ECONNABORTED:
278                         ac.error = afs_abort_to_error(ac.abort_code);
279                         goto error;
280                 case -ENOMEM:
281                 case -ENONET:
282                         goto error;
283                 case -ENETUNREACH:
284                 case -EHOSTUNREACH:
285                 case -ECONNREFUSED:
286                         break;
287                 default:
288                         ac.error = -EIO;
289                         goto error;
290                 }
291         }
292
293 error:
294         return ERR_PTR(afs_end_cursor(&ac));
295 }
296
297 /*
298  * Get or create a fileserver record.
299  */
300 struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
301                                      const uuid_t *uuid)
302 {
303         struct afs_addr_list *alist;
304         struct afs_server *server, *candidate;
305
306         _enter("%p,%pU", cell->net, uuid);
307
308         server = afs_find_server_by_uuid(cell->net, uuid);
309         if (server)
310                 return server;
311
312         alist = afs_vl_lookup_addrs(cell, key, uuid);
313         if (IS_ERR(alist))
314                 return ERR_CAST(alist);
315
316         candidate = afs_alloc_server(cell->net, uuid, alist);
317         if (!candidate) {
318                 afs_put_addrlist(alist);
319                 return ERR_PTR(-ENOMEM);
320         }
321
322         server = afs_install_server(cell->net, candidate);
323         if (server != candidate) {
324                 afs_put_addrlist(alist);
325                 kfree(candidate);
326         }
327
328         _leave(" = %p{%d}", server, atomic_read(&server->usage));
329         return server;
330 }
331
332 /*
333  * Set the server timer to fire after a given delay, assuming it's not already
334  * set for an earlier time.
335  */
336 static void afs_set_server_timer(struct afs_net *net, time64_t delay)
337 {
338         if (net->live) {
339                 afs_inc_servers_outstanding(net);
340                 if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
341                         afs_dec_servers_outstanding(net);
342         }
343 }
344
345 /*
346  * Server management timer.  We have an increment on fs_outstanding that we
347  * need to pass along to the work item.
348  */
349 void afs_servers_timer(struct timer_list *timer)
350 {
351         struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
352
353         _enter("");
354         if (!queue_work(afs_wq, &net->fs_manager))
355                 afs_dec_servers_outstanding(net);
356 }
357
358 /*
359  * Release a reference on a server record.
360  */
361 void afs_put_server(struct afs_net *net, struct afs_server *server)
362 {
363         unsigned int usage;
364
365         if (!server)
366                 return;
367
368         server->put_time = ktime_get_real_seconds();
369
370         usage = atomic_dec_return(&server->usage);
371
372         _enter("{%u}", usage);
373
374         if (likely(usage > 0))
375                 return;
376
377         afs_set_server_timer(net, afs_server_gc_delay);
378 }
379
380 static void afs_server_rcu(struct rcu_head *rcu)
381 {
382         struct afs_server *server = container_of(rcu, struct afs_server, rcu);
383
384         afs_put_addrlist(server->addresses);
385         kfree(server);
386 }
387
388 /*
389  * destroy a dead server
390  */
391 static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
392 {
393         struct afs_addr_list *alist = server->addresses;
394         struct afs_addr_cursor ac = {
395                 .alist  = alist,
396                 .addr   = &alist->addrs[0],
397                 .start  = alist->index,
398                 .index  = alist->index,
399                 .error  = 0,
400         };
401         _enter("%p", server);
402
403         afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
404         call_rcu(&server->rcu, afs_server_rcu);
405         afs_dec_servers_outstanding(net);
406 }
407
408 /*
409  * Garbage collect any expired servers.
410  */
411 static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
412 {
413         struct afs_server *server;
414         bool deleted;
415         int usage;
416
417         while ((server = gc_list)) {
418                 gc_list = server->gc_next;
419
420                 write_seqlock(&net->fs_lock);
421                 usage = 1;
422                 deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
423                 if (deleted) {
424                         rb_erase(&server->uuid_rb, &net->fs_servers);
425                         hlist_del_rcu(&server->proc_link);
426                 }
427                 write_sequnlock(&net->fs_lock);
428
429                 if (deleted)
430                         afs_destroy_server(net, server);
431         }
432 }
433
434 /*
435  * Manage the records of servers known to be within a network namespace.  This
436  * includes garbage collecting unused servers.
437  *
438  * Note also that we were given an increment on net->servers_outstanding by
439  * whoever queued us that we need to deal with before returning.
440  */
441 void afs_manage_servers(struct work_struct *work)
442 {
443         struct afs_net *net = container_of(work, struct afs_net, fs_manager);
444         struct afs_server *gc_list = NULL;
445         struct rb_node *cursor;
446         time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
447         bool purging = !net->live;
448
449         _enter("");
450
451         /* Trawl the server list looking for servers that have expired from
452          * lack of use.
453          */
454         read_seqlock_excl(&net->fs_lock);
455
456         for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
457                 struct afs_server *server =
458                         rb_entry(cursor, struct afs_server, uuid_rb);
459                 int usage = atomic_read(&server->usage);
460
461                 _debug("manage %pU %u", &server->uuid, usage);
462
463                 ASSERTCMP(usage, >=, 1);
464                 ASSERTIFCMP(purging, usage, ==, 1);
465
466                 if (usage == 1) {
467                         time64_t expire_at = server->put_time;
468
469                         if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
470                             !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
471                                 expire_at += afs_server_gc_delay;
472                         if (purging || expire_at <= now) {
473                                 server->gc_next = gc_list;
474                                 gc_list = server;
475                         } else if (expire_at < next_manage) {
476                                 next_manage = expire_at;
477                         }
478                 }
479         }
480
481         read_sequnlock_excl(&net->fs_lock);
482
483         /* Update the timer on the way out.  We have to pass an increment on
484          * servers_outstanding in the namespace that we are in to the timer or
485          * the work scheduler.
486          */
487         if (!purging && next_manage < TIME64_MAX) {
488                 now = ktime_get_real_seconds();
489
490                 if (next_manage - now <= 0) {
491                         if (queue_work(afs_wq, &net->fs_manager))
492                                 afs_inc_servers_outstanding(net);
493                 } else {
494                         afs_set_server_timer(net, next_manage - now);
495                 }
496         }
497
498         afs_gc_servers(net, gc_list);
499
500         afs_dec_servers_outstanding(net);
501         _leave(" [%d]", atomic_read(&net->servers_outstanding));
502 }
503
504 static void afs_queue_server_manager(struct afs_net *net)
505 {
506         afs_inc_servers_outstanding(net);
507         if (!queue_work(afs_wq, &net->fs_manager))
508                 afs_dec_servers_outstanding(net);
509 }
510
511 /*
512  * Purge list of servers.
513  */
514 void afs_purge_servers(struct afs_net *net)
515 {
516         _enter("");
517
518         if (del_timer_sync(&net->fs_timer))
519                 atomic_dec(&net->servers_outstanding);
520
521         afs_queue_server_manager(net);
522
523         _debug("wait");
524         wait_var_event(&net->servers_outstanding,
525                        !atomic_read(&net->servers_outstanding));
526         _leave("");
527 }
528
529 /*
530  * Probe a fileserver to find its capabilities.
531  *
532  * TODO: Try service upgrade.
533  */
534 static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
535 {
536         _enter("");
537
538         fc->ac.addr = NULL;
539         fc->ac.start = READ_ONCE(fc->ac.alist->index);
540         fc->ac.index = fc->ac.start;
541         fc->ac.error = 0;
542         fc->ac.begun = false;
543
544         while (afs_iterate_addresses(&fc->ac)) {
545                 afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
546                                         &fc->ac, fc->key);
547                 switch (fc->ac.error) {
548                 case 0:
549                         afs_end_cursor(&fc->ac);
550                         set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
551                         return true;
552                 case -ECONNABORTED:
553                         fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
554                         goto error;
555                 case -ENOMEM:
556                 case -ENONET:
557                         goto error;
558                 case -ENETUNREACH:
559                 case -EHOSTUNREACH:
560                 case -ECONNREFUSED:
561                 case -ETIMEDOUT:
562                 case -ETIME:
563                         break;
564                 default:
565                         fc->ac.error = -EIO;
566                         goto error;
567                 }
568         }
569
570 error:
571         afs_end_cursor(&fc->ac);
572         return false;
573 }
574
575 /*
576  * If we haven't already, try probing the fileserver to get its capabilities.
577  * We try not to instigate parallel probes, but it's possible that the parallel
578  * probes will fail due to authentication failure when ours would succeed.
579  *
580  * TODO: Try sending an anonymous probe if an authenticated probe fails.
581  */
582 bool afs_probe_fileserver(struct afs_fs_cursor *fc)
583 {
584         bool success;
585         int ret, retries = 0;
586
587         _enter("");
588
589 retry:
590         if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
591                 _leave(" = t");
592                 return true;
593         }
594
595         if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
596                 success = afs_do_probe_fileserver(fc);
597                 clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
598                 wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
599                 _leave(" = t");
600                 return success;
601         }
602
603         _debug("wait");
604         ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
605                           TASK_INTERRUPTIBLE);
606         if (ret == -ERESTARTSYS) {
607                 fc->ac.error = ret;
608                 _leave(" = f [%d]", ret);
609                 return false;
610         }
611
612         retries++;
613         if (retries == 4) {
614                 fc->ac.error = -ESTALE;
615                 _leave(" = f [stale]");
616                 return false;
617         }
618         _debug("retry");
619         goto retry;
620 }
621
622 /*
623  * Get an update for a server's address list.
624  */
625 static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
626 {
627         struct afs_addr_list *alist, *discard;
628
629         _enter("");
630
631         alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
632                                     &server->uuid);
633         if (IS_ERR(alist)) {
634                 fc->ac.error = PTR_ERR(alist);
635                 _leave(" = f [%d]", fc->ac.error);
636                 return false;
637         }
638
639         discard = alist;
640         if (server->addr_version != alist->version) {
641                 write_lock(&server->fs_lock);
642                 discard = rcu_dereference_protected(server->addresses,
643                                                     lockdep_is_held(&server->fs_lock));
644                 rcu_assign_pointer(server->addresses, alist);
645                 server->addr_version = alist->version;
646                 write_unlock(&server->fs_lock);
647         }
648
649         server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
650         afs_put_addrlist(discard);
651         _leave(" = t");
652         return true;
653 }
654
655 /*
656  * See if a server's address list needs updating.
657  */
658 bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
659 {
660         time64_t now = ktime_get_real_seconds();
661         long diff;
662         bool success;
663         int ret, retries = 0;
664
665         _enter("");
666
667         ASSERT(server);
668
669 retry:
670         diff = READ_ONCE(server->update_at) - now;
671         if (diff > 0) {
672                 _leave(" = t [not now %ld]", diff);
673                 return true;
674         }
675
676         if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
677                 success = afs_update_server_record(fc, server);
678                 clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
679                 wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
680                 _leave(" = %d", success);
681                 return success;
682         }
683
684         ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
685                           TASK_INTERRUPTIBLE);
686         if (ret == -ERESTARTSYS) {
687                 fc->ac.error = ret;
688                 _leave(" = f [intr]");
689                 return false;
690         }
691
692         retries++;
693         if (retries == 4) {
694                 _leave(" = f [stale]");
695                 ret = -ESTALE;
696                 return false;
697         }
698         goto retry;
699 }