Merge tag 'for-6.12/block-20240925' of git://git.kernel.dk/linux
[linux-2.6-block.git] / fs / afs / volume.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
ec26815a 2/* AFS volume management
1da177e4 3 *
08e0e7c8 4 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
1da177e4 5 * Written by David Howells (dhowells@redhat.com)
1da177e4
LT
6 */
7
8#include <linux/kernel.h>
1da177e4 9#include <linux/slab.h>
1da177e4
LT
10#include "internal.h"
11
102d8410 12static unsigned __read_mostly afs_volume_record_life = 60 * 60;
d2ddc776 13
445f9b69
DH
14static void afs_destroy_volume(struct work_struct *work);
15
20325960
DH
16/*
17 * Insert a volume into a cell. If there's an existing volume record, that is
18 * returned instead with a ref held.
19 */
20static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell,
21 struct afs_volume *volume)
22{
23 struct afs_volume *p;
24 struct rb_node *parent = NULL, **pp;
25
26 write_seqlock(&cell->volume_lock);
27
28 pp = &cell->volumes.rb_node;
29 while (*pp) {
30 parent = *pp;
31 p = rb_entry(parent, struct afs_volume, cell_node);
32 if (p->vid < volume->vid) {
33 pp = &(*pp)->rb_left;
34 } else if (p->vid > volume->vid) {
35 pp = &(*pp)->rb_right;
36 } else {
9a6b294a
DH
37 if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) {
38 volume = p;
39 goto found;
40 }
41
42 set_bit(AFS_VOLUME_RM_TREE, &volume->flags);
43 rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes);
20325960
DH
44 }
45 }
46
47 rb_link_node_rcu(&volume->cell_node, parent, pp);
48 rb_insert_color(&volume->cell_node, &cell->volumes);
49 hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes);
50
51found:
52 write_sequnlock(&cell->volume_lock);
53 return volume;
54
55}
56
57static void afs_remove_volume_from_cell(struct afs_volume *volume)
58{
59 struct afs_cell *cell = volume->cell;
60
61 if (!hlist_unhashed(&volume->proc_link)) {
c56f9ec8 62 trace_afs_volume(volume->vid, refcount_read(&cell->ref),
20325960
DH
63 afs_volume_trace_remove);
64 write_seqlock(&cell->volume_lock);
65 hlist_del_rcu(&volume->proc_link);
9a6b294a
DH
66 if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags))
67 rb_erase(&volume->cell_node, &cell->volumes);
20325960
DH
68 write_sequnlock(&cell->volume_lock);
69 }
70}
71
1da177e4 72/*
d2ddc776
DH
73 * Allocate a volume record and load it up from a vldb record.
74 */
13fcc683 75static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params,
d2ddc776 76 struct afs_vldb_entry *vldb,
ca0e79a4 77 struct afs_server_list **_slist)
d2ddc776
DH
78{
79 struct afs_server_list *slist;
d2ddc776 80 struct afs_volume *volume;
ca0e79a4 81 int ret = -ENOMEM, i;
d2ddc776
DH
82
83 volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL);
84 if (!volume)
85 goto error_0;
86
87 volume->vid = vldb->vid[params->type];
88 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
dca54a7b 89 volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol);
d2ddc776
DH
90 volume->type = params->type;
91 volume->type_force = params->force;
92 volume->name_len = vldb->name_len;
16069e13
DH
93 volume->creation_time = TIME64_MIN;
94 volume->update_time = TIME64_MIN;
d2ddc776 95
c56f9ec8 96 refcount_set(&volume->ref, 1);
20325960 97 INIT_HLIST_NODE(&volume->proc_link);
445f9b69 98 INIT_WORK(&volume->destructor, afs_destroy_volume);
d2ddc776 99 rwlock_init(&volume->servers_lock);
16069e13 100 mutex_init(&volume->volsync_lock);
453924de 101 mutex_init(&volume->cb_check_lock);
90fa9b64 102 rwlock_init(&volume->cb_v_break_lock);
453924de
DH
103 INIT_LIST_HEAD(&volume->open_mmaps);
104 init_rwsem(&volume->open_mmaps_lock);
d2ddc776
DH
105 memcpy(volume->name, vldb->name, vldb->name_len + 1);
106
ca0e79a4
DH
107 for (i = 0; i < AFS_MAXTYPES; i++)
108 volume->vids[i] = vldb->vid[i];
109
110 slist = afs_alloc_server_list(volume, params->key, vldb);
d2ddc776
DH
111 if (IS_ERR(slist)) {
112 ret = PTR_ERR(slist);
113 goto error_1;
114 }
115
ca0e79a4 116 *_slist = slist;
8a070a96 117 rcu_assign_pointer(volume->servers, slist);
cca37d45 118 trace_afs_volume(volume->vid, 1, afs_volume_trace_alloc);
d2ddc776
DH
119 return volume;
120
d2ddc776 121error_1:
dca54a7b 122 afs_put_cell(volume->cell, afs_cell_trace_put_vol);
d2ddc776
DH
123 kfree(volume);
124error_0:
125 return ERR_PTR(ret);
126}
127
20325960
DH
128/*
129 * Look up or allocate a volume record.
130 */
131static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params,
ca0e79a4 132 struct afs_vldb_entry *vldb)
20325960 133{
ca0e79a4 134 struct afs_server_list *slist;
20325960
DH
135 struct afs_volume *candidate, *volume;
136
ca0e79a4 137 candidate = afs_alloc_volume(params, vldb, &slist);
20325960
DH
138 if (IS_ERR(candidate))
139 return candidate;
140
141 volume = afs_insert_volume_into_cell(params->cell, candidate);
ca0e79a4
DH
142 if (volume == candidate)
143 afs_attach_volume_to_servers(volume, slist);
144 else
445f9b69 145 afs_put_volume(candidate, afs_volume_trace_put_cell_dup);
20325960
DH
146 return volume;
147}
148
d2ddc776
DH
149/*
150 * Look up a VLDB record for a volume.
151 */
152static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell,
153 struct key *key,
154 const char *volname,
155 size_t volnamesz)
156{
0a5143f2
DH
157 struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ);
158 struct afs_vl_cursor vc;
d2ddc776
DH
159 int ret;
160
0a5143f2
DH
161 if (!afs_begin_vlserver_operation(&vc, cell, key))
162 return ERR_PTR(-ERESTARTSYS);
d2ddc776 163
0a5143f2 164 while (afs_select_vlserver(&vc)) {
0a5143f2 165 vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz);
d2ddc776
DH
166 }
167
0a5143f2
DH
168 ret = afs_end_vlserver_operation(&vc);
169 return ret < 0 ? ERR_PTR(ret) : vldb;
d2ddc776
DH
170}
171
172/*
173 * Look up a volume in the VL server and create a candidate volume record for
174 * it.
175 *
176 * The volume name can be one of the following:
1da177e4
LT
177 * "%[cell:]volume[.]" R/W volume
178 * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0),
179 * or R/W (rwparent=1) volume
180 * "%[cell:]volume.readonly" R/O volume
181 * "#[cell:]volume.readonly" R/O volume
182 * "%[cell:]volume.backup" Backup volume
183 * "#[cell:]volume.backup" Backup volume
184 *
185 * The cell name is optional, and defaults to the current cell.
186 *
187 * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin
188 * Guide
189 * - Rule 1: Explicit type suffix forces access of that type or nothing
190 * (no suffix, then use Rule 2 & 3)
191 * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W
192 * if not available
193 * - Rule 3: If parent volume is R/W, then only mount R/W volume unless
194 * explicitly told otherwise
195 */
13fcc683 196struct afs_volume *afs_create_volume(struct afs_fs_context *params)
1da177e4 197{
d2ddc776
DH
198 struct afs_vldb_entry *vldb;
199 struct afs_volume *volume;
200 unsigned long type_mask = 1UL << params->type;
1da177e4 201
d2ddc776
DH
202 vldb = afs_vl_lookup_vldb(params->cell, params->key,
203 params->volname, params->volnamesz);
204 if (IS_ERR(vldb))
205 return ERR_CAST(vldb);
1da177e4 206
d2ddc776
DH
207 if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) {
208 volume = ERR_PTR(vldb->error);
209 goto error;
210 }
1da177e4 211
d2ddc776
DH
212 /* Make the final decision on the type we want */
213 volume = ERR_PTR(-ENOMEDIUM);
00d3b7a4 214 if (params->force) {
d2ddc776 215 if (!(vldb->flags & type_mask))
1da177e4 216 goto error;
d2ddc776 217 } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) {
00d3b7a4 218 params->type = AFSVL_ROVOL;
d2ddc776 219 } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) {
00d3b7a4 220 params->type = AFSVL_RWVOL;
ec26815a 221 } else {
1da177e4
LT
222 goto error;
223 }
224
ca0e79a4 225 volume = afs_lookup_volume(params, vldb);
1da177e4 226
d2ddc776
DH
227error:
228 kfree(vldb);
229 return volume;
230}
1da177e4 231
d2ddc776
DH
232/*
233 * Destroy a volume record
234 */
445f9b69 235static void afs_destroy_volume(struct work_struct *work)
d2ddc776 236{
445f9b69 237 struct afs_volume *volume = container_of(work, struct afs_volume, destructor);
ca0e79a4
DH
238 struct afs_server_list *slist = rcu_access_pointer(volume->servers);
239
d2ddc776 240 _enter("%p", volume);
1da177e4 241
d2ddc776
DH
242#ifdef CONFIG_AFS_FSCACHE
243 ASSERTCMP(volume->cache, ==, NULL);
244#endif
1da177e4 245
ca0e79a4 246 afs_detach_volume_from_servers(volume, slist);
20325960 247 afs_remove_volume_from_cell(volume);
445f9b69 248 afs_put_serverlist(volume->cell->net, slist);
dca54a7b 249 afs_put_cell(volume->cell, afs_cell_trace_put_vol);
c56f9ec8 250 trace_afs_volume(volume->vid, refcount_read(&volume->ref),
cca37d45 251 afs_volume_trace_free);
20325960 252 kfree_rcu(volume, rcu);
1da177e4 253
d2ddc776
DH
254 _leave(" [destroyed]");
255}
256
9a6b294a
DH
257/*
258 * Try to get a reference on a volume record.
259 */
260bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason)
261{
262 int r;
263
264 if (__refcount_inc_not_zero(&volume->ref, &r)) {
265 trace_afs_volume(volume->vid, r + 1, reason);
266 return true;
267 }
268 return false;
269}
270
d2ddc776 271/*
cca37d45 272 * Get a reference on a volume record.
d2ddc776 273 */
cca37d45
DH
274struct afs_volume *afs_get_volume(struct afs_volume *volume,
275 enum afs_volume_trace reason)
d2ddc776
DH
276{
277 if (volume) {
c56f9ec8
DH
278 int r;
279
280 __refcount_inc(&volume->ref, &r);
281 trace_afs_volume(volume->vid, r + 1, reason);
cca37d45
DH
282 }
283 return volume;
284}
285
d2ddc776 286
cca37d45
DH
287/*
288 * Drop a reference on a volume record.
289 */
445f9b69 290void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason)
cca37d45
DH
291{
292 if (volume) {
293 afs_volid_t vid = volume->vid;
c56f9ec8
DH
294 bool zero;
295 int r;
296
297 zero = __refcount_dec_and_test(&volume->ref, &r);
298 trace_afs_volume(vid, r - 1, reason);
299 if (zero)
445f9b69 300 schedule_work(&volume->destructor);
1da177e4 301 }
d2ddc776 302}
1da177e4 303
d2ddc776
DH
304/*
305 * Activate a volume.
306 */
523d27cd 307int afs_activate_volume(struct afs_volume *volume)
d2ddc776 308{
9b3f26c9 309#ifdef CONFIG_AFS_FSCACHE
523d27cd
DH
310 struct fscache_volume *vcookie;
311 char *name;
312
313 name = kasprintf(GFP_KERNEL, "afs,%s,%llx",
314 volume->cell->name, volume->vid);
315 if (!name)
316 return -ENOMEM;
317
318 vcookie = fscache_acquire_volume(name, NULL, NULL, 0);
319 if (IS_ERR(vcookie)) {
320 if (vcookie != ERR_PTR(-EBUSY)) {
321 kfree(name);
322 return PTR_ERR(vcookie);
323 }
324 pr_err("AFS: Cache volume key already in use (%s)\n", name);
325 vcookie = NULL;
326 }
327 volume->cache = vcookie;
328 kfree(name);
1da177e4 329#endif
523d27cd 330 return 0;
d2ddc776 331}
1da177e4 332
d2ddc776
DH
333/*
334 * Deactivate a volume.
335 */
336void afs_deactivate_volume(struct afs_volume *volume)
337{
338 _enter("%s", volume->name);
1da177e4 339
d2ddc776 340#ifdef CONFIG_AFS_FSCACHE
523d27cd 341 fscache_relinquish_volume(volume->cache, NULL,
d2ddc776
DH
342 test_bit(AFS_VOLUME_DELETED, &volume->flags));
343 volume->cache = NULL;
344#endif
1da177e4 345
d2ddc776 346 _leave("");
ec26815a 347}
1da177e4 348
1da177e4 349/*
d2ddc776 350 * Query the VL service to update the volume status.
1da177e4 351 */
d2ddc776 352static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
1da177e4 353{
d2ddc776
DH
354 struct afs_server_list *new, *old, *discard;
355 struct afs_vldb_entry *vldb;
6ea38e2a 356 char idbuf[24];
d2ddc776 357 int ret, idsz;
1da177e4 358
d2ddc776 359 _enter("");
1da177e4 360
d2ddc776
DH
361 /* We look up an ID by passing it as a decimal string in the
362 * operation's name parameter.
363 */
6ea38e2a 364 idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
1da177e4 365
d2ddc776
DH
366 vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
367 if (IS_ERR(vldb)) {
368 ret = PTR_ERR(vldb);
369 goto error;
370 }
1da177e4 371
d2ddc776
DH
372 /* See if the volume got renamed. */
373 if (vldb->name_len != volume->name_len ||
374 memcmp(vldb->name, volume->name, vldb->name_len) != 0) {
375 /* TODO: Use RCU'd string. */
376 memcpy(volume->name, vldb->name, AFS_MAXVOLNAME);
377 volume->name_len = vldb->name_len;
378 }
379
380 /* See if the volume's server list got updated. */
ca0e79a4 381 new = afs_alloc_server_list(volume, key, vldb);
d2ddc776
DH
382 if (IS_ERR(new)) {
383 ret = PTR_ERR(new);
384 goto error_vldb;
385 }
1da177e4 386
d2ddc776 387 write_lock(&volume->servers_lock);
1da177e4 388
d2ddc776 389 discard = new;
8a070a96
DH
390 old = rcu_dereference_protected(volume->servers,
391 lockdep_is_held(&volume->servers_lock));
d2ddc776
DH
392 if (afs_annotate_server_list(new, old)) {
393 new->seq = volume->servers_seq + 1;
8a070a96 394 rcu_assign_pointer(volume->servers, new);
d2ddc776
DH
395 smp_wmb();
396 volume->servers_seq++;
397 discard = old;
1da177e4
LT
398 }
399
495f2ae9
DH
400 /* Check more often if replication is ongoing. */
401 if (new->ro_replicating)
402 volume->update_at = ktime_get_real_seconds() + 10 * 60;
403 else
404 volume->update_at = ktime_get_real_seconds() + afs_volume_record_life;
d2ddc776 405 write_unlock(&volume->servers_lock);
1da177e4 406
ca0e79a4
DH
407 if (discard == old)
408 afs_reattach_volume_to_servers(volume, new, old);
d2ddc776 409 afs_put_serverlist(volume->cell->net, discard);
ca0e79a4 410 ret = 0;
d2ddc776
DH
411error_vldb:
412 kfree(vldb);
413error:
414 _leave(" = %d", ret);
415 return ret;
416}
1da177e4 417
d2ddc776
DH
418/*
419 * Make sure the volume record is up to date.
420 */
e49c7b2f 421int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op)
d2ddc776 422{
d2ddc776 423 int ret, retries = 0;
1da177e4 424
d2ddc776
DH
425 _enter("");
426
d2ddc776 427retry:
f6cbb368
DH
428 if (test_bit(AFS_VOLUME_WAIT, &volume->flags))
429 goto wait;
430 if (volume->update_at <= ktime_get_real_seconds() ||
431 test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags))
432 goto update;
433 _leave(" = 0");
434 return 0;
435
436update:
d2ddc776 437 if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) {
f6cbb368 438 clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
e49c7b2f 439 ret = afs_update_volume_status(volume, op->key);
f6cbb368
DH
440 if (ret < 0)
441 set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags);
d2ddc776
DH
442 clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags);
443 clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags);
444 wake_up_bit(&volume->flags, AFS_VOLUME_WAIT);
445 _leave(" = %d", ret);
446 return ret;
447 }
1da177e4 448
f6cbb368 449wait:
d2ddc776
DH
450 if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) {
451 _leave(" = 0 [no wait]");
452 return 0;
453 }
454
c4bfda16 455 ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT,
e49c7b2f
DH
456 (op->flags & AFS_OPERATION_UNINTR) ?
457 TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE);
d2ddc776
DH
458 if (ret == -ERESTARTSYS) {
459 _leave(" = %d", ret);
460 return ret;
461 }
462
463 retries++;
464 if (retries == 4) {
465 _leave(" = -ESTALE");
466 return -ESTALE;
467 }
468 goto retry;
ec26815a 469}