Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
ec26815a | 2 | /* AFS volume management |
1da177e4 | 3 | * |
08e0e7c8 | 4 | * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. |
1da177e4 | 5 | * Written by David Howells (dhowells@redhat.com) |
1da177e4 LT |
6 | */ |
7 | ||
8 | #include <linux/kernel.h> | |
1da177e4 | 9 | #include <linux/slab.h> |
1da177e4 LT |
10 | #include "internal.h" |
11 | ||
102d8410 | 12 | static unsigned __read_mostly afs_volume_record_life = 60 * 60; |
4f67bcf6 | 13 | static atomic_t afs_volume_debug_id; |
d2ddc776 | 14 | |
445f9b69 DH |
15 | static void afs_destroy_volume(struct work_struct *work); |
16 | ||
20325960 DH |
17 | /* |
18 | * Insert a volume into a cell. If there's an existing volume record, that is | |
19 | * returned instead with a ref held. | |
20 | */ | |
21 | static struct afs_volume *afs_insert_volume_into_cell(struct afs_cell *cell, | |
22 | struct afs_volume *volume) | |
23 | { | |
24 | struct afs_volume *p; | |
25 | struct rb_node *parent = NULL, **pp; | |
26 | ||
27 | write_seqlock(&cell->volume_lock); | |
28 | ||
29 | pp = &cell->volumes.rb_node; | |
30 | while (*pp) { | |
31 | parent = *pp; | |
32 | p = rb_entry(parent, struct afs_volume, cell_node); | |
33 | if (p->vid < volume->vid) { | |
34 | pp = &(*pp)->rb_left; | |
35 | } else if (p->vid > volume->vid) { | |
36 | pp = &(*pp)->rb_right; | |
37 | } else { | |
9a6b294a DH |
38 | if (afs_try_get_volume(p, afs_volume_trace_get_cell_insert)) { |
39 | volume = p; | |
40 | goto found; | |
41 | } | |
42 | ||
43 | set_bit(AFS_VOLUME_RM_TREE, &volume->flags); | |
44 | rb_replace_node_rcu(&p->cell_node, &volume->cell_node, &cell->volumes); | |
20325960 DH |
45 | } |
46 | } | |
47 | ||
48 | rb_link_node_rcu(&volume->cell_node, parent, pp); | |
49 | rb_insert_color(&volume->cell_node, &cell->volumes); | |
50 | hlist_add_head_rcu(&volume->proc_link, &cell->proc_volumes); | |
51 | ||
52 | found: | |
53 | write_sequnlock(&cell->volume_lock); | |
54 | return volume; | |
55 | ||
56 | } | |
57 | ||
58 | static void afs_remove_volume_from_cell(struct afs_volume *volume) | |
59 | { | |
60 | struct afs_cell *cell = volume->cell; | |
61 | ||
62 | if (!hlist_unhashed(&volume->proc_link)) { | |
4f67bcf6 | 63 | trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), |
20325960 DH |
64 | afs_volume_trace_remove); |
65 | write_seqlock(&cell->volume_lock); | |
66 | hlist_del_rcu(&volume->proc_link); | |
9a6b294a DH |
67 | if (!test_and_set_bit(AFS_VOLUME_RM_TREE, &volume->flags)) |
68 | rb_erase(&volume->cell_node, &cell->volumes); | |
20325960 DH |
69 | write_sequnlock(&cell->volume_lock); |
70 | } | |
71 | } | |
72 | ||
1da177e4 | 73 | /* |
d2ddc776 DH |
74 | * Allocate a volume record and load it up from a vldb record. |
75 | */ | |
13fcc683 | 76 | static struct afs_volume *afs_alloc_volume(struct afs_fs_context *params, |
d2ddc776 | 77 | struct afs_vldb_entry *vldb, |
ca0e79a4 | 78 | struct afs_server_list **_slist) |
d2ddc776 DH |
79 | { |
80 | struct afs_server_list *slist; | |
d2ddc776 | 81 | struct afs_volume *volume; |
ca0e79a4 | 82 | int ret = -ENOMEM, i; |
d2ddc776 DH |
83 | |
84 | volume = kzalloc(sizeof(struct afs_volume), GFP_KERNEL); | |
85 | if (!volume) | |
86 | goto error_0; | |
87 | ||
4f67bcf6 | 88 | volume->debug_id = atomic_inc_return(&afs_volume_debug_id); |
d2ddc776 DH |
89 | volume->vid = vldb->vid[params->type]; |
90 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; | |
dca54a7b | 91 | volume->cell = afs_get_cell(params->cell, afs_cell_trace_get_vol); |
d2ddc776 DH |
92 | volume->type = params->type; |
93 | volume->type_force = params->force; | |
94 | volume->name_len = vldb->name_len; | |
16069e13 DH |
95 | volume->creation_time = TIME64_MIN; |
96 | volume->update_time = TIME64_MIN; | |
d2ddc776 | 97 | |
c56f9ec8 | 98 | refcount_set(&volume->ref, 1); |
20325960 | 99 | INIT_HLIST_NODE(&volume->proc_link); |
445f9b69 | 100 | INIT_WORK(&volume->destructor, afs_destroy_volume); |
d2ddc776 | 101 | rwlock_init(&volume->servers_lock); |
16069e13 | 102 | mutex_init(&volume->volsync_lock); |
453924de | 103 | mutex_init(&volume->cb_check_lock); |
90fa9b64 | 104 | rwlock_init(&volume->cb_v_break_lock); |
453924de DH |
105 | INIT_LIST_HEAD(&volume->open_mmaps); |
106 | init_rwsem(&volume->open_mmaps_lock); | |
d2ddc776 DH |
107 | memcpy(volume->name, vldb->name, vldb->name_len + 1); |
108 | ||
ca0e79a4 DH |
109 | for (i = 0; i < AFS_MAXTYPES; i++) |
110 | volume->vids[i] = vldb->vid[i]; | |
111 | ||
112 | slist = afs_alloc_server_list(volume, params->key, vldb); | |
d2ddc776 DH |
113 | if (IS_ERR(slist)) { |
114 | ret = PTR_ERR(slist); | |
115 | goto error_1; | |
116 | } | |
117 | ||
ca0e79a4 | 118 | *_slist = slist; |
8a070a96 | 119 | rcu_assign_pointer(volume->servers, slist); |
4f67bcf6 | 120 | trace_afs_volume(volume->debug_id, volume->vid, 1, afs_volume_trace_alloc); |
d2ddc776 DH |
121 | return volume; |
122 | ||
d2ddc776 | 123 | error_1: |
dca54a7b | 124 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
d2ddc776 DH |
125 | kfree(volume); |
126 | error_0: | |
127 | return ERR_PTR(ret); | |
128 | } | |
129 | ||
20325960 DH |
130 | /* |
131 | * Look up or allocate a volume record. | |
132 | */ | |
133 | static struct afs_volume *afs_lookup_volume(struct afs_fs_context *params, | |
ca0e79a4 | 134 | struct afs_vldb_entry *vldb) |
20325960 | 135 | { |
ca0e79a4 | 136 | struct afs_server_list *slist; |
20325960 DH |
137 | struct afs_volume *candidate, *volume; |
138 | ||
ca0e79a4 | 139 | candidate = afs_alloc_volume(params, vldb, &slist); |
20325960 DH |
140 | if (IS_ERR(candidate)) |
141 | return candidate; | |
142 | ||
143 | volume = afs_insert_volume_into_cell(params->cell, candidate); | |
ca0e79a4 DH |
144 | if (volume == candidate) |
145 | afs_attach_volume_to_servers(volume, slist); | |
146 | else | |
445f9b69 | 147 | afs_put_volume(candidate, afs_volume_trace_put_cell_dup); |
20325960 DH |
148 | return volume; |
149 | } | |
150 | ||
d2ddc776 DH |
151 | /* |
152 | * Look up a VLDB record for a volume. | |
153 | */ | |
154 | static struct afs_vldb_entry *afs_vl_lookup_vldb(struct afs_cell *cell, | |
155 | struct key *key, | |
156 | const char *volname, | |
157 | size_t volnamesz) | |
158 | { | |
0a5143f2 DH |
159 | struct afs_vldb_entry *vldb = ERR_PTR(-EDESTADDRREQ); |
160 | struct afs_vl_cursor vc; | |
d2ddc776 DH |
161 | int ret; |
162 | ||
0a5143f2 DH |
163 | if (!afs_begin_vlserver_operation(&vc, cell, key)) |
164 | return ERR_PTR(-ERESTARTSYS); | |
d2ddc776 | 165 | |
0a5143f2 | 166 | while (afs_select_vlserver(&vc)) { |
0a5143f2 | 167 | vldb = afs_vl_get_entry_by_name_u(&vc, volname, volnamesz); |
d2ddc776 DH |
168 | } |
169 | ||
0a5143f2 DH |
170 | ret = afs_end_vlserver_operation(&vc); |
171 | return ret < 0 ? ERR_PTR(ret) : vldb; | |
d2ddc776 DH |
172 | } |
173 | ||
174 | /* | |
175 | * Look up a volume in the VL server and create a candidate volume record for | |
176 | * it. | |
177 | * | |
178 | * The volume name can be one of the following: | |
1da177e4 LT |
179 | * "%[cell:]volume[.]" R/W volume |
180 | * "#[cell:]volume[.]" R/O or R/W volume (rwparent=0), | |
181 | * or R/W (rwparent=1) volume | |
182 | * "%[cell:]volume.readonly" R/O volume | |
183 | * "#[cell:]volume.readonly" R/O volume | |
184 | * "%[cell:]volume.backup" Backup volume | |
185 | * "#[cell:]volume.backup" Backup volume | |
186 | * | |
187 | * The cell name is optional, and defaults to the current cell. | |
188 | * | |
189 | * See "The Rules of Mount Point Traversal" in Chapter 5 of the AFS SysAdmin | |
190 | * Guide | |
191 | * - Rule 1: Explicit type suffix forces access of that type or nothing | |
192 | * (no suffix, then use Rule 2 & 3) | |
193 | * - Rule 2: If parent volume is R/O, then mount R/O volume by preference, R/W | |
194 | * if not available | |
195 | * - Rule 3: If parent volume is R/W, then only mount R/W volume unless | |
196 | * explicitly told otherwise | |
197 | */ | |
13fcc683 | 198 | struct afs_volume *afs_create_volume(struct afs_fs_context *params) |
1da177e4 | 199 | { |
d2ddc776 DH |
200 | struct afs_vldb_entry *vldb; |
201 | struct afs_volume *volume; | |
202 | unsigned long type_mask = 1UL << params->type; | |
1da177e4 | 203 | |
d2ddc776 DH |
204 | vldb = afs_vl_lookup_vldb(params->cell, params->key, |
205 | params->volname, params->volnamesz); | |
206 | if (IS_ERR(vldb)) | |
207 | return ERR_CAST(vldb); | |
1da177e4 | 208 | |
d2ddc776 DH |
209 | if (test_bit(AFS_VLDB_QUERY_ERROR, &vldb->flags)) { |
210 | volume = ERR_PTR(vldb->error); | |
211 | goto error; | |
212 | } | |
1da177e4 | 213 | |
d2ddc776 DH |
214 | /* Make the final decision on the type we want */ |
215 | volume = ERR_PTR(-ENOMEDIUM); | |
00d3b7a4 | 216 | if (params->force) { |
d2ddc776 | 217 | if (!(vldb->flags & type_mask)) |
1da177e4 | 218 | goto error; |
d2ddc776 | 219 | } else if (test_bit(AFS_VLDB_HAS_RO, &vldb->flags)) { |
00d3b7a4 | 220 | params->type = AFSVL_ROVOL; |
d2ddc776 | 221 | } else if (test_bit(AFS_VLDB_HAS_RW, &vldb->flags)) { |
00d3b7a4 | 222 | params->type = AFSVL_RWVOL; |
ec26815a | 223 | } else { |
1da177e4 LT |
224 | goto error; |
225 | } | |
226 | ||
ca0e79a4 | 227 | volume = afs_lookup_volume(params, vldb); |
1da177e4 | 228 | |
d2ddc776 DH |
229 | error: |
230 | kfree(vldb); | |
231 | return volume; | |
232 | } | |
1da177e4 | 233 | |
d2ddc776 DH |
234 | /* |
235 | * Destroy a volume record | |
236 | */ | |
445f9b69 | 237 | static void afs_destroy_volume(struct work_struct *work) |
d2ddc776 | 238 | { |
445f9b69 | 239 | struct afs_volume *volume = container_of(work, struct afs_volume, destructor); |
ca0e79a4 DH |
240 | struct afs_server_list *slist = rcu_access_pointer(volume->servers); |
241 | ||
d2ddc776 | 242 | _enter("%p", volume); |
1da177e4 | 243 | |
d2ddc776 DH |
244 | #ifdef CONFIG_AFS_FSCACHE |
245 | ASSERTCMP(volume->cache, ==, NULL); | |
246 | #endif | |
1da177e4 | 247 | |
ca0e79a4 | 248 | afs_detach_volume_from_servers(volume, slist); |
20325960 | 249 | afs_remove_volume_from_cell(volume); |
445f9b69 | 250 | afs_put_serverlist(volume->cell->net, slist); |
dca54a7b | 251 | afs_put_cell(volume->cell, afs_cell_trace_put_vol); |
4f67bcf6 | 252 | trace_afs_volume(volume->debug_id, volume->vid, refcount_read(&volume->ref), |
cca37d45 | 253 | afs_volume_trace_free); |
20325960 | 254 | kfree_rcu(volume, rcu); |
1da177e4 | 255 | |
d2ddc776 DH |
256 | _leave(" [destroyed]"); |
257 | } | |
258 | ||
9a6b294a DH |
259 | /* |
260 | * Try to get a reference on a volume record. | |
261 | */ | |
262 | bool afs_try_get_volume(struct afs_volume *volume, enum afs_volume_trace reason) | |
263 | { | |
264 | int r; | |
265 | ||
266 | if (__refcount_inc_not_zero(&volume->ref, &r)) { | |
4f67bcf6 | 267 | trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); |
9a6b294a DH |
268 | return true; |
269 | } | |
270 | return false; | |
271 | } | |
272 | ||
d2ddc776 | 273 | /* |
cca37d45 | 274 | * Get a reference on a volume record. |
d2ddc776 | 275 | */ |
cca37d45 DH |
276 | struct afs_volume *afs_get_volume(struct afs_volume *volume, |
277 | enum afs_volume_trace reason) | |
d2ddc776 DH |
278 | { |
279 | if (volume) { | |
c56f9ec8 DH |
280 | int r; |
281 | ||
282 | __refcount_inc(&volume->ref, &r); | |
4f67bcf6 | 283 | trace_afs_volume(volume->debug_id, volume->vid, r + 1, reason); |
cca37d45 DH |
284 | } |
285 | return volume; | |
286 | } | |
287 | ||
d2ddc776 | 288 | |
cca37d45 DH |
289 | /* |
290 | * Drop a reference on a volume record. | |
291 | */ | |
445f9b69 | 292 | void afs_put_volume(struct afs_volume *volume, enum afs_volume_trace reason) |
cca37d45 DH |
293 | { |
294 | if (volume) { | |
4f67bcf6 | 295 | unsigned int debug_id = volume->debug_id; |
cca37d45 | 296 | afs_volid_t vid = volume->vid; |
c56f9ec8 DH |
297 | bool zero; |
298 | int r; | |
299 | ||
300 | zero = __refcount_dec_and_test(&volume->ref, &r); | |
4f67bcf6 | 301 | trace_afs_volume(debug_id, vid, r - 1, reason); |
c56f9ec8 | 302 | if (zero) |
445f9b69 | 303 | schedule_work(&volume->destructor); |
1da177e4 | 304 | } |
d2ddc776 | 305 | } |
1da177e4 | 306 | |
d2ddc776 DH |
307 | /* |
308 | * Activate a volume. | |
309 | */ | |
523d27cd | 310 | int afs_activate_volume(struct afs_volume *volume) |
d2ddc776 | 311 | { |
9b3f26c9 | 312 | #ifdef CONFIG_AFS_FSCACHE |
523d27cd DH |
313 | struct fscache_volume *vcookie; |
314 | char *name; | |
315 | ||
316 | name = kasprintf(GFP_KERNEL, "afs,%s,%llx", | |
317 | volume->cell->name, volume->vid); | |
318 | if (!name) | |
319 | return -ENOMEM; | |
320 | ||
321 | vcookie = fscache_acquire_volume(name, NULL, NULL, 0); | |
322 | if (IS_ERR(vcookie)) { | |
323 | if (vcookie != ERR_PTR(-EBUSY)) { | |
324 | kfree(name); | |
325 | return PTR_ERR(vcookie); | |
326 | } | |
327 | pr_err("AFS: Cache volume key already in use (%s)\n", name); | |
328 | vcookie = NULL; | |
329 | } | |
330 | volume->cache = vcookie; | |
331 | kfree(name); | |
1da177e4 | 332 | #endif |
523d27cd | 333 | return 0; |
d2ddc776 | 334 | } |
1da177e4 | 335 | |
d2ddc776 DH |
336 | /* |
337 | * Deactivate a volume. | |
338 | */ | |
339 | void afs_deactivate_volume(struct afs_volume *volume) | |
340 | { | |
341 | _enter("%s", volume->name); | |
1da177e4 | 342 | |
d2ddc776 | 343 | #ifdef CONFIG_AFS_FSCACHE |
523d27cd | 344 | fscache_relinquish_volume(volume->cache, NULL, |
d2ddc776 DH |
345 | test_bit(AFS_VOLUME_DELETED, &volume->flags)); |
346 | volume->cache = NULL; | |
347 | #endif | |
1da177e4 | 348 | |
d2ddc776 | 349 | _leave(""); |
ec26815a | 350 | } |
1da177e4 | 351 | |
1da177e4 | 352 | /* |
d2ddc776 | 353 | * Query the VL service to update the volume status. |
1da177e4 | 354 | */ |
d2ddc776 | 355 | static int afs_update_volume_status(struct afs_volume *volume, struct key *key) |
1da177e4 | 356 | { |
d2ddc776 DH |
357 | struct afs_server_list *new, *old, *discard; |
358 | struct afs_vldb_entry *vldb; | |
6ea38e2a | 359 | char idbuf[24]; |
d2ddc776 | 360 | int ret, idsz; |
1da177e4 | 361 | |
d2ddc776 | 362 | _enter(""); |
1da177e4 | 363 | |
d2ddc776 DH |
364 | /* We look up an ID by passing it as a decimal string in the |
365 | * operation's name parameter. | |
366 | */ | |
6ea38e2a | 367 | idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid); |
1da177e4 | 368 | |
d2ddc776 DH |
369 | vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz); |
370 | if (IS_ERR(vldb)) { | |
371 | ret = PTR_ERR(vldb); | |
372 | goto error; | |
373 | } | |
1da177e4 | 374 | |
d2ddc776 DH |
375 | /* See if the volume got renamed. */ |
376 | if (vldb->name_len != volume->name_len || | |
377 | memcmp(vldb->name, volume->name, vldb->name_len) != 0) { | |
378 | /* TODO: Use RCU'd string. */ | |
379 | memcpy(volume->name, vldb->name, AFS_MAXVOLNAME); | |
380 | volume->name_len = vldb->name_len; | |
381 | } | |
382 | ||
383 | /* See if the volume's server list got updated. */ | |
ca0e79a4 | 384 | new = afs_alloc_server_list(volume, key, vldb); |
d2ddc776 DH |
385 | if (IS_ERR(new)) { |
386 | ret = PTR_ERR(new); | |
387 | goto error_vldb; | |
388 | } | |
1da177e4 | 389 | |
d2ddc776 | 390 | write_lock(&volume->servers_lock); |
1da177e4 | 391 | |
d2ddc776 | 392 | discard = new; |
8a070a96 DH |
393 | old = rcu_dereference_protected(volume->servers, |
394 | lockdep_is_held(&volume->servers_lock)); | |
d2ddc776 DH |
395 | if (afs_annotate_server_list(new, old)) { |
396 | new->seq = volume->servers_seq + 1; | |
8a070a96 | 397 | rcu_assign_pointer(volume->servers, new); |
d2ddc776 DH |
398 | smp_wmb(); |
399 | volume->servers_seq++; | |
400 | discard = old; | |
1da177e4 LT |
401 | } |
402 | ||
495f2ae9 DH |
403 | /* Check more often if replication is ongoing. */ |
404 | if (new->ro_replicating) | |
405 | volume->update_at = ktime_get_real_seconds() + 10 * 60; | |
406 | else | |
407 | volume->update_at = ktime_get_real_seconds() + afs_volume_record_life; | |
d2ddc776 | 408 | write_unlock(&volume->servers_lock); |
1da177e4 | 409 | |
ca0e79a4 DH |
410 | if (discard == old) |
411 | afs_reattach_volume_to_servers(volume, new, old); | |
d2ddc776 | 412 | afs_put_serverlist(volume->cell->net, discard); |
ca0e79a4 | 413 | ret = 0; |
d2ddc776 DH |
414 | error_vldb: |
415 | kfree(vldb); | |
416 | error: | |
417 | _leave(" = %d", ret); | |
418 | return ret; | |
419 | } | |
1da177e4 | 420 | |
d2ddc776 DH |
421 | /* |
422 | * Make sure the volume record is up to date. | |
423 | */ | |
e49c7b2f | 424 | int afs_check_volume_status(struct afs_volume *volume, struct afs_operation *op) |
d2ddc776 | 425 | { |
d2ddc776 | 426 | int ret, retries = 0; |
1da177e4 | 427 | |
d2ddc776 DH |
428 | _enter(""); |
429 | ||
d2ddc776 | 430 | retry: |
f6cbb368 DH |
431 | if (test_bit(AFS_VOLUME_WAIT, &volume->flags)) |
432 | goto wait; | |
433 | if (volume->update_at <= ktime_get_real_seconds() || | |
434 | test_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags)) | |
435 | goto update; | |
436 | _leave(" = 0"); | |
437 | return 0; | |
438 | ||
439 | update: | |
d2ddc776 | 440 | if (!test_and_set_bit_lock(AFS_VOLUME_UPDATING, &volume->flags)) { |
f6cbb368 | 441 | clear_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); |
e49c7b2f | 442 | ret = afs_update_volume_status(volume, op->key); |
f6cbb368 DH |
443 | if (ret < 0) |
444 | set_bit(AFS_VOLUME_NEEDS_UPDATE, &volume->flags); | |
d2ddc776 DH |
445 | clear_bit_unlock(AFS_VOLUME_WAIT, &volume->flags); |
446 | clear_bit_unlock(AFS_VOLUME_UPDATING, &volume->flags); | |
447 | wake_up_bit(&volume->flags, AFS_VOLUME_WAIT); | |
448 | _leave(" = %d", ret); | |
449 | return ret; | |
450 | } | |
1da177e4 | 451 | |
f6cbb368 | 452 | wait: |
d2ddc776 DH |
453 | if (!test_bit(AFS_VOLUME_WAIT, &volume->flags)) { |
454 | _leave(" = 0 [no wait]"); | |
455 | return 0; | |
456 | } | |
457 | ||
c4bfda16 | 458 | ret = wait_on_bit(&volume->flags, AFS_VOLUME_WAIT, |
e49c7b2f DH |
459 | (op->flags & AFS_OPERATION_UNINTR) ? |
460 | TASK_UNINTERRUPTIBLE : TASK_INTERRUPTIBLE); | |
d2ddc776 DH |
461 | if (ret == -ERESTARTSYS) { |
462 | _leave(" = %d", ret); | |
463 | return ret; | |
464 | } | |
465 | ||
466 | retries++; | |
467 | if (retries == 4) { | |
468 | _leave(" = -ESTALE"); | |
469 | return -ESTALE; | |
470 | } | |
471 | goto retry; | |
ec26815a | 472 | } |