Commit | Line | Data |
---|---|---|
2522fe45 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e7fd4179 DT |
2 | /****************************************************************************** |
3 | ******************************************************************************* | |
4 | ** | |
5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | |
7 | ** | |
e7fd4179 DT |
8 | ** |
9 | ******************************************************************************* | |
10 | ******************************************************************************/ | |
11 | ||
12 | #include "dlm_internal.h" | |
13 | #include "lockspace.h" | |
14 | #include "member.h" | |
15 | #include "lowcomms.h" | |
16 | #include "rcom.h" | |
17 | #include "config.h" | |
18 | #include "memory.h" | |
19 | #include "recover.h" | |
20 | #include "util.h" | |
21 | #include "lock.h" | |
22 | #include "dir.h" | |
23 | ||
e7fd4179 DT |
24 | /* |
25 | * We use the upper 16 bits of the hash value to select the directory node. | |
26 | * Low bits are used for distribution of rsb's among hash buckets on each node. | |
27 | * | |
28 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | |
29 | * num_nodes to the hash value. This value in the desired range is used as an | |
30 | * offset into the sorted list of nodeid's to give the particular nodeid. | |
31 | */ | |
32 | ||
33 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | |
34 | { | |
c04fecb4 | 35 | uint32_t node; |
e7fd4179 | 36 | |
c04fecb4 DT |
37 | if (ls->ls_num_nodes == 1) |
38 | return dlm_our_nodeid(); | |
39 | else { | |
e7fd4179 | 40 | node = (hash >> 16) % ls->ls_total_weight; |
c04fecb4 | 41 | return ls->ls_node_array[node]; |
e7fd4179 | 42 | } |
e7fd4179 DT |
43 | } |
44 | ||
45 | int dlm_dir_nodeid(struct dlm_rsb *r) | |
46 | { | |
c04fecb4 | 47 | return r->res_dir_nodeid; |
e7fd4179 DT |
48 | } |
49 | ||
c04fecb4 | 50 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) |
e7fd4179 | 51 | { |
c04fecb4 | 52 | struct dlm_rsb *r; |
e7fd4179 | 53 | |
c04fecb4 DT |
54 | down_read(&ls->ls_root_sem); |
55 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
56 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); | |
e7fd4179 | 57 | } |
c04fecb4 | 58 | up_read(&ls->ls_root_sem); |
e7fd4179 DT |
59 | } |
60 | ||
61 | int dlm_recover_directory(struct dlm_ls *ls) | |
62 | { | |
63 | struct dlm_member *memb; | |
e7fd4179 | 64 | char *b, *last_name = NULL; |
c04fecb4 | 65 | int error = -ENOMEM, last_len, nodeid, result; |
e7fd4179 | 66 | uint16_t namelen; |
c04fecb4 | 67 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
e7fd4179 | 68 | |
075f0177 | 69 | log_rinfo(ls, "dlm_recover_directory"); |
e7fd4179 DT |
70 | |
71 | if (dlm_no_directory(ls)) | |
72 | goto out_status; | |
73 | ||
573c24c4 | 74 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
e7fd4179 DT |
75 | if (!last_name) |
76 | goto out; | |
77 | ||
78 | list_for_each_entry(memb, &ls->ls_nodes, list) { | |
c04fecb4 DT |
79 | if (memb->nodeid == dlm_our_nodeid()) |
80 | continue; | |
81 | ||
e7fd4179 DT |
82 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
83 | last_len = 0; | |
84 | ||
85 | for (;;) { | |
cd9df1aa | 86 | int left; |
e10249b1 | 87 | if (dlm_recovery_stopped(ls)) { |
aee742c9 | 88 | error = -EINTR; |
e7fd4179 | 89 | goto out_free; |
aee742c9 | 90 | } |
e7fd4179 DT |
91 | |
92 | error = dlm_rcom_names(ls, memb->nodeid, | |
93 | last_name, last_len); | |
94 | if (error) | |
95 | goto out_free; | |
96 | ||
c04fecb4 | 97 | cond_resched(); |
e7fd4179 DT |
98 | |
99 | /* | |
100 | * pick namelen/name pairs out of received buffer | |
101 | */ | |
102 | ||
4007685c | 103 | b = ls->ls_recover_buf->rc_buf; |
3428785a | 104 | left = le16_to_cpu(ls->ls_recover_buf->rc_header.h_length); |
cd9df1aa | 105 | left -= sizeof(struct dlm_rcom); |
e7fd4179 DT |
106 | |
107 | for (;;) { | |
cd9df1aa AV |
108 | __be16 v; |
109 | ||
110 | error = -EINVAL; | |
111 | if (left < sizeof(__be16)) | |
112 | goto out_free; | |
113 | ||
114 | memcpy(&v, b, sizeof(__be16)); | |
115 | namelen = be16_to_cpu(v); | |
116 | b += sizeof(__be16); | |
117 | left -= sizeof(__be16); | |
e7fd4179 DT |
118 | |
119 | /* namelen of 0xFFFFF marks end of names for | |
120 | this node; namelen of 0 marks end of the | |
121 | buffer */ | |
122 | ||
123 | if (namelen == 0xFFFF) | |
124 | goto done; | |
125 | if (!namelen) | |
126 | break; | |
127 | ||
cd9df1aa AV |
128 | if (namelen > left) |
129 | goto out_free; | |
130 | ||
131 | if (namelen > DLM_RESNAME_MAXLEN) | |
132 | goto out_free; | |
133 | ||
c04fecb4 DT |
134 | error = dlm_master_lookup(ls, memb->nodeid, |
135 | b, namelen, | |
136 | DLM_LU_RECOVER_DIR, | |
137 | &nodeid, &result); | |
138 | if (error) { | |
139 | log_error(ls, "recover_dir lookup %d", | |
140 | error); | |
e7fd4179 | 141 | goto out_free; |
c04fecb4 DT |
142 | } |
143 | ||
144 | /* The name was found in rsbtbl, but the | |
145 | * master nodeid is different from | |
146 | * memb->nodeid which says it is the master. | |
147 | * This should not happen. */ | |
148 | ||
149 | if (result == DLM_LU_MATCH && | |
150 | nodeid != memb->nodeid) { | |
151 | count_bad++; | |
152 | log_error(ls, "recover_dir lookup %d " | |
153 | "nodeid %d memb %d bad %u", | |
154 | result, nodeid, memb->nodeid, | |
155 | count_bad); | |
156 | print_hex_dump_bytes("dlm_recover_dir ", | |
157 | DUMP_PREFIX_NONE, | |
158 | b, namelen); | |
159 | } | |
160 | ||
161 | /* The name was found in rsbtbl, and the | |
162 | * master nodeid matches memb->nodeid. */ | |
163 | ||
164 | if (result == DLM_LU_MATCH && | |
165 | nodeid == memb->nodeid) { | |
166 | count_match++; | |
167 | } | |
168 | ||
169 | /* The name was not found in rsbtbl and was | |
170 | * added with memb->nodeid as the master. */ | |
171 | ||
172 | if (result == DLM_LU_ADD) { | |
173 | count_add++; | |
174 | } | |
e7fd4179 | 175 | |
e7fd4179 | 176 | last_len = namelen; |
e7fd4179 DT |
177 | memcpy(last_name, b, namelen); |
178 | b += namelen; | |
cd9df1aa | 179 | left -= namelen; |
e7fd4179 DT |
180 | count++; |
181 | } | |
182 | } | |
c04fecb4 | 183 | done: |
e7fd4179 DT |
184 | ; |
185 | } | |
186 | ||
187 | out_status: | |
188 | error = 0; | |
c04fecb4 DT |
189 | dlm_set_recover_status(ls, DLM_RS_DIR); |
190 | ||
075f0177 | 191 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
c04fecb4 | 192 | count, count_add); |
e7fd4179 DT |
193 | out_free: |
194 | kfree(last_name); | |
195 | out: | |
e7fd4179 DT |
196 | return error; |
197 | } | |
198 | ||
85f0379a DT |
199 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
200 | { | |
201 | struct dlm_rsb *r; | |
7210cb7a DT |
202 | uint32_t hash, bucket; |
203 | int rv; | |
204 | ||
205 | hash = jhash(name, len, 0); | |
206 | bucket = hash & (ls->ls_rsbtbl_size - 1); | |
207 | ||
208 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | |
c04fecb4 | 209 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); |
7210cb7a DT |
210 | if (rv) |
211 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, | |
c04fecb4 | 212 | name, len, &r); |
7210cb7a DT |
213 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
214 | ||
215 | if (!rv) | |
216 | return r; | |
85f0379a DT |
217 | |
218 | down_read(&ls->ls_root_sem); | |
219 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
220 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | |
221 | up_read(&ls->ls_root_sem); | |
c04fecb4 | 222 | log_debug(ls, "find_rsb_root revert to root_list %s", |
7210cb7a | 223 | r->res_name); |
85f0379a DT |
224 | return r; |
225 | } | |
226 | } | |
227 | up_read(&ls->ls_root_sem); | |
228 | return NULL; | |
229 | } | |
230 | ||
231 | /* Find the rsb where we left off (or start again), then send rsb names | |
232 | for rsb's we're master of and whose directory node matches the requesting | |
233 | node. inbuf is the rsb name last sent, inlen is the name's length */ | |
e7fd4179 DT |
234 | |
235 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |
236 | char *outbuf, int outlen, int nodeid) | |
237 | { | |
238 | struct list_head *list; | |
85f0379a DT |
239 | struct dlm_rsb *r; |
240 | int offset = 0, dir_nodeid; | |
cd8e4679 | 241 | __be16 be_namelen; |
e7fd4179 | 242 | |
e7fd4179 | 243 | down_read(&ls->ls_root_sem); |
85f0379a DT |
244 | |
245 | if (inlen > 1) { | |
246 | r = find_rsb_root(ls, inbuf, inlen); | |
247 | if (!r) { | |
248 | inbuf[inlen - 1] = '\0'; | |
249 | log_error(ls, "copy_master_names from %d start %d %s", | |
250 | nodeid, inlen, inbuf); | |
251 | goto out; | |
252 | } | |
253 | list = r->res_root_list.next; | |
254 | } else { | |
e7fd4179 | 255 | list = ls->ls_root_list.next; |
85f0379a | 256 | } |
e7fd4179 DT |
257 | |
258 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | |
259 | r = list_entry(list, struct dlm_rsb, res_root_list); | |
260 | if (r->res_nodeid) | |
261 | continue; | |
262 | ||
263 | dir_nodeid = dlm_dir_nodeid(r); | |
264 | if (dir_nodeid != nodeid) | |
265 | continue; | |
266 | ||
267 | /* | |
268 | * The block ends when we can't fit the following in the | |
269 | * remaining buffer space: | |
270 | * namelen (uint16_t) + | |
271 | * name (r->res_length) + | |
272 | * end-of-block record 0x0000 (uint16_t) | |
273 | */ | |
274 | ||
275 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | |
276 | /* Write end-of-block record */ | |
cd8e4679 HH |
277 | be_namelen = cpu_to_be16(0); |
278 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
279 | offset += sizeof(__be16); | |
c04fecb4 | 280 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 DT |
281 | goto out; |
282 | } | |
283 | ||
284 | be_namelen = cpu_to_be16(r->res_length); | |
cd8e4679 HH |
285 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
286 | offset += sizeof(__be16); | |
e7fd4179 DT |
287 | memcpy(outbuf + offset, r->res_name, r->res_length); |
288 | offset += r->res_length; | |
c04fecb4 | 289 | ls->ls_recover_dir_sent_res++; |
e7fd4179 DT |
290 | } |
291 | ||
292 | /* | |
293 | * If we've reached the end of the list (and there's room) write a | |
294 | * terminating record. | |
295 | */ | |
296 | ||
297 | if ((list == &ls->ls_root_list) && | |
298 | (offset + sizeof(uint16_t) <= outlen)) { | |
cd8e4679 HH |
299 | be_namelen = cpu_to_be16(0xFFFF); |
300 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
301 | offset += sizeof(__be16); | |
c04fecb4 | 302 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 | 303 | } |
e7fd4179 DT |
304 | out: |
305 | up_read(&ls->ls_root_sem); | |
306 | } | |
307 |