Commit | Line | Data |
---|---|---|
2522fe45 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e7fd4179 DT |
2 | /****************************************************************************** |
3 | ******************************************************************************* | |
4 | ** | |
5 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | |
6 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. | |
7 | ** | |
e7fd4179 DT |
8 | ** |
9 | ******************************************************************************* | |
10 | ******************************************************************************/ | |
11 | ||
12 | #include "dlm_internal.h" | |
13 | #include "lockspace.h" | |
14 | #include "member.h" | |
15 | #include "lowcomms.h" | |
16 | #include "rcom.h" | |
17 | #include "config.h" | |
18 | #include "memory.h" | |
19 | #include "recover.h" | |
20 | #include "util.h" | |
21 | #include "lock.h" | |
22 | #include "dir.h" | |
23 | ||
e7fd4179 DT |
24 | /* |
25 | * We use the upper 16 bits of the hash value to select the directory node. | |
26 | * Low bits are used for distribution of rsb's among hash buckets on each node. | |
27 | * | |
28 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of | |
29 | * num_nodes to the hash value. This value in the desired range is used as an | |
30 | * offset into the sorted list of nodeid's to give the particular nodeid. | |
31 | */ | |
32 | ||
33 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) | |
34 | { | |
c04fecb4 | 35 | uint32_t node; |
e7fd4179 | 36 | |
c04fecb4 DT |
37 | if (ls->ls_num_nodes == 1) |
38 | return dlm_our_nodeid(); | |
39 | else { | |
e7fd4179 | 40 | node = (hash >> 16) % ls->ls_total_weight; |
c04fecb4 | 41 | return ls->ls_node_array[node]; |
e7fd4179 | 42 | } |
e7fd4179 DT |
43 | } |
44 | ||
45 | int dlm_dir_nodeid(struct dlm_rsb *r) | |
46 | { | |
c04fecb4 | 47 | return r->res_dir_nodeid; |
e7fd4179 DT |
48 | } |
49 | ||
c04fecb4 | 50 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) |
e7fd4179 | 51 | { |
c04fecb4 | 52 | struct dlm_rsb *r; |
e7fd4179 | 53 | |
c04fecb4 DT |
54 | down_read(&ls->ls_root_sem); |
55 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
56 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); | |
e7fd4179 | 57 | } |
c04fecb4 | 58 | up_read(&ls->ls_root_sem); |
e7fd4179 DT |
59 | } |
60 | ||
61 | int dlm_recover_directory(struct dlm_ls *ls) | |
62 | { | |
63 | struct dlm_member *memb; | |
e7fd4179 | 64 | char *b, *last_name = NULL; |
c04fecb4 | 65 | int error = -ENOMEM, last_len, nodeid, result; |
e7fd4179 | 66 | uint16_t namelen; |
c04fecb4 | 67 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
e7fd4179 | 68 | |
075f0177 | 69 | log_rinfo(ls, "dlm_recover_directory"); |
e7fd4179 DT |
70 | |
71 | if (dlm_no_directory(ls)) | |
72 | goto out_status; | |
73 | ||
573c24c4 | 74 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
e7fd4179 DT |
75 | if (!last_name) |
76 | goto out; | |
77 | ||
78 | list_for_each_entry(memb, &ls->ls_nodes, list) { | |
c04fecb4 DT |
79 | if (memb->nodeid == dlm_our_nodeid()) |
80 | continue; | |
81 | ||
e7fd4179 DT |
82 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
83 | last_len = 0; | |
84 | ||
85 | for (;;) { | |
cd9df1aa | 86 | int left; |
e7fd4179 DT |
87 | error = dlm_recovery_stopped(ls); |
88 | if (error) | |
89 | goto out_free; | |
90 | ||
91 | error = dlm_rcom_names(ls, memb->nodeid, | |
92 | last_name, last_len); | |
93 | if (error) | |
94 | goto out_free; | |
95 | ||
c04fecb4 | 96 | cond_resched(); |
e7fd4179 DT |
97 | |
98 | /* | |
99 | * pick namelen/name pairs out of received buffer | |
100 | */ | |
101 | ||
4007685c | 102 | b = ls->ls_recover_buf->rc_buf; |
cd9df1aa AV |
103 | left = ls->ls_recover_buf->rc_header.h_length; |
104 | left -= sizeof(struct dlm_rcom); | |
e7fd4179 DT |
105 | |
106 | for (;;) { | |
cd9df1aa AV |
107 | __be16 v; |
108 | ||
109 | error = -EINVAL; | |
110 | if (left < sizeof(__be16)) | |
111 | goto out_free; | |
112 | ||
113 | memcpy(&v, b, sizeof(__be16)); | |
114 | namelen = be16_to_cpu(v); | |
115 | b += sizeof(__be16); | |
116 | left -= sizeof(__be16); | |
e7fd4179 DT |
117 | |
118 | /* namelen of 0xFFFFF marks end of names for | |
119 | this node; namelen of 0 marks end of the | |
120 | buffer */ | |
121 | ||
122 | if (namelen == 0xFFFF) | |
123 | goto done; | |
124 | if (!namelen) | |
125 | break; | |
126 | ||
cd9df1aa AV |
127 | if (namelen > left) |
128 | goto out_free; | |
129 | ||
130 | if (namelen > DLM_RESNAME_MAXLEN) | |
131 | goto out_free; | |
132 | ||
c04fecb4 DT |
133 | error = dlm_master_lookup(ls, memb->nodeid, |
134 | b, namelen, | |
135 | DLM_LU_RECOVER_DIR, | |
136 | &nodeid, &result); | |
137 | if (error) { | |
138 | log_error(ls, "recover_dir lookup %d", | |
139 | error); | |
e7fd4179 | 140 | goto out_free; |
c04fecb4 DT |
141 | } |
142 | ||
143 | /* The name was found in rsbtbl, but the | |
144 | * master nodeid is different from | |
145 | * memb->nodeid which says it is the master. | |
146 | * This should not happen. */ | |
147 | ||
148 | if (result == DLM_LU_MATCH && | |
149 | nodeid != memb->nodeid) { | |
150 | count_bad++; | |
151 | log_error(ls, "recover_dir lookup %d " | |
152 | "nodeid %d memb %d bad %u", | |
153 | result, nodeid, memb->nodeid, | |
154 | count_bad); | |
155 | print_hex_dump_bytes("dlm_recover_dir ", | |
156 | DUMP_PREFIX_NONE, | |
157 | b, namelen); | |
158 | } | |
159 | ||
160 | /* The name was found in rsbtbl, and the | |
161 | * master nodeid matches memb->nodeid. */ | |
162 | ||
163 | if (result == DLM_LU_MATCH && | |
164 | nodeid == memb->nodeid) { | |
165 | count_match++; | |
166 | } | |
167 | ||
168 | /* The name was not found in rsbtbl and was | |
169 | * added with memb->nodeid as the master. */ | |
170 | ||
171 | if (result == DLM_LU_ADD) { | |
172 | count_add++; | |
173 | } | |
e7fd4179 | 174 | |
e7fd4179 | 175 | last_len = namelen; |
e7fd4179 DT |
176 | memcpy(last_name, b, namelen); |
177 | b += namelen; | |
cd9df1aa | 178 | left -= namelen; |
e7fd4179 DT |
179 | count++; |
180 | } | |
181 | } | |
c04fecb4 | 182 | done: |
e7fd4179 DT |
183 | ; |
184 | } | |
185 | ||
186 | out_status: | |
187 | error = 0; | |
c04fecb4 DT |
188 | dlm_set_recover_status(ls, DLM_RS_DIR); |
189 | ||
075f0177 | 190 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
c04fecb4 | 191 | count, count_add); |
e7fd4179 DT |
192 | out_free: |
193 | kfree(last_name); | |
194 | out: | |
e7fd4179 DT |
195 | return error; |
196 | } | |
197 | ||
85f0379a DT |
198 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
199 | { | |
200 | struct dlm_rsb *r; | |
7210cb7a DT |
201 | uint32_t hash, bucket; |
202 | int rv; | |
203 | ||
204 | hash = jhash(name, len, 0); | |
205 | bucket = hash & (ls->ls_rsbtbl_size - 1); | |
206 | ||
207 | spin_lock(&ls->ls_rsbtbl[bucket].lock); | |
c04fecb4 | 208 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); |
7210cb7a DT |
209 | if (rv) |
210 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, | |
c04fecb4 | 211 | name, len, &r); |
7210cb7a DT |
212 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
213 | ||
214 | if (!rv) | |
215 | return r; | |
85f0379a DT |
216 | |
217 | down_read(&ls->ls_root_sem); | |
218 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | |
219 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { | |
220 | up_read(&ls->ls_root_sem); | |
c04fecb4 | 221 | log_debug(ls, "find_rsb_root revert to root_list %s", |
7210cb7a | 222 | r->res_name); |
85f0379a DT |
223 | return r; |
224 | } | |
225 | } | |
226 | up_read(&ls->ls_root_sem); | |
227 | return NULL; | |
228 | } | |
229 | ||
230 | /* Find the rsb where we left off (or start again), then send rsb names | |
231 | for rsb's we're master of and whose directory node matches the requesting | |
232 | node. inbuf is the rsb name last sent, inlen is the name's length */ | |
e7fd4179 DT |
233 | |
234 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, | |
235 | char *outbuf, int outlen, int nodeid) | |
236 | { | |
237 | struct list_head *list; | |
85f0379a DT |
238 | struct dlm_rsb *r; |
239 | int offset = 0, dir_nodeid; | |
cd8e4679 | 240 | __be16 be_namelen; |
e7fd4179 | 241 | |
e7fd4179 | 242 | down_read(&ls->ls_root_sem); |
85f0379a DT |
243 | |
244 | if (inlen > 1) { | |
245 | r = find_rsb_root(ls, inbuf, inlen); | |
246 | if (!r) { | |
247 | inbuf[inlen - 1] = '\0'; | |
248 | log_error(ls, "copy_master_names from %d start %d %s", | |
249 | nodeid, inlen, inbuf); | |
250 | goto out; | |
251 | } | |
252 | list = r->res_root_list.next; | |
253 | } else { | |
e7fd4179 | 254 | list = ls->ls_root_list.next; |
85f0379a | 255 | } |
e7fd4179 DT |
256 | |
257 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { | |
258 | r = list_entry(list, struct dlm_rsb, res_root_list); | |
259 | if (r->res_nodeid) | |
260 | continue; | |
261 | ||
262 | dir_nodeid = dlm_dir_nodeid(r); | |
263 | if (dir_nodeid != nodeid) | |
264 | continue; | |
265 | ||
266 | /* | |
267 | * The block ends when we can't fit the following in the | |
268 | * remaining buffer space: | |
269 | * namelen (uint16_t) + | |
270 | * name (r->res_length) + | |
271 | * end-of-block record 0x0000 (uint16_t) | |
272 | */ | |
273 | ||
274 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { | |
275 | /* Write end-of-block record */ | |
cd8e4679 HH |
276 | be_namelen = cpu_to_be16(0); |
277 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
278 | offset += sizeof(__be16); | |
c04fecb4 | 279 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 DT |
280 | goto out; |
281 | } | |
282 | ||
283 | be_namelen = cpu_to_be16(r->res_length); | |
cd8e4679 HH |
284 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
285 | offset += sizeof(__be16); | |
e7fd4179 DT |
286 | memcpy(outbuf + offset, r->res_name, r->res_length); |
287 | offset += r->res_length; | |
c04fecb4 | 288 | ls->ls_recover_dir_sent_res++; |
e7fd4179 DT |
289 | } |
290 | ||
291 | /* | |
292 | * If we've reached the end of the list (and there's room) write a | |
293 | * terminating record. | |
294 | */ | |
295 | ||
296 | if ((list == &ls->ls_root_list) && | |
297 | (offset + sizeof(uint16_t) <= outlen)) { | |
cd8e4679 HH |
298 | be_namelen = cpu_to_be16(0xFFFF); |
299 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); | |
300 | offset += sizeof(__be16); | |
c04fecb4 | 301 | ls->ls_recover_dir_sent_msg++; |
e7fd4179 | 302 | } |
e7fd4179 DT |
303 | out: |
304 | up_read(&ls->ls_root_sem); | |
305 | } | |
306 |