Commit | Line | Data |
---|---|---|
a4bd217b JG |
1 | /* |
2 | * Copyright (C) 2016 CNEX Labs | |
3 | * Initial: Javier Gonzalez <javier@cnexlabs.com> | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License version | |
7 | * 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, but | |
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | * General Public License for more details. | |
13 | * | |
14 | * pblk-recovery.c - pblk's recovery path | |
15 | */ | |
16 | ||
17 | #include "pblk.h" | |
18 | ||
19 | void pblk_submit_rec(struct work_struct *work) | |
20 | { | |
21 | struct pblk_rec_ctx *recovery = | |
22 | container_of(work, struct pblk_rec_ctx, ws_rec); | |
23 | struct pblk *pblk = recovery->pblk; | |
24 | struct nvm_tgt_dev *dev = pblk->dev; | |
25 | struct nvm_rq *rqd = recovery->rqd; | |
26 | struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd); | |
27 | int max_secs = nvm_max_phys_sects(dev); | |
28 | struct bio *bio; | |
29 | unsigned int nr_rec_secs; | |
30 | unsigned int pgs_read; | |
31 | int ret; | |
32 | ||
33 | nr_rec_secs = bitmap_weight((unsigned long int *)&rqd->ppa_status, | |
34 | max_secs); | |
35 | ||
36 | bio = bio_alloc(GFP_KERNEL, nr_rec_secs); | |
37 | if (!bio) { | |
38 | pr_err("pblk: not able to create recovery bio\n"); | |
39 | return; | |
40 | } | |
41 | ||
42 | bio->bi_iter.bi_sector = 0; | |
43 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); | |
44 | rqd->bio = bio; | |
45 | rqd->nr_ppas = nr_rec_secs; | |
46 | ||
47 | pgs_read = pblk_rb_read_to_bio_list(&pblk->rwb, bio, &recovery->failed, | |
48 | nr_rec_secs); | |
49 | if (pgs_read != nr_rec_secs) { | |
50 | pr_err("pblk: could not read recovery entries\n"); | |
51 | goto err; | |
52 | } | |
53 | ||
54 | if (pblk_setup_w_rec_rq(pblk, rqd, c_ctx)) { | |
55 | pr_err("pblk: could not setup recovery request\n"); | |
56 | goto err; | |
57 | } | |
58 | ||
59 | #ifdef CONFIG_NVM_DEBUG | |
60 | atomic_long_add(nr_rec_secs, &pblk->recov_writes); | |
61 | #endif | |
62 | ||
63 | ret = pblk_submit_io(pblk, rqd); | |
64 | if (ret) { | |
65 | pr_err("pblk: I/O submission failed: %d\n", ret); | |
66 | goto err; | |
67 | } | |
68 | ||
69 | mempool_free(recovery, pblk->rec_pool); | |
70 | return; | |
71 | ||
72 | err: | |
73 | bio_put(bio); | |
74 | pblk_free_rqd(pblk, rqd, WRITE); | |
75 | } | |
76 | ||
77 | int pblk_recov_setup_rq(struct pblk *pblk, struct pblk_c_ctx *c_ctx, | |
78 | struct pblk_rec_ctx *recovery, u64 *comp_bits, | |
79 | unsigned int comp) | |
80 | { | |
81 | struct nvm_tgt_dev *dev = pblk->dev; | |
82 | int max_secs = nvm_max_phys_sects(dev); | |
83 | struct nvm_rq *rec_rqd; | |
84 | struct pblk_c_ctx *rec_ctx; | |
85 | int nr_entries = c_ctx->nr_valid + c_ctx->nr_padded; | |
86 | ||
87 | rec_rqd = pblk_alloc_rqd(pblk, WRITE); | |
88 | if (IS_ERR(rec_rqd)) { | |
89 | pr_err("pblk: could not create recovery req.\n"); | |
90 | return -ENOMEM; | |
91 | } | |
92 | ||
93 | rec_ctx = nvm_rq_to_pdu(rec_rqd); | |
94 | ||
95 | /* Copy completion bitmap, but exclude the first X completed entries */ | |
96 | bitmap_shift_right((unsigned long int *)&rec_rqd->ppa_status, | |
97 | (unsigned long int *)comp_bits, | |
98 | comp, max_secs); | |
99 | ||
100 | /* Save the context for the entries that need to be re-written and | |
101 | * update current context with the completed entries. | |
102 | */ | |
103 | rec_ctx->sentry = pblk_rb_wrap_pos(&pblk->rwb, c_ctx->sentry + comp); | |
104 | if (comp >= c_ctx->nr_valid) { | |
105 | rec_ctx->nr_valid = 0; | |
106 | rec_ctx->nr_padded = nr_entries - comp; | |
107 | ||
108 | c_ctx->nr_padded = comp - c_ctx->nr_valid; | |
109 | } else { | |
110 | rec_ctx->nr_valid = c_ctx->nr_valid - comp; | |
111 | rec_ctx->nr_padded = c_ctx->nr_padded; | |
112 | ||
113 | c_ctx->nr_valid = comp; | |
114 | c_ctx->nr_padded = 0; | |
115 | } | |
116 | ||
117 | recovery->rqd = rec_rqd; | |
118 | recovery->pblk = pblk; | |
119 | ||
120 | return 0; | |
121 | } | |
122 | ||
dd2a4343 | 123 | __le64 *pblk_recov_get_lba_list(struct pblk *pblk, struct line_emeta *emeta_buf) |
a4bd217b JG |
124 | { |
125 | u32 crc; | |
126 | ||
dd2a4343 JG |
127 | crc = pblk_calc_emeta_crc(pblk, emeta_buf); |
128 | if (le32_to_cpu(emeta_buf->crc) != crc) | |
a4bd217b JG |
129 | return NULL; |
130 | ||
dd2a4343 | 131 | if (le32_to_cpu(emeta_buf->header.identifier) != PBLK_MAGIC) |
a4bd217b JG |
132 | return NULL; |
133 | ||
dd2a4343 | 134 | return emeta_to_lbas(pblk, emeta_buf); |
a4bd217b JG |
135 | } |
136 | ||
137 | static int pblk_recov_l2p_from_emeta(struct pblk *pblk, struct pblk_line *line) | |
138 | { | |
139 | struct nvm_tgt_dev *dev = pblk->dev; | |
140 | struct nvm_geo *geo = &dev->geo; | |
141 | struct pblk_line_meta *lm = &pblk->lm; | |
dd2a4343 JG |
142 | struct pblk_emeta *emeta = line->emeta; |
143 | struct line_emeta *emeta_buf = emeta->buf; | |
a4bd217b JG |
144 | __le64 *lba_list; |
145 | int data_start; | |
146 | int nr_data_lbas, nr_valid_lbas, nr_lbas = 0; | |
147 | int i; | |
148 | ||
dd2a4343 | 149 | lba_list = pblk_recov_get_lba_list(pblk, emeta_buf); |
a4bd217b JG |
150 | if (!lba_list) |
151 | return 1; | |
152 | ||
153 | data_start = pblk_line_smeta_start(pblk, line) + lm->smeta_sec; | |
dd2a4343 JG |
154 | nr_data_lbas = lm->sec_per_line - lm->emeta_sec[0]; |
155 | nr_valid_lbas = le64_to_cpu(emeta_buf->nr_valid_lbas); | |
a4bd217b JG |
156 | |
157 | for (i = data_start; i < nr_data_lbas && nr_lbas < nr_valid_lbas; i++) { | |
158 | struct ppa_addr ppa; | |
159 | int pos; | |
160 | ||
161 | ppa = addr_to_pblk_ppa(pblk, i, line->id); | |
162 | pos = pblk_ppa_to_pos(geo, ppa); | |
163 | ||
164 | /* Do not update bad blocks */ | |
165 | if (test_bit(pos, line->blk_bitmap)) | |
166 | continue; | |
167 | ||
168 | if (le64_to_cpu(lba_list[i]) == ADDR_EMPTY) { | |
169 | spin_lock(&line->lock); | |
170 | if (test_and_set_bit(i, line->invalid_bitmap)) | |
2a79efd8 | 171 | WARN_ONCE(1, "pblk: rec. double invalidate:\n"); |
a4bd217b | 172 | else |
dd2a4343 | 173 | le32_add_cpu(line->vsc, -1); |
a4bd217b JG |
174 | spin_unlock(&line->lock); |
175 | ||
176 | continue; | |
177 | } | |
178 | ||
179 | pblk_update_map(pblk, le64_to_cpu(lba_list[i]), ppa); | |
180 | nr_lbas++; | |
181 | } | |
182 | ||
183 | if (nr_valid_lbas != nr_lbas) | |
184 | pr_err("pblk: line %d - inconsistent lba list(%llu/%d)\n", | |
dd2a4343 | 185 | line->id, emeta_buf->nr_valid_lbas, nr_lbas); |
a4bd217b JG |
186 | |
187 | line->left_msecs = 0; | |
188 | ||
189 | return 0; | |
190 | } | |
191 | ||
192 | static int pblk_calc_sec_in_line(struct pblk *pblk, struct pblk_line *line) | |
193 | { | |
194 | struct nvm_tgt_dev *dev = pblk->dev; | |
195 | struct nvm_geo *geo = &dev->geo; | |
196 | struct pblk_line_meta *lm = &pblk->lm; | |
197 | int nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); | |
198 | ||
dd2a4343 | 199 | return lm->sec_per_line - lm->smeta_sec - lm->emeta_sec[0] - |
a4bd217b JG |
200 | nr_bb * geo->sec_per_blk; |
201 | } | |
202 | ||
203 | struct pblk_recov_alloc { | |
204 | struct ppa_addr *ppa_list; | |
205 | struct pblk_sec_meta *meta_list; | |
206 | struct nvm_rq *rqd; | |
207 | void *data; | |
208 | dma_addr_t dma_ppa_list; | |
209 | dma_addr_t dma_meta_list; | |
210 | }; | |
211 | ||
212 | static int pblk_recov_read_oob(struct pblk *pblk, struct pblk_line *line, | |
213 | struct pblk_recov_alloc p, u64 r_ptr) | |
214 | { | |
215 | struct nvm_tgt_dev *dev = pblk->dev; | |
216 | struct nvm_geo *geo = &dev->geo; | |
217 | struct ppa_addr *ppa_list; | |
218 | struct pblk_sec_meta *meta_list; | |
219 | struct nvm_rq *rqd; | |
220 | struct bio *bio; | |
221 | void *data; | |
222 | dma_addr_t dma_ppa_list, dma_meta_list; | |
223 | u64 r_ptr_int; | |
224 | int left_ppas; | |
225 | int rq_ppas, rq_len; | |
226 | int i, j; | |
227 | int ret = 0; | |
228 | DECLARE_COMPLETION_ONSTACK(wait); | |
229 | ||
230 | ppa_list = p.ppa_list; | |
231 | meta_list = p.meta_list; | |
232 | rqd = p.rqd; | |
233 | data = p.data; | |
234 | dma_ppa_list = p.dma_ppa_list; | |
235 | dma_meta_list = p.dma_meta_list; | |
236 | ||
237 | left_ppas = line->cur_sec - r_ptr; | |
238 | if (!left_ppas) | |
239 | return 0; | |
240 | ||
241 | r_ptr_int = r_ptr; | |
242 | ||
243 | next_read_rq: | |
084ec9ba | 244 | memset(rqd, 0, pblk_g_rq_size); |
a4bd217b JG |
245 | |
246 | rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); | |
247 | if (!rq_ppas) | |
248 | rq_ppas = pblk->min_write_pgs; | |
249 | rq_len = rq_ppas * geo->sec_size; | |
250 | ||
251 | bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); | |
252 | if (IS_ERR(bio)) | |
253 | return PTR_ERR(bio); | |
254 | ||
255 | bio->bi_iter.bi_sector = 0; /* internal bio */ | |
256 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | |
257 | ||
258 | rqd->bio = bio; | |
259 | rqd->opcode = NVM_OP_PREAD; | |
260 | rqd->flags = pblk_set_read_mode(pblk); | |
261 | rqd->meta_list = meta_list; | |
262 | rqd->nr_ppas = rq_ppas; | |
263 | rqd->ppa_list = ppa_list; | |
264 | rqd->dma_ppa_list = dma_ppa_list; | |
265 | rqd->dma_meta_list = dma_meta_list; | |
266 | rqd->end_io = pblk_end_io_sync; | |
267 | rqd->private = &wait; | |
268 | ||
269 | for (i = 0; i < rqd->nr_ppas; ) { | |
270 | struct ppa_addr ppa; | |
271 | int pos; | |
272 | ||
273 | ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); | |
274 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
275 | ||
276 | while (test_bit(pos, line->blk_bitmap)) { | |
277 | r_ptr_int += pblk->min_write_pgs; | |
278 | ppa = addr_to_gen_ppa(pblk, r_ptr_int, line->id); | |
279 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
280 | } | |
281 | ||
282 | for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++) | |
283 | rqd->ppa_list[i] = | |
284 | addr_to_gen_ppa(pblk, r_ptr_int, line->id); | |
285 | } | |
286 | ||
287 | /* If read fails, more padding is needed */ | |
288 | ret = pblk_submit_io(pblk, rqd); | |
289 | if (ret) { | |
290 | pr_err("pblk: I/O submission failed: %d\n", ret); | |
291 | return ret; | |
292 | } | |
293 | ||
294 | if (!wait_for_completion_io_timeout(&wait, | |
295 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
296 | pr_err("pblk: L2P recovery read timed out\n"); | |
297 | return -EINTR; | |
298 | } | |
299 | ||
300 | reinit_completion(&wait); | |
301 | ||
302 | /* At this point, the read should not fail. If it does, it is a problem | |
303 | * we cannot recover from here. Need FTL log. | |
304 | */ | |
305 | if (rqd->error) { | |
306 | pr_err("pblk: L2P recovery failed (%d)\n", rqd->error); | |
307 | return -EINTR; | |
308 | } | |
309 | ||
310 | for (i = 0; i < rqd->nr_ppas; i++) { | |
311 | u64 lba = le64_to_cpu(meta_list[i].lba); | |
312 | ||
313 | if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) | |
314 | continue; | |
315 | ||
316 | pblk_update_map(pblk, lba, rqd->ppa_list[i]); | |
317 | } | |
318 | ||
319 | left_ppas -= rq_ppas; | |
320 | if (left_ppas > 0) | |
321 | goto next_read_rq; | |
322 | ||
323 | return 0; | |
324 | } | |
325 | ||
326 | static int pblk_recov_pad_oob(struct pblk *pblk, struct pblk_line *line, | |
327 | struct pblk_recov_alloc p, int left_ppas) | |
328 | { | |
329 | struct nvm_tgt_dev *dev = pblk->dev; | |
330 | struct nvm_geo *geo = &dev->geo; | |
331 | struct ppa_addr *ppa_list; | |
332 | struct pblk_sec_meta *meta_list; | |
333 | struct nvm_rq *rqd; | |
334 | struct bio *bio; | |
335 | void *data; | |
336 | dma_addr_t dma_ppa_list, dma_meta_list; | |
dd2a4343 | 337 | __le64 *lba_list = emeta_to_lbas(pblk, line->emeta->buf); |
a4bd217b JG |
338 | u64 w_ptr = line->cur_sec; |
339 | int left_line_ppas = line->left_msecs; | |
340 | int rq_ppas, rq_len; | |
341 | int i, j; | |
342 | int ret = 0; | |
343 | DECLARE_COMPLETION_ONSTACK(wait); | |
344 | ||
345 | ppa_list = p.ppa_list; | |
346 | meta_list = p.meta_list; | |
347 | rqd = p.rqd; | |
348 | data = p.data; | |
349 | dma_ppa_list = p.dma_ppa_list; | |
350 | dma_meta_list = p.dma_meta_list; | |
351 | ||
352 | next_pad_rq: | |
353 | rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); | |
354 | if (!rq_ppas) | |
355 | rq_ppas = pblk->min_write_pgs; | |
356 | rq_len = rq_ppas * geo->sec_size; | |
357 | ||
358 | bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); | |
359 | if (IS_ERR(bio)) | |
360 | return PTR_ERR(bio); | |
361 | ||
362 | bio->bi_iter.bi_sector = 0; /* internal bio */ | |
363 | bio_set_op_attrs(bio, REQ_OP_WRITE, 0); | |
364 | ||
084ec9ba | 365 | memset(rqd, 0, pblk_g_rq_size); |
a4bd217b JG |
366 | |
367 | rqd->bio = bio; | |
368 | rqd->opcode = NVM_OP_PWRITE; | |
369 | rqd->flags = pblk_set_progr_mode(pblk, WRITE); | |
370 | rqd->meta_list = meta_list; | |
371 | rqd->nr_ppas = rq_ppas; | |
372 | rqd->ppa_list = ppa_list; | |
373 | rqd->dma_ppa_list = dma_ppa_list; | |
374 | rqd->dma_meta_list = dma_meta_list; | |
375 | rqd->end_io = pblk_end_io_sync; | |
376 | rqd->private = &wait; | |
377 | ||
378 | for (i = 0; i < rqd->nr_ppas; ) { | |
379 | struct ppa_addr ppa; | |
380 | int pos; | |
381 | ||
382 | w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); | |
383 | ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); | |
384 | pos = pblk_ppa_to_pos(geo, ppa); | |
385 | ||
386 | while (test_bit(pos, line->blk_bitmap)) { | |
387 | w_ptr += pblk->min_write_pgs; | |
388 | ppa = addr_to_pblk_ppa(pblk, w_ptr, line->id); | |
389 | pos = pblk_ppa_to_pos(geo, ppa); | |
390 | } | |
391 | ||
392 | for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) { | |
393 | struct ppa_addr dev_ppa; | |
caa69fa5 | 394 | u64 addr_empty = cpu_to_le64(ADDR_EMPTY); |
a4bd217b JG |
395 | |
396 | dev_ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); | |
397 | ||
398 | pblk_map_invalidate(pblk, dev_ppa); | |
caa69fa5 | 399 | lba_list[w_ptr] = meta_list[i].lba = addr_empty; |
a4bd217b JG |
400 | rqd->ppa_list[i] = dev_ppa; |
401 | } | |
402 | } | |
403 | ||
404 | ret = pblk_submit_io(pblk, rqd); | |
405 | if (ret) { | |
406 | pr_err("pblk: I/O submission failed: %d\n", ret); | |
407 | return ret; | |
408 | } | |
409 | ||
410 | if (!wait_for_completion_io_timeout(&wait, | |
411 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
412 | pr_err("pblk: L2P recovery write timed out\n"); | |
413 | } | |
414 | reinit_completion(&wait); | |
415 | ||
416 | left_line_ppas -= rq_ppas; | |
417 | left_ppas -= rq_ppas; | |
418 | if (left_ppas > 0 && left_line_ppas) | |
419 | goto next_pad_rq; | |
420 | ||
421 | return 0; | |
422 | } | |
423 | ||
424 | /* When this function is called, it means that not all upper pages have been | |
425 | * written in a page that contains valid data. In order to recover this data, we | |
426 | * first find the write pointer on the device, then we pad all necessary | |
427 | * sectors, and finally attempt to read the valid data | |
428 | */ | |
429 | static int pblk_recov_scan_all_oob(struct pblk *pblk, struct pblk_line *line, | |
430 | struct pblk_recov_alloc p) | |
431 | { | |
432 | struct nvm_tgt_dev *dev = pblk->dev; | |
433 | struct nvm_geo *geo = &dev->geo; | |
434 | struct ppa_addr *ppa_list; | |
435 | struct pblk_sec_meta *meta_list; | |
436 | struct nvm_rq *rqd; | |
437 | struct bio *bio; | |
438 | void *data; | |
439 | dma_addr_t dma_ppa_list, dma_meta_list; | |
440 | u64 w_ptr = 0, r_ptr; | |
441 | int rq_ppas, rq_len; | |
442 | int i, j; | |
443 | int ret = 0; | |
444 | int rec_round; | |
445 | int left_ppas = pblk_calc_sec_in_line(pblk, line) - line->cur_sec; | |
446 | DECLARE_COMPLETION_ONSTACK(wait); | |
447 | ||
448 | ppa_list = p.ppa_list; | |
449 | meta_list = p.meta_list; | |
450 | rqd = p.rqd; | |
451 | data = p.data; | |
452 | dma_ppa_list = p.dma_ppa_list; | |
453 | dma_meta_list = p.dma_meta_list; | |
454 | ||
455 | /* we could recover up until the line write pointer */ | |
456 | r_ptr = line->cur_sec; | |
457 | rec_round = 0; | |
458 | ||
459 | next_rq: | |
084ec9ba | 460 | memset(rqd, 0, pblk_g_rq_size); |
a4bd217b JG |
461 | |
462 | rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); | |
463 | if (!rq_ppas) | |
464 | rq_ppas = pblk->min_write_pgs; | |
465 | rq_len = rq_ppas * geo->sec_size; | |
466 | ||
467 | bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); | |
468 | if (IS_ERR(bio)) | |
469 | return PTR_ERR(bio); | |
470 | ||
471 | bio->bi_iter.bi_sector = 0; /* internal bio */ | |
472 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | |
473 | ||
474 | rqd->bio = bio; | |
475 | rqd->opcode = NVM_OP_PREAD; | |
476 | rqd->flags = pblk_set_read_mode(pblk); | |
477 | rqd->meta_list = meta_list; | |
478 | rqd->nr_ppas = rq_ppas; | |
479 | rqd->ppa_list = ppa_list; | |
480 | rqd->dma_ppa_list = dma_ppa_list; | |
481 | rqd->dma_meta_list = dma_meta_list; | |
482 | rqd->end_io = pblk_end_io_sync; | |
483 | rqd->private = &wait; | |
484 | ||
485 | for (i = 0; i < rqd->nr_ppas; ) { | |
486 | struct ppa_addr ppa; | |
487 | int pos; | |
488 | ||
489 | w_ptr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); | |
490 | ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); | |
491 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
492 | ||
493 | while (test_bit(pos, line->blk_bitmap)) { | |
494 | w_ptr += pblk->min_write_pgs; | |
495 | ppa = addr_to_gen_ppa(pblk, w_ptr, line->id); | |
496 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
497 | } | |
498 | ||
499 | for (j = 0; j < pblk->min_write_pgs; j++, i++, w_ptr++) | |
500 | rqd->ppa_list[i] = | |
501 | addr_to_gen_ppa(pblk, w_ptr, line->id); | |
502 | } | |
503 | ||
504 | ret = pblk_submit_io(pblk, rqd); | |
505 | if (ret) { | |
506 | pr_err("pblk: I/O submission failed: %d\n", ret); | |
507 | return ret; | |
508 | } | |
509 | ||
510 | if (!wait_for_completion_io_timeout(&wait, | |
511 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
512 | pr_err("pblk: L2P recovery read timed out\n"); | |
513 | } | |
514 | reinit_completion(&wait); | |
515 | ||
516 | /* This should not happen since the read failed during normal recovery, | |
517 | * but the media works funny sometimes... | |
518 | */ | |
519 | if (!rec_round++ && !rqd->error) { | |
520 | rec_round = 0; | |
521 | for (i = 0; i < rqd->nr_ppas; i++, r_ptr++) { | |
522 | u64 lba = le64_to_cpu(meta_list[i].lba); | |
523 | ||
524 | if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) | |
525 | continue; | |
526 | ||
527 | pblk_update_map(pblk, lba, rqd->ppa_list[i]); | |
528 | } | |
529 | } | |
530 | ||
531 | /* Reached the end of the written line */ | |
532 | if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) { | |
533 | int pad_secs, nr_error_bits, bit; | |
534 | int ret; | |
535 | ||
536 | bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); | |
537 | nr_error_bits = rqd->nr_ppas - bit; | |
538 | ||
539 | /* Roll back failed sectors */ | |
540 | line->cur_sec -= nr_error_bits; | |
541 | line->left_msecs += nr_error_bits; | |
542 | bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); | |
543 | ||
544 | pad_secs = pblk_pad_distance(pblk); | |
545 | if (pad_secs > line->left_msecs) | |
546 | pad_secs = line->left_msecs; | |
547 | ||
548 | ret = pblk_recov_pad_oob(pblk, line, p, pad_secs); | |
549 | if (ret) | |
550 | pr_err("pblk: OOB padding failed (err:%d)\n", ret); | |
551 | ||
552 | ret = pblk_recov_read_oob(pblk, line, p, r_ptr); | |
553 | if (ret) | |
554 | pr_err("pblk: OOB read failed (err:%d)\n", ret); | |
555 | ||
a4bd217b JG |
556 | left_ppas = 0; |
557 | } | |
558 | ||
559 | left_ppas -= rq_ppas; | |
560 | if (left_ppas > 0) | |
561 | goto next_rq; | |
562 | ||
563 | return ret; | |
564 | } | |
565 | ||
566 | static int pblk_recov_scan_oob(struct pblk *pblk, struct pblk_line *line, | |
567 | struct pblk_recov_alloc p, int *done) | |
568 | { | |
569 | struct nvm_tgt_dev *dev = pblk->dev; | |
570 | struct nvm_geo *geo = &dev->geo; | |
571 | struct ppa_addr *ppa_list; | |
572 | struct pblk_sec_meta *meta_list; | |
573 | struct nvm_rq *rqd; | |
574 | struct bio *bio; | |
575 | void *data; | |
576 | dma_addr_t dma_ppa_list, dma_meta_list; | |
577 | u64 paddr; | |
578 | int rq_ppas, rq_len; | |
579 | int i, j; | |
580 | int ret = 0; | |
581 | int left_ppas = pblk_calc_sec_in_line(pblk, line); | |
582 | DECLARE_COMPLETION_ONSTACK(wait); | |
583 | ||
584 | ppa_list = p.ppa_list; | |
585 | meta_list = p.meta_list; | |
586 | rqd = p.rqd; | |
587 | data = p.data; | |
588 | dma_ppa_list = p.dma_ppa_list; | |
589 | dma_meta_list = p.dma_meta_list; | |
590 | ||
591 | *done = 1; | |
592 | ||
593 | next_rq: | |
084ec9ba | 594 | memset(rqd, 0, pblk_g_rq_size); |
a4bd217b JG |
595 | |
596 | rq_ppas = pblk_calc_secs(pblk, left_ppas, 0); | |
597 | if (!rq_ppas) | |
598 | rq_ppas = pblk->min_write_pgs; | |
599 | rq_len = rq_ppas * geo->sec_size; | |
600 | ||
601 | bio = bio_map_kern(dev->q, data, rq_len, GFP_KERNEL); | |
602 | if (IS_ERR(bio)) | |
603 | return PTR_ERR(bio); | |
604 | ||
605 | bio->bi_iter.bi_sector = 0; /* internal bio */ | |
606 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | |
607 | ||
608 | rqd->bio = bio; | |
609 | rqd->opcode = NVM_OP_PREAD; | |
610 | rqd->flags = pblk_set_read_mode(pblk); | |
611 | rqd->meta_list = meta_list; | |
612 | rqd->nr_ppas = rq_ppas; | |
613 | rqd->ppa_list = ppa_list; | |
614 | rqd->dma_ppa_list = dma_ppa_list; | |
615 | rqd->dma_meta_list = dma_meta_list; | |
616 | rqd->end_io = pblk_end_io_sync; | |
617 | rqd->private = &wait; | |
618 | ||
619 | for (i = 0; i < rqd->nr_ppas; ) { | |
620 | struct ppa_addr ppa; | |
621 | int pos; | |
622 | ||
623 | paddr = pblk_alloc_page(pblk, line, pblk->min_write_pgs); | |
624 | ppa = addr_to_gen_ppa(pblk, paddr, line->id); | |
625 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
626 | ||
627 | while (test_bit(pos, line->blk_bitmap)) { | |
628 | paddr += pblk->min_write_pgs; | |
629 | ppa = addr_to_gen_ppa(pblk, paddr, line->id); | |
630 | pos = pblk_dev_ppa_to_pos(geo, ppa); | |
631 | } | |
632 | ||
633 | for (j = 0; j < pblk->min_write_pgs; j++, i++, paddr++) | |
634 | rqd->ppa_list[i] = | |
635 | addr_to_gen_ppa(pblk, paddr, line->id); | |
636 | } | |
637 | ||
638 | ret = pblk_submit_io(pblk, rqd); | |
639 | if (ret) { | |
640 | pr_err("pblk: I/O submission failed: %d\n", ret); | |
641 | bio_put(bio); | |
642 | return ret; | |
643 | } | |
644 | ||
645 | if (!wait_for_completion_io_timeout(&wait, | |
646 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
647 | pr_err("pblk: L2P recovery read timed out\n"); | |
648 | } | |
649 | reinit_completion(&wait); | |
650 | ||
651 | /* Reached the end of the written line */ | |
652 | if (rqd->error) { | |
653 | int nr_error_bits, bit; | |
654 | ||
655 | bit = find_first_bit((void *)&rqd->ppa_status, rqd->nr_ppas); | |
656 | nr_error_bits = rqd->nr_ppas - bit; | |
657 | ||
658 | /* Roll back failed sectors */ | |
659 | line->cur_sec -= nr_error_bits; | |
660 | line->left_msecs += nr_error_bits; | |
a4bd217b JG |
661 | bitmap_clear(line->map_bitmap, line->cur_sec, nr_error_bits); |
662 | ||
663 | left_ppas = 0; | |
664 | rqd->nr_ppas = bit; | |
665 | ||
666 | if (rqd->error != NVM_RSP_ERR_EMPTYPAGE) | |
667 | *done = 0; | |
668 | } | |
669 | ||
670 | for (i = 0; i < rqd->nr_ppas; i++) { | |
671 | u64 lba = le64_to_cpu(meta_list[i].lba); | |
672 | ||
673 | if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs) | |
674 | continue; | |
675 | ||
676 | pblk_update_map(pblk, lba, rqd->ppa_list[i]); | |
677 | } | |
678 | ||
679 | left_ppas -= rq_ppas; | |
680 | if (left_ppas > 0) | |
681 | goto next_rq; | |
682 | ||
683 | return ret; | |
684 | } | |
685 | ||
686 | /* Scan line for lbas on out of bound area */ | |
687 | static int pblk_recov_l2p_from_oob(struct pblk *pblk, struct pblk_line *line) | |
688 | { | |
689 | struct nvm_tgt_dev *dev = pblk->dev; | |
690 | struct nvm_geo *geo = &dev->geo; | |
691 | struct nvm_rq *rqd; | |
692 | struct ppa_addr *ppa_list; | |
693 | struct pblk_sec_meta *meta_list; | |
694 | struct pblk_recov_alloc p; | |
695 | void *data; | |
696 | dma_addr_t dma_ppa_list, dma_meta_list; | |
697 | int done, ret = 0; | |
698 | ||
699 | rqd = pblk_alloc_rqd(pblk, READ); | |
700 | if (IS_ERR(rqd)) | |
701 | return PTR_ERR(rqd); | |
702 | ||
703 | meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); | |
704 | if (!meta_list) { | |
705 | ret = -ENOMEM; | |
706 | goto free_rqd; | |
707 | } | |
708 | ||
709 | ppa_list = (void *)(meta_list) + pblk_dma_meta_size; | |
710 | dma_ppa_list = dma_meta_list + pblk_dma_meta_size; | |
711 | ||
712 | data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); | |
713 | if (!data) { | |
714 | ret = -ENOMEM; | |
715 | goto free_meta_list; | |
716 | } | |
717 | ||
718 | p.ppa_list = ppa_list; | |
719 | p.meta_list = meta_list; | |
720 | p.rqd = rqd; | |
721 | p.data = data; | |
722 | p.dma_ppa_list = dma_ppa_list; | |
723 | p.dma_meta_list = dma_meta_list; | |
724 | ||
725 | ret = pblk_recov_scan_oob(pblk, line, p, &done); | |
726 | if (ret) { | |
727 | pr_err("pblk: could not recover L2P from OOB\n"); | |
728 | goto out; | |
729 | } | |
730 | ||
731 | if (!done) { | |
732 | ret = pblk_recov_scan_all_oob(pblk, line, p); | |
733 | if (ret) { | |
734 | pr_err("pblk: could not recover L2P from OOB\n"); | |
735 | goto out; | |
736 | } | |
737 | } | |
738 | ||
739 | if (pblk_line_is_full(line)) | |
740 | pblk_line_recov_close(pblk, line); | |
741 | ||
742 | out: | |
743 | kfree(data); | |
744 | free_meta_list: | |
745 | nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); | |
746 | free_rqd: | |
747 | pblk_free_rqd(pblk, rqd, READ); | |
748 | ||
749 | return ret; | |
750 | } | |
751 | ||
752 | /* Insert lines ordered by sequence number (seq_num) on list */ | |
753 | static void pblk_recov_line_add_ordered(struct list_head *head, | |
754 | struct pblk_line *line) | |
755 | { | |
756 | struct pblk_line *t = NULL; | |
757 | ||
758 | list_for_each_entry(t, head, list) | |
759 | if (t->seq_nr > line->seq_nr) | |
760 | break; | |
761 | ||
762 | __list_add(&line->list, t->list.prev, &t->list); | |
763 | } | |
764 | ||
765 | struct pblk_line *pblk_recov_l2p(struct pblk *pblk) | |
766 | { | |
767 | struct nvm_tgt_dev *dev = pblk->dev; | |
768 | struct nvm_geo *geo = &dev->geo; | |
769 | struct pblk_line_meta *lm = &pblk->lm; | |
770 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
771 | struct pblk_line *line, *tline, *data_line = NULL; | |
dd2a4343 JG |
772 | struct pblk_smeta *smeta; |
773 | struct pblk_emeta *emeta; | |
774 | struct line_smeta *smeta_buf; | |
a4bd217b JG |
775 | int found_lines = 0, recovered_lines = 0, open_lines = 0; |
776 | int is_next = 0; | |
777 | int meta_line; | |
778 | int i, valid_uuid = 0; | |
779 | LIST_HEAD(recov_list); | |
780 | ||
781 | /* TODO: Implement FTL snapshot */ | |
782 | ||
783 | /* Scan recovery - takes place when FTL snapshot fails */ | |
784 | spin_lock(&l_mg->free_lock); | |
785 | meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES); | |
786 | set_bit(meta_line, &l_mg->meta_bitmap); | |
dd2a4343 JG |
787 | smeta = l_mg->sline_meta[meta_line]; |
788 | emeta = l_mg->eline_meta[meta_line]; | |
789 | smeta_buf = smeta->buf; | |
a4bd217b JG |
790 | spin_unlock(&l_mg->free_lock); |
791 | ||
792 | /* Order data lines using their sequence number */ | |
793 | for (i = 0; i < l_mg->nr_lines; i++) { | |
794 | u32 crc; | |
795 | ||
796 | line = &pblk->lines[i]; | |
797 | ||
798 | memset(smeta, 0, lm->smeta_len); | |
799 | line->smeta = smeta; | |
dd2a4343 | 800 | line->lun_bitmap = ((void *)(smeta_buf)) + |
a4bd217b JG |
801 | sizeof(struct line_smeta); |
802 | ||
803 | /* Lines that cannot be read are assumed as not written here */ | |
804 | if (pblk_line_read_smeta(pblk, line)) | |
805 | continue; | |
806 | ||
dd2a4343 JG |
807 | crc = pblk_calc_smeta_crc(pblk, smeta_buf); |
808 | if (le32_to_cpu(smeta_buf->crc) != crc) | |
a4bd217b JG |
809 | continue; |
810 | ||
dd2a4343 | 811 | if (le32_to_cpu(smeta_buf->header.identifier) != PBLK_MAGIC) |
a4bd217b JG |
812 | continue; |
813 | ||
dd2a4343 | 814 | if (le16_to_cpu(smeta_buf->header.version) != 1) { |
a4bd217b | 815 | pr_err("pblk: found incompatible line version %u\n", |
dd2a4343 | 816 | smeta_buf->header.version); |
a4bd217b JG |
817 | return ERR_PTR(-EINVAL); |
818 | } | |
819 | ||
820 | /* The first valid instance uuid is used for initialization */ | |
821 | if (!valid_uuid) { | |
dd2a4343 | 822 | memcpy(pblk->instance_uuid, smeta_buf->header.uuid, 16); |
a4bd217b JG |
823 | valid_uuid = 1; |
824 | } | |
825 | ||
dd2a4343 | 826 | if (memcmp(pblk->instance_uuid, smeta_buf->header.uuid, 16)) { |
a4bd217b JG |
827 | pr_debug("pblk: ignore line %u due to uuid mismatch\n", |
828 | i); | |
829 | continue; | |
830 | } | |
831 | ||
832 | /* Update line metadata */ | |
833 | spin_lock(&line->lock); | |
dd2a4343 JG |
834 | line->id = le32_to_cpu(smeta_buf->header.id); |
835 | line->type = le16_to_cpu(smeta_buf->header.type); | |
836 | line->seq_nr = le64_to_cpu(smeta_buf->seq_nr); | |
a4bd217b JG |
837 | spin_unlock(&line->lock); |
838 | ||
839 | /* Update general metadata */ | |
840 | spin_lock(&l_mg->free_lock); | |
841 | if (line->seq_nr >= l_mg->d_seq_nr) | |
842 | l_mg->d_seq_nr = line->seq_nr + 1; | |
843 | l_mg->nr_free_lines--; | |
844 | spin_unlock(&l_mg->free_lock); | |
845 | ||
846 | if (pblk_line_recov_alloc(pblk, line)) | |
847 | goto out; | |
848 | ||
849 | pblk_recov_line_add_ordered(&recov_list, line); | |
850 | found_lines++; | |
851 | pr_debug("pblk: recovering data line %d, seq:%llu\n", | |
dd2a4343 | 852 | line->id, smeta_buf->seq_nr); |
a4bd217b JG |
853 | } |
854 | ||
855 | if (!found_lines) { | |
856 | pblk_setup_uuid(pblk); | |
857 | ||
858 | spin_lock(&l_mg->free_lock); | |
859 | WARN_ON_ONCE(!test_and_clear_bit(meta_line, | |
860 | &l_mg->meta_bitmap)); | |
861 | spin_unlock(&l_mg->free_lock); | |
862 | ||
863 | goto out; | |
864 | } | |
865 | ||
866 | /* Verify closed blocks and recover this portion of L2P table*/ | |
867 | list_for_each_entry_safe(line, tline, &recov_list, list) { | |
868 | int off, nr_bb; | |
869 | ||
870 | recovered_lines++; | |
871 | /* Calculate where emeta starts based on the line bb */ | |
dd2a4343 | 872 | off = lm->sec_per_line - lm->emeta_sec[0]; |
a4bd217b JG |
873 | nr_bb = bitmap_weight(line->blk_bitmap, lm->blk_per_line); |
874 | off -= nr_bb * geo->sec_per_pl; | |
875 | ||
dd2a4343 | 876 | memset(&emeta->buf, 0, lm->emeta_len[0]); |
a4bd217b JG |
877 | line->emeta = emeta; |
878 | line->emeta_ssec = off; | |
879 | ||
dd2a4343 | 880 | if (pblk_line_read_emeta(pblk, line, line->emeta->buf)) { |
a4bd217b JG |
881 | pblk_recov_l2p_from_oob(pblk, line); |
882 | goto next; | |
883 | } | |
884 | ||
885 | if (pblk_recov_l2p_from_emeta(pblk, line)) | |
886 | pblk_recov_l2p_from_oob(pblk, line); | |
887 | ||
888 | next: | |
889 | if (pblk_line_is_full(line)) { | |
890 | struct list_head *move_list; | |
891 | ||
892 | spin_lock(&line->lock); | |
893 | line->state = PBLK_LINESTATE_CLOSED; | |
894 | move_list = pblk_line_gc_list(pblk, line); | |
895 | spin_unlock(&line->lock); | |
896 | ||
897 | spin_lock(&l_mg->gc_lock); | |
898 | list_move_tail(&line->list, move_list); | |
899 | spin_unlock(&l_mg->gc_lock); | |
900 | ||
901 | mempool_free(line->map_bitmap, pblk->line_meta_pool); | |
902 | line->map_bitmap = NULL; | |
903 | line->smeta = NULL; | |
904 | line->emeta = NULL; | |
905 | } else { | |
906 | if (open_lines > 1) | |
907 | pr_err("pblk: failed to recover L2P\n"); | |
908 | ||
909 | open_lines++; | |
910 | line->meta_line = meta_line; | |
911 | data_line = line; | |
912 | } | |
913 | } | |
914 | ||
915 | spin_lock(&l_mg->free_lock); | |
916 | if (!open_lines) { | |
917 | WARN_ON_ONCE(!test_and_clear_bit(meta_line, | |
918 | &l_mg->meta_bitmap)); | |
919 | pblk_line_replace_data(pblk); | |
920 | } else { | |
921 | /* Allocate next line for preparation */ | |
922 | l_mg->data_next = pblk_line_get(pblk); | |
923 | if (l_mg->data_next) { | |
924 | l_mg->data_next->seq_nr = l_mg->d_seq_nr++; | |
925 | l_mg->data_next->type = PBLK_LINETYPE_DATA; | |
926 | is_next = 1; | |
927 | } | |
928 | } | |
929 | spin_unlock(&l_mg->free_lock); | |
930 | ||
931 | if (is_next) { | |
932 | pblk_line_erase(pblk, l_mg->data_next); | |
933 | pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next); | |
934 | } | |
935 | ||
936 | out: | |
937 | if (found_lines != recovered_lines) | |
938 | pr_err("pblk: failed to recover all found lines %d/%d\n", | |
939 | found_lines, recovered_lines); | |
940 | ||
941 | return data_line; | |
942 | } | |
943 | ||
944 | /* | |
945 | * Pad until smeta can be read on current data line | |
946 | */ | |
947 | void pblk_recov_pad(struct pblk *pblk) | |
948 | { | |
949 | struct nvm_tgt_dev *dev = pblk->dev; | |
950 | struct nvm_geo *geo = &dev->geo; | |
951 | struct pblk_line *line; | |
952 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
953 | struct nvm_rq *rqd; | |
954 | struct pblk_recov_alloc p; | |
955 | struct ppa_addr *ppa_list; | |
956 | struct pblk_sec_meta *meta_list; | |
957 | void *data; | |
958 | dma_addr_t dma_ppa_list, dma_meta_list; | |
959 | ||
960 | spin_lock(&l_mg->free_lock); | |
961 | line = l_mg->data_line; | |
962 | spin_unlock(&l_mg->free_lock); | |
963 | ||
964 | rqd = pblk_alloc_rqd(pblk, READ); | |
965 | if (IS_ERR(rqd)) | |
966 | return; | |
967 | ||
968 | meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, &dma_meta_list); | |
969 | if (!meta_list) | |
970 | goto free_rqd; | |
971 | ||
972 | ppa_list = (void *)(meta_list) + pblk_dma_meta_size; | |
973 | dma_ppa_list = dma_meta_list + pblk_dma_meta_size; | |
974 | ||
975 | data = kcalloc(pblk->max_write_pgs, geo->sec_size, GFP_KERNEL); | |
976 | if (!data) | |
977 | goto free_meta_list; | |
978 | ||
979 | p.ppa_list = ppa_list; | |
980 | p.meta_list = meta_list; | |
981 | p.rqd = rqd; | |
982 | p.data = data; | |
983 | p.dma_ppa_list = dma_ppa_list; | |
984 | p.dma_meta_list = dma_meta_list; | |
985 | ||
986 | if (pblk_recov_pad_oob(pblk, line, p, line->left_msecs)) { | |
987 | pr_err("pblk: Tear down padding failed\n"); | |
988 | goto free_data; | |
989 | } | |
990 | ||
991 | pblk_line_close(pblk, line); | |
992 | ||
993 | free_data: | |
994 | kfree(data); | |
995 | free_meta_list: | |
996 | nvm_dev_dma_free(dev->parent, meta_list, dma_meta_list); | |
997 | free_rqd: | |
998 | pblk_free_rqd(pblk, rqd, READ); | |
999 | } |