Commit | Line | Data |
---|---|---|
a4bd217b JG |
1 | /* |
2 | * Copyright (C) 2016 CNEX Labs | |
3 | * Initial release: Javier Gonzalez <javier@cnexlabs.com> | |
4 | * Matias Bjorling <matias@cnexlabs.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License version | |
8 | * 2 as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License for more details. | |
14 | * | |
15 | * pblk-read.c - pblk's read path | |
16 | */ | |
17 | ||
18 | #include "pblk.h" | |
19 | ||
20 | /* | |
21 | * There is no guarantee that the value read from cache has not been updated and | |
22 | * resides at another location in the cache. We guarantee though that if the | |
23 | * value is read from the cache, it belongs to the mapped lba. In order to | |
24 | * guarantee and order between writes and reads are ordered, a flush must be | |
25 | * issued. | |
26 | */ | |
27 | static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio, | |
28 | sector_t lba, struct ppa_addr ppa, | |
29 | int bio_iter) | |
30 | { | |
31 | #ifdef CONFIG_NVM_DEBUG | |
32 | /* Callers must ensure that the ppa points to a cache address */ | |
33 | BUG_ON(pblk_ppa_empty(ppa)); | |
34 | BUG_ON(!pblk_addr_in_cache(ppa)); | |
35 | #endif | |
36 | ||
37 | return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, | |
38 | pblk_addr_to_cacheline(ppa), bio_iter); | |
39 | } | |
40 | ||
41 | static void pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd, | |
42 | unsigned long *read_bitmap) | |
43 | { | |
44 | struct bio *bio = rqd->bio; | |
45 | struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; | |
46 | sector_t blba = pblk_get_lba(bio); | |
47 | int nr_secs = rqd->nr_ppas; | |
48 | int advanced_bio = 0; | |
49 | int i, j = 0; | |
50 | ||
51 | /* logic error: lba out-of-bounds. Ignore read request */ | |
2a79efd8 DC |
52 | if (blba + nr_secs >= pblk->rl.nr_secs) { |
53 | WARN(1, "pblk: read lbas out of bounds\n"); | |
a4bd217b JG |
54 | return; |
55 | } | |
56 | ||
57 | pblk_lookup_l2p_seq(pblk, ppas, blba, nr_secs); | |
58 | ||
59 | for (i = 0; i < nr_secs; i++) { | |
60 | struct ppa_addr p = ppas[i]; | |
61 | sector_t lba = blba + i; | |
62 | ||
63 | retry: | |
64 | if (pblk_ppa_empty(p)) { | |
65 | WARN_ON(test_and_set_bit(i, read_bitmap)); | |
66 | continue; | |
67 | } | |
68 | ||
69 | /* Try to read from write buffer. The address is later checked | |
70 | * on the write buffer to prevent retrieving overwritten data. | |
71 | */ | |
72 | if (pblk_addr_in_cache(p)) { | |
73 | if (!pblk_read_from_cache(pblk, bio, lba, p, i)) { | |
74 | pblk_lookup_l2p_seq(pblk, &p, lba, 1); | |
75 | goto retry; | |
76 | } | |
77 | WARN_ON(test_and_set_bit(i, read_bitmap)); | |
78 | advanced_bio = 1; | |
79 | } else { | |
80 | /* Read from media non-cached sectors */ | |
81 | rqd->ppa_list[j++] = p; | |
82 | } | |
83 | ||
84 | if (advanced_bio) | |
85 | bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE); | |
86 | } | |
87 | ||
88 | #ifdef CONFIG_NVM_DEBUG | |
89 | atomic_long_add(nr_secs, &pblk->inflight_reads); | |
90 | #endif | |
91 | } | |
92 | ||
93 | static int pblk_submit_read_io(struct pblk *pblk, struct nvm_rq *rqd) | |
94 | { | |
95 | int err; | |
96 | ||
97 | rqd->flags = pblk_set_read_mode(pblk); | |
98 | ||
99 | err = pblk_submit_io(pblk, rqd); | |
100 | if (err) | |
101 | return NVM_IO_ERR; | |
102 | ||
103 | return NVM_IO_OK; | |
104 | } | |
105 | ||
106 | static void pblk_end_io_read(struct nvm_rq *rqd) | |
107 | { | |
108 | struct pblk *pblk = rqd->private; | |
109 | struct nvm_tgt_dev *dev = pblk->dev; | |
110 | struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); | |
111 | struct bio *bio = rqd->bio; | |
112 | ||
113 | if (rqd->error) | |
114 | pblk_log_read_err(pblk, rqd); | |
115 | #ifdef CONFIG_NVM_DEBUG | |
116 | else | |
117 | WARN_ONCE(bio->bi_error, "pblk: corrupted read error\n"); | |
118 | #endif | |
119 | ||
120 | if (rqd->nr_ppas > 1) | |
121 | nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list); | |
122 | ||
123 | bio_put(bio); | |
124 | if (r_ctx->orig_bio) { | |
125 | #ifdef CONFIG_NVM_DEBUG | |
126 | WARN_ONCE(r_ctx->orig_bio->bi_error, | |
127 | "pblk: corrupted read bio\n"); | |
128 | #endif | |
129 | bio_endio(r_ctx->orig_bio); | |
130 | bio_put(r_ctx->orig_bio); | |
131 | } | |
132 | ||
133 | #ifdef CONFIG_NVM_DEBUG | |
134 | atomic_long_add(rqd->nr_ppas, &pblk->sync_reads); | |
135 | atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads); | |
136 | #endif | |
137 | ||
138 | pblk_free_rqd(pblk, rqd, READ); | |
139 | } | |
140 | ||
141 | static int pblk_fill_partial_read_bio(struct pblk *pblk, struct nvm_rq *rqd, | |
142 | unsigned int bio_init_idx, | |
143 | unsigned long *read_bitmap) | |
144 | { | |
145 | struct bio *new_bio, *bio = rqd->bio; | |
146 | struct bio_vec src_bv, dst_bv; | |
147 | void *ppa_ptr = NULL; | |
148 | void *src_p, *dst_p; | |
149 | dma_addr_t dma_ppa_list = 0; | |
150 | int nr_secs = rqd->nr_ppas; | |
151 | int nr_holes = nr_secs - bitmap_weight(read_bitmap, nr_secs); | |
152 | int i, ret, hole; | |
153 | DECLARE_COMPLETION_ONSTACK(wait); | |
154 | ||
155 | new_bio = bio_alloc(GFP_KERNEL, nr_holes); | |
156 | if (!new_bio) { | |
157 | pr_err("pblk: could not alloc read bio\n"); | |
158 | return NVM_IO_ERR; | |
159 | } | |
160 | ||
161 | if (pblk_bio_add_pages(pblk, new_bio, GFP_KERNEL, nr_holes)) | |
162 | goto err; | |
163 | ||
164 | if (nr_holes != new_bio->bi_vcnt) { | |
165 | pr_err("pblk: malformed bio\n"); | |
166 | goto err; | |
167 | } | |
168 | ||
169 | new_bio->bi_iter.bi_sector = 0; /* internal bio */ | |
170 | bio_set_op_attrs(new_bio, REQ_OP_READ, 0); | |
171 | new_bio->bi_private = &wait; | |
172 | new_bio->bi_end_io = pblk_end_bio_sync; | |
173 | ||
174 | rqd->bio = new_bio; | |
175 | rqd->nr_ppas = nr_holes; | |
176 | rqd->end_io = NULL; | |
177 | ||
178 | if (unlikely(nr_secs > 1 && nr_holes == 1)) { | |
179 | ppa_ptr = rqd->ppa_list; | |
180 | dma_ppa_list = rqd->dma_ppa_list; | |
181 | rqd->ppa_addr = rqd->ppa_list[0]; | |
182 | } | |
183 | ||
184 | ret = pblk_submit_read_io(pblk, rqd); | |
185 | if (ret) { | |
186 | bio_put(rqd->bio); | |
187 | pr_err("pblk: read IO submission failed\n"); | |
188 | goto err; | |
189 | } | |
190 | ||
191 | if (!wait_for_completion_io_timeout(&wait, | |
192 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
193 | pr_err("pblk: partial read I/O timed out\n"); | |
194 | } | |
195 | ||
196 | if (rqd->error) { | |
197 | atomic_long_inc(&pblk->read_failed); | |
198 | #ifdef CONFIG_NVM_DEBUG | |
199 | pblk_print_failed_rqd(pblk, rqd, rqd->error); | |
200 | #endif | |
201 | } | |
202 | ||
203 | if (unlikely(nr_secs > 1 && nr_holes == 1)) { | |
204 | rqd->ppa_list = ppa_ptr; | |
205 | rqd->dma_ppa_list = dma_ppa_list; | |
206 | } | |
207 | ||
208 | /* Fill the holes in the original bio */ | |
209 | i = 0; | |
210 | hole = find_first_zero_bit(read_bitmap, nr_secs); | |
211 | do { | |
212 | src_bv = new_bio->bi_io_vec[i++]; | |
213 | dst_bv = bio->bi_io_vec[bio_init_idx + hole]; | |
214 | ||
215 | src_p = kmap_atomic(src_bv.bv_page); | |
216 | dst_p = kmap_atomic(dst_bv.bv_page); | |
217 | ||
218 | memcpy(dst_p + dst_bv.bv_offset, | |
219 | src_p + src_bv.bv_offset, | |
220 | PBLK_EXPOSED_PAGE_SIZE); | |
221 | ||
222 | kunmap_atomic(src_p); | |
223 | kunmap_atomic(dst_p); | |
224 | ||
225 | mempool_free(src_bv.bv_page, pblk->page_pool); | |
226 | ||
227 | hole = find_next_zero_bit(read_bitmap, nr_secs, hole + 1); | |
228 | } while (hole < nr_secs); | |
229 | ||
230 | bio_put(new_bio); | |
231 | ||
232 | /* Complete the original bio and associated request */ | |
233 | rqd->bio = bio; | |
234 | rqd->nr_ppas = nr_secs; | |
235 | rqd->private = pblk; | |
236 | ||
237 | bio_endio(bio); | |
238 | pblk_end_io_read(rqd); | |
239 | return NVM_IO_OK; | |
240 | ||
241 | err: | |
242 | /* Free allocated pages in new bio */ | |
243 | pblk_bio_free_pages(pblk, bio, 0, new_bio->bi_vcnt); | |
244 | rqd->private = pblk; | |
245 | pblk_end_io_read(rqd); | |
246 | return NVM_IO_ERR; | |
247 | } | |
248 | ||
249 | static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, | |
250 | unsigned long *read_bitmap) | |
251 | { | |
252 | struct bio *bio = rqd->bio; | |
253 | struct ppa_addr ppa; | |
254 | sector_t lba = pblk_get_lba(bio); | |
255 | ||
256 | /* logic error: lba out-of-bounds. Ignore read request */ | |
2a79efd8 DC |
257 | if (lba >= pblk->rl.nr_secs) { |
258 | WARN(1, "pblk: read lba out of bounds\n"); | |
a4bd217b JG |
259 | return; |
260 | } | |
261 | ||
262 | pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); | |
263 | ||
264 | #ifdef CONFIG_NVM_DEBUG | |
265 | atomic_long_inc(&pblk->inflight_reads); | |
266 | #endif | |
267 | ||
268 | retry: | |
269 | if (pblk_ppa_empty(ppa)) { | |
270 | WARN_ON(test_and_set_bit(0, read_bitmap)); | |
271 | return; | |
272 | } | |
273 | ||
274 | /* Try to read from write buffer. The address is later checked on the | |
275 | * write buffer to prevent retrieving overwritten data. | |
276 | */ | |
277 | if (pblk_addr_in_cache(ppa)) { | |
278 | if (!pblk_read_from_cache(pblk, bio, lba, ppa, 0)) { | |
279 | pblk_lookup_l2p_seq(pblk, &ppa, lba, 1); | |
280 | goto retry; | |
281 | } | |
282 | WARN_ON(test_and_set_bit(0, read_bitmap)); | |
283 | } else { | |
284 | rqd->ppa_addr = ppa; | |
285 | } | |
286 | } | |
287 | ||
288 | int pblk_submit_read(struct pblk *pblk, struct bio *bio) | |
289 | { | |
290 | struct nvm_tgt_dev *dev = pblk->dev; | |
5bf1e1ee | 291 | unsigned int nr_secs = pblk_get_secs(bio); |
a4bd217b JG |
292 | struct nvm_rq *rqd; |
293 | unsigned long read_bitmap; /* Max 64 ppas per request */ | |
294 | unsigned int bio_init_idx; | |
295 | int ret = NVM_IO_ERR; | |
296 | ||
297 | if (nr_secs > PBLK_MAX_REQ_ADDRS) | |
298 | return NVM_IO_ERR; | |
299 | ||
300 | bitmap_zero(&read_bitmap, nr_secs); | |
301 | ||
302 | rqd = pblk_alloc_rqd(pblk, READ); | |
303 | if (IS_ERR(rqd)) { | |
304 | pr_err_ratelimited("pblk: not able to alloc rqd"); | |
305 | return NVM_IO_ERR; | |
306 | } | |
307 | ||
308 | rqd->opcode = NVM_OP_PREAD; | |
309 | rqd->bio = bio; | |
310 | rqd->nr_ppas = nr_secs; | |
311 | rqd->private = pblk; | |
312 | rqd->end_io = pblk_end_io_read; | |
313 | ||
314 | /* Save the index for this bio's start. This is needed in case | |
315 | * we need to fill a partial read. | |
316 | */ | |
317 | bio_init_idx = pblk_get_bi_idx(bio); | |
318 | ||
319 | if (nr_secs > 1) { | |
320 | rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, | |
321 | &rqd->dma_ppa_list); | |
322 | if (!rqd->ppa_list) { | |
323 | pr_err("pblk: not able to allocate ppa list\n"); | |
324 | goto fail_rqd_free; | |
325 | } | |
326 | ||
327 | pblk_read_ppalist_rq(pblk, rqd, &read_bitmap); | |
328 | } else { | |
329 | pblk_read_rq(pblk, rqd, &read_bitmap); | |
330 | } | |
331 | ||
332 | bio_get(bio); | |
333 | if (bitmap_full(&read_bitmap, nr_secs)) { | |
334 | bio_endio(bio); | |
335 | pblk_end_io_read(rqd); | |
336 | return NVM_IO_OK; | |
337 | } | |
338 | ||
339 | /* All sectors are to be read from the device */ | |
340 | if (bitmap_empty(&read_bitmap, rqd->nr_ppas)) { | |
341 | struct bio *int_bio = NULL; | |
342 | struct pblk_r_ctx *r_ctx = nvm_rq_to_pdu(rqd); | |
343 | ||
344 | /* Clone read bio to deal with read errors internally */ | |
345 | int_bio = bio_clone_bioset(bio, GFP_KERNEL, fs_bio_set); | |
346 | if (!int_bio) { | |
347 | pr_err("pblk: could not clone read bio\n"); | |
348 | return NVM_IO_ERR; | |
349 | } | |
350 | ||
351 | rqd->bio = int_bio; | |
352 | r_ctx->orig_bio = bio; | |
353 | ||
354 | ret = pblk_submit_read_io(pblk, rqd); | |
355 | if (ret) { | |
356 | pr_err("pblk: read IO submission failed\n"); | |
357 | if (int_bio) | |
358 | bio_put(int_bio); | |
359 | return ret; | |
360 | } | |
361 | ||
362 | return NVM_IO_OK; | |
363 | } | |
364 | ||
365 | /* The read bio request could be partially filled by the write buffer, | |
366 | * but there are some holes that need to be read from the drive. | |
367 | */ | |
368 | ret = pblk_fill_partial_read_bio(pblk, rqd, bio_init_idx, &read_bitmap); | |
369 | if (ret) { | |
370 | pr_err("pblk: failed to perform partial read\n"); | |
371 | return ret; | |
372 | } | |
373 | ||
374 | return NVM_IO_OK; | |
375 | ||
376 | fail_rqd_free: | |
377 | pblk_free_rqd(pblk, rqd, READ); | |
378 | return ret; | |
379 | } | |
380 | ||
381 | static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, | |
382 | struct pblk_line *line, u64 *lba_list, | |
383 | unsigned int nr_secs) | |
384 | { | |
385 | struct ppa_addr ppas[PBLK_MAX_REQ_ADDRS]; | |
386 | int valid_secs = 0; | |
387 | int i; | |
388 | ||
389 | pblk_lookup_l2p_rand(pblk, ppas, lba_list, nr_secs); | |
390 | ||
391 | for (i = 0; i < nr_secs; i++) { | |
392 | if (pblk_addr_in_cache(ppas[i]) || ppas[i].g.blk != line->id || | |
393 | pblk_ppa_empty(ppas[i])) { | |
394 | lba_list[i] = ADDR_EMPTY; | |
395 | continue; | |
396 | } | |
397 | ||
398 | rqd->ppa_list[valid_secs++] = ppas[i]; | |
399 | } | |
400 | ||
401 | #ifdef CONFIG_NVM_DEBUG | |
402 | atomic_long_add(valid_secs, &pblk->inflight_reads); | |
403 | #endif | |
404 | return valid_secs; | |
405 | } | |
406 | ||
407 | static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd, | |
408 | struct pblk_line *line, sector_t lba) | |
409 | { | |
410 | struct ppa_addr ppa; | |
411 | int valid_secs = 0; | |
412 | ||
659226eb DC |
413 | if (lba == ADDR_EMPTY) |
414 | goto out; | |
415 | ||
a4bd217b | 416 | /* logic error: lba out-of-bounds */ |
2a79efd8 DC |
417 | if (lba >= pblk->rl.nr_secs) { |
418 | WARN(1, "pblk: read lba out of bounds\n"); | |
a4bd217b JG |
419 | goto out; |
420 | } | |
421 | ||
a4bd217b JG |
422 | spin_lock(&pblk->trans_lock); |
423 | ppa = pblk_trans_map_get(pblk, lba); | |
424 | spin_unlock(&pblk->trans_lock); | |
425 | ||
426 | /* Ignore updated values until the moment */ | |
427 | if (pblk_addr_in_cache(ppa) || ppa.g.blk != line->id || | |
428 | pblk_ppa_empty(ppa)) | |
429 | goto out; | |
430 | ||
431 | rqd->ppa_addr = ppa; | |
432 | valid_secs = 1; | |
433 | ||
434 | #ifdef CONFIG_NVM_DEBUG | |
435 | atomic_long_inc(&pblk->inflight_reads); | |
436 | #endif | |
437 | ||
438 | out: | |
439 | return valid_secs; | |
440 | } | |
441 | ||
442 | int pblk_submit_read_gc(struct pblk *pblk, u64 *lba_list, void *data, | |
443 | unsigned int nr_secs, unsigned int *secs_to_gc, | |
444 | struct pblk_line *line) | |
445 | { | |
446 | struct nvm_tgt_dev *dev = pblk->dev; | |
447 | struct nvm_geo *geo = &dev->geo; | |
448 | struct request_queue *q = dev->q; | |
449 | struct bio *bio; | |
450 | struct nvm_rq rqd; | |
451 | int ret, data_len; | |
452 | DECLARE_COMPLETION_ONSTACK(wait); | |
453 | ||
454 | memset(&rqd, 0, sizeof(struct nvm_rq)); | |
455 | ||
456 | if (nr_secs > 1) { | |
457 | rqd.ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL, | |
458 | &rqd.dma_ppa_list); | |
459 | if (!rqd.ppa_list) | |
460 | return NVM_IO_ERR; | |
461 | ||
462 | *secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, line, lba_list, | |
463 | nr_secs); | |
464 | if (*secs_to_gc == 1) { | |
465 | struct ppa_addr ppa; | |
466 | ||
467 | ppa = rqd.ppa_list[0]; | |
468 | nvm_dev_dma_free(dev->parent, rqd.ppa_list, | |
469 | rqd.dma_ppa_list); | |
470 | rqd.ppa_addr = ppa; | |
471 | } | |
472 | } else { | |
473 | *secs_to_gc = read_rq_gc(pblk, &rqd, line, lba_list[0]); | |
474 | } | |
475 | ||
476 | if (!(*secs_to_gc)) | |
477 | goto out; | |
478 | ||
479 | data_len = (*secs_to_gc) * geo->sec_size; | |
480 | bio = bio_map_kern(q, data, data_len, GFP_KERNEL); | |
481 | if (IS_ERR(bio)) { | |
482 | pr_err("pblk: could not allocate GC bio (%lu)\n", PTR_ERR(bio)); | |
483 | goto err_free_dma; | |
484 | } | |
485 | ||
486 | bio->bi_iter.bi_sector = 0; /* internal bio */ | |
487 | bio_set_op_attrs(bio, REQ_OP_READ, 0); | |
488 | ||
489 | rqd.opcode = NVM_OP_PREAD; | |
490 | rqd.end_io = pblk_end_io_sync; | |
491 | rqd.private = &wait; | |
492 | rqd.nr_ppas = *secs_to_gc; | |
493 | rqd.bio = bio; | |
494 | ||
495 | ret = pblk_submit_read_io(pblk, &rqd); | |
496 | if (ret) { | |
497 | bio_endio(bio); | |
498 | pr_err("pblk: GC read request failed\n"); | |
499 | goto err_free_dma; | |
500 | } | |
501 | ||
502 | if (!wait_for_completion_io_timeout(&wait, | |
503 | msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) { | |
504 | pr_err("pblk: GC read I/O timed out\n"); | |
505 | } | |
506 | ||
507 | if (rqd.error) { | |
508 | atomic_long_inc(&pblk->read_failed_gc); | |
509 | #ifdef CONFIG_NVM_DEBUG | |
510 | pblk_print_failed_rqd(pblk, &rqd, rqd.error); | |
511 | #endif | |
512 | } | |
513 | ||
514 | #ifdef CONFIG_NVM_DEBUG | |
515 | atomic_long_add(*secs_to_gc, &pblk->sync_reads); | |
516 | atomic_long_add(*secs_to_gc, &pblk->recov_gc_reads); | |
517 | atomic_long_sub(*secs_to_gc, &pblk->inflight_reads); | |
518 | #endif | |
519 | ||
520 | out: | |
521 | if (rqd.nr_ppas > 1) | |
522 | nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); | |
523 | return NVM_IO_OK; | |
524 | ||
525 | err_free_dma: | |
526 | if (rqd.nr_ppas > 1) | |
527 | nvm_dev_dma_free(dev->parent, rqd.ppa_list, rqd.dma_ppa_list); | |
528 | return NVM_IO_ERR; | |
529 | } |