lightnvm: pblk: add lock assertions on helpers
[linux-2.6-block.git] / drivers / lightnvm / pblk-core.c
CommitLineData
a4bd217b
JG
1/*
2 * Copyright (C) 2016 CNEX Labs
3 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
4 * Matias Bjorling <matias@cnexlabs.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License version
8 * 2 as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * pblk-core.c - pblk's core functionality
16 *
17 */
18
19#include "pblk.h"
a4bd217b
JG
20
21static void pblk_mark_bb(struct pblk *pblk, struct pblk_line *line,
22 struct ppa_addr *ppa)
23{
24 struct nvm_tgt_dev *dev = pblk->dev;
25 struct nvm_geo *geo = &dev->geo;
26 int pos = pblk_dev_ppa_to_pos(geo, *ppa);
27
28 pr_debug("pblk: erase failed: line:%d, pos:%d\n", line->id, pos);
29 atomic_long_inc(&pblk->erase_failed);
30
a44f53fa 31 atomic_dec(&line->blk_in_line);
a4bd217b
JG
32 if (test_and_set_bit(pos, line->blk_bitmap))
33 pr_err("pblk: attempted to erase bb: line:%d, pos:%d\n",
34 line->id, pos);
35
36 pblk_line_run_ws(pblk, NULL, ppa, pblk_line_mark_bb);
37}
38
39static void __pblk_end_io_erase(struct pblk *pblk, struct nvm_rq *rqd)
40{
41 struct pblk_line *line;
42
43 line = &pblk->lines[pblk_dev_ppa_to_line(rqd->ppa_addr)];
44 atomic_dec(&line->left_seblks);
45
46 if (rqd->error) {
47 struct ppa_addr *ppa;
48
49 ppa = kmalloc(sizeof(struct ppa_addr), GFP_ATOMIC);
50 if (!ppa)
51 return;
52
53 *ppa = rqd->ppa_addr;
54 pblk_mark_bb(pblk, line, ppa);
55 }
56}
57
58/* Erase completion assumes that only one block is erased at the time */
59static void pblk_end_io_erase(struct nvm_rq *rqd)
60{
61 struct pblk *pblk = rqd->private;
62
a4bd217b 63 __pblk_end_io_erase(pblk, rqd);
084ec9ba 64 mempool_free(rqd, pblk->g_rq_pool);
a4bd217b
JG
65}
66
0880a9aa
JG
67void __pblk_map_invalidate(struct pblk *pblk, struct pblk_line *line,
68 u64 paddr)
a4bd217b
JG
69{
70 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
71 struct list_head *move_list = NULL;
72
73 /* Lines being reclaimed (GC'ed) cannot be invalidated. Before the L2P
74 * table is modified with reclaimed sectors, a check is done to endure
75 * that newer updates are not overwritten.
76 */
77 spin_lock(&line->lock);
78 if (line->state == PBLK_LINESTATE_GC ||
79 line->state == PBLK_LINESTATE_FREE) {
80 spin_unlock(&line->lock);
81 return;
82 }
83
84 if (test_and_set_bit(paddr, line->invalid_bitmap)) {
85 WARN_ONCE(1, "pblk: double invalidate\n");
86 spin_unlock(&line->lock);
87 return;
88 }
dd2a4343 89 le32_add_cpu(line->vsc, -1);
a4bd217b
JG
90
91 if (line->state == PBLK_LINESTATE_CLOSED)
92 move_list = pblk_line_gc_list(pblk, line);
93 spin_unlock(&line->lock);
94
95 if (move_list) {
96 spin_lock(&l_mg->gc_lock);
97 spin_lock(&line->lock);
98 /* Prevent moving a line that has just been chosen for GC */
99 if (line->state == PBLK_LINESTATE_GC ||
100 line->state == PBLK_LINESTATE_FREE) {
101 spin_unlock(&line->lock);
102 spin_unlock(&l_mg->gc_lock);
103 return;
104 }
105 spin_unlock(&line->lock);
106
107 list_move_tail(&line->list, move_list);
108 spin_unlock(&l_mg->gc_lock);
109 }
110}
111
112void pblk_map_invalidate(struct pblk *pblk, struct ppa_addr ppa)
113{
114 struct pblk_line *line;
115 u64 paddr;
116 int line_id;
117
118#ifdef CONFIG_NVM_DEBUG
119 /* Callers must ensure that the ppa points to a device address */
120 BUG_ON(pblk_addr_in_cache(ppa));
121 BUG_ON(pblk_ppa_empty(ppa));
122#endif
123
124 line_id = pblk_tgt_ppa_to_line(ppa);
125 line = &pblk->lines[line_id];
126 paddr = pblk_dev_ppa_to_line_addr(pblk, ppa);
127
128 __pblk_map_invalidate(pblk, line, paddr);
129}
130
a4bd217b
JG
131static void pblk_invalidate_range(struct pblk *pblk, sector_t slba,
132 unsigned int nr_secs)
133{
134 sector_t lba;
135
136 spin_lock(&pblk->trans_lock);
137 for (lba = slba; lba < slba + nr_secs; lba++) {
138 struct ppa_addr ppa;
139
140 ppa = pblk_trans_map_get(pblk, lba);
141
142 if (!pblk_addr_in_cache(ppa) && !pblk_ppa_empty(ppa))
143 pblk_map_invalidate(pblk, ppa);
144
145 pblk_ppa_set_empty(&ppa);
146 pblk_trans_map_set(pblk, lba, ppa);
147 }
148 spin_unlock(&pblk->trans_lock);
149}
150
151struct nvm_rq *pblk_alloc_rqd(struct pblk *pblk, int rw)
152{
153 mempool_t *pool;
154 struct nvm_rq *rqd;
155 int rq_size;
156
157 if (rw == WRITE) {
158 pool = pblk->w_rq_pool;
159 rq_size = pblk_w_rq_size;
160 } else {
084ec9ba
JG
161 pool = pblk->g_rq_pool;
162 rq_size = pblk_g_rq_size;
a4bd217b
JG
163 }
164
165 rqd = mempool_alloc(pool, GFP_KERNEL);
166 memset(rqd, 0, rq_size);
167
168 return rqd;
169}
170
171void pblk_free_rqd(struct pblk *pblk, struct nvm_rq *rqd, int rw)
172{
173 mempool_t *pool;
174
175 if (rw == WRITE)
176 pool = pblk->w_rq_pool;
177 else
084ec9ba 178 pool = pblk->g_rq_pool;
a4bd217b
JG
179
180 mempool_free(rqd, pool);
181}
182
183void pblk_bio_free_pages(struct pblk *pblk, struct bio *bio, int off,
184 int nr_pages)
185{
186 struct bio_vec bv;
187 int i;
188
189 WARN_ON(off + nr_pages != bio->bi_vcnt);
190
191 bio_advance(bio, off * PBLK_EXPOSED_PAGE_SIZE);
192 for (i = off; i < nr_pages + off; i++) {
193 bv = bio->bi_io_vec[i];
194 mempool_free(bv.bv_page, pblk->page_pool);
195 }
196}
197
198int pblk_bio_add_pages(struct pblk *pblk, struct bio *bio, gfp_t flags,
199 int nr_pages)
200{
201 struct request_queue *q = pblk->dev->q;
202 struct page *page;
203 int i, ret;
204
205 for (i = 0; i < nr_pages; i++) {
206 page = mempool_alloc(pblk->page_pool, flags);
207 if (!page)
208 goto err;
209
210 ret = bio_add_pc_page(q, bio, page, PBLK_EXPOSED_PAGE_SIZE, 0);
211 if (ret != PBLK_EXPOSED_PAGE_SIZE) {
212 pr_err("pblk: could not add page to bio\n");
213 mempool_free(page, pblk->page_pool);
214 goto err;
215 }
216 }
217
218 return 0;
219err:
220 pblk_bio_free_pages(pblk, bio, 0, i - 1);
221 return -1;
222}
223
224static void pblk_write_kick(struct pblk *pblk)
225{
226 wake_up_process(pblk->writer_ts);
227 mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(1000));
228}
229
230void pblk_write_timer_fn(unsigned long data)
231{
232 struct pblk *pblk = (struct pblk *)data;
233
234 /* kick the write thread every tick to flush outstanding data */
235 pblk_write_kick(pblk);
236}
237
238void pblk_write_should_kick(struct pblk *pblk)
239{
240 unsigned int secs_avail = pblk_rb_read_count(&pblk->rwb);
241
242 if (secs_avail >= pblk->min_write_pgs)
243 pblk_write_kick(pblk);
244}
245
246void pblk_end_bio_sync(struct bio *bio)
247{
248 struct completion *waiting = bio->bi_private;
249
250 complete(waiting);
251}
252
253void pblk_end_io_sync(struct nvm_rq *rqd)
254{
255 struct completion *waiting = rqd->private;
256
257 complete(waiting);
258}
259
260void pblk_flush_writer(struct pblk *pblk)
261{
262 struct bio *bio;
263 int ret;
264 DECLARE_COMPLETION_ONSTACK(wait);
265
266 bio = bio_alloc(GFP_KERNEL, 1);
267 if (!bio)
268 return;
269
270 bio->bi_iter.bi_sector = 0; /* internal bio */
271 bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_OP_FLUSH);
272 bio->bi_private = &wait;
273 bio->bi_end_io = pblk_end_bio_sync;
274
275 ret = pblk_write_to_cache(pblk, bio, 0);
276 if (ret == NVM_IO_OK) {
277 if (!wait_for_completion_io_timeout(&wait,
278 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
279 pr_err("pblk: flush cache timed out\n");
280 }
281 } else if (ret != NVM_IO_DONE) {
282 pr_err("pblk: tear down bio failed\n");
283 }
284
4e4cbee9
CH
285 if (bio->bi_status)
286 pr_err("pblk: flush sync write failed (%u)\n", bio->bi_status);
a4bd217b
JG
287
288 bio_put(bio);
289}
290
291struct list_head *pblk_line_gc_list(struct pblk *pblk, struct pblk_line *line)
292{
293 struct pblk_line_meta *lm = &pblk->lm;
294 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
295 struct list_head *move_list = NULL;
dd2a4343 296 int vsc = le32_to_cpu(*line->vsc);
a4bd217b 297
476118c9
JG
298 lockdep_assert_held(&line->lock);
299
dd2a4343 300 if (!vsc) {
a4bd217b
JG
301 if (line->gc_group != PBLK_LINEGC_FULL) {
302 line->gc_group = PBLK_LINEGC_FULL;
303 move_list = &l_mg->gc_full_list;
304 }
dd2a4343 305 } else if (vsc < lm->mid_thrs) {
a4bd217b
JG
306 if (line->gc_group != PBLK_LINEGC_HIGH) {
307 line->gc_group = PBLK_LINEGC_HIGH;
308 move_list = &l_mg->gc_high_list;
309 }
dd2a4343 310 } else if (vsc < lm->high_thrs) {
a4bd217b
JG
311 if (line->gc_group != PBLK_LINEGC_MID) {
312 line->gc_group = PBLK_LINEGC_MID;
313 move_list = &l_mg->gc_mid_list;
314 }
dd2a4343 315 } else if (vsc < line->sec_in_line) {
a4bd217b
JG
316 if (line->gc_group != PBLK_LINEGC_LOW) {
317 line->gc_group = PBLK_LINEGC_LOW;
318 move_list = &l_mg->gc_low_list;
319 }
dd2a4343 320 } else if (vsc == line->sec_in_line) {
a4bd217b
JG
321 if (line->gc_group != PBLK_LINEGC_EMPTY) {
322 line->gc_group = PBLK_LINEGC_EMPTY;
323 move_list = &l_mg->gc_empty_list;
324 }
325 } else {
326 line->state = PBLK_LINESTATE_CORRUPT;
327 line->gc_group = PBLK_LINEGC_NONE;
328 move_list = &l_mg->corrupt_list;
329 pr_err("pblk: corrupted vsc for line %d, vsc:%d (%d/%d/%d)\n",
dd2a4343 330 line->id, vsc,
a4bd217b
JG
331 line->sec_in_line,
332 lm->high_thrs, lm->mid_thrs);
333 }
334
335 return move_list;
336}
337
338void pblk_discard(struct pblk *pblk, struct bio *bio)
339{
340 sector_t slba = pblk_get_lba(bio);
341 sector_t nr_secs = pblk_get_secs(bio);
342
343 pblk_invalidate_range(pblk, slba, nr_secs);
344}
345
346struct ppa_addr pblk_get_lba_map(struct pblk *pblk, sector_t lba)
347{
348 struct ppa_addr ppa;
349
350 spin_lock(&pblk->trans_lock);
351 ppa = pblk_trans_map_get(pblk, lba);
352 spin_unlock(&pblk->trans_lock);
353
354 return ppa;
355}
356
357void pblk_log_write_err(struct pblk *pblk, struct nvm_rq *rqd)
358{
359 atomic_long_inc(&pblk->write_failed);
360#ifdef CONFIG_NVM_DEBUG
361 pblk_print_failed_rqd(pblk, rqd, rqd->error);
362#endif
363}
364
365void pblk_log_read_err(struct pblk *pblk, struct nvm_rq *rqd)
366{
367 /* Empty page read is not necessarily an error (e.g., L2P recovery) */
368 if (rqd->error == NVM_RSP_ERR_EMPTYPAGE) {
369 atomic_long_inc(&pblk->read_empty);
370 return;
371 }
372
373 switch (rqd->error) {
374 case NVM_RSP_WARN_HIGHECC:
375 atomic_long_inc(&pblk->read_high_ecc);
376 break;
377 case NVM_RSP_ERR_FAILECC:
378 case NVM_RSP_ERR_FAILCRC:
379 atomic_long_inc(&pblk->read_failed);
380 break;
381 default:
382 pr_err("pblk: unknown read error:%d\n", rqd->error);
383 }
384#ifdef CONFIG_NVM_DEBUG
385 pblk_print_failed_rqd(pblk, rqd, rqd->error);
386#endif
387}
388
c2e9f5d4
JG
389void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write)
390{
391 pblk->sec_per_write = sec_per_write;
392}
393
a4bd217b
JG
394int pblk_submit_io(struct pblk *pblk, struct nvm_rq *rqd)
395{
396 struct nvm_tgt_dev *dev = pblk->dev;
397
398#ifdef CONFIG_NVM_DEBUG
399 struct ppa_addr *ppa_list;
400
401 ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
402 if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
403 WARN_ON(1);
404 return -EINVAL;
405 }
406
407 if (rqd->opcode == NVM_OP_PWRITE) {
408 struct pblk_line *line;
409 struct ppa_addr ppa;
410 int i;
411
412 for (i = 0; i < rqd->nr_ppas; i++) {
413 ppa = ppa_list[i];
414 line = &pblk->lines[pblk_dev_ppa_to_line(ppa)];
415
416 spin_lock(&line->lock);
417 if (line->state != PBLK_LINESTATE_OPEN) {
418 pr_err("pblk: bad ppa: line:%d,state:%d\n",
419 line->id, line->state);
420 WARN_ON(1);
421 spin_unlock(&line->lock);
422 return -EINVAL;
423 }
424 spin_unlock(&line->lock);
425 }
426 }
427#endif
428 return nvm_submit_io(dev, rqd);
429}
430
431struct bio *pblk_bio_map_addr(struct pblk *pblk, void *data,
432 unsigned int nr_secs, unsigned int len,
433 gfp_t gfp_mask)
434{
435 struct nvm_tgt_dev *dev = pblk->dev;
436 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
437 void *kaddr = data;
438 struct page *page;
439 struct bio *bio;
440 int i, ret;
441
442 if (l_mg->emeta_alloc_type == PBLK_KMALLOC_META)
443 return bio_map_kern(dev->q, kaddr, len, gfp_mask);
444
445 bio = bio_kmalloc(gfp_mask, nr_secs);
446 if (!bio)
447 return ERR_PTR(-ENOMEM);
448
449 for (i = 0; i < nr_secs; i++) {
450 page = vmalloc_to_page(kaddr);
451 if (!page) {
452 pr_err("pblk: could not map vmalloc bio\n");
453 bio_put(bio);
454 bio = ERR_PTR(-ENOMEM);
455 goto out;
456 }
457
458 ret = bio_add_pc_page(dev->q, bio, page, PAGE_SIZE, 0);
459 if (ret != PAGE_SIZE) {
460 pr_err("pblk: could not add page to bio\n");
461 bio_put(bio);
462 bio = ERR_PTR(-ENOMEM);
463 goto out;
464 }
465
466 kaddr += PAGE_SIZE;
467 }
468out:
469 return bio;
470}
471
472int pblk_calc_secs(struct pblk *pblk, unsigned long secs_avail,
473 unsigned long secs_to_flush)
474{
c2e9f5d4 475 int max = pblk->sec_per_write;
a4bd217b
JG
476 int min = pblk->min_write_pgs;
477 int secs_to_sync = 0;
478
479 if (secs_avail >= max)
480 secs_to_sync = max;
481 else if (secs_avail >= min)
482 secs_to_sync = min * (secs_avail / min);
483 else if (secs_to_flush)
484 secs_to_sync = min;
485
486 return secs_to_sync;
487}
488
dd2a4343
JG
489void pblk_dealloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
490{
491 u64 addr;
492 int i;
493
494 addr = find_next_zero_bit(line->map_bitmap,
495 pblk->lm.sec_per_line, line->cur_sec);
496 line->cur_sec = addr - nr_secs;
497
498 for (i = 0; i < nr_secs; i++, line->cur_sec--)
499 WARN_ON(!test_and_clear_bit(line->cur_sec, line->map_bitmap));
500}
501
502u64 __pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
a4bd217b
JG
503{
504 u64 addr;
505 int i;
506
476118c9
JG
507 lockdep_assert_held(&line->lock);
508
a4bd217b
JG
509 /* logic error: ppa out-of-bounds. Prevent generating bad address */
510 if (line->cur_sec + nr_secs > pblk->lm.sec_per_line) {
511 WARN(1, "pblk: page allocation out of bounds\n");
512 nr_secs = pblk->lm.sec_per_line - line->cur_sec;
513 }
514
515 line->cur_sec = addr = find_next_zero_bit(line->map_bitmap,
516 pblk->lm.sec_per_line, line->cur_sec);
517 for (i = 0; i < nr_secs; i++, line->cur_sec++)
518 WARN_ON(test_and_set_bit(line->cur_sec, line->map_bitmap));
519
520 return addr;
521}
522
523u64 pblk_alloc_page(struct pblk *pblk, struct pblk_line *line, int nr_secs)
524{
525 u64 addr;
526
527 /* Lock needed in case a write fails and a recovery needs to remap
528 * failed write buffer entries
529 */
530 spin_lock(&line->lock);
531 addr = __pblk_alloc_page(pblk, line, nr_secs);
532 line->left_msecs -= nr_secs;
533 WARN(line->left_msecs < 0, "pblk: page allocation out of bounds\n");
534 spin_unlock(&line->lock);
535
536 return addr;
537}
538
dd2a4343
JG
539u64 pblk_lookup_page(struct pblk *pblk, struct pblk_line *line)
540{
541 u64 paddr;
542
543 spin_lock(&line->lock);
544 paddr = find_next_zero_bit(line->map_bitmap,
545 pblk->lm.sec_per_line, line->cur_sec);
546 spin_unlock(&line->lock);
547
548 return paddr;
549}
550
a4bd217b
JG
551/*
552 * Submit emeta to one LUN in the raid line at the time to avoid a deadlock when
553 * taking the per LUN semaphore.
554 */
555static int pblk_line_submit_emeta_io(struct pblk *pblk, struct pblk_line *line,
dd2a4343 556 void *emeta_buf, u64 paddr, int dir)
a4bd217b
JG
557{
558 struct nvm_tgt_dev *dev = pblk->dev;
559 struct nvm_geo *geo = &dev->geo;
560 struct pblk_line_meta *lm = &pblk->lm;
63e3809c 561 void *ppa_list, *meta_list;
a4bd217b
JG
562 struct bio *bio;
563 struct nvm_rq rqd;
63e3809c 564 dma_addr_t dma_ppa_list, dma_meta_list;
a4bd217b 565 int min = pblk->min_write_pgs;
dd2a4343 566 int left_ppas = lm->emeta_sec[0];
a4bd217b
JG
567 int id = line->id;
568 int rq_ppas, rq_len;
569 int cmd_op, bio_op;
a4bd217b
JG
570 int i, j;
571 int ret;
572 DECLARE_COMPLETION_ONSTACK(wait);
573
574 if (dir == WRITE) {
575 bio_op = REQ_OP_WRITE;
576 cmd_op = NVM_OP_PWRITE;
a4bd217b
JG
577 } else if (dir == READ) {
578 bio_op = REQ_OP_READ;
579 cmd_op = NVM_OP_PREAD;
a4bd217b
JG
580 } else
581 return -EINVAL;
582
63e3809c
JG
583 meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
584 &dma_meta_list);
585 if (!meta_list)
a4bd217b
JG
586 return -ENOMEM;
587
63e3809c
JG
588 ppa_list = meta_list + pblk_dma_meta_size;
589 dma_ppa_list = dma_meta_list + pblk_dma_meta_size;
590
a4bd217b
JG
591next_rq:
592 memset(&rqd, 0, sizeof(struct nvm_rq));
593
594 rq_ppas = pblk_calc_secs(pblk, left_ppas, 0);
595 rq_len = rq_ppas * geo->sec_size;
596
dd2a4343 597 bio = pblk_bio_map_addr(pblk, emeta_buf, rq_ppas, rq_len, GFP_KERNEL);
a4bd217b
JG
598 if (IS_ERR(bio)) {
599 ret = PTR_ERR(bio);
600 goto free_rqd_dma;
601 }
602
603 bio->bi_iter.bi_sector = 0; /* internal bio */
604 bio_set_op_attrs(bio, bio_op, 0);
605
606 rqd.bio = bio;
63e3809c 607 rqd.meta_list = meta_list;
a4bd217b 608 rqd.ppa_list = ppa_list;
63e3809c 609 rqd.dma_meta_list = dma_meta_list;
a4bd217b 610 rqd.dma_ppa_list = dma_ppa_list;
63e3809c
JG
611 rqd.opcode = cmd_op;
612 rqd.nr_ppas = rq_ppas;
a4bd217b
JG
613 rqd.end_io = pblk_end_io_sync;
614 rqd.private = &wait;
615
616 if (dir == WRITE) {
63e3809c
JG
617 struct pblk_sec_meta *meta_list = rqd.meta_list;
618
f9c10152 619 rqd.flags = pblk_set_progr_mode(pblk, WRITE);
a4bd217b
JG
620 for (i = 0; i < rqd.nr_ppas; ) {
621 spin_lock(&line->lock);
622 paddr = __pblk_alloc_page(pblk, line, min);
623 spin_unlock(&line->lock);
63e3809c
JG
624 for (j = 0; j < min; j++, i++, paddr++) {
625 meta_list[i].lba = cpu_to_le64(ADDR_EMPTY);
a4bd217b
JG
626 rqd.ppa_list[i] =
627 addr_to_gen_ppa(pblk, paddr, id);
63e3809c 628 }
a4bd217b
JG
629 }
630 } else {
631 for (i = 0; i < rqd.nr_ppas; ) {
632 struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
633 int pos = pblk_dev_ppa_to_pos(geo, ppa);
f9c10152
JG
634 int read_type = PBLK_READ_RANDOM;
635
636 if (pblk_io_aligned(pblk, rq_ppas))
637 read_type = PBLK_READ_SEQUENTIAL;
638 rqd.flags = pblk_set_read_mode(pblk, read_type);
a4bd217b
JG
639
640 while (test_bit(pos, line->blk_bitmap)) {
641 paddr += min;
642 if (pblk_boundary_paddr_checks(pblk, paddr)) {
643 pr_err("pblk: corrupt emeta line:%d\n",
644 line->id);
645 bio_put(bio);
646 ret = -EINTR;
647 goto free_rqd_dma;
648 }
649
650 ppa = addr_to_gen_ppa(pblk, paddr, id);
651 pos = pblk_dev_ppa_to_pos(geo, ppa);
652 }
653
654 if (pblk_boundary_paddr_checks(pblk, paddr + min)) {
655 pr_err("pblk: corrupt emeta line:%d\n",
656 line->id);
657 bio_put(bio);
658 ret = -EINTR;
659 goto free_rqd_dma;
660 }
661
662 for (j = 0; j < min; j++, i++, paddr++)
663 rqd.ppa_list[i] =
664 addr_to_gen_ppa(pblk, paddr, line->id);
665 }
666 }
667
668 ret = pblk_submit_io(pblk, &rqd);
669 if (ret) {
670 pr_err("pblk: emeta I/O submission failed: %d\n", ret);
671 bio_put(bio);
672 goto free_rqd_dma;
673 }
674
675 if (!wait_for_completion_io_timeout(&wait,
676 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
677 pr_err("pblk: emeta I/O timed out\n");
678 }
679 reinit_completion(&wait);
680
f680f19a
JG
681 if (likely(pblk->l_mg.emeta_alloc_type == PBLK_VMALLOC_META))
682 bio_put(bio);
a4bd217b
JG
683
684 if (rqd.error) {
685 if (dir == WRITE)
686 pblk_log_write_err(pblk, &rqd);
687 else
688 pblk_log_read_err(pblk, &rqd);
689 }
690
dd2a4343 691 emeta_buf += rq_len;
a4bd217b
JG
692 left_ppas -= rq_ppas;
693 if (left_ppas)
694 goto next_rq;
695free_rqd_dma:
63e3809c 696 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
a4bd217b
JG
697 return ret;
698}
699
700u64 pblk_line_smeta_start(struct pblk *pblk, struct pblk_line *line)
701{
702 struct nvm_tgt_dev *dev = pblk->dev;
703 struct nvm_geo *geo = &dev->geo;
704 struct pblk_line_meta *lm = &pblk->lm;
705 int bit;
706
707 /* This usually only happens on bad lines */
708 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
709 if (bit >= lm->blk_per_line)
710 return -1;
711
712 return bit * geo->sec_per_pl;
713}
714
715static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
716 u64 paddr, int dir)
717{
718 struct nvm_tgt_dev *dev = pblk->dev;
719 struct pblk_line_meta *lm = &pblk->lm;
720 struct bio *bio;
721 struct nvm_rq rqd;
722 __le64 *lba_list = NULL;
723 int i, ret;
724 int cmd_op, bio_op;
725 int flags;
726 DECLARE_COMPLETION_ONSTACK(wait);
727
728 if (dir == WRITE) {
729 bio_op = REQ_OP_WRITE;
730 cmd_op = NVM_OP_PWRITE;
731 flags = pblk_set_progr_mode(pblk, WRITE);
dd2a4343 732 lba_list = emeta_to_lbas(pblk, line->emeta->buf);
a4bd217b
JG
733 } else if (dir == READ) {
734 bio_op = REQ_OP_READ;
735 cmd_op = NVM_OP_PREAD;
f9c10152 736 flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
a4bd217b
JG
737 } else
738 return -EINVAL;
739
740 memset(&rqd, 0, sizeof(struct nvm_rq));
741
63e3809c
JG
742 rqd.meta_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
743 &rqd.dma_meta_list);
744 if (!rqd.meta_list)
a4bd217b
JG
745 return -ENOMEM;
746
63e3809c
JG
747 rqd.ppa_list = rqd.meta_list + pblk_dma_meta_size;
748 rqd.dma_ppa_list = rqd.dma_meta_list + pblk_dma_meta_size;
749
a4bd217b
JG
750 bio = bio_map_kern(dev->q, line->smeta, lm->smeta_len, GFP_KERNEL);
751 if (IS_ERR(bio)) {
752 ret = PTR_ERR(bio);
753 goto free_ppa_list;
754 }
755
756 bio->bi_iter.bi_sector = 0; /* internal bio */
757 bio_set_op_attrs(bio, bio_op, 0);
758
759 rqd.bio = bio;
760 rqd.opcode = cmd_op;
761 rqd.flags = flags;
762 rqd.nr_ppas = lm->smeta_sec;
763 rqd.end_io = pblk_end_io_sync;
764 rqd.private = &wait;
765
766 for (i = 0; i < lm->smeta_sec; i++, paddr++) {
63e3809c
JG
767 struct pblk_sec_meta *meta_list = rqd.meta_list;
768
a4bd217b 769 rqd.ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
63e3809c
JG
770
771 if (dir == WRITE) {
772 u64 addr_empty = cpu_to_le64(ADDR_EMPTY);
773
774 meta_list[i].lba = lba_list[paddr] = addr_empty;
775 }
a4bd217b
JG
776 }
777
778 /*
779 * This I/O is sent by the write thread when a line is replace. Since
780 * the write thread is the only one sending write and erase commands,
781 * there is no need to take the LUN semaphore.
782 */
783 ret = pblk_submit_io(pblk, &rqd);
784 if (ret) {
785 pr_err("pblk: smeta I/O submission failed: %d\n", ret);
786 bio_put(bio);
787 goto free_ppa_list;
788 }
789
790 if (!wait_for_completion_io_timeout(&wait,
791 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
792 pr_err("pblk: smeta I/O timed out\n");
793 }
794
795 if (rqd.error) {
796 if (dir == WRITE)
797 pblk_log_write_err(pblk, &rqd);
798 else
799 pblk_log_read_err(pblk, &rqd);
800 }
801
802free_ppa_list:
63e3809c 803 nvm_dev_dma_free(dev->parent, rqd.meta_list, rqd.dma_meta_list);
a4bd217b
JG
804
805 return ret;
806}
807
808int pblk_line_read_smeta(struct pblk *pblk, struct pblk_line *line)
809{
810 u64 bpaddr = pblk_line_smeta_start(pblk, line);
811
812 return pblk_line_submit_smeta_io(pblk, line, bpaddr, READ);
813}
814
dd2a4343
JG
815int pblk_line_read_emeta(struct pblk *pblk, struct pblk_line *line,
816 void *emeta_buf)
a4bd217b 817{
dd2a4343
JG
818 return pblk_line_submit_emeta_io(pblk, line, emeta_buf,
819 line->emeta_ssec, READ);
a4bd217b
JG
820}
821
822static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
823 struct ppa_addr ppa)
824{
825 rqd->opcode = NVM_OP_ERASE;
826 rqd->ppa_addr = ppa;
827 rqd->nr_ppas = 1;
828 rqd->flags = pblk_set_progr_mode(pblk, ERASE);
829 rqd->bio = NULL;
830}
831
832static int pblk_blk_erase_sync(struct pblk *pblk, struct ppa_addr ppa)
833{
834 struct nvm_rq rqd;
835 int ret;
836 DECLARE_COMPLETION_ONSTACK(wait);
837
838 memset(&rqd, 0, sizeof(struct nvm_rq));
839
840 pblk_setup_e_rq(pblk, &rqd, ppa);
841
842 rqd.end_io = pblk_end_io_sync;
843 rqd.private = &wait;
844
845 /* The write thread schedules erases so that it minimizes disturbances
846 * with writes. Thus, there is no need to take the LUN semaphore.
847 */
848 ret = pblk_submit_io(pblk, &rqd);
849 if (ret) {
850 struct nvm_tgt_dev *dev = pblk->dev;
851 struct nvm_geo *geo = &dev->geo;
852
853 pr_err("pblk: could not sync erase line:%d,blk:%d\n",
854 pblk_dev_ppa_to_line(ppa),
855 pblk_dev_ppa_to_pos(geo, ppa));
856
857 rqd.error = ret;
858 goto out;
859 }
860
861 if (!wait_for_completion_io_timeout(&wait,
862 msecs_to_jiffies(PBLK_COMMAND_TIMEOUT_MS))) {
863 pr_err("pblk: sync erase timed out\n");
864 }
865
866out:
867 rqd.private = pblk;
868 __pblk_end_io_erase(pblk, &rqd);
869
870 return 0;
871}
872
873int pblk_line_erase(struct pblk *pblk, struct pblk_line *line)
874{
875 struct pblk_line_meta *lm = &pblk->lm;
876 struct ppa_addr ppa;
877 int bit = -1;
878
a44f53fa
JG
879 /* Erase only good blocks, one at a time */
880 do {
881 spin_lock(&line->lock);
882 bit = find_next_zero_bit(line->erase_bitmap, lm->blk_per_line,
883 bit + 1);
884 if (bit >= lm->blk_per_line) {
885 spin_unlock(&line->lock);
886 break;
887 }
888
a4bd217b
JG
889 ppa = pblk->luns[bit].bppa; /* set ch and lun */
890 ppa.g.blk = line->id;
891
a44f53fa 892 atomic_dec(&line->left_eblks);
a4bd217b 893 WARN_ON(test_and_set_bit(bit, line->erase_bitmap));
a44f53fa 894 spin_unlock(&line->lock);
a4bd217b
JG
895
896 if (pblk_blk_erase_sync(pblk, ppa)) {
897 pr_err("pblk: failed to erase line %d\n", line->id);
898 return -ENOMEM;
899 }
a44f53fa 900 } while (1);
a4bd217b
JG
901
902 return 0;
903}
904
dd2a4343
JG
905static void pblk_line_setup_metadata(struct pblk_line *line,
906 struct pblk_line_mgmt *l_mg,
907 struct pblk_line_meta *lm)
908{
909 int meta_line;
910
911retry_meta:
912 meta_line = find_first_zero_bit(&l_mg->meta_bitmap, PBLK_DATA_LINES);
913 if (meta_line == PBLK_DATA_LINES) {
914 spin_unlock(&l_mg->free_lock);
915 io_schedule();
916 spin_lock(&l_mg->free_lock);
917 goto retry_meta;
918 }
919
920 set_bit(meta_line, &l_mg->meta_bitmap);
921 line->meta_line = meta_line;
922
923 line->smeta = l_mg->sline_meta[meta_line];
924 line->emeta = l_mg->eline_meta[meta_line];
925
926 memset(line->smeta, 0, lm->smeta_len);
927 memset(line->emeta->buf, 0, lm->emeta_len[0]);
928
929 line->emeta->mem = 0;
930 atomic_set(&line->emeta->sync, 0);
931}
932
a4bd217b
JG
933/* For now lines are always assumed full lines. Thus, smeta former and current
934 * lun bitmaps are omitted.
935 */
dd2a4343 936static int pblk_line_init_metadata(struct pblk *pblk, struct pblk_line *line,
a4bd217b
JG
937 struct pblk_line *cur)
938{
939 struct nvm_tgt_dev *dev = pblk->dev;
940 struct nvm_geo *geo = &dev->geo;
941 struct pblk_line_meta *lm = &pblk->lm;
942 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
dd2a4343
JG
943 struct pblk_emeta *emeta = line->emeta;
944 struct line_emeta *emeta_buf = emeta->buf;
945 struct line_smeta *smeta_buf = (struct line_smeta *)line->smeta;
a4bd217b
JG
946 int nr_blk_line;
947
948 /* After erasing the line, new bad blocks might appear and we risk
949 * having an invalid line
950 */
951 nr_blk_line = lm->blk_per_line -
952 bitmap_weight(line->blk_bitmap, lm->blk_per_line);
953 if (nr_blk_line < lm->min_blk_line) {
954 spin_lock(&l_mg->free_lock);
955 spin_lock(&line->lock);
956 line->state = PBLK_LINESTATE_BAD;
957 spin_unlock(&line->lock);
958
959 list_add_tail(&line->list, &l_mg->bad_list);
960 spin_unlock(&l_mg->free_lock);
961
962 pr_debug("pblk: line %d is bad\n", line->id);
963
964 return 0;
965 }
966
967 /* Run-time metadata */
dd2a4343 968 line->lun_bitmap = ((void *)(smeta_buf)) + sizeof(struct line_smeta);
a4bd217b
JG
969
970 /* Mark LUNs allocated in this line (all for now) */
971 bitmap_set(line->lun_bitmap, 0, lm->lun_bitmap_len);
972
dd2a4343
JG
973 smeta_buf->header.identifier = cpu_to_le32(PBLK_MAGIC);
974 memcpy(smeta_buf->header.uuid, pblk->instance_uuid, 16);
975 smeta_buf->header.id = cpu_to_le32(line->id);
976 smeta_buf->header.type = cpu_to_le16(line->type);
977 smeta_buf->header.version = cpu_to_le16(1);
a4bd217b
JG
978
979 /* Start metadata */
dd2a4343
JG
980 smeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
981 smeta_buf->window_wr_lun = cpu_to_le32(geo->nr_luns);
a4bd217b
JG
982
983 /* Fill metadata among lines */
984 if (cur) {
985 memcpy(line->lun_bitmap, cur->lun_bitmap, lm->lun_bitmap_len);
dd2a4343
JG
986 smeta_buf->prev_id = cpu_to_le32(cur->id);
987 cur->emeta->buf->next_id = cpu_to_le32(line->id);
a4bd217b 988 } else {
dd2a4343 989 smeta_buf->prev_id = cpu_to_le32(PBLK_LINE_EMPTY);
a4bd217b
JG
990 }
991
992 /* All smeta must be set at this point */
dd2a4343
JG
993 smeta_buf->header.crc = cpu_to_le32(
994 pblk_calc_meta_header_crc(pblk, &smeta_buf->header));
995 smeta_buf->crc = cpu_to_le32(pblk_calc_smeta_crc(pblk, smeta_buf));
a4bd217b
JG
996
997 /* End metadata */
dd2a4343
JG
998 memcpy(&emeta_buf->header, &smeta_buf->header,
999 sizeof(struct line_header));
1000 emeta_buf->seq_nr = cpu_to_le64(line->seq_nr);
1001 emeta_buf->nr_lbas = cpu_to_le64(line->sec_in_line);
1002 emeta_buf->nr_valid_lbas = cpu_to_le64(0);
1003 emeta_buf->next_id = cpu_to_le32(PBLK_LINE_EMPTY);
1004 emeta_buf->crc = cpu_to_le32(0);
1005 emeta_buf->prev_id = smeta_buf->prev_id;
a4bd217b
JG
1006
1007 return 1;
1008}
1009
1010/* For now lines are always assumed full lines. Thus, smeta former and current
1011 * lun bitmaps are omitted.
1012 */
1013static int pblk_line_init_bb(struct pblk *pblk, struct pblk_line *line,
1014 int init)
1015{
1016 struct nvm_tgt_dev *dev = pblk->dev;
1017 struct nvm_geo *geo = &dev->geo;
1018 struct pblk_line_meta *lm = &pblk->lm;
1019 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1020 int nr_bb = 0;
1021 u64 off;
1022 int bit = -1;
1023
1024 line->sec_in_line = lm->sec_per_line;
1025
1026 /* Capture bad block information on line mapping bitmaps */
1027 while ((bit = find_next_bit(line->blk_bitmap, lm->blk_per_line,
1028 bit + 1)) < lm->blk_per_line) {
1029 off = bit * geo->sec_per_pl;
1030 bitmap_shift_left(l_mg->bb_aux, l_mg->bb_template, off,
1031 lm->sec_per_line);
1032 bitmap_or(line->map_bitmap, line->map_bitmap, l_mg->bb_aux,
1033 lm->sec_per_line);
1034 line->sec_in_line -= geo->sec_per_blk;
1035 if (bit >= lm->emeta_bb)
1036 nr_bb++;
1037 }
1038
1039 /* Mark smeta metadata sectors as bad sectors */
1040 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1041 off = bit * geo->sec_per_pl;
1042retry_smeta:
1043 bitmap_set(line->map_bitmap, off, lm->smeta_sec);
1044 line->sec_in_line -= lm->smeta_sec;
1045 line->smeta_ssec = off;
1046 line->cur_sec = off + lm->smeta_sec;
1047
1048 if (init && pblk_line_submit_smeta_io(pblk, line, off, WRITE)) {
1049 pr_debug("pblk: line smeta I/O failed. Retry\n");
1050 off += geo->sec_per_pl;
1051 goto retry_smeta;
1052 }
1053
1054 bitmap_copy(line->invalid_bitmap, line->map_bitmap, lm->sec_per_line);
1055
1056 /* Mark emeta metadata sectors as bad sectors. We need to consider bad
1057 * blocks to make sure that there are enough sectors to store emeta
1058 */
1059 bit = lm->sec_per_line;
dd2a4343
JG
1060 off = lm->sec_per_line - lm->emeta_sec[0];
1061 bitmap_set(line->invalid_bitmap, off, lm->emeta_sec[0]);
a4bd217b
JG
1062 while (nr_bb) {
1063 off -= geo->sec_per_pl;
1064 if (!test_bit(off, line->invalid_bitmap)) {
1065 bitmap_set(line->invalid_bitmap, off, geo->sec_per_pl);
1066 nr_bb--;
1067 }
1068 }
1069
dd2a4343 1070 line->sec_in_line -= lm->emeta_sec[0];
a4bd217b 1071 line->emeta_ssec = off;
dd2a4343 1072 line->nr_valid_lbas = 0;
0880a9aa 1073 line->left_msecs = line->sec_in_line;
dd2a4343 1074 *line->vsc = cpu_to_le32(line->sec_in_line);
a4bd217b
JG
1075
1076 if (lm->sec_per_line - line->sec_in_line !=
1077 bitmap_weight(line->invalid_bitmap, lm->sec_per_line)) {
1078 spin_lock(&line->lock);
1079 line->state = PBLK_LINESTATE_BAD;
1080 spin_unlock(&line->lock);
1081
1082 list_add_tail(&line->list, &l_mg->bad_list);
1083 pr_err("pblk: unexpected line %d is bad\n", line->id);
1084
1085 return 0;
1086 }
1087
1088 return 1;
1089}
1090
1091static int pblk_line_prepare(struct pblk *pblk, struct pblk_line *line)
1092{
1093 struct pblk_line_meta *lm = &pblk->lm;
a44f53fa 1094 int blk_in_line = atomic_read(&line->blk_in_line);
a4bd217b
JG
1095
1096 line->map_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1097 if (!line->map_bitmap)
1098 return -ENOMEM;
1099 memset(line->map_bitmap, 0, lm->sec_bitmap_len);
1100
1101 /* invalid_bitmap is special since it is used when line is closed. No
1102 * need to zeroized; it will be initialized using bb info form
1103 * map_bitmap
1104 */
1105 line->invalid_bitmap = mempool_alloc(pblk->line_meta_pool, GFP_ATOMIC);
1106 if (!line->invalid_bitmap) {
1107 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1108 return -ENOMEM;
1109 }
1110
1111 spin_lock(&line->lock);
1112 if (line->state != PBLK_LINESTATE_FREE) {
1113 spin_unlock(&line->lock);
1114 WARN(1, "pblk: corrupted line state\n");
1115 return -EINTR;
1116 }
1117 line->state = PBLK_LINESTATE_OPEN;
a44f53fa
JG
1118
1119 atomic_set(&line->left_eblks, blk_in_line);
1120 atomic_set(&line->left_seblks, blk_in_line);
dd2a4343
JG
1121
1122 line->meta_distance = lm->meta_distance;
a4bd217b
JG
1123 spin_unlock(&line->lock);
1124
1125 /* Bad blocks do not need to be erased */
1126 bitmap_copy(line->erase_bitmap, line->blk_bitmap, lm->blk_per_line);
a4bd217b
JG
1127
1128 kref_init(&line->ref);
1129
1130 return 0;
1131}
1132
1133int pblk_line_recov_alloc(struct pblk *pblk, struct pblk_line *line)
1134{
1135 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1136 int ret;
1137
1138 spin_lock(&l_mg->free_lock);
1139 l_mg->data_line = line;
1140 list_del(&line->list);
a4bd217b
JG
1141
1142 ret = pblk_line_prepare(pblk, line);
1143 if (ret) {
1144 list_add(&line->list, &l_mg->free_list);
3dc001f3 1145 spin_unlock(&l_mg->free_lock);
a4bd217b
JG
1146 return ret;
1147 }
3dc001f3 1148 spin_unlock(&l_mg->free_lock);
a4bd217b
JG
1149
1150 pblk_rl_free_lines_dec(&pblk->rl, line);
1151
1152 if (!pblk_line_init_bb(pblk, line, 0)) {
1153 list_add(&line->list, &l_mg->free_list);
1154 return -EINTR;
1155 }
1156
1157 return 0;
1158}
1159
1160void pblk_line_recov_close(struct pblk *pblk, struct pblk_line *line)
1161{
1162 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1163 line->map_bitmap = NULL;
1164 line->smeta = NULL;
1165 line->emeta = NULL;
1166}
1167
1168struct pblk_line *pblk_line_get(struct pblk *pblk)
1169{
1170 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1171 struct pblk_line_meta *lm = &pblk->lm;
1172 struct pblk_line *line = NULL;
1173 int bit;
1174
1175 lockdep_assert_held(&l_mg->free_lock);
1176
1177retry_get:
1178 if (list_empty(&l_mg->free_list)) {
1179 pr_err("pblk: no free lines\n");
1180 goto out;
1181 }
1182
1183 line = list_first_entry(&l_mg->free_list, struct pblk_line, list);
1184 list_del(&line->list);
1185 l_mg->nr_free_lines--;
1186
1187 bit = find_first_zero_bit(line->blk_bitmap, lm->blk_per_line);
1188 if (unlikely(bit >= lm->blk_per_line)) {
1189 spin_lock(&line->lock);
1190 line->state = PBLK_LINESTATE_BAD;
1191 spin_unlock(&line->lock);
1192
1193 list_add_tail(&line->list, &l_mg->bad_list);
1194
1195 pr_debug("pblk: line %d is bad\n", line->id);
1196 goto retry_get;
1197 }
1198
1199 if (pblk_line_prepare(pblk, line)) {
1200 pr_err("pblk: failed to prepare line %d\n", line->id);
1201 list_add(&line->list, &l_mg->free_list);
1202 return NULL;
1203 }
1204
1205out:
1206 return line;
1207}
1208
1209static struct pblk_line *pblk_line_retry(struct pblk *pblk,
1210 struct pblk_line *line)
1211{
1212 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1213 struct pblk_line *retry_line;
1214
1215 spin_lock(&l_mg->free_lock);
1216 retry_line = pblk_line_get(pblk);
1217 if (!retry_line) {
be388d9f 1218 l_mg->data_line = NULL;
a4bd217b
JG
1219 spin_unlock(&l_mg->free_lock);
1220 return NULL;
1221 }
1222
1223 retry_line->smeta = line->smeta;
1224 retry_line->emeta = line->emeta;
1225 retry_line->meta_line = line->meta_line;
a4bd217b 1226
be388d9f 1227 pblk_line_free(pblk, line);
3dc001f3 1228 l_mg->data_line = retry_line;
a4bd217b
JG
1229 spin_unlock(&l_mg->free_lock);
1230
be388d9f
JG
1231 if (pblk_line_erase(pblk, retry_line)) {
1232 spin_lock(&l_mg->free_lock);
1233 l_mg->data_line = NULL;
1234 spin_unlock(&l_mg->free_lock);
a4bd217b 1235 return NULL;
be388d9f 1236 }
a4bd217b
JG
1237
1238 pblk_rl_free_lines_dec(&pblk->rl, retry_line);
1239
a4bd217b
JG
1240 return retry_line;
1241}
1242
1243struct pblk_line *pblk_line_get_first_data(struct pblk *pblk)
1244{
1245 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1246 struct pblk_line *line;
a4bd217b
JG
1247 int is_next = 0;
1248
1249 spin_lock(&l_mg->free_lock);
1250 line = pblk_line_get(pblk);
1251 if (!line) {
1252 spin_unlock(&l_mg->free_lock);
1253 return NULL;
1254 }
1255
1256 line->seq_nr = l_mg->d_seq_nr++;
1257 line->type = PBLK_LINETYPE_DATA;
1258 l_mg->data_line = line;
1259
dd2a4343 1260 pblk_line_setup_metadata(line, l_mg, &pblk->lm);
a4bd217b
JG
1261
1262 /* Allocate next line for preparation */
1263 l_mg->data_next = pblk_line_get(pblk);
1264 if (l_mg->data_next) {
1265 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1266 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1267 is_next = 1;
1268 }
1269 spin_unlock(&l_mg->free_lock);
1270
1271 pblk_rl_free_lines_dec(&pblk->rl, line);
1272 if (is_next)
1273 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1274
1275 if (pblk_line_erase(pblk, line))
1276 return NULL;
1277
1278retry_setup:
dd2a4343 1279 if (!pblk_line_init_metadata(pblk, line, NULL)) {
a4bd217b
JG
1280 line = pblk_line_retry(pblk, line);
1281 if (!line)
1282 return NULL;
1283
1284 goto retry_setup;
1285 }
1286
1287 if (!pblk_line_init_bb(pblk, line, 1)) {
1288 line = pblk_line_retry(pblk, line);
1289 if (!line)
1290 return NULL;
1291
1292 goto retry_setup;
1293 }
1294
1295 return line;
1296}
1297
1298struct pblk_line *pblk_line_replace_data(struct pblk *pblk)
1299{
a4bd217b
JG
1300 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1301 struct pblk_line *cur, *new;
1302 unsigned int left_seblks;
a4bd217b
JG
1303 int is_next = 0;
1304
1305 cur = l_mg->data_line;
1306 new = l_mg->data_next;
1307 if (!new)
1308 return NULL;
1309 l_mg->data_line = new;
1310
1311retry_line:
1312 left_seblks = atomic_read(&new->left_seblks);
1313 if (left_seblks) {
1314 /* If line is not fully erased, erase it */
a44f53fa 1315 if (atomic_read(&new->left_eblks)) {
a4bd217b
JG
1316 if (pblk_line_erase(pblk, new))
1317 return NULL;
1318 } else {
1319 io_schedule();
1320 }
1321 goto retry_line;
1322 }
1323
1324 spin_lock(&l_mg->free_lock);
1325 /* Allocate next line for preparation */
1326 l_mg->data_next = pblk_line_get(pblk);
1327 if (l_mg->data_next) {
1328 l_mg->data_next->seq_nr = l_mg->d_seq_nr++;
1329 l_mg->data_next->type = PBLK_LINETYPE_DATA;
1330 is_next = 1;
1331 }
1332
dd2a4343 1333 pblk_line_setup_metadata(new, l_mg, &pblk->lm);
a4bd217b
JG
1334 spin_unlock(&l_mg->free_lock);
1335
1336 if (is_next)
1337 pblk_rl_free_lines_dec(&pblk->rl, l_mg->data_next);
1338
1339retry_setup:
dd2a4343 1340 if (!pblk_line_init_metadata(pblk, new, cur)) {
a4bd217b 1341 new = pblk_line_retry(pblk, new);
f3236cef 1342 if (!new)
a4bd217b
JG
1343 return NULL;
1344
1345 goto retry_setup;
1346 }
1347
1348 if (!pblk_line_init_bb(pblk, new, 1)) {
1349 new = pblk_line_retry(pblk, new);
1350 if (!new)
1351 return NULL;
1352
1353 goto retry_setup;
1354 }
1355
1356 return new;
1357}
1358
1359void pblk_line_free(struct pblk *pblk, struct pblk_line *line)
1360{
1361 if (line->map_bitmap)
1362 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1363 if (line->invalid_bitmap)
1364 mempool_free(line->invalid_bitmap, pblk->line_meta_pool);
1365
dd2a4343
JG
1366 *line->vsc = cpu_to_le32(EMPTY_ENTRY);
1367
a4bd217b
JG
1368 line->map_bitmap = NULL;
1369 line->invalid_bitmap = NULL;
be388d9f
JG
1370 line->smeta = NULL;
1371 line->emeta = NULL;
a4bd217b
JG
1372}
1373
1374void pblk_line_put(struct kref *ref)
1375{
1376 struct pblk_line *line = container_of(ref, struct pblk_line, ref);
1377 struct pblk *pblk = line->pblk;
1378 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1379
1380 spin_lock(&line->lock);
1381 WARN_ON(line->state != PBLK_LINESTATE_GC);
1382 line->state = PBLK_LINESTATE_FREE;
1383 line->gc_group = PBLK_LINEGC_NONE;
1384 pblk_line_free(pblk, line);
1385 spin_unlock(&line->lock);
1386
1387 spin_lock(&l_mg->free_lock);
1388 list_add_tail(&line->list, &l_mg->free_list);
1389 l_mg->nr_free_lines++;
1390 spin_unlock(&l_mg->free_lock);
1391
1392 pblk_rl_free_lines_inc(&pblk->rl, line);
1393}
1394
1395int pblk_blk_erase_async(struct pblk *pblk, struct ppa_addr ppa)
1396{
1397 struct nvm_rq *rqd;
1398 int err;
1399
084ec9ba
JG
1400 rqd = mempool_alloc(pblk->g_rq_pool, GFP_KERNEL);
1401 memset(rqd, 0, pblk_g_rq_size);
a4bd217b
JG
1402
1403 pblk_setup_e_rq(pblk, rqd, ppa);
1404
1405 rqd->end_io = pblk_end_io_erase;
1406 rqd->private = pblk;
1407
1408 /* The write thread schedules erases so that it minimizes disturbances
1409 * with writes. Thus, there is no need to take the LUN semaphore.
1410 */
1411 err = pblk_submit_io(pblk, rqd);
1412 if (err) {
1413 struct nvm_tgt_dev *dev = pblk->dev;
1414 struct nvm_geo *geo = &dev->geo;
1415
1416 pr_err("pblk: could not async erase line:%d,blk:%d\n",
1417 pblk_dev_ppa_to_line(ppa),
1418 pblk_dev_ppa_to_pos(geo, ppa));
1419 }
1420
1421 return err;
1422}
1423
1424struct pblk_line *pblk_line_get_data(struct pblk *pblk)
1425{
1426 return pblk->l_mg.data_line;
1427}
1428
d624f371
JG
1429/* For now, always erase next line */
1430struct pblk_line *pblk_line_get_erase(struct pblk *pblk)
a4bd217b
JG
1431{
1432 return pblk->l_mg.data_next;
1433}
1434
1435int pblk_line_is_full(struct pblk_line *line)
1436{
1437 return (line->left_msecs == 0);
1438}
1439
1440void pblk_line_close(struct pblk *pblk, struct pblk_line *line)
1441{
1442 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
dd2a4343 1443 struct pblk_line_meta *lm = &pblk->lm;
a4bd217b
JG
1444 struct list_head *move_list;
1445
dd2a4343 1446 WARN(!bitmap_full(line->map_bitmap, lm->sec_per_line),
a4bd217b
JG
1447 "pblk: corrupt closed line %d\n", line->id);
1448
1449 spin_lock(&l_mg->free_lock);
1450 WARN_ON(!test_and_clear_bit(line->meta_line, &l_mg->meta_bitmap));
1451 spin_unlock(&l_mg->free_lock);
1452
1453 spin_lock(&l_mg->gc_lock);
1454 spin_lock(&line->lock);
1455 WARN_ON(line->state != PBLK_LINESTATE_OPEN);
1456 line->state = PBLK_LINESTATE_CLOSED;
1457 move_list = pblk_line_gc_list(pblk, line);
1458
1459 list_add_tail(&line->list, move_list);
1460
1461 mempool_free(line->map_bitmap, pblk->line_meta_pool);
1462 line->map_bitmap = NULL;
1463 line->smeta = NULL;
1464 line->emeta = NULL;
1465
1466 spin_unlock(&line->lock);
1467 spin_unlock(&l_mg->gc_lock);
1468}
1469
dd2a4343
JG
1470void pblk_line_close_meta(struct pblk *pblk, struct pblk_line *line)
1471{
1472 struct pblk_line_mgmt *l_mg = &pblk->l_mg;
1473 struct pblk_line_meta *lm = &pblk->lm;
1474 struct pblk_emeta *emeta = line->emeta;
1475 struct line_emeta *emeta_buf = emeta->buf;
1476
1477 /* No need for exact vsc value; avoid a big line lock and tak aprox. */
1478 memcpy(emeta_to_vsc(pblk, emeta_buf), l_mg->vsc_list, lm->vsc_list_len);
1479 memcpy(emeta_to_bb(emeta_buf), line->blk_bitmap, lm->blk_bitmap_len);
1480
1481 emeta_buf->nr_valid_lbas = cpu_to_le64(line->nr_valid_lbas);
1482 emeta_buf->crc = cpu_to_le32(pblk_calc_emeta_crc(pblk, emeta_buf));
1483
1484 spin_lock(&l_mg->close_lock);
1485 spin_lock(&line->lock);
1486 list_add_tail(&line->list, &l_mg->emeta_list);
1487 spin_unlock(&line->lock);
1488 spin_unlock(&l_mg->close_lock);
1489}
1490
a4bd217b
JG
1491void pblk_line_close_ws(struct work_struct *work)
1492{
1493 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1494 ws);
1495 struct pblk *pblk = line_ws->pblk;
1496 struct pblk_line *line = line_ws->line;
1497
1498 pblk_line_close(pblk, line);
1499 mempool_free(line_ws, pblk->line_ws_pool);
1500}
1501
1502void pblk_line_mark_bb(struct work_struct *work)
1503{
1504 struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
1505 ws);
1506 struct pblk *pblk = line_ws->pblk;
1507 struct nvm_tgt_dev *dev = pblk->dev;
1508 struct ppa_addr *ppa = line_ws->priv;
1509 int ret;
1510
1511 ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
1512 if (ret) {
1513 struct pblk_line *line;
1514 int pos;
1515
1516 line = &pblk->lines[pblk_dev_ppa_to_line(*ppa)];
1517 pos = pblk_dev_ppa_to_pos(&dev->geo, *ppa);
1518
1519 pr_err("pblk: failed to mark bb, line:%d, pos:%d\n",
1520 line->id, pos);
1521 }
1522
1523 kfree(ppa);
1524 mempool_free(line_ws, pblk->line_ws_pool);
1525}
1526
1527void pblk_line_run_ws(struct pblk *pblk, struct pblk_line *line, void *priv,
1528 void (*work)(struct work_struct *))
1529{
1530 struct pblk_line_ws *line_ws;
1531
1532 line_ws = mempool_alloc(pblk->line_ws_pool, GFP_ATOMIC);
1533 if (!line_ws)
1534 return;
1535
1536 line_ws->pblk = pblk;
1537 line_ws->line = line;
1538 line_ws->priv = priv;
1539
1540 INIT_WORK(&line_ws->ws, work);
1541 queue_work(pblk->kw_wq, &line_ws->ws);
1542}
1543
1544void pblk_down_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1545 unsigned long *lun_bitmap)
1546{
1547 struct nvm_tgt_dev *dev = pblk->dev;
1548 struct nvm_geo *geo = &dev->geo;
1549 struct pblk_lun *rlun;
dd2a4343 1550 int pos = pblk_ppa_to_pos(geo, ppa_list[0]);
a4bd217b
JG
1551 int ret;
1552
1553 /*
1554 * Only send one inflight I/O per LUN. Since we map at a page
1555 * granurality, all ppas in the I/O will map to the same LUN
1556 */
1557#ifdef CONFIG_NVM_DEBUG
1558 int i;
1559
1560 for (i = 1; i < nr_ppas; i++)
1561 WARN_ON(ppa_list[0].g.lun != ppa_list[i].g.lun ||
1562 ppa_list[0].g.ch != ppa_list[i].g.ch);
1563#endif
1564 /* If the LUN has been locked for this same request, do no attempt to
1565 * lock it again
1566 */
dd2a4343 1567 if (test_and_set_bit(pos, lun_bitmap))
a4bd217b
JG
1568 return;
1569
dd2a4343 1570 rlun = &pblk->luns[pos];
a4bd217b
JG
1571 ret = down_timeout(&rlun->wr_sem, msecs_to_jiffies(5000));
1572 if (ret) {
1573 switch (ret) {
1574 case -ETIME:
1575 pr_err("pblk: lun semaphore timed out\n");
1576 break;
1577 case -EINTR:
1578 pr_err("pblk: lun semaphore timed out\n");
1579 break;
1580 }
1581 }
1582}
1583
1584void pblk_up_rq(struct pblk *pblk, struct ppa_addr *ppa_list, int nr_ppas,
1585 unsigned long *lun_bitmap)
1586{
1587 struct nvm_tgt_dev *dev = pblk->dev;
1588 struct nvm_geo *geo = &dev->geo;
1589 struct pblk_lun *rlun;
1590 int nr_luns = geo->nr_luns;
1591 int bit = -1;
1592
1593 while ((bit = find_next_bit(lun_bitmap, nr_luns, bit + 1)) < nr_luns) {
1594 rlun = &pblk->luns[bit];
1595 up(&rlun->wr_sem);
1596 }
1597
1598 kfree(lun_bitmap);
1599}
1600
1601void pblk_update_map(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1602{
1603 struct ppa_addr l2p_ppa;
1604
1605 /* logic error: lba out-of-bounds. Ignore update */
1606 if (!(lba < pblk->rl.nr_secs)) {
1607 WARN(1, "pblk: corrupted L2P map request\n");
1608 return;
1609 }
1610
1611 spin_lock(&pblk->trans_lock);
1612 l2p_ppa = pblk_trans_map_get(pblk, lba);
1613
1614 if (!pblk_addr_in_cache(l2p_ppa) && !pblk_ppa_empty(l2p_ppa))
1615 pblk_map_invalidate(pblk, l2p_ppa);
1616
1617 pblk_trans_map_set(pblk, lba, ppa);
1618 spin_unlock(&pblk->trans_lock);
1619}
1620
1621void pblk_update_map_cache(struct pblk *pblk, sector_t lba, struct ppa_addr ppa)
1622{
1623#ifdef CONFIG_NVM_DEBUG
1624 /* Callers must ensure that the ppa points to a cache address */
1625 BUG_ON(!pblk_addr_in_cache(ppa));
1626 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1627#endif
1628
1629 pblk_update_map(pblk, lba, ppa);
1630}
1631
1632int pblk_update_map_gc(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1633 struct pblk_line *gc_line)
1634{
1635 struct ppa_addr l2p_ppa;
1636 int ret = 1;
1637
1638#ifdef CONFIG_NVM_DEBUG
1639 /* Callers must ensure that the ppa points to a cache address */
1640 BUG_ON(!pblk_addr_in_cache(ppa));
1641 BUG_ON(pblk_rb_pos_oob(&pblk->rwb, pblk_addr_to_cacheline(ppa)));
1642#endif
1643
1644 /* logic error: lba out-of-bounds. Ignore update */
1645 if (!(lba < pblk->rl.nr_secs)) {
1646 WARN(1, "pblk: corrupted L2P map request\n");
1647 return 0;
1648 }
1649
1650 spin_lock(&pblk->trans_lock);
1651 l2p_ppa = pblk_trans_map_get(pblk, lba);
1652
1653 /* Prevent updated entries to be overwritten by GC */
1654 if (pblk_addr_in_cache(l2p_ppa) || pblk_ppa_empty(l2p_ppa) ||
1655 pblk_tgt_ppa_to_line(l2p_ppa) != gc_line->id) {
1656 ret = 0;
1657 goto out;
1658 }
1659
1660 pblk_trans_map_set(pblk, lba, ppa);
1661out:
1662 spin_unlock(&pblk->trans_lock);
1663 return ret;
1664}
1665
1666void pblk_update_map_dev(struct pblk *pblk, sector_t lba, struct ppa_addr ppa,
1667 struct ppa_addr entry_line)
1668{
1669 struct ppa_addr l2p_line;
1670
1671#ifdef CONFIG_NVM_DEBUG
1672 /* Callers must ensure that the ppa points to a device address */
1673 BUG_ON(pblk_addr_in_cache(ppa));
1674#endif
1675 /* Invalidate and discard padded entries */
1676 if (lba == ADDR_EMPTY) {
1677#ifdef CONFIG_NVM_DEBUG
1678 atomic_long_inc(&pblk->padded_wb);
1679#endif
1680 pblk_map_invalidate(pblk, ppa);
1681 return;
1682 }
1683
1684 /* logic error: lba out-of-bounds. Ignore update */
1685 if (!(lba < pblk->rl.nr_secs)) {
1686 WARN(1, "pblk: corrupted L2P map request\n");
1687 return;
1688 }
1689
1690 spin_lock(&pblk->trans_lock);
1691 l2p_line = pblk_trans_map_get(pblk, lba);
1692
1693 /* Do not update L2P if the cacheline has been updated. In this case,
1694 * the mapped ppa must be invalidated
1695 */
1696 if (l2p_line.ppa != entry_line.ppa) {
1697 if (!pblk_ppa_empty(ppa))
1698 pblk_map_invalidate(pblk, ppa);
1699 goto out;
1700 }
1701
1702#ifdef CONFIG_NVM_DEBUG
1703 WARN_ON(!pblk_addr_in_cache(l2p_line) && !pblk_ppa_empty(l2p_line));
1704#endif
1705
1706 pblk_trans_map_set(pblk, lba, ppa);
1707out:
1708 spin_unlock(&pblk->trans_lock);
1709}
1710
1711void pblk_lookup_l2p_seq(struct pblk *pblk, struct ppa_addr *ppas,
1712 sector_t blba, int nr_secs)
1713{
1714 int i;
1715
1716 spin_lock(&pblk->trans_lock);
1717 for (i = 0; i < nr_secs; i++)
1718 ppas[i] = pblk_trans_map_get(pblk, blba + i);
1719 spin_unlock(&pblk->trans_lock);
1720}
1721
1722void pblk_lookup_l2p_rand(struct pblk *pblk, struct ppa_addr *ppas,
1723 u64 *lba_list, int nr_secs)
1724{
1725 sector_t lba;
1726 int i;
1727
1728 spin_lock(&pblk->trans_lock);
1729 for (i = 0; i < nr_secs; i++) {
1730 lba = lba_list[i];
1731 if (lba == ADDR_EMPTY) {
1732 ppas[i].ppa = ADDR_EMPTY;
1733 } else {
1734 /* logic error: lba out-of-bounds. Ignore update */
1735 if (!(lba < pblk->rl.nr_secs)) {
1736 WARN(1, "pblk: corrupted L2P map request\n");
1737 continue;
1738 }
1739 ppas[i] = pblk_trans_map_get(pblk, lba);
1740 }
1741 }
1742 spin_unlock(&pblk->trans_lock);
1743}