Commit | Line | Data |
---|---|---|
a4bd217b JG |
1 | /* |
2 | * Copyright (C) 2015 IT University of Copenhagen (rrpc.c) | |
3 | * Copyright (C) 2016 CNEX Labs | |
4 | * Initial release: Javier Gonzalez <javier@cnexlabs.com> | |
5 | * Matias Bjorling <matias@cnexlabs.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License version | |
9 | * 2 as published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * Implementation of a physical block-device target for Open-channel SSDs. | |
17 | * | |
18 | * pblk-init.c - pblk's initialization. | |
19 | */ | |
20 | ||
21 | #include "pblk.h" | |
22 | ||
084ec9ba JG |
23 | static struct kmem_cache *pblk_blk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache, |
24 | *pblk_w_rq_cache, *pblk_line_meta_cache; | |
a4bd217b | 25 | static DECLARE_RWSEM(pblk_lock); |
b25d5237 | 26 | struct bio_set *pblk_bio_set; |
a4bd217b JG |
27 | |
28 | static int pblk_rw_io(struct request_queue *q, struct pblk *pblk, | |
29 | struct bio *bio) | |
30 | { | |
31 | int ret; | |
32 | ||
33 | /* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap | |
34 | * constraint. Writes can be of arbitrary size. | |
35 | */ | |
36 | if (bio_data_dir(bio) == READ) { | |
af67c31f | 37 | blk_queue_split(q, &bio); |
a4bd217b JG |
38 | ret = pblk_submit_read(pblk, bio); |
39 | if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED)) | |
40 | bio_put(bio); | |
41 | ||
42 | return ret; | |
43 | } | |
44 | ||
45 | /* Prevent deadlock in the case of a modest LUN configuration and large | |
46 | * user I/Os. Unless stalled, the rate limiter leaves at least 256KB | |
47 | * available for user I/O. | |
48 | */ | |
49 | if (unlikely(pblk_get_secs(bio) >= pblk_rl_sysfs_rate_show(&pblk->rl))) | |
af67c31f | 50 | blk_queue_split(q, &bio); |
a4bd217b JG |
51 | |
52 | return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER); | |
53 | } | |
54 | ||
55 | static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio) | |
56 | { | |
57 | struct pblk *pblk = q->queuedata; | |
58 | ||
59 | if (bio_op(bio) == REQ_OP_DISCARD) { | |
60 | pblk_discard(pblk, bio); | |
61 | if (!(bio->bi_opf & REQ_PREFLUSH)) { | |
62 | bio_endio(bio); | |
63 | return BLK_QC_T_NONE; | |
64 | } | |
65 | } | |
66 | ||
67 | switch (pblk_rw_io(q, pblk, bio)) { | |
68 | case NVM_IO_ERR: | |
69 | bio_io_error(bio); | |
70 | break; | |
71 | case NVM_IO_DONE: | |
72 | bio_endio(bio); | |
73 | break; | |
74 | } | |
75 | ||
76 | return BLK_QC_T_NONE; | |
77 | } | |
78 | ||
79 | static void pblk_l2p_free(struct pblk *pblk) | |
80 | { | |
81 | vfree(pblk->trans_map); | |
82 | } | |
83 | ||
84 | static int pblk_l2p_init(struct pblk *pblk) | |
85 | { | |
86 | sector_t i; | |
87 | struct ppa_addr ppa; | |
88 | int entry_size = 8; | |
89 | ||
90 | if (pblk->ppaf_bitsize < 32) | |
91 | entry_size = 4; | |
92 | ||
93 | pblk->trans_map = vmalloc(entry_size * pblk->rl.nr_secs); | |
94 | if (!pblk->trans_map) | |
95 | return -ENOMEM; | |
96 | ||
97 | pblk_ppa_set_empty(&ppa); | |
98 | ||
99 | for (i = 0; i < pblk->rl.nr_secs; i++) | |
100 | pblk_trans_map_set(pblk, i, ppa); | |
101 | ||
102 | return 0; | |
103 | } | |
104 | ||
105 | static void pblk_rwb_free(struct pblk *pblk) | |
106 | { | |
107 | if (pblk_rb_tear_down_check(&pblk->rwb)) | |
108 | pr_err("pblk: write buffer error on tear down\n"); | |
109 | ||
110 | pblk_rb_data_free(&pblk->rwb); | |
111 | vfree(pblk_rb_entries_ref(&pblk->rwb)); | |
112 | } | |
113 | ||
114 | static int pblk_rwb_init(struct pblk *pblk) | |
115 | { | |
116 | struct nvm_tgt_dev *dev = pblk->dev; | |
117 | struct nvm_geo *geo = &dev->geo; | |
118 | struct pblk_rb_entry *entries; | |
119 | unsigned long nr_entries; | |
120 | unsigned int power_size, power_seg_sz; | |
121 | ||
122 | nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer); | |
123 | ||
124 | entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry)); | |
125 | if (!entries) | |
126 | return -ENOMEM; | |
127 | ||
128 | power_size = get_count_order(nr_entries); | |
129 | power_seg_sz = get_count_order(geo->sec_size); | |
130 | ||
131 | return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz); | |
132 | } | |
133 | ||
134 | /* Minimum pages needed within a lun */ | |
135 | #define PAGE_POOL_SIZE 16 | |
136 | #define ADDR_POOL_SIZE 64 | |
137 | ||
138 | static int pblk_set_ppaf(struct pblk *pblk) | |
139 | { | |
140 | struct nvm_tgt_dev *dev = pblk->dev; | |
141 | struct nvm_geo *geo = &dev->geo; | |
142 | struct nvm_addr_format ppaf = geo->ppaf; | |
143 | int power_len; | |
144 | ||
145 | /* Re-calculate channel and lun format to adapt to configuration */ | |
146 | power_len = get_count_order(geo->nr_chnls); | |
147 | if (1 << power_len != geo->nr_chnls) { | |
148 | pr_err("pblk: supports only power-of-two channel config.\n"); | |
149 | return -EINVAL; | |
150 | } | |
151 | ppaf.ch_len = power_len; | |
152 | ||
153 | power_len = get_count_order(geo->luns_per_chnl); | |
154 | if (1 << power_len != geo->luns_per_chnl) { | |
155 | pr_err("pblk: supports only power-of-two LUN config.\n"); | |
156 | return -EINVAL; | |
157 | } | |
158 | ppaf.lun_len = power_len; | |
159 | ||
160 | pblk->ppaf.sec_offset = 0; | |
161 | pblk->ppaf.pln_offset = ppaf.sect_len; | |
162 | pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len; | |
163 | pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len; | |
164 | pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len; | |
165 | pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len; | |
166 | pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1; | |
167 | pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) << | |
168 | pblk->ppaf.pln_offset; | |
169 | pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) << | |
170 | pblk->ppaf.ch_offset; | |
171 | pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) << | |
172 | pblk->ppaf.lun_offset; | |
173 | pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) << | |
174 | pblk->ppaf.pg_offset; | |
175 | pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) << | |
176 | pblk->ppaf.blk_offset; | |
177 | ||
178 | pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len; | |
179 | ||
180 | return 0; | |
181 | } | |
182 | ||
183 | static int pblk_init_global_caches(struct pblk *pblk) | |
184 | { | |
185 | char cache_name[PBLK_CACHE_NAME_LEN]; | |
186 | ||
187 | down_write(&pblk_lock); | |
188 | pblk_blk_ws_cache = kmem_cache_create("pblk_blk_ws", | |
189 | sizeof(struct pblk_line_ws), 0, 0, NULL); | |
190 | if (!pblk_blk_ws_cache) { | |
191 | up_write(&pblk_lock); | |
192 | return -ENOMEM; | |
193 | } | |
194 | ||
195 | pblk_rec_cache = kmem_cache_create("pblk_rec", | |
196 | sizeof(struct pblk_rec_ctx), 0, 0, NULL); | |
197 | if (!pblk_rec_cache) { | |
198 | kmem_cache_destroy(pblk_blk_ws_cache); | |
199 | up_write(&pblk_lock); | |
200 | return -ENOMEM; | |
201 | } | |
202 | ||
084ec9ba | 203 | pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size, |
a4bd217b | 204 | 0, 0, NULL); |
084ec9ba | 205 | if (!pblk_g_rq_cache) { |
a4bd217b JG |
206 | kmem_cache_destroy(pblk_blk_ws_cache); |
207 | kmem_cache_destroy(pblk_rec_cache); | |
208 | up_write(&pblk_lock); | |
209 | return -ENOMEM; | |
210 | } | |
211 | ||
212 | pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size, | |
213 | 0, 0, NULL); | |
214 | if (!pblk_w_rq_cache) { | |
215 | kmem_cache_destroy(pblk_blk_ws_cache); | |
216 | kmem_cache_destroy(pblk_rec_cache); | |
084ec9ba | 217 | kmem_cache_destroy(pblk_g_rq_cache); |
a4bd217b JG |
218 | up_write(&pblk_lock); |
219 | return -ENOMEM; | |
220 | } | |
221 | ||
222 | snprintf(cache_name, sizeof(cache_name), "pblk_line_m_%s", | |
223 | pblk->disk->disk_name); | |
224 | pblk_line_meta_cache = kmem_cache_create(cache_name, | |
225 | pblk->lm.sec_bitmap_len, 0, 0, NULL); | |
226 | if (!pblk_line_meta_cache) { | |
227 | kmem_cache_destroy(pblk_blk_ws_cache); | |
228 | kmem_cache_destroy(pblk_rec_cache); | |
084ec9ba | 229 | kmem_cache_destroy(pblk_g_rq_cache); |
a4bd217b JG |
230 | kmem_cache_destroy(pblk_w_rq_cache); |
231 | up_write(&pblk_lock); | |
232 | return -ENOMEM; | |
233 | } | |
234 | up_write(&pblk_lock); | |
235 | ||
236 | return 0; | |
237 | } | |
238 | ||
239 | static int pblk_core_init(struct pblk *pblk) | |
240 | { | |
241 | struct nvm_tgt_dev *dev = pblk->dev; | |
242 | struct nvm_geo *geo = &dev->geo; | |
243 | int max_write_ppas; | |
244 | int mod; | |
245 | ||
246 | pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE); | |
247 | max_write_ppas = pblk->min_write_pgs * geo->nr_luns; | |
248 | pblk->max_write_pgs = (max_write_ppas < nvm_max_phys_sects(dev)) ? | |
249 | max_write_ppas : nvm_max_phys_sects(dev); | |
250 | pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg * | |
251 | geo->nr_planes * geo->nr_luns; | |
252 | ||
c2e9f5d4 JG |
253 | pblk_set_sec_per_write(pblk, pblk->min_write_pgs); |
254 | ||
a4bd217b JG |
255 | if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) { |
256 | pr_err("pblk: cannot support device max_phys_sect\n"); | |
257 | return -EINVAL; | |
258 | } | |
259 | ||
260 | div_u64_rem(geo->sec_per_blk, pblk->min_write_pgs, &mod); | |
261 | if (mod) { | |
262 | pr_err("pblk: bad configuration of sectors/pages\n"); | |
263 | return -EINVAL; | |
264 | } | |
265 | ||
266 | if (pblk_init_global_caches(pblk)) | |
267 | return -ENOMEM; | |
268 | ||
269 | pblk->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0); | |
270 | if (!pblk->page_pool) | |
271 | return -ENOMEM; | |
272 | ||
273 | pblk->line_ws_pool = mempool_create_slab_pool(geo->nr_luns, | |
274 | pblk_blk_ws_cache); | |
275 | if (!pblk->line_ws_pool) | |
276 | goto free_page_pool; | |
277 | ||
278 | pblk->rec_pool = mempool_create_slab_pool(geo->nr_luns, pblk_rec_cache); | |
279 | if (!pblk->rec_pool) | |
280 | goto free_blk_ws_pool; | |
281 | ||
084ec9ba JG |
282 | pblk->g_rq_pool = mempool_create_slab_pool(64, pblk_g_rq_cache); |
283 | if (!pblk->g_rq_pool) | |
a4bd217b JG |
284 | goto free_rec_pool; |
285 | ||
286 | pblk->w_rq_pool = mempool_create_slab_pool(64, pblk_w_rq_cache); | |
287 | if (!pblk->w_rq_pool) | |
084ec9ba | 288 | goto free_g_rq_pool; |
a4bd217b JG |
289 | |
290 | pblk->line_meta_pool = | |
291 | mempool_create_slab_pool(16, pblk_line_meta_cache); | |
292 | if (!pblk->line_meta_pool) | |
293 | goto free_w_rq_pool; | |
294 | ||
295 | pblk->kw_wq = alloc_workqueue("pblk-aux-wq", | |
296 | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | |
297 | if (!pblk->kw_wq) | |
298 | goto free_line_meta_pool; | |
299 | ||
300 | if (pblk_set_ppaf(pblk)) | |
301 | goto free_kw_wq; | |
302 | ||
303 | if (pblk_rwb_init(pblk)) | |
304 | goto free_kw_wq; | |
305 | ||
306 | INIT_LIST_HEAD(&pblk->compl_list); | |
307 | return 0; | |
308 | ||
309 | free_kw_wq: | |
310 | destroy_workqueue(pblk->kw_wq); | |
311 | free_line_meta_pool: | |
312 | mempool_destroy(pblk->line_meta_pool); | |
313 | free_w_rq_pool: | |
314 | mempool_destroy(pblk->w_rq_pool); | |
084ec9ba JG |
315 | free_g_rq_pool: |
316 | mempool_destroy(pblk->g_rq_pool); | |
a4bd217b JG |
317 | free_rec_pool: |
318 | mempool_destroy(pblk->rec_pool); | |
319 | free_blk_ws_pool: | |
320 | mempool_destroy(pblk->line_ws_pool); | |
321 | free_page_pool: | |
322 | mempool_destroy(pblk->page_pool); | |
323 | return -ENOMEM; | |
324 | } | |
325 | ||
326 | static void pblk_core_free(struct pblk *pblk) | |
327 | { | |
328 | if (pblk->kw_wq) | |
329 | destroy_workqueue(pblk->kw_wq); | |
330 | ||
331 | mempool_destroy(pblk->page_pool); | |
332 | mempool_destroy(pblk->line_ws_pool); | |
333 | mempool_destroy(pblk->rec_pool); | |
084ec9ba | 334 | mempool_destroy(pblk->g_rq_pool); |
a4bd217b JG |
335 | mempool_destroy(pblk->w_rq_pool); |
336 | mempool_destroy(pblk->line_meta_pool); | |
337 | ||
338 | kmem_cache_destroy(pblk_blk_ws_cache); | |
339 | kmem_cache_destroy(pblk_rec_cache); | |
084ec9ba | 340 | kmem_cache_destroy(pblk_g_rq_cache); |
a4bd217b JG |
341 | kmem_cache_destroy(pblk_w_rq_cache); |
342 | kmem_cache_destroy(pblk_line_meta_cache); | |
343 | } | |
344 | ||
345 | static void pblk_luns_free(struct pblk *pblk) | |
346 | { | |
347 | kfree(pblk->luns); | |
348 | } | |
349 | ||
350 | static void pblk_lines_free(struct pblk *pblk) | |
351 | { | |
352 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
353 | struct pblk_line *line; | |
354 | int i; | |
355 | ||
356 | spin_lock(&l_mg->free_lock); | |
357 | for (i = 0; i < l_mg->nr_lines; i++) { | |
358 | line = &pblk->lines[i]; | |
359 | ||
360 | pblk_line_free(pblk, line); | |
361 | kfree(line->blk_bitmap); | |
362 | kfree(line->erase_bitmap); | |
363 | } | |
364 | spin_unlock(&l_mg->free_lock); | |
365 | } | |
366 | ||
367 | static void pblk_line_meta_free(struct pblk *pblk) | |
368 | { | |
369 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
370 | int i; | |
371 | ||
372 | kfree(l_mg->bb_template); | |
373 | kfree(l_mg->bb_aux); | |
374 | ||
375 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
376 | pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); | |
377 | pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); | |
378 | } | |
379 | ||
380 | kfree(pblk->lines); | |
381 | } | |
382 | ||
383 | static int pblk_bb_discovery(struct nvm_tgt_dev *dev, struct pblk_lun *rlun) | |
384 | { | |
385 | struct nvm_geo *geo = &dev->geo; | |
386 | struct ppa_addr ppa; | |
387 | u8 *blks; | |
388 | int nr_blks, ret; | |
389 | ||
390 | nr_blks = geo->blks_per_lun * geo->plane_mode; | |
391 | blks = kmalloc(nr_blks, GFP_KERNEL); | |
392 | if (!blks) | |
393 | return -ENOMEM; | |
394 | ||
395 | ppa.ppa = 0; | |
396 | ppa.g.ch = rlun->bppa.g.ch; | |
397 | ppa.g.lun = rlun->bppa.g.lun; | |
398 | ||
399 | ret = nvm_get_tgt_bb_tbl(dev, ppa, blks); | |
400 | if (ret) | |
401 | goto out; | |
402 | ||
403 | nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks); | |
404 | if (nr_blks < 0) { | |
a4bd217b | 405 | ret = nr_blks; |
5136a4fd | 406 | goto out; |
a4bd217b JG |
407 | } |
408 | ||
409 | rlun->bb_list = blks; | |
410 | ||
5136a4fd | 411 | return 0; |
a4bd217b | 412 | out: |
5136a4fd | 413 | kfree(blks); |
a4bd217b JG |
414 | return ret; |
415 | } | |
416 | ||
417 | static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line) | |
418 | { | |
419 | struct pblk_line_meta *lm = &pblk->lm; | |
420 | struct pblk_lun *rlun; | |
421 | int bb_cnt = 0; | |
422 | int i; | |
423 | ||
424 | line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); | |
425 | if (!line->blk_bitmap) | |
426 | return -ENOMEM; | |
427 | ||
428 | line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL); | |
429 | if (!line->erase_bitmap) { | |
430 | kfree(line->blk_bitmap); | |
431 | return -ENOMEM; | |
432 | } | |
433 | ||
434 | for (i = 0; i < lm->blk_per_line; i++) { | |
435 | rlun = &pblk->luns[i]; | |
436 | if (rlun->bb_list[line->id] == NVM_BLK_T_FREE) | |
437 | continue; | |
438 | ||
439 | set_bit(i, line->blk_bitmap); | |
440 | bb_cnt++; | |
441 | } | |
442 | ||
443 | return bb_cnt; | |
444 | } | |
445 | ||
446 | static int pblk_luns_init(struct pblk *pblk, struct ppa_addr *luns) | |
447 | { | |
448 | struct nvm_tgt_dev *dev = pblk->dev; | |
449 | struct nvm_geo *geo = &dev->geo; | |
450 | struct pblk_lun *rlun; | |
451 | int i, ret; | |
452 | ||
453 | /* TODO: Implement unbalanced LUN support */ | |
454 | if (geo->luns_per_chnl < 0) { | |
455 | pr_err("pblk: unbalanced LUN config.\n"); | |
456 | return -EINVAL; | |
457 | } | |
458 | ||
459 | pblk->luns = kcalloc(geo->nr_luns, sizeof(struct pblk_lun), GFP_KERNEL); | |
460 | if (!pblk->luns) | |
461 | return -ENOMEM; | |
462 | ||
463 | for (i = 0; i < geo->nr_luns; i++) { | |
464 | /* Stripe across channels */ | |
465 | int ch = i % geo->nr_chnls; | |
466 | int lun_raw = i / geo->nr_chnls; | |
467 | int lunid = lun_raw + ch * geo->luns_per_chnl; | |
468 | ||
469 | rlun = &pblk->luns[i]; | |
470 | rlun->bppa = luns[lunid]; | |
471 | ||
472 | sema_init(&rlun->wr_sem, 1); | |
473 | ||
474 | ret = pblk_bb_discovery(dev, rlun); | |
475 | if (ret) { | |
476 | while (--i >= 0) | |
477 | kfree(pblk->luns[i].bb_list); | |
478 | return ret; | |
479 | } | |
480 | } | |
481 | ||
482 | return 0; | |
483 | } | |
484 | ||
485 | static int pblk_lines_configure(struct pblk *pblk, int flags) | |
486 | { | |
487 | struct pblk_line *line = NULL; | |
488 | int ret = 0; | |
489 | ||
490 | if (!(flags & NVM_TARGET_FACTORY)) { | |
491 | line = pblk_recov_l2p(pblk); | |
492 | if (IS_ERR(line)) { | |
493 | pr_err("pblk: could not recover l2p table\n"); | |
494 | ret = -EFAULT; | |
495 | } | |
496 | } | |
497 | ||
498 | if (!line) { | |
499 | /* Configure next line for user data */ | |
500 | line = pblk_line_get_first_data(pblk); | |
501 | if (!line) { | |
502 | pr_err("pblk: line list corrupted\n"); | |
503 | ret = -EFAULT; | |
504 | } | |
505 | } | |
506 | ||
507 | return ret; | |
508 | } | |
509 | ||
510 | /* See comment over struct line_emeta definition */ | |
511 | static unsigned int calc_emeta_len(struct pblk *pblk, struct pblk_line_meta *lm) | |
512 | { | |
513 | return (sizeof(struct line_emeta) + | |
514 | ((lm->sec_per_line - lm->emeta_sec) * sizeof(u64)) + | |
515 | (pblk->l_mg.nr_lines * sizeof(u32)) + | |
516 | lm->blk_bitmap_len); | |
517 | } | |
518 | ||
519 | static void pblk_set_provision(struct pblk *pblk, long nr_free_blks) | |
520 | { | |
521 | struct nvm_tgt_dev *dev = pblk->dev; | |
522 | struct nvm_geo *geo = &dev->geo; | |
523 | sector_t provisioned; | |
524 | ||
525 | pblk->over_pct = 20; | |
526 | ||
527 | provisioned = nr_free_blks; | |
528 | provisioned *= (100 - pblk->over_pct); | |
529 | sector_div(provisioned, 100); | |
530 | ||
531 | /* Internally pblk manages all free blocks, but all calculations based | |
532 | * on user capacity consider only provisioned blocks | |
533 | */ | |
534 | pblk->rl.total_blocks = nr_free_blks; | |
535 | pblk->rl.nr_secs = nr_free_blks * geo->sec_per_blk; | |
536 | pblk->capacity = provisioned * geo->sec_per_blk; | |
537 | atomic_set(&pblk->rl.free_blocks, nr_free_blks); | |
538 | } | |
539 | ||
540 | static int pblk_lines_init(struct pblk *pblk) | |
541 | { | |
542 | struct nvm_tgt_dev *dev = pblk->dev; | |
543 | struct nvm_geo *geo = &dev->geo; | |
544 | struct pblk_line_mgmt *l_mg = &pblk->l_mg; | |
545 | struct pblk_line_meta *lm = &pblk->lm; | |
546 | struct pblk_line *line; | |
547 | unsigned int smeta_len, emeta_len; | |
d624f371 | 548 | long nr_bad_blks, nr_free_blks; |
a4bd217b JG |
549 | int bb_distance; |
550 | int i; | |
1c6286f2 | 551 | int ret; |
a4bd217b JG |
552 | |
553 | lm->sec_per_line = geo->sec_per_blk * geo->nr_luns; | |
554 | lm->blk_per_line = geo->nr_luns; | |
555 | lm->blk_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); | |
556 | lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long); | |
557 | lm->lun_bitmap_len = BITS_TO_LONGS(geo->nr_luns) * sizeof(long); | |
558 | lm->high_thrs = lm->sec_per_line / 2; | |
559 | lm->mid_thrs = lm->sec_per_line / 4; | |
560 | ||
561 | /* Calculate necessary pages for smeta. See comment over struct | |
562 | * line_smeta definition | |
563 | */ | |
564 | lm->smeta_len = sizeof(struct line_smeta) + | |
565 | PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; | |
566 | ||
567 | i = 1; | |
568 | add_smeta_page: | |
569 | lm->smeta_sec = i * geo->sec_per_pl; | |
570 | lm->smeta_len = lm->smeta_sec * geo->sec_size; | |
571 | ||
572 | smeta_len = sizeof(struct line_smeta) + | |
573 | PBLK_LINE_NR_LUN_BITMAP * lm->lun_bitmap_len; | |
574 | if (smeta_len > lm->smeta_len) { | |
575 | i++; | |
576 | goto add_smeta_page; | |
577 | } | |
578 | ||
579 | /* Calculate necessary pages for emeta. See comment over struct | |
580 | * line_emeta definition | |
581 | */ | |
582 | i = 1; | |
583 | add_emeta_page: | |
584 | lm->emeta_sec = i * geo->sec_per_pl; | |
585 | lm->emeta_len = lm->emeta_sec * geo->sec_size; | |
586 | ||
587 | emeta_len = calc_emeta_len(pblk, lm); | |
588 | if (emeta_len > lm->emeta_len) { | |
589 | i++; | |
590 | goto add_emeta_page; | |
591 | } | |
592 | lm->emeta_bb = geo->nr_luns - i; | |
593 | ||
d624f371 JG |
594 | lm->min_blk_line = 1 + DIV_ROUND_UP(lm->smeta_sec + lm->emeta_sec, |
595 | geo->sec_per_blk); | |
a4bd217b JG |
596 | |
597 | l_mg->nr_lines = geo->blks_per_lun; | |
598 | l_mg->log_line = l_mg->data_line = NULL; | |
599 | l_mg->l_seq_nr = l_mg->d_seq_nr = 0; | |
600 | l_mg->nr_free_lines = 0; | |
601 | bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES); | |
602 | ||
603 | /* smeta is always small enough to fit on a kmalloc memory allocation, | |
604 | * emeta depends on the number of LUNs allocated to the pblk instance | |
605 | */ | |
606 | l_mg->smeta_alloc_type = PBLK_KMALLOC_META; | |
607 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
608 | l_mg->sline_meta[i].meta = kmalloc(lm->smeta_len, GFP_KERNEL); | |
609 | if (!l_mg->sline_meta[i].meta) | |
610 | while (--i >= 0) { | |
611 | kfree(l_mg->sline_meta[i].meta); | |
612 | ret = -ENOMEM; | |
613 | goto fail; | |
614 | } | |
615 | } | |
616 | ||
617 | if (lm->emeta_len > KMALLOC_MAX_CACHE_SIZE) { | |
618 | l_mg->emeta_alloc_type = PBLK_VMALLOC_META; | |
619 | ||
620 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
621 | l_mg->eline_meta[i].meta = vmalloc(lm->emeta_len); | |
622 | if (!l_mg->eline_meta[i].meta) | |
623 | while (--i >= 0) { | |
624 | vfree(l_mg->eline_meta[i].meta); | |
625 | ret = -ENOMEM; | |
626 | goto fail; | |
627 | } | |
628 | } | |
629 | } else { | |
630 | l_mg->emeta_alloc_type = PBLK_KMALLOC_META; | |
631 | ||
632 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
633 | l_mg->eline_meta[i].meta = | |
634 | kmalloc(lm->emeta_len, GFP_KERNEL); | |
635 | if (!l_mg->eline_meta[i].meta) | |
636 | while (--i >= 0) { | |
637 | kfree(l_mg->eline_meta[i].meta); | |
638 | ret = -ENOMEM; | |
639 | goto fail; | |
640 | } | |
641 | } | |
642 | } | |
643 | ||
644 | l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); | |
1c6286f2 DC |
645 | if (!l_mg->bb_template) { |
646 | ret = -ENOMEM; | |
a4bd217b | 647 | goto fail_free_meta; |
1c6286f2 | 648 | } |
a4bd217b JG |
649 | |
650 | l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL); | |
1c6286f2 DC |
651 | if (!l_mg->bb_aux) { |
652 | ret = -ENOMEM; | |
a4bd217b | 653 | goto fail_free_bb_template; |
1c6286f2 | 654 | } |
a4bd217b JG |
655 | |
656 | bb_distance = (geo->nr_luns) * geo->sec_per_pl; | |
657 | for (i = 0; i < lm->sec_per_line; i += bb_distance) | |
658 | bitmap_set(l_mg->bb_template, i, geo->sec_per_pl); | |
659 | ||
660 | INIT_LIST_HEAD(&l_mg->free_list); | |
661 | INIT_LIST_HEAD(&l_mg->corrupt_list); | |
662 | INIT_LIST_HEAD(&l_mg->bad_list); | |
663 | INIT_LIST_HEAD(&l_mg->gc_full_list); | |
664 | INIT_LIST_HEAD(&l_mg->gc_high_list); | |
665 | INIT_LIST_HEAD(&l_mg->gc_mid_list); | |
666 | INIT_LIST_HEAD(&l_mg->gc_low_list); | |
667 | INIT_LIST_HEAD(&l_mg->gc_empty_list); | |
668 | ||
669 | l_mg->gc_lists[0] = &l_mg->gc_high_list; | |
670 | l_mg->gc_lists[1] = &l_mg->gc_mid_list; | |
671 | l_mg->gc_lists[2] = &l_mg->gc_low_list; | |
672 | ||
673 | spin_lock_init(&l_mg->free_lock); | |
674 | spin_lock_init(&l_mg->gc_lock); | |
675 | ||
676 | pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line), | |
677 | GFP_KERNEL); | |
1c6286f2 DC |
678 | if (!pblk->lines) { |
679 | ret = -ENOMEM; | |
a4bd217b | 680 | goto fail_free_bb_aux; |
1c6286f2 | 681 | } |
a4bd217b JG |
682 | |
683 | nr_free_blks = 0; | |
684 | for (i = 0; i < l_mg->nr_lines; i++) { | |
a44f53fa JG |
685 | int blk_in_line; |
686 | ||
a4bd217b JG |
687 | line = &pblk->lines[i]; |
688 | ||
689 | line->pblk = pblk; | |
690 | line->id = i; | |
691 | line->type = PBLK_LINETYPE_FREE; | |
692 | line->state = PBLK_LINESTATE_FREE; | |
693 | line->gc_group = PBLK_LINEGC_NONE; | |
694 | spin_lock_init(&line->lock); | |
695 | ||
696 | nr_bad_blks = pblk_bb_line(pblk, line); | |
1c6286f2 DC |
697 | if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line) { |
698 | ret = -EINVAL; | |
a4bd217b | 699 | goto fail_free_lines; |
1c6286f2 | 700 | } |
a4bd217b | 701 | |
a44f53fa JG |
702 | blk_in_line = lm->blk_per_line - nr_bad_blks; |
703 | if (blk_in_line < lm->min_blk_line) { | |
a4bd217b JG |
704 | line->state = PBLK_LINESTATE_BAD; |
705 | list_add_tail(&line->list, &l_mg->bad_list); | |
706 | continue; | |
707 | } | |
708 | ||
a44f53fa JG |
709 | nr_free_blks += blk_in_line; |
710 | atomic_set(&line->blk_in_line, blk_in_line); | |
a4bd217b JG |
711 | |
712 | l_mg->nr_free_lines++; | |
713 | list_add_tail(&line->list, &l_mg->free_list); | |
714 | } | |
715 | ||
716 | pblk_set_provision(pblk, nr_free_blks); | |
717 | ||
a4bd217b JG |
718 | /* Cleanup per-LUN bad block lists - managed within lines on run-time */ |
719 | for (i = 0; i < geo->nr_luns; i++) | |
720 | kfree(pblk->luns[i].bb_list); | |
721 | ||
722 | return 0; | |
723 | fail_free_lines: | |
724 | kfree(pblk->lines); | |
725 | fail_free_bb_aux: | |
726 | kfree(l_mg->bb_aux); | |
727 | fail_free_bb_template: | |
728 | kfree(l_mg->bb_template); | |
729 | fail_free_meta: | |
730 | for (i = 0; i < PBLK_DATA_LINES; i++) { | |
731 | pblk_mfree(l_mg->sline_meta[i].meta, l_mg->smeta_alloc_type); | |
732 | pblk_mfree(l_mg->eline_meta[i].meta, l_mg->emeta_alloc_type); | |
733 | } | |
734 | fail: | |
735 | for (i = 0; i < geo->nr_luns; i++) | |
736 | kfree(pblk->luns[i].bb_list); | |
737 | ||
738 | return ret; | |
739 | } | |
740 | ||
741 | static int pblk_writer_init(struct pblk *pblk) | |
742 | { | |
743 | setup_timer(&pblk->wtimer, pblk_write_timer_fn, (unsigned long)pblk); | |
744 | mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100)); | |
745 | ||
746 | pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t"); | |
747 | if (IS_ERR(pblk->writer_ts)) { | |
748 | pr_err("pblk: could not allocate writer kthread\n"); | |
1c6286f2 | 749 | return PTR_ERR(pblk->writer_ts); |
a4bd217b JG |
750 | } |
751 | ||
752 | return 0; | |
753 | } | |
754 | ||
755 | static void pblk_writer_stop(struct pblk *pblk) | |
756 | { | |
757 | if (pblk->writer_ts) | |
758 | kthread_stop(pblk->writer_ts); | |
759 | del_timer(&pblk->wtimer); | |
760 | } | |
761 | ||
762 | static void pblk_free(struct pblk *pblk) | |
763 | { | |
764 | pblk_luns_free(pblk); | |
765 | pblk_lines_free(pblk); | |
766 | pblk_line_meta_free(pblk); | |
767 | pblk_core_free(pblk); | |
768 | pblk_l2p_free(pblk); | |
769 | ||
770 | kfree(pblk); | |
771 | } | |
772 | ||
773 | static void pblk_tear_down(struct pblk *pblk) | |
774 | { | |
775 | pblk_flush_writer(pblk); | |
776 | pblk_writer_stop(pblk); | |
777 | pblk_rb_sync_l2p(&pblk->rwb); | |
778 | pblk_recov_pad(pblk); | |
779 | pblk_rwb_free(pblk); | |
780 | pblk_rl_free(&pblk->rl); | |
781 | ||
782 | pr_debug("pblk: consistent tear down\n"); | |
783 | } | |
784 | ||
785 | static void pblk_exit(void *private) | |
786 | { | |
787 | struct pblk *pblk = private; | |
788 | ||
789 | down_write(&pblk_lock); | |
790 | pblk_gc_exit(pblk); | |
791 | pblk_tear_down(pblk); | |
792 | pblk_free(pblk); | |
793 | up_write(&pblk_lock); | |
794 | } | |
795 | ||
796 | static sector_t pblk_capacity(void *private) | |
797 | { | |
798 | struct pblk *pblk = private; | |
799 | ||
800 | return pblk->capacity * NR_PHY_IN_LOG; | |
801 | } | |
802 | ||
803 | static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk, | |
804 | int flags) | |
805 | { | |
806 | struct nvm_geo *geo = &dev->geo; | |
807 | struct request_queue *bqueue = dev->q; | |
808 | struct request_queue *tqueue = tdisk->queue; | |
809 | struct pblk *pblk; | |
810 | int ret; | |
811 | ||
812 | if (dev->identity.dom & NVM_RSP_L2P) { | |
813 | pr_err("pblk: device-side L2P table not supported. (%x)\n", | |
814 | dev->identity.dom); | |
815 | return ERR_PTR(-EINVAL); | |
816 | } | |
817 | ||
818 | pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL); | |
819 | if (!pblk) | |
820 | return ERR_PTR(-ENOMEM); | |
821 | ||
822 | pblk->dev = dev; | |
823 | pblk->disk = tdisk; | |
824 | ||
825 | spin_lock_init(&pblk->trans_lock); | |
826 | spin_lock_init(&pblk->lock); | |
827 | ||
828 | if (flags & NVM_TARGET_FACTORY) | |
829 | pblk_setup_uuid(pblk); | |
830 | ||
831 | #ifdef CONFIG_NVM_DEBUG | |
832 | atomic_long_set(&pblk->inflight_writes, 0); | |
833 | atomic_long_set(&pblk->padded_writes, 0); | |
834 | atomic_long_set(&pblk->padded_wb, 0); | |
835 | atomic_long_set(&pblk->nr_flush, 0); | |
836 | atomic_long_set(&pblk->req_writes, 0); | |
837 | atomic_long_set(&pblk->sub_writes, 0); | |
838 | atomic_long_set(&pblk->sync_writes, 0); | |
839 | atomic_long_set(&pblk->compl_writes, 0); | |
840 | atomic_long_set(&pblk->inflight_reads, 0); | |
db7ada33 | 841 | atomic_long_set(&pblk->cache_reads, 0); |
a4bd217b JG |
842 | atomic_long_set(&pblk->sync_reads, 0); |
843 | atomic_long_set(&pblk->recov_writes, 0); | |
844 | atomic_long_set(&pblk->recov_writes, 0); | |
845 | atomic_long_set(&pblk->recov_gc_writes, 0); | |
846 | #endif | |
847 | ||
848 | atomic_long_set(&pblk->read_failed, 0); | |
849 | atomic_long_set(&pblk->read_empty, 0); | |
850 | atomic_long_set(&pblk->read_high_ecc, 0); | |
851 | atomic_long_set(&pblk->read_failed_gc, 0); | |
852 | atomic_long_set(&pblk->write_failed, 0); | |
853 | atomic_long_set(&pblk->erase_failed, 0); | |
854 | ||
855 | ret = pblk_luns_init(pblk, dev->luns); | |
856 | if (ret) { | |
857 | pr_err("pblk: could not initialize luns\n"); | |
858 | goto fail; | |
859 | } | |
860 | ||
861 | ret = pblk_lines_init(pblk); | |
862 | if (ret) { | |
863 | pr_err("pblk: could not initialize lines\n"); | |
864 | goto fail_free_luns; | |
865 | } | |
866 | ||
867 | ret = pblk_core_init(pblk); | |
868 | if (ret) { | |
869 | pr_err("pblk: could not initialize core\n"); | |
870 | goto fail_free_line_meta; | |
871 | } | |
872 | ||
873 | ret = pblk_l2p_init(pblk); | |
874 | if (ret) { | |
875 | pr_err("pblk: could not initialize maps\n"); | |
876 | goto fail_free_core; | |
877 | } | |
878 | ||
879 | ret = pblk_lines_configure(pblk, flags); | |
880 | if (ret) { | |
881 | pr_err("pblk: could not configure lines\n"); | |
882 | goto fail_free_l2p; | |
883 | } | |
884 | ||
885 | ret = pblk_writer_init(pblk); | |
886 | if (ret) { | |
887 | pr_err("pblk: could not initialize write thread\n"); | |
888 | goto fail_free_lines; | |
889 | } | |
890 | ||
891 | ret = pblk_gc_init(pblk); | |
892 | if (ret) { | |
893 | pr_err("pblk: could not initialize gc\n"); | |
894 | goto fail_stop_writer; | |
895 | } | |
896 | ||
897 | /* inherit the size from the underlying device */ | |
898 | blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue)); | |
899 | blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue)); | |
900 | ||
901 | blk_queue_write_cache(tqueue, true, false); | |
902 | ||
903 | tqueue->limits.discard_granularity = geo->pgs_per_blk * geo->pfpg_size; | |
904 | tqueue->limits.discard_alignment = 0; | |
905 | blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9); | |
906 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, tqueue); | |
907 | ||
908 | pr_info("pblk init: luns:%u, lines:%d, secs:%llu, buf entries:%u\n", | |
909 | geo->nr_luns, pblk->l_mg.nr_lines, | |
910 | (unsigned long long)pblk->rl.nr_secs, | |
911 | pblk->rwb.nr_entries); | |
912 | ||
913 | wake_up_process(pblk->writer_ts); | |
914 | return pblk; | |
915 | ||
916 | fail_stop_writer: | |
917 | pblk_writer_stop(pblk); | |
918 | fail_free_lines: | |
919 | pblk_lines_free(pblk); | |
920 | fail_free_l2p: | |
921 | pblk_l2p_free(pblk); | |
922 | fail_free_core: | |
923 | pblk_core_free(pblk); | |
924 | fail_free_line_meta: | |
925 | pblk_line_meta_free(pblk); | |
926 | fail_free_luns: | |
927 | pblk_luns_free(pblk); | |
928 | fail: | |
929 | kfree(pblk); | |
930 | return ERR_PTR(ret); | |
931 | } | |
932 | ||
933 | /* physical block device target */ | |
934 | static struct nvm_tgt_type tt_pblk = { | |
935 | .name = "pblk", | |
936 | .version = {1, 0, 0}, | |
937 | ||
938 | .make_rq = pblk_make_rq, | |
939 | .capacity = pblk_capacity, | |
940 | ||
941 | .init = pblk_init, | |
942 | .exit = pblk_exit, | |
943 | ||
944 | .sysfs_init = pblk_sysfs_init, | |
945 | .sysfs_exit = pblk_sysfs_exit, | |
946 | }; | |
947 | ||
948 | static int __init pblk_module_init(void) | |
949 | { | |
b25d5237 N |
950 | int ret; |
951 | ||
952 | pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0); | |
953 | if (!pblk_bio_set) | |
954 | return -ENOMEM; | |
955 | ret = nvm_register_tgt_type(&tt_pblk); | |
956 | if (ret) | |
957 | bioset_free(pblk_bio_set); | |
958 | return ret; | |
a4bd217b JG |
959 | } |
960 | ||
961 | static void pblk_module_exit(void) | |
962 | { | |
b25d5237 | 963 | bioset_free(pblk_bio_set); |
a4bd217b JG |
964 | nvm_unregister_tgt_type(&tt_pblk); |
965 | } | |
966 | ||
967 | module_init(pblk_module_init); | |
968 | module_exit(pblk_module_exit); | |
969 | MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>"); | |
970 | MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>"); | |
971 | MODULE_LICENSE("GPL v2"); | |
972 | MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs"); |