Commit | Line | Data |
---|---|---|
8c16567d | 1 | // SPDX-License-Identifier: GPL-2.0 |
9e0e252a VV |
2 | /* |
3 | * Bad block management | |
4 | * | |
5 | * - Heavily based on MD badblocks code from Neil Brown | |
6 | * | |
7 | * Copyright (c) 2015, Intel Corporation. | |
9e0e252a VV |
8 | */ |
9 | ||
10 | #include <linux/badblocks.h> | |
11 | #include <linux/seqlock.h> | |
16263ff6 | 12 | #include <linux/device.h> |
9e0e252a VV |
13 | #include <linux/kernel.h> |
14 | #include <linux/module.h> | |
15 | #include <linux/stddef.h> | |
16 | #include <linux/types.h> | |
17 | #include <linux/slab.h> | |
18 | ||
1726c774 CL |
19 | /* |
20 | * The purpose of badblocks set/clear is to manage bad blocks ranges which are | |
21 | * identified by LBA addresses. | |
22 | * | |
23 | * When the caller of badblocks_set() wants to set a range of bad blocks, the | |
24 | * setting range can be acked or unacked. And the setting range may merge, | |
25 | * overwrite, skip the overlapped already set range, depends on who they are | |
26 | * overlapped or adjacent, and the acknowledgment type of the ranges. It can be | |
27 | * more complicated when the setting range covers multiple already set bad block | |
28 | * ranges, with restrictions of maximum length of each bad range and the bad | |
29 | * table space limitation. | |
30 | * | |
31 | * It is difficult and unnecessary to take care of all the possible situations, | |
32 | * for setting a large range of bad blocks, we can handle it by dividing the | |
33 | * large range into smaller ones when encounter overlap, max range length or | |
34 | * bad table full conditions. Every time only a smaller piece of the bad range | |
35 | * is handled with a limited number of conditions how it is interacted with | |
36 | * possible overlapped or adjacent already set bad block ranges. Then the hard | |
37 | * complicated problem can be much simpler to handle in proper way. | |
38 | * | |
39 | * When setting a range of bad blocks to the bad table, the simplified situations | |
40 | * to be considered are, (The already set bad blocks ranges are naming with | |
41 | * prefix E, and the setting bad blocks range is naming with prefix S) | |
42 | * | |
43 | * 1) A setting range is not overlapped or adjacent to any other already set bad | |
44 | * block range. | |
45 | * +--------+ | |
46 | * | S | | |
47 | * +--------+ | |
48 | * +-------------+ +-------------+ | |
49 | * | E1 | | E2 | | |
50 | * +-------------+ +-------------+ | |
51 | * For this situation if the bad blocks table is not full, just allocate a | |
52 | * free slot from the bad blocks table to mark the setting range S. The | |
53 | * result is, | |
54 | * +-------------+ +--------+ +-------------+ | |
55 | * | E1 | | S | | E2 | | |
56 | * +-------------+ +--------+ +-------------+ | |
57 | * 2) A setting range starts exactly at a start LBA of an already set bad blocks | |
58 | * range. | |
59 | * 2.1) The setting range size < already set range size | |
60 | * +--------+ | |
61 | * | S | | |
62 | * +--------+ | |
63 | * +-------------+ | |
64 | * | E | | |
65 | * +-------------+ | |
66 | * 2.1.1) If S and E are both acked or unacked range, the setting range S can | |
67 | * be merged into existing bad range E. The result is, | |
68 | * +-------------+ | |
69 | * | S | | |
70 | * +-------------+ | |
71 | * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and | |
72 | * the result is, | |
73 | * +-------------+ | |
74 | * | E | | |
75 | * +-------------+ | |
76 | * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E. | |
77 | * An extra slot from the bad blocks table will be allocated for S, and head | |
78 | * of E will move to end of the inserted range S. The result is, | |
79 | * +--------+----+ | |
80 | * | S | E | | |
81 | * +--------+----+ | |
82 | * 2.2) The setting range size == already set range size | |
83 | * 2.2.1) If S and E are both acked or unacked range, the setting range S can | |
84 | * be merged into existing bad range E. The result is, | |
85 | * +-------------+ | |
86 | * | S | | |
87 | * +-------------+ | |
88 | * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and | |
89 | * the result is, | |
90 | * +-------------+ | |
91 | * | E | | |
92 | * +-------------+ | |
93 | * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of | |
94 | bad blocks range E. The result is, | |
95 | * +-------------+ | |
96 | * | S | | |
97 | * +-------------+ | |
98 | * 2.3) The setting range size > already set range size | |
99 | * +-------------------+ | |
100 | * | S | | |
101 | * +-------------------+ | |
102 | * +-------------+ | |
103 | * | E | | |
104 | * +-------------+ | |
105 | * For such situation, the setting range S can be treated as two parts, the | |
106 | * first part (S1) is as same size as the already set range E, the second | |
107 | * part (S2) is the rest of setting range. | |
108 | * +-------------+-----+ +-------------+ +-----+ | |
109 | * | S1 | S2 | | S1 | | S2 | | |
110 | * +-------------+-----+ ===> +-------------+ +-----+ | |
111 | * +-------------+ +-------------+ | |
112 | * | E | | E | | |
113 | * +-------------+ +-------------+ | |
114 | * Now we only focus on how to handle the setting range S1 and already set | |
115 | * range E, which are already explained in 2.2), for the rest S2 it will be | |
116 | * handled later in next loop. | |
117 | * 3) A setting range starts before the start LBA of an already set bad blocks | |
118 | * range. | |
119 | * +-------------+ | |
120 | * | S | | |
121 | * +-------------+ | |
122 | * +-------------+ | |
123 | * | E | | |
124 | * +-------------+ | |
125 | * For this situation, the setting range S can be divided into two parts, the | |
126 | * first (S1) ends at the start LBA of already set range E, the second part | |
127 | * (S2) starts exactly at a start LBA of the already set range E. | |
128 | * +----+---------+ +----+ +---------+ | |
129 | * | S1 | S2 | | S1 | | S2 | | |
130 | * +----+---------+ ===> +----+ +---------+ | |
131 | * +-------------+ +-------------+ | |
132 | * | E | | E | | |
133 | * +-------------+ +-------------+ | |
134 | * Now only the first part S1 should be handled in this loop, which is in | |
135 | * similar condition as 1). The rest part S2 has exact same start LBA address | |
136 | * of the already set range E, they will be handled in next loop in one of | |
137 | * situations in 2). | |
138 | * 4) A setting range starts after the start LBA of an already set bad blocks | |
139 | * range. | |
140 | * 4.1) If the setting range S exactly matches the tail part of already set bad | |
141 | * blocks range E, like the following chart shows, | |
142 | * +---------+ | |
143 | * | S | | |
144 | * +---------+ | |
145 | * +-------------+ | |
146 | * | E | | |
147 | * +-------------+ | |
148 | * 4.1.1) If range S and E have same acknowledge value (both acked or unacked), | |
149 | * they will be merged into one, the result is, | |
150 | * +-------------+ | |
151 | * | S | | |
152 | * +-------------+ | |
153 | * 4.1.2) If range E is acked and the setting range S is unacked, the setting | |
154 | * request of S will be rejected, the result is, | |
155 | * +-------------+ | |
156 | * | E | | |
157 | * +-------------+ | |
158 | * 4.1.3) If range E is unacked, and the setting range S is acked, then S may | |
159 | * overwrite the overlapped range of E, the result is, | |
160 | * +---+---------+ | |
161 | * | E | S | | |
162 | * +---+---------+ | |
163 | * 4.2) If the setting range S stays in middle of an already set range E, like | |
164 | * the following chart shows, | |
165 | * +----+ | |
166 | * | S | | |
167 | * +----+ | |
168 | * +--------------+ | |
169 | * | E | | |
170 | * +--------------+ | |
171 | * 4.2.1) If range S and E have same acknowledge value (both acked or unacked), | |
172 | * they will be merged into one, the result is, | |
173 | * +--------------+ | |
174 | * | S | | |
175 | * +--------------+ | |
176 | * 4.2.2) If range E is acked and the setting range S is unacked, the setting | |
177 | * request of S will be rejected, the result is also, | |
178 | * +--------------+ | |
179 | * | E | | |
180 | * +--------------+ | |
181 | * 4.2.3) If range E is unacked, and the setting range S is acked, then S will | |
182 | * inserted into middle of E and split previous range E into two parts (E1 | |
183 | * and E2), the result is, | |
184 | * +----+----+----+ | |
185 | * | E1 | S | E2 | | |
186 | * +----+----+----+ | |
187 | * 4.3) If the setting bad blocks range S is overlapped with an already set bad | |
188 | * blocks range E. The range S starts after the start LBA of range E, and | |
189 | * ends after the end LBA of range E, as the following chart shows, | |
190 | * +-------------------+ | |
191 | * | S | | |
192 | * +-------------------+ | |
193 | * +-------------+ | |
194 | * | E | | |
195 | * +-------------+ | |
196 | * For this situation the range S can be divided into two parts, the first | |
197 | * part (S1) ends at end range E, and the second part (S2) has rest range of | |
198 | * origin S. | |
199 | * +---------+---------+ +---------+ +---------+ | |
200 | * | S1 | S2 | | S1 | | S2 | | |
201 | * +---------+---------+ ===> +---------+ +---------+ | |
202 | * +-------------+ +-------------+ | |
203 | * | E | | E | | |
204 | * +-------------+ +-------------+ | |
205 | * Now in this loop the setting range S1 and already set range E can be | |
206 | * handled as the situations 4.1), the rest range S2 will be handled in next | |
207 | * loop and ignored in this loop. | |
208 | * 5) A setting bad blocks range S is adjacent to one or more already set bad | |
209 | * blocks range(s), and they are all acked or unacked range. | |
210 | * 5.1) Front merge: If the already set bad blocks range E is before setting | |
211 | * range S and they are adjacent, | |
212 | * +------+ | |
213 | * | S | | |
214 | * +------+ | |
215 | * +-------+ | |
216 | * | E | | |
217 | * +-------+ | |
218 | * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge | |
219 | * values are same, the setting range S can front merges into range E. The | |
220 | * result is, | |
221 | * +--------------+ | |
222 | * | S | | |
223 | * +--------------+ | |
224 | * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting | |
225 | * range S right after already set range E into the bad blocks table. The | |
226 | * result is, | |
227 | * +--------+------+ | |
228 | * | E | S | | |
229 | * +--------+------+ | |
230 | * 6) Special cases which above conditions cannot handle | |
231 | * 6.1) Multiple already set ranges may merge into less ones in a full bad table | |
232 | * +-------------------------------------------------------+ | |
233 | * | S | | |
234 | * +-------------------------------------------------------+ | |
235 | * |<----- BB_MAX_LEN ----->| | |
236 | * +-----+ +-----+ +-----+ | |
237 | * | E1 | | E2 | | E3 | | |
238 | * +-----+ +-----+ +-----+ | |
239 | * In the above example, when the bad blocks table is full, inserting the | |
240 | * first part of setting range S will fail because no more available slot | |
241 | * can be allocated from bad blocks table. In this situation a proper | |
242 | * setting method should be go though all the setting bad blocks range and | |
243 | * look for chance to merge already set ranges into less ones. When there | |
244 | * is available slot from bad blocks table, re-try again to handle more | |
245 | * setting bad blocks ranges as many as possible. | |
246 | * +------------------------+ | |
247 | * | S3 | | |
248 | * +------------------------+ | |
249 | * |<----- BB_MAX_LEN ----->| | |
250 | * +-----+-----+-----+---+-----+--+ | |
251 | * | S1 | S2 | | |
252 | * +-----+-----+-----+---+-----+--+ | |
253 | * The above chart shows although the first part (S3) cannot be inserted due | |
254 | * to no-space in bad blocks table, but the following E1, E2 and E3 ranges | |
255 | * can be merged with rest part of S into less range S1 and S2. Now there is | |
256 | * 1 free slot in bad blocks table. | |
257 | * +------------------------+-----+-----+-----+---+-----+--+ | |
258 | * | S3 | S1 | S2 | | |
259 | * +------------------------+-----+-----+-----+---+-----+--+ | |
260 | * Since the bad blocks table is not full anymore, re-try again for the | |
261 | * origin setting range S. Now the setting range S3 can be inserted into the | |
262 | * bad blocks table with previous freed slot from multiple ranges merge. | |
263 | * 6.2) Front merge after overwrite | |
264 | * In the following example, in bad blocks table, E1 is an acked bad blocks | |
265 | * range and E2 is an unacked bad blocks range, therefore they are not able | |
266 | * to merge into a larger range. The setting bad blocks range S is acked, | |
267 | * therefore part of E2 can be overwritten by S. | |
268 | * +--------+ | |
269 | * | S | acknowledged | |
270 | * +--------+ S: 1 | |
271 | * +-------+-------------+ E1: 1 | |
272 | * | E1 | E2 | E2: 0 | |
273 | * +-------+-------------+ | |
274 | * With previous simplified routines, after overwriting part of E2 with S, | |
275 | * the bad blocks table should be (E3 is remaining part of E2 which is not | |
276 | * overwritten by S), | |
277 | * acknowledged | |
278 | * +-------+--------+----+ S: 1 | |
279 | * | E1 | S | E3 | E1: 1 | |
280 | * +-------+--------+----+ E3: 0 | |
281 | * The above result is correct but not perfect. Range E1 and S in the bad | |
282 | * blocks table are all acked, merging them into a larger one range may | |
283 | * occupy less bad blocks table space and make badblocks_check() faster. | |
284 | * Therefore in such situation, after overwriting range S, the previous range | |
285 | * E1 should be checked for possible front combination. Then the ideal | |
286 | * result can be, | |
287 | * +----------------+----+ acknowledged | |
288 | * | E1 | E3 | E1: 1 | |
289 | * +----------------+----+ E3: 0 | |
290 | * 6.3) Behind merge: If the already set bad blocks range E is behind the setting | |
291 | * range S and they are adjacent. Normally we don't need to care about this | |
292 | * because front merge handles this while going though range S from head to | |
293 | * tail, except for the tail part of range S. When the setting range S are | |
294 | * fully handled, all the above simplified routine doesn't check whether the | |
295 | * tail LBA of range S is adjacent to the next already set range and not | |
296 | * merge them even it is possible. | |
297 | * +------+ | |
298 | * | S | | |
299 | * +------+ | |
300 | * +-------+ | |
301 | * | E | | |
302 | * +-------+ | |
303 | * For the above special situation, when the setting range S are all handled | |
304 | * and the loop ends, an extra check is necessary for whether next already | |
305 | * set range E is right after S and mergeable. | |
306 | * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge | |
307 | * values are same, the setting range S can behind merges into range E. The | |
308 | * result is, | |
309 | * +--------------+ | |
310 | * | S | | |
311 | * +--------------+ | |
312 | * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range | |
313 | * S in front of the already set range E in the bad blocks table. The result | |
314 | * is, | |
315 | * +------+-------+ | |
316 | * | S | E | | |
317 | * +------+-------+ | |
318 | * | |
319 | * All the above 5 simplified situations and 3 special cases may cover 99%+ of | |
320 | * the bad block range setting conditions. Maybe there is some rare corner case | |
321 | * is not considered and optimized, it won't hurt if badblocks_set() fails due | |
322 | * to no space, or some ranges are not merged to save bad blocks table space. | |
323 | * | |
324 | * Inside badblocks_set() each loop starts by jumping to re_insert label, every | |
325 | * time for the new loop prev_badblocks() is called to find an already set range | |
326 | * which starts before or at current setting range. Since the setting bad blocks | |
327 | * range is handled from head to tail, most of the cases it is unnecessary to do | |
328 | * the binary search inside prev_badblocks(), it is possible to provide a hint | |
329 | * to prev_badblocks() for a fast path, then the expensive binary search can be | |
330 | * avoided. In my test with the hint to prev_badblocks(), except for the first | |
331 | * loop, all rested calls to prev_badblocks() can go into the fast path and | |
332 | * return correct bad blocks table index immediately. | |
db448eb6 CL |
333 | * |
334 | * | |
335 | * Clearing a bad blocks range from the bad block table has similar idea as | |
336 | * setting does, but much more simpler. The only thing needs to be noticed is | |
337 | * when the clearing range hits middle of a bad block range, the existing bad | |
338 | * block range will split into two, and one more item should be added into the | |
339 | * bad block table. The simplified situations to be considered are, (The already | |
340 | * set bad blocks ranges in bad block table are naming with prefix E, and the | |
341 | * clearing bad blocks range is naming with prefix C) | |
342 | * | |
343 | * 1) A clearing range is not overlapped to any already set ranges in bad block | |
344 | * table. | |
345 | * +-----+ | +-----+ | +-----+ | |
346 | * | C | | | C | | | C | | |
347 | * +-----+ or +-----+ or +-----+ | |
348 | * +---+ | +----+ +----+ | +---+ | |
349 | * | E | | | E1 | | E2 | | | E | | |
350 | * +---+ | +----+ +----+ | +---+ | |
351 | * For the above situations, no bad block to be cleared and no failure | |
352 | * happens, simply returns 0. | |
353 | * 2) The clearing range hits middle of an already setting bad blocks range in | |
354 | * the bad block table. | |
355 | * +---+ | |
356 | * | C | | |
357 | * +---+ | |
358 | * +-----------------+ | |
359 | * | E | | |
360 | * +-----------------+ | |
361 | * In this situation if the bad block table is not full, the range E will be | |
362 | * split into two ranges E1 and E2. The result is, | |
363 | * +------+ +------+ | |
364 | * | E1 | | E2 | | |
365 | * +------+ +------+ | |
366 | * 3) The clearing range starts exactly at same LBA as an already set bad block range | |
367 | * from the bad block table. | |
368 | * 3.1) Partially covered at head part | |
369 | * +------------+ | |
370 | * | C | | |
371 | * +------------+ | |
372 | * +-----------------+ | |
373 | * | E | | |
374 | * +-----------------+ | |
375 | * For this situation, the overlapped already set range will update the | |
376 | * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No | |
377 | * item deleted from bad block table. The result is, | |
378 | * +----+ | |
379 | * | E1 | | |
380 | * +----+ | |
381 | * 3.2) Exact fully covered | |
382 | * +-----------------+ | |
383 | * | C | | |
384 | * +-----------------+ | |
385 | * +-----------------+ | |
386 | * | E | | |
387 | * +-----------------+ | |
388 | * For this situation the whole bad blocks range E will be cleared and its | |
389 | * corresponded item is deleted from the bad block table. | |
390 | * 4) The clearing range exactly ends at same LBA as an already set bad block | |
391 | * range. | |
392 | * +-------+ | |
393 | * | C | | |
394 | * +-------+ | |
395 | * +-----------------+ | |
396 | * | E | | |
397 | * +-----------------+ | |
398 | * For the above situation, the already set range E is updated to shrink its | |
399 | * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C). | |
400 | * The result is, | |
401 | * +---------+ | |
402 | * | E | | |
403 | * +---------+ | |
404 | * 5) The clearing range is partially overlapped with an already set bad block | |
405 | * range from the bad block table. | |
406 | * 5.1) The already set bad block range is front overlapped with the clearing | |
407 | * range. | |
408 | * +----------+ | |
409 | * | C | | |
410 | * +----------+ | |
411 | * +------------+ | |
412 | * | E | | |
413 | * +------------+ | |
414 | * For such situation, the clearing range C can be treated as two parts. The | |
415 | * first part ends at the start LBA of range E, and the second part starts at | |
416 | * same LBA of range E. | |
417 | * +----+-----+ +----+ +-----+ | |
418 | * | C1 | C2 | | C1 | | C2 | | |
419 | * +----+-----+ ===> +----+ +-----+ | |
420 | * +------------+ +------------+ | |
421 | * | E | | E | | |
422 | * +------------+ +------------+ | |
423 | * Now the first part C1 can be handled as condition 1), and the second part C2 can be | |
424 | * handled as condition 3.1) in next loop. | |
425 | * 5.2) The already set bad block range is behind overlaopped with the clearing | |
426 | * range. | |
427 | * +----------+ | |
428 | * | C | | |
429 | * +----------+ | |
430 | * +------------+ | |
431 | * | E | | |
432 | * +------------+ | |
433 | * For such situation, the clearing range C can be treated as two parts. The | |
434 | * first part C1 ends at same end LBA of range E, and the second part starts | |
435 | * at end LBA of range E. | |
436 | * +----+-----+ +----+ +-----+ | |
437 | * | C1 | C2 | | C1 | | C2 | | |
438 | * +----+-----+ ===> +----+ +-----+ | |
439 | * +------------+ +------------+ | |
440 | * | E | | E | | |
441 | * +------------+ +------------+ | |
442 | * Now the first part clearing range C1 can be handled as condition 4), and | |
443 | * the second part clearing range C2 can be handled as condition 1) in next | |
444 | * loop. | |
445 | * | |
446 | * All bad blocks range clearing can be simplified into the above 5 situations | |
447 | * by only handling the head part of the clearing range in each run of the | |
448 | * while-loop. The idea is similar to bad blocks range setting but much | |
449 | * simpler. | |
1726c774 CL |
450 | */ |
451 | ||
c3c6a86e CL |
452 | /* |
453 | * Find the range starts at-or-before 's' from bad table. The search | |
454 | * starts from index 'hint' and stops at index 'hint_end' from the bad | |
455 | * table. | |
456 | */ | |
457 | static int prev_by_hint(struct badblocks *bb, sector_t s, int hint) | |
458 | { | |
459 | int hint_end = hint + 2; | |
460 | u64 *p = bb->page; | |
461 | int ret = -1; | |
462 | ||
463 | while ((hint < hint_end) && ((hint + 1) <= bb->count) && | |
464 | (BB_OFFSET(p[hint]) <= s)) { | |
465 | if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) { | |
466 | ret = hint; | |
467 | break; | |
468 | } | |
469 | hint++; | |
470 | } | |
471 | ||
472 | return ret; | |
473 | } | |
474 | ||
475 | /* | |
476 | * Find the range starts at-or-before bad->start. If 'hint' is provided | |
477 | * (hint >= 0) then search in the bad table from hint firstly. It is | |
478 | * very probably the wanted bad range can be found from the hint index, | |
479 | * then the unnecessary while-loop iteration can be avoided. | |
480 | */ | |
481 | static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad, | |
482 | int hint) | |
483 | { | |
484 | sector_t s = bad->start; | |
485 | int ret = -1; | |
486 | int lo, hi; | |
487 | u64 *p; | |
488 | ||
489 | if (!bb->count) | |
490 | goto out; | |
491 | ||
492 | if (hint >= 0) { | |
493 | ret = prev_by_hint(bb, s, hint); | |
494 | if (ret >= 0) | |
495 | goto out; | |
496 | } | |
497 | ||
498 | lo = 0; | |
499 | hi = bb->count; | |
500 | p = bb->page; | |
501 | ||
502 | /* The following bisect search might be unnecessary */ | |
503 | if (BB_OFFSET(p[lo]) > s) | |
504 | return -1; | |
505 | if (BB_OFFSET(p[hi - 1]) <= s) | |
506 | return hi - 1; | |
507 | ||
508 | /* Do bisect search in bad table */ | |
509 | while (hi - lo > 1) { | |
510 | int mid = (lo + hi)/2; | |
511 | sector_t a = BB_OFFSET(p[mid]); | |
512 | ||
513 | if (a == s) { | |
514 | ret = mid; | |
515 | goto out; | |
516 | } | |
517 | ||
518 | if (a < s) | |
519 | lo = mid; | |
520 | else | |
521 | hi = mid; | |
522 | } | |
523 | ||
524 | if (BB_OFFSET(p[lo]) <= s) | |
525 | ret = lo; | |
526 | out: | |
527 | return ret; | |
528 | } | |
529 | ||
530 | /* | |
531 | * Return 'true' if the range indicated by 'bad' can be backward merged | |
532 | * with the bad range (from the bad table) index by 'behind'. | |
533 | */ | |
534 | static bool can_merge_behind(struct badblocks *bb, | |
535 | struct badblocks_context *bad, int behind) | |
536 | { | |
537 | sector_t sectors = bad->len; | |
538 | sector_t s = bad->start; | |
539 | u64 *p = bb->page; | |
540 | ||
541 | if ((s < BB_OFFSET(p[behind])) && | |
542 | ((s + sectors) >= BB_OFFSET(p[behind])) && | |
543 | ((BB_END(p[behind]) - s) <= BB_MAX_LEN) && | |
544 | BB_ACK(p[behind]) == bad->ack) | |
545 | return true; | |
546 | return false; | |
547 | } | |
548 | ||
549 | /* | |
550 | * Do backward merge for range indicated by 'bad' and the bad range | |
551 | * (from the bad table) indexed by 'behind'. The return value is merged | |
552 | * sectors from bad->len. | |
553 | */ | |
554 | static int behind_merge(struct badblocks *bb, struct badblocks_context *bad, | |
555 | int behind) | |
556 | { | |
557 | sector_t sectors = bad->len; | |
558 | sector_t s = bad->start; | |
559 | u64 *p = bb->page; | |
560 | int merged = 0; | |
561 | ||
562 | WARN_ON(s >= BB_OFFSET(p[behind])); | |
563 | WARN_ON((s + sectors) < BB_OFFSET(p[behind])); | |
564 | ||
565 | if (s < BB_OFFSET(p[behind])) { | |
566 | merged = BB_OFFSET(p[behind]) - s; | |
567 | p[behind] = BB_MAKE(s, BB_LEN(p[behind]) + merged, bad->ack); | |
568 | ||
569 | WARN_ON((BB_LEN(p[behind]) + merged) >= BB_MAX_LEN); | |
570 | } | |
571 | ||
572 | return merged; | |
573 | } | |
574 | ||
575 | /* | |
576 | * Return 'true' if the range indicated by 'bad' can be forward | |
577 | * merged with the bad range (from the bad table) indexed by 'prev'. | |
578 | */ | |
579 | static bool can_merge_front(struct badblocks *bb, int prev, | |
580 | struct badblocks_context *bad) | |
581 | { | |
582 | sector_t s = bad->start; | |
583 | u64 *p = bb->page; | |
584 | ||
585 | if (BB_ACK(p[prev]) == bad->ack && | |
586 | (s < BB_END(p[prev]) || | |
587 | (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN)))) | |
588 | return true; | |
589 | return false; | |
590 | } | |
591 | ||
592 | /* | |
593 | * Do forward merge for range indicated by 'bad' and the bad range | |
594 | * (from bad table) indexed by 'prev'. The return value is sectors | |
595 | * merged from bad->len. | |
596 | */ | |
597 | static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad) | |
598 | { | |
599 | sector_t sectors = bad->len; | |
600 | sector_t s = bad->start; | |
601 | u64 *p = bb->page; | |
602 | int merged = 0; | |
603 | ||
604 | WARN_ON(s > BB_END(p[prev])); | |
605 | ||
606 | if (s < BB_END(p[prev])) { | |
607 | merged = min_t(sector_t, sectors, BB_END(p[prev]) - s); | |
608 | } else { | |
609 | merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev])); | |
610 | if ((prev + 1) < bb->count && | |
611 | merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) { | |
612 | merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]); | |
613 | } | |
614 | ||
615 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
616 | BB_LEN(p[prev]) + merged, bad->ack); | |
617 | } | |
618 | ||
619 | return merged; | |
620 | } | |
621 | ||
622 | /* | |
623 | * 'Combine' is a special case which can_merge_front() is not able to | |
624 | * handle: If a bad range (indexed by 'prev' from bad table) exactly | |
625 | * starts as bad->start, and the bad range ahead of 'prev' (indexed by | |
626 | * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and | |
627 | * the sum of their lengths does not exceed BB_MAX_LEN limitation, then | |
628 | * these two bad range (from bad table) can be combined. | |
629 | * | |
630 | * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad | |
631 | * table can be combined. | |
632 | */ | |
633 | static bool can_combine_front(struct badblocks *bb, int prev, | |
634 | struct badblocks_context *bad) | |
635 | { | |
636 | u64 *p = bb->page; | |
637 | ||
638 | if ((prev > 0) && | |
639 | (BB_OFFSET(p[prev]) == bad->start) && | |
640 | (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) && | |
641 | (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) && | |
642 | (BB_ACK(p[prev - 1]) == BB_ACK(p[prev]))) | |
643 | return true; | |
644 | return false; | |
645 | } | |
646 | ||
647 | /* | |
648 | * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad | |
649 | * table) into one larger bad range, and the new range is indexed by | |
650 | * 'prev - 1'. | |
651 | * The caller of front_combine() will decrease bb->count, therefore | |
652 | * it is unnecessary to clear p[perv] after front merge. | |
653 | */ | |
654 | static void front_combine(struct badblocks *bb, int prev) | |
655 | { | |
656 | u64 *p = bb->page; | |
657 | ||
658 | p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]), | |
659 | BB_LEN(p[prev - 1]) + BB_LEN(p[prev]), | |
660 | BB_ACK(p[prev])); | |
661 | if ((prev + 1) < bb->count) | |
662 | memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8); | |
663 | } | |
664 | ||
665 | /* | |
666 | * Return 'true' if the range indicated by 'bad' is exactly forward | |
667 | * overlapped with the bad range (from bad table) indexed by 'front'. | |
668 | * Exactly forward overlap means the bad range (from bad table) indexed | |
669 | * by 'prev' does not cover the whole range indicated by 'bad'. | |
670 | */ | |
671 | static bool overlap_front(struct badblocks *bb, int front, | |
672 | struct badblocks_context *bad) | |
673 | { | |
674 | u64 *p = bb->page; | |
675 | ||
676 | if (bad->start >= BB_OFFSET(p[front]) && | |
677 | bad->start < BB_END(p[front])) | |
678 | return true; | |
679 | return false; | |
680 | } | |
681 | ||
682 | /* | |
683 | * Return 'true' if the range indicated by 'bad' is exactly backward | |
684 | * overlapped with the bad range (from bad table) indexed by 'behind'. | |
685 | */ | |
686 | static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad, | |
687 | int behind) | |
688 | { | |
689 | u64 *p = bb->page; | |
690 | ||
691 | if (bad->start < BB_OFFSET(p[behind]) && | |
692 | (bad->start + bad->len) > BB_OFFSET(p[behind])) | |
693 | return true; | |
694 | return false; | |
695 | } | |
696 | ||
697 | /* | |
698 | * Return 'true' if the range indicated by 'bad' can overwrite the bad | |
699 | * range (from bad table) indexed by 'prev'. | |
700 | * | |
701 | * The range indicated by 'bad' can overwrite the bad range indexed by | |
702 | * 'prev' when, | |
703 | * 1) The whole range indicated by 'bad' can cover partial or whole bad | |
704 | * range (from bad table) indexed by 'prev'. | |
705 | * 2) The ack value of 'bad' is larger or equal to the ack value of bad | |
706 | * range 'prev'. | |
707 | * | |
708 | * If the overwriting doesn't cover the whole bad range (from bad table) | |
709 | * indexed by 'prev', new range might be split from existing bad range, | |
710 | * 1) The overwrite covers head or tail part of existing bad range, 1 | |
711 | * extra bad range will be split and added into the bad table. | |
712 | * 2) The overwrite covers middle of existing bad range, 2 extra bad | |
713 | * ranges will be split (ahead and after the overwritten range) and | |
714 | * added into the bad table. | |
715 | * The number of extra split ranges of the overwriting is stored in | |
716 | * 'extra' and returned for the caller. | |
717 | */ | |
718 | static bool can_front_overwrite(struct badblocks *bb, int prev, | |
719 | struct badblocks_context *bad, int *extra) | |
720 | { | |
721 | u64 *p = bb->page; | |
722 | int len; | |
723 | ||
724 | WARN_ON(!overlap_front(bb, prev, bad)); | |
725 | ||
726 | if (BB_ACK(p[prev]) >= bad->ack) | |
727 | return false; | |
728 | ||
729 | if (BB_END(p[prev]) <= (bad->start + bad->len)) { | |
730 | len = BB_END(p[prev]) - bad->start; | |
731 | if (BB_OFFSET(p[prev]) == bad->start) | |
732 | *extra = 0; | |
733 | else | |
734 | *extra = 1; | |
735 | ||
736 | bad->len = len; | |
737 | } else { | |
738 | if (BB_OFFSET(p[prev]) == bad->start) | |
739 | *extra = 1; | |
740 | else | |
741 | /* | |
742 | * prev range will be split into two, beside the overwritten | |
743 | * one, an extra slot needed from bad table. | |
744 | */ | |
745 | *extra = 2; | |
746 | } | |
747 | ||
748 | if ((bb->count + (*extra)) >= MAX_BADBLOCKS) | |
749 | return false; | |
750 | ||
751 | return true; | |
752 | } | |
753 | ||
754 | /* | |
755 | * Do the overwrite from the range indicated by 'bad' to the bad range | |
756 | * (from bad table) indexed by 'prev'. | |
757 | * The previously called can_front_overwrite() will provide how many | |
758 | * extra bad range(s) might be split and added into the bad table. All | |
759 | * the splitting cases in the bad table will be handled here. | |
760 | */ | |
761 | static int front_overwrite(struct badblocks *bb, int prev, | |
762 | struct badblocks_context *bad, int extra) | |
763 | { | |
764 | u64 *p = bb->page; | |
765 | sector_t orig_end = BB_END(p[prev]); | |
766 | int orig_ack = BB_ACK(p[prev]); | |
767 | ||
768 | switch (extra) { | |
769 | case 0: | |
770 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]), | |
771 | bad->ack); | |
772 | break; | |
773 | case 1: | |
774 | if (BB_OFFSET(p[prev]) == bad->start) { | |
775 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
776 | bad->len, bad->ack); | |
777 | memmove(p + prev + 2, p + prev + 1, | |
778 | (bb->count - prev - 1) * 8); | |
779 | p[prev + 1] = BB_MAKE(bad->start + bad->len, | |
780 | orig_end - BB_END(p[prev]), | |
781 | orig_ack); | |
782 | } else { | |
783 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
784 | bad->start - BB_OFFSET(p[prev]), | |
785 | orig_ack); | |
786 | /* | |
787 | * prev +2 -> prev + 1 + 1, which is for, | |
788 | * 1) prev + 1: the slot index of the previous one | |
789 | * 2) + 1: one more slot for extra being 1. | |
790 | */ | |
791 | memmove(p + prev + 2, p + prev + 1, | |
792 | (bb->count - prev - 1) * 8); | |
793 | p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); | |
794 | } | |
795 | break; | |
796 | case 2: | |
797 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
798 | bad->start - BB_OFFSET(p[prev]), | |
799 | orig_ack); | |
800 | /* | |
801 | * prev + 3 -> prev + 1 + 2, which is for, | |
802 | * 1) prev + 1: the slot index of the previous one | |
803 | * 2) + 2: two more slots for extra being 2. | |
804 | */ | |
805 | memmove(p + prev + 3, p + prev + 1, | |
806 | (bb->count - prev - 1) * 8); | |
807 | p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack); | |
808 | p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]), | |
809 | orig_end - BB_END(p[prev + 1]), | |
810 | orig_ack); | |
811 | break; | |
812 | default: | |
813 | break; | |
814 | } | |
815 | ||
816 | return bad->len; | |
817 | } | |
818 | ||
819 | /* | |
820 | * Explicitly insert a range indicated by 'bad' to the bad table, where | |
821 | * the location is indexed by 'at'. | |
822 | */ | |
823 | static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad) | |
824 | { | |
825 | u64 *p = bb->page; | |
826 | int len; | |
827 | ||
828 | WARN_ON(badblocks_full(bb)); | |
829 | ||
830 | len = min_t(sector_t, bad->len, BB_MAX_LEN); | |
831 | if (at < bb->count) | |
832 | memmove(p + at + 1, p + at, (bb->count - at) * 8); | |
833 | p[at] = BB_MAKE(bad->start, len, bad->ack); | |
834 | ||
835 | return len; | |
836 | } | |
837 | ||
1726c774 CL |
838 | static void badblocks_update_acked(struct badblocks *bb) |
839 | { | |
840 | bool unacked = false; | |
841 | u64 *p = bb->page; | |
842 | int i; | |
843 | ||
844 | if (!bb->unacked_exist) | |
845 | return; | |
846 | ||
847 | for (i = 0; i < bb->count ; i++) { | |
848 | if (!BB_ACK(p[i])) { | |
849 | unacked = true; | |
850 | break; | |
851 | } | |
852 | } | |
853 | ||
854 | if (!unacked) | |
855 | bb->unacked_exist = 0; | |
856 | } | |
857 | ||
858 | /* Do exact work to set bad block range into the bad block table */ | |
859 | static int _badblocks_set(struct badblocks *bb, sector_t s, int sectors, | |
860 | int acknowledged) | |
861 | { | |
862 | int retried = 0, space_desired = 0; | |
863 | int orig_len, len = 0, added = 0; | |
864 | struct badblocks_context bad; | |
865 | int prev = -1, hint = -1; | |
866 | sector_t orig_start; | |
867 | unsigned long flags; | |
868 | int rv = 0; | |
869 | u64 *p; | |
870 | ||
871 | if (bb->shift < 0) | |
872 | /* badblocks are disabled */ | |
873 | return 1; | |
874 | ||
875 | if (sectors == 0) | |
876 | /* Invalid sectors number */ | |
877 | return 1; | |
878 | ||
879 | if (bb->shift) { | |
880 | /* round the start down, and the end up */ | |
881 | sector_t next = s + sectors; | |
882 | ||
883 | rounddown(s, bb->shift); | |
884 | roundup(next, bb->shift); | |
885 | sectors = next - s; | |
886 | } | |
887 | ||
888 | write_seqlock_irqsave(&bb->lock, flags); | |
889 | ||
890 | orig_start = s; | |
891 | orig_len = sectors; | |
892 | bad.ack = acknowledged; | |
893 | p = bb->page; | |
894 | ||
895 | re_insert: | |
896 | bad.start = s; | |
897 | bad.len = sectors; | |
898 | len = 0; | |
899 | ||
900 | if (badblocks_empty(bb)) { | |
901 | len = insert_at(bb, 0, &bad); | |
902 | bb->count++; | |
903 | added++; | |
904 | goto update_sectors; | |
905 | } | |
906 | ||
907 | prev = prev_badblocks(bb, &bad, hint); | |
908 | ||
909 | /* start before all badblocks */ | |
910 | if (prev < 0) { | |
911 | if (!badblocks_full(bb)) { | |
912 | /* insert on the first */ | |
913 | if (bad.len > (BB_OFFSET(p[0]) - bad.start)) | |
914 | bad.len = BB_OFFSET(p[0]) - bad.start; | |
915 | len = insert_at(bb, 0, &bad); | |
916 | bb->count++; | |
917 | added++; | |
918 | hint = 0; | |
919 | goto update_sectors; | |
920 | } | |
921 | ||
922 | /* No sapce, try to merge */ | |
923 | if (overlap_behind(bb, &bad, 0)) { | |
924 | if (can_merge_behind(bb, &bad, 0)) { | |
925 | len = behind_merge(bb, &bad, 0); | |
926 | added++; | |
927 | } else { | |
928 | len = BB_OFFSET(p[0]) - s; | |
929 | space_desired = 1; | |
930 | } | |
931 | hint = 0; | |
932 | goto update_sectors; | |
933 | } | |
934 | ||
935 | /* no table space and give up */ | |
936 | goto out; | |
937 | } | |
938 | ||
939 | /* in case p[prev-1] can be merged with p[prev] */ | |
940 | if (can_combine_front(bb, prev, &bad)) { | |
941 | front_combine(bb, prev); | |
942 | bb->count--; | |
943 | added++; | |
944 | hint = prev; | |
945 | goto update_sectors; | |
946 | } | |
947 | ||
948 | if (overlap_front(bb, prev, &bad)) { | |
949 | if (can_merge_front(bb, prev, &bad)) { | |
950 | len = front_merge(bb, prev, &bad); | |
951 | added++; | |
952 | } else { | |
953 | int extra = 0; | |
954 | ||
955 | if (!can_front_overwrite(bb, prev, &bad, &extra)) { | |
956 | len = min_t(sector_t, | |
957 | BB_END(p[prev]) - s, sectors); | |
958 | hint = prev; | |
959 | goto update_sectors; | |
960 | } | |
961 | ||
962 | len = front_overwrite(bb, prev, &bad, extra); | |
963 | added++; | |
964 | bb->count += extra; | |
965 | ||
966 | if (can_combine_front(bb, prev, &bad)) { | |
967 | front_combine(bb, prev); | |
968 | bb->count--; | |
969 | } | |
970 | } | |
971 | hint = prev; | |
972 | goto update_sectors; | |
973 | } | |
974 | ||
975 | if (can_merge_front(bb, prev, &bad)) { | |
976 | len = front_merge(bb, prev, &bad); | |
977 | added++; | |
978 | hint = prev; | |
979 | goto update_sectors; | |
980 | } | |
981 | ||
982 | /* if no space in table, still try to merge in the covered range */ | |
983 | if (badblocks_full(bb)) { | |
984 | /* skip the cannot-merge range */ | |
985 | if (((prev + 1) < bb->count) && | |
986 | overlap_behind(bb, &bad, prev + 1) && | |
987 | ((s + sectors) >= BB_END(p[prev + 1]))) { | |
988 | len = BB_END(p[prev + 1]) - s; | |
989 | hint = prev + 1; | |
990 | goto update_sectors; | |
991 | } | |
992 | ||
993 | /* no retry any more */ | |
994 | len = sectors; | |
995 | space_desired = 1; | |
996 | hint = -1; | |
997 | goto update_sectors; | |
998 | } | |
999 | ||
1000 | /* cannot merge and there is space in bad table */ | |
1001 | if ((prev + 1) < bb->count && | |
1002 | overlap_behind(bb, &bad, prev + 1)) | |
1003 | bad.len = min_t(sector_t, | |
1004 | bad.len, BB_OFFSET(p[prev + 1]) - bad.start); | |
1005 | ||
1006 | len = insert_at(bb, prev + 1, &bad); | |
1007 | bb->count++; | |
1008 | added++; | |
1009 | hint = prev + 1; | |
1010 | ||
1011 | update_sectors: | |
1012 | s += len; | |
1013 | sectors -= len; | |
1014 | ||
1015 | if (sectors > 0) | |
1016 | goto re_insert; | |
1017 | ||
1018 | WARN_ON(sectors < 0); | |
1019 | ||
1020 | /* | |
1021 | * Check whether the following already set range can be | |
1022 | * merged. (prev < 0) condition is not handled here, | |
1023 | * because it's already complicated enough. | |
1024 | */ | |
1025 | if (prev >= 0 && | |
1026 | (prev + 1) < bb->count && | |
1027 | BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) && | |
1028 | (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN && | |
1029 | BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) { | |
1030 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
1031 | BB_LEN(p[prev]) + BB_LEN(p[prev + 1]), | |
1032 | BB_ACK(p[prev])); | |
1033 | ||
1034 | if ((prev + 2) < bb->count) | |
1035 | memmove(p + prev + 1, p + prev + 2, | |
1036 | (bb->count - (prev + 2)) * 8); | |
1037 | bb->count--; | |
1038 | } | |
1039 | ||
1040 | if (space_desired && !badblocks_full(bb)) { | |
1041 | s = orig_start; | |
1042 | sectors = orig_len; | |
1043 | space_desired = 0; | |
1044 | if (retried++ < 3) | |
1045 | goto re_insert; | |
1046 | } | |
1047 | ||
1048 | out: | |
1049 | if (added) { | |
1050 | set_changed(bb); | |
1051 | ||
1052 | if (!acknowledged) | |
1053 | bb->unacked_exist = 1; | |
1054 | else | |
1055 | badblocks_update_acked(bb); | |
1056 | } | |
1057 | ||
1058 | write_sequnlock_irqrestore(&bb->lock, flags); | |
1059 | ||
1060 | if (!added) | |
1061 | rv = 1; | |
1062 | ||
1063 | return rv; | |
1064 | } | |
1065 | ||
db448eb6 CL |
1066 | /* |
1067 | * Clear the bad block range from bad block table which is front overlapped | |
1068 | * with the clearing range. The return value is how many sectors from an | |
1069 | * already set bad block range are cleared. If the whole bad block range is | |
1070 | * covered by the clearing range and fully cleared, 'delete' is set as 1 for | |
1071 | * the caller to reduce bb->count. | |
1072 | */ | |
1073 | static int front_clear(struct badblocks *bb, int prev, | |
1074 | struct badblocks_context *bad, int *deleted) | |
1075 | { | |
1076 | sector_t sectors = bad->len; | |
1077 | sector_t s = bad->start; | |
1078 | u64 *p = bb->page; | |
1079 | int cleared = 0; | |
1080 | ||
1081 | *deleted = 0; | |
1082 | if (s == BB_OFFSET(p[prev])) { | |
1083 | if (BB_LEN(p[prev]) > sectors) { | |
1084 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors, | |
1085 | BB_LEN(p[prev]) - sectors, | |
1086 | BB_ACK(p[prev])); | |
1087 | cleared = sectors; | |
1088 | } else { | |
1089 | /* BB_LEN(p[prev]) <= sectors */ | |
1090 | cleared = BB_LEN(p[prev]); | |
1091 | if ((prev + 1) < bb->count) | |
1092 | memmove(p + prev, p + prev + 1, | |
1093 | (bb->count - prev - 1) * 8); | |
1094 | *deleted = 1; | |
1095 | } | |
1096 | } else if (s > BB_OFFSET(p[prev])) { | |
1097 | if (BB_END(p[prev]) <= (s + sectors)) { | |
1098 | cleared = BB_END(p[prev]) - s; | |
1099 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
1100 | s - BB_OFFSET(p[prev]), | |
1101 | BB_ACK(p[prev])); | |
1102 | } else { | |
1103 | /* Splitting is handled in front_splitting_clear() */ | |
1104 | BUG(); | |
1105 | } | |
1106 | } | |
1107 | ||
1108 | return cleared; | |
1109 | } | |
1110 | ||
1111 | /* | |
1112 | * Handle the condition that the clearing range hits middle of an already set | |
1113 | * bad block range from bad block table. In this condition the existing bad | |
1114 | * block range is split into two after the middle part is cleared. | |
1115 | */ | |
1116 | static int front_splitting_clear(struct badblocks *bb, int prev, | |
1117 | struct badblocks_context *bad) | |
1118 | { | |
1119 | u64 *p = bb->page; | |
1120 | u64 end = BB_END(p[prev]); | |
1121 | int ack = BB_ACK(p[prev]); | |
1122 | sector_t sectors = bad->len; | |
1123 | sector_t s = bad->start; | |
1124 | ||
1125 | p[prev] = BB_MAKE(BB_OFFSET(p[prev]), | |
1126 | s - BB_OFFSET(p[prev]), | |
1127 | ack); | |
1128 | memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8); | |
1129 | p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack); | |
1130 | return sectors; | |
1131 | } | |
1132 | ||
1133 | /* Do the exact work to clear bad block range from the bad block table */ | |
1134 | static int _badblocks_clear(struct badblocks *bb, sector_t s, int sectors) | |
1135 | { | |
1136 | struct badblocks_context bad; | |
1137 | int prev = -1, hint = -1; | |
1138 | int len = 0, cleared = 0; | |
1139 | int rv = 0; | |
1140 | u64 *p; | |
1141 | ||
1142 | if (bb->shift < 0) | |
1143 | /* badblocks are disabled */ | |
1144 | return 1; | |
1145 | ||
1146 | if (sectors == 0) | |
1147 | /* Invalid sectors number */ | |
1148 | return 1; | |
1149 | ||
1150 | if (bb->shift) { | |
1151 | sector_t target; | |
1152 | ||
1153 | /* When clearing we round the start up and the end down. | |
1154 | * This should not matter as the shift should align with | |
1155 | * the block size and no rounding should ever be needed. | |
1156 | * However it is better the think a block is bad when it | |
1157 | * isn't than to think a block is not bad when it is. | |
1158 | */ | |
1159 | target = s + sectors; | |
1160 | roundup(s, bb->shift); | |
1161 | rounddown(target, bb->shift); | |
1162 | sectors = target - s; | |
1163 | } | |
1164 | ||
1165 | write_seqlock_irq(&bb->lock); | |
1166 | ||
1167 | bad.ack = true; | |
1168 | p = bb->page; | |
1169 | ||
1170 | re_clear: | |
1171 | bad.start = s; | |
1172 | bad.len = sectors; | |
1173 | ||
1174 | if (badblocks_empty(bb)) { | |
1175 | len = sectors; | |
1176 | cleared++; | |
1177 | goto update_sectors; | |
1178 | } | |
1179 | ||
1180 | ||
1181 | prev = prev_badblocks(bb, &bad, hint); | |
1182 | ||
1183 | /* Start before all badblocks */ | |
1184 | if (prev < 0) { | |
1185 | if (overlap_behind(bb, &bad, 0)) { | |
1186 | len = BB_OFFSET(p[0]) - s; | |
1187 | hint = 0; | |
1188 | } else { | |
1189 | len = sectors; | |
1190 | } | |
1191 | /* | |
1192 | * Both situations are to clear non-bad range, | |
1193 | * should be treated as successful | |
1194 | */ | |
1195 | cleared++; | |
1196 | goto update_sectors; | |
1197 | } | |
1198 | ||
1199 | /* Start after all badblocks */ | |
1200 | if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) { | |
1201 | len = sectors; | |
1202 | cleared++; | |
1203 | goto update_sectors; | |
1204 | } | |
1205 | ||
1206 | /* Clear will split a bad record but the table is full */ | |
1207 | if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) && | |
1208 | (BB_END(p[prev]) > (bad.start + sectors))) { | |
1209 | len = sectors; | |
1210 | goto update_sectors; | |
1211 | } | |
1212 | ||
1213 | if (overlap_front(bb, prev, &bad)) { | |
1214 | if ((BB_OFFSET(p[prev]) < bad.start) && | |
1215 | (BB_END(p[prev]) > (bad.start + bad.len))) { | |
1216 | /* Splitting */ | |
1217 | if ((bb->count + 1) < MAX_BADBLOCKS) { | |
1218 | len = front_splitting_clear(bb, prev, &bad); | |
1219 | bb->count += 1; | |
1220 | cleared++; | |
1221 | } else { | |
1222 | /* No space to split, give up */ | |
1223 | len = sectors; | |
1224 | } | |
1225 | } else { | |
1226 | int deleted = 0; | |
1227 | ||
1228 | len = front_clear(bb, prev, &bad, &deleted); | |
1229 | bb->count -= deleted; | |
1230 | cleared++; | |
1231 | hint = prev; | |
1232 | } | |
1233 | ||
1234 | goto update_sectors; | |
1235 | } | |
1236 | ||
1237 | /* Not front overlap, but behind overlap */ | |
1238 | if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) { | |
1239 | len = BB_OFFSET(p[prev + 1]) - bad.start; | |
1240 | hint = prev + 1; | |
1241 | /* Clear non-bad range should be treated as successful */ | |
1242 | cleared++; | |
1243 | goto update_sectors; | |
1244 | } | |
1245 | ||
1246 | /* Not cover any badblocks range in the table */ | |
1247 | len = sectors; | |
1248 | /* Clear non-bad range should be treated as successful */ | |
1249 | cleared++; | |
1250 | ||
1251 | update_sectors: | |
1252 | s += len; | |
1253 | sectors -= len; | |
1254 | ||
1255 | if (sectors > 0) | |
1256 | goto re_clear; | |
1257 | ||
1258 | WARN_ON(sectors < 0); | |
1259 | ||
1260 | if (cleared) { | |
1261 | badblocks_update_acked(bb); | |
1262 | set_changed(bb); | |
1263 | } | |
1264 | ||
1265 | write_sequnlock_irq(&bb->lock); | |
1266 | ||
1267 | if (!cleared) | |
1268 | rv = 1; | |
1269 | ||
1270 | return rv; | |
1271 | } | |
1272 | ||
3ea3354c CL |
1273 | /* Do the exact work to check bad blocks range from the bad block table */ |
1274 | static int _badblocks_check(struct badblocks *bb, sector_t s, int sectors, | |
1275 | sector_t *first_bad, int *bad_sectors) | |
1276 | { | |
1277 | int unacked_badblocks, acked_badblocks; | |
1278 | int prev = -1, hint = -1, set = 0; | |
1279 | struct badblocks_context bad; | |
1280 | unsigned int seq; | |
1281 | int len, rv; | |
1282 | u64 *p; | |
1283 | ||
1284 | WARN_ON(bb->shift < 0 || sectors == 0); | |
1285 | ||
1286 | if (bb->shift > 0) { | |
1287 | sector_t target; | |
1288 | ||
1289 | /* round the start down, and the end up */ | |
1290 | target = s + sectors; | |
1291 | rounddown(s, bb->shift); | |
1292 | roundup(target, bb->shift); | |
1293 | sectors = target - s; | |
1294 | } | |
1295 | ||
1296 | retry: | |
1297 | seq = read_seqbegin(&bb->lock); | |
1298 | ||
1299 | p = bb->page; | |
1300 | unacked_badblocks = 0; | |
1301 | acked_badblocks = 0; | |
1302 | ||
1303 | re_check: | |
1304 | bad.start = s; | |
1305 | bad.len = sectors; | |
1306 | ||
1307 | if (badblocks_empty(bb)) { | |
1308 | len = sectors; | |
1309 | goto update_sectors; | |
1310 | } | |
1311 | ||
1312 | prev = prev_badblocks(bb, &bad, hint); | |
1313 | ||
1314 | /* start after all badblocks */ | |
146e843f CL |
1315 | if ((prev >= 0) && |
1316 | ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) { | |
3ea3354c CL |
1317 | len = sectors; |
1318 | goto update_sectors; | |
1319 | } | |
1320 | ||
146e843f CL |
1321 | /* Overlapped with front badblocks record */ |
1322 | if ((prev >= 0) && overlap_front(bb, prev, &bad)) { | |
3ea3354c CL |
1323 | if (BB_ACK(p[prev])) |
1324 | acked_badblocks++; | |
1325 | else | |
1326 | unacked_badblocks++; | |
1327 | ||
1328 | if (BB_END(p[prev]) >= (s + sectors)) | |
1329 | len = sectors; | |
1330 | else | |
1331 | len = BB_END(p[prev]) - s; | |
1332 | ||
1333 | if (set == 0) { | |
1334 | *first_bad = BB_OFFSET(p[prev]); | |
1335 | *bad_sectors = BB_LEN(p[prev]); | |
1336 | set = 1; | |
1337 | } | |
1338 | goto update_sectors; | |
1339 | } | |
1340 | ||
1341 | /* Not front overlap, but behind overlap */ | |
1342 | if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) { | |
1343 | len = BB_OFFSET(p[prev + 1]) - bad.start; | |
1344 | hint = prev + 1; | |
1345 | goto update_sectors; | |
1346 | } | |
1347 | ||
1348 | /* not cover any badblocks range in the table */ | |
1349 | len = sectors; | |
1350 | ||
1351 | update_sectors: | |
1352 | s += len; | |
1353 | sectors -= len; | |
1354 | ||
1355 | if (sectors > 0) | |
1356 | goto re_check; | |
1357 | ||
1358 | WARN_ON(sectors < 0); | |
1359 | ||
1360 | if (unacked_badblocks > 0) | |
1361 | rv = -1; | |
1362 | else if (acked_badblocks > 0) | |
1363 | rv = 1; | |
1364 | else | |
1365 | rv = 0; | |
1366 | ||
1367 | if (read_seqretry(&bb->lock, seq)) | |
1368 | goto retry; | |
1369 | ||
1370 | return rv; | |
1371 | } | |
db448eb6 | 1372 | |
9e0e252a VV |
1373 | /** |
1374 | * badblocks_check() - check a given range for bad sectors | |
1375 | * @bb: the badblocks structure that holds all badblock information | |
1376 | * @s: sector (start) at which to check for badblocks | |
1377 | * @sectors: number of sectors to check for badblocks | |
1378 | * @first_bad: pointer to store location of the first badblock | |
1379 | * @bad_sectors: pointer to store number of badblocks after @first_bad | |
1380 | * | |
1381 | * We can record which blocks on each device are 'bad' and so just | |
1382 | * fail those blocks, or that stripe, rather than the whole device. | |
1383 | * Entries in the bad-block table are 64bits wide. This comprises: | |
1384 | * Length of bad-range, in sectors: 0-511 for lengths 1-512 | |
1385 | * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) | |
1386 | * A 'shift' can be set so that larger blocks are tracked and | |
1387 | * consequently larger devices can be covered. | |
1388 | * 'Acknowledged' flag - 1 bit. - the most significant bit. | |
1389 | * | |
1390 | * Locking of the bad-block table uses a seqlock so badblocks_check | |
1391 | * might need to retry if it is very unlucky. | |
1392 | * We will sometimes want to check for bad blocks in a bi_end_io function, | |
1393 | * so we use the write_seqlock_irq variant. | |
1394 | * | |
1395 | * When looking for a bad block we specify a range and want to | |
1396 | * know if any block in the range is bad. So we binary-search | |
1397 | * to the last range that starts at-or-before the given endpoint, | |
1398 | * (or "before the sector after the target range") | |
1399 | * then see if it ends after the given start. | |
1400 | * | |
1401 | * Return: | |
1402 | * 0: there are no known bad blocks in the range | |
1403 | * 1: there are known bad block which are all acknowledged | |
1404 | * -1: there are bad blocks which have not yet been acknowledged in metadata. | |
1405 | * plus the start/length of the first bad section we overlap. | |
1406 | */ | |
1407 | int badblocks_check(struct badblocks *bb, sector_t s, int sectors, | |
1408 | sector_t *first_bad, int *bad_sectors) | |
1409 | { | |
aa511ff8 | 1410 | return _badblocks_check(bb, s, sectors, first_bad, bad_sectors); |
9e0e252a VV |
1411 | } |
1412 | EXPORT_SYMBOL_GPL(badblocks_check); | |
1413 | ||
1414 | /** | |
1415 | * badblocks_set() - Add a range of bad blocks to the table. | |
1416 | * @bb: the badblocks structure that holds all badblock information | |
1417 | * @s: first sector to mark as bad | |
1418 | * @sectors: number of sectors to mark as bad | |
1419 | * @acknowledged: weather to mark the bad sectors as acknowledged | |
1420 | * | |
1421 | * This might extend the table, or might contract it if two adjacent ranges | |
1422 | * can be merged. We binary-search to find the 'insertion' point, then | |
1423 | * decide how best to handle it. | |
1424 | * | |
1425 | * Return: | |
1426 | * 0: success | |
1427 | * 1: failed to set badblocks (out of space) | |
1428 | */ | |
1429 | int badblocks_set(struct badblocks *bb, sector_t s, int sectors, | |
1430 | int acknowledged) | |
1431 | { | |
aa511ff8 | 1432 | return _badblocks_set(bb, s, sectors, acknowledged); |
9e0e252a VV |
1433 | } |
1434 | EXPORT_SYMBOL_GPL(badblocks_set); | |
1435 | ||
1436 | /** | |
1437 | * badblocks_clear() - Remove a range of bad blocks to the table. | |
1438 | * @bb: the badblocks structure that holds all badblock information | |
1439 | * @s: first sector to mark as bad | |
1440 | * @sectors: number of sectors to mark as bad | |
1441 | * | |
1442 | * This may involve extending the table if we spilt a region, | |
1443 | * but it must not fail. So if the table becomes full, we just | |
1444 | * drop the remove request. | |
1445 | * | |
1446 | * Return: | |
1447 | * 0: success | |
1448 | * 1: failed to clear badblocks | |
1449 | */ | |
1450 | int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) | |
1451 | { | |
aa511ff8 | 1452 | return _badblocks_clear(bb, s, sectors); |
9e0e252a VV |
1453 | } |
1454 | EXPORT_SYMBOL_GPL(badblocks_clear); | |
1455 | ||
1456 | /** | |
1457 | * ack_all_badblocks() - Acknowledge all bad blocks in a list. | |
1458 | * @bb: the badblocks structure that holds all badblock information | |
1459 | * | |
1460 | * This only succeeds if ->changed is clear. It is used by | |
1461 | * in-kernel metadata updates | |
1462 | */ | |
1463 | void ack_all_badblocks(struct badblocks *bb) | |
1464 | { | |
1465 | if (bb->page == NULL || bb->changed) | |
1466 | /* no point even trying */ | |
1467 | return; | |
1468 | write_seqlock_irq(&bb->lock); | |
1469 | ||
1470 | if (bb->changed == 0 && bb->unacked_exist) { | |
1471 | u64 *p = bb->page; | |
1472 | int i; | |
1473 | ||
1474 | for (i = 0; i < bb->count ; i++) { | |
1475 | if (!BB_ACK(p[i])) { | |
1476 | sector_t start = BB_OFFSET(p[i]); | |
1477 | int len = BB_LEN(p[i]); | |
1478 | ||
1479 | p[i] = BB_MAKE(start, len, 1); | |
1480 | } | |
1481 | } | |
1482 | bb->unacked_exist = 0; | |
1483 | } | |
1484 | write_sequnlock_irq(&bb->lock); | |
1485 | } | |
1486 | EXPORT_SYMBOL_GPL(ack_all_badblocks); | |
1487 | ||
1488 | /** | |
1489 | * badblocks_show() - sysfs access to bad-blocks list | |
1490 | * @bb: the badblocks structure that holds all badblock information | |
1491 | * @page: buffer received from sysfs | |
1492 | * @unack: weather to show unacknowledged badblocks | |
1493 | * | |
1494 | * Return: | |
1495 | * Length of returned data | |
1496 | */ | |
1497 | ssize_t badblocks_show(struct badblocks *bb, char *page, int unack) | |
1498 | { | |
1499 | size_t len; | |
1500 | int i; | |
1501 | u64 *p = bb->page; | |
1502 | unsigned seq; | |
1503 | ||
1504 | if (bb->shift < 0) | |
1505 | return 0; | |
1506 | ||
1507 | retry: | |
1508 | seq = read_seqbegin(&bb->lock); | |
1509 | ||
1510 | len = 0; | |
1511 | i = 0; | |
1512 | ||
1513 | while (len < PAGE_SIZE && i < bb->count) { | |
1514 | sector_t s = BB_OFFSET(p[i]); | |
1515 | unsigned int length = BB_LEN(p[i]); | |
1516 | int ack = BB_ACK(p[i]); | |
1517 | ||
1518 | i++; | |
1519 | ||
1520 | if (unack && ack) | |
1521 | continue; | |
1522 | ||
1523 | len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", | |
1524 | (unsigned long long)s << bb->shift, | |
1525 | length << bb->shift); | |
1526 | } | |
1527 | if (unack && len == 0) | |
1528 | bb->unacked_exist = 0; | |
1529 | ||
1530 | if (read_seqretry(&bb->lock, seq)) | |
1531 | goto retry; | |
1532 | ||
1533 | return len; | |
1534 | } | |
1535 | EXPORT_SYMBOL_GPL(badblocks_show); | |
1536 | ||
1537 | /** | |
1538 | * badblocks_store() - sysfs access to bad-blocks list | |
1539 | * @bb: the badblocks structure that holds all badblock information | |
1540 | * @page: buffer received from sysfs | |
1541 | * @len: length of data received from sysfs | |
1542 | * @unack: weather to show unacknowledged badblocks | |
1543 | * | |
1544 | * Return: | |
1545 | * Length of the buffer processed or -ve error. | |
1546 | */ | |
1547 | ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len, | |
1548 | int unack) | |
1549 | { | |
1550 | unsigned long long sector; | |
1551 | int length; | |
1552 | char newline; | |
1553 | ||
1554 | switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { | |
1555 | case 3: | |
1556 | if (newline != '\n') | |
1557 | return -EINVAL; | |
df561f66 | 1558 | fallthrough; |
9e0e252a VV |
1559 | case 2: |
1560 | if (length <= 0) | |
1561 | return -EINVAL; | |
1562 | break; | |
1563 | default: | |
1564 | return -EINVAL; | |
1565 | } | |
1566 | ||
1567 | if (badblocks_set(bb, sector, length, !unack)) | |
1568 | return -ENOSPC; | |
1569 | else | |
1570 | return len; | |
1571 | } | |
1572 | EXPORT_SYMBOL_GPL(badblocks_store); | |
1573 | ||
16263ff6 DW |
1574 | static int __badblocks_init(struct device *dev, struct badblocks *bb, |
1575 | int enable) | |
9e0e252a | 1576 | { |
16263ff6 | 1577 | bb->dev = dev; |
9e0e252a VV |
1578 | bb->count = 0; |
1579 | if (enable) | |
1580 | bb->shift = 0; | |
1581 | else | |
1582 | bb->shift = -1; | |
16263ff6 DW |
1583 | if (dev) |
1584 | bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL); | |
1585 | else | |
1586 | bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL); | |
1587 | if (!bb->page) { | |
9e0e252a VV |
1588 | bb->shift = -1; |
1589 | return -ENOMEM; | |
1590 | } | |
1591 | seqlock_init(&bb->lock); | |
1592 | ||
1593 | return 0; | |
1594 | } | |
16263ff6 DW |
1595 | |
1596 | /** | |
1597 | * badblocks_init() - initialize the badblocks structure | |
1598 | * @bb: the badblocks structure that holds all badblock information | |
1599 | * @enable: weather to enable badblocks accounting | |
1600 | * | |
1601 | * Return: | |
1602 | * 0: success | |
1603 | * -ve errno: on error | |
1604 | */ | |
1605 | int badblocks_init(struct badblocks *bb, int enable) | |
1606 | { | |
1607 | return __badblocks_init(NULL, bb, enable); | |
1608 | } | |
9e0e252a VV |
1609 | EXPORT_SYMBOL_GPL(badblocks_init); |
1610 | ||
16263ff6 DW |
1611 | int devm_init_badblocks(struct device *dev, struct badblocks *bb) |
1612 | { | |
1613 | if (!bb) | |
1614 | return -EINVAL; | |
1615 | return __badblocks_init(dev, bb, 1); | |
1616 | } | |
1617 | EXPORT_SYMBOL_GPL(devm_init_badblocks); | |
1618 | ||
9e0e252a | 1619 | /** |
d3b407fb | 1620 | * badblocks_exit() - free the badblocks structure |
9e0e252a VV |
1621 | * @bb: the badblocks structure that holds all badblock information |
1622 | */ | |
d3b407fb | 1623 | void badblocks_exit(struct badblocks *bb) |
9e0e252a | 1624 | { |
20a308f0 DW |
1625 | if (!bb) |
1626 | return; | |
16263ff6 DW |
1627 | if (bb->dev) |
1628 | devm_kfree(bb->dev, bb->page); | |
1629 | else | |
1630 | kfree(bb->page); | |
9e0e252a VV |
1631 | bb->page = NULL; |
1632 | } | |
d3b407fb | 1633 | EXPORT_SYMBOL_GPL(badblocks_exit); |