Commit | Line | Data |
---|---|---|
de1ba09b AM |
1 | Fault injection capabilities infrastructure |
2 | =========================================== | |
3 | ||
1892ce4c | 4 | See also drivers/md/md-faulty.c and "every_nth" module option for scsi_debug. |
de1ba09b AM |
5 | |
6 | ||
7 | Available fault injection capabilities | |
8 | -------------------------------------- | |
9 | ||
10 | o failslab | |
11 | ||
12 | injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...) | |
13 | ||
14 | o fail_page_alloc | |
15 | ||
16 | injects page allocation failures. (alloc_pages(), get_free_pages(), ...) | |
17 | ||
ab51fbab DB |
18 | o fail_futex |
19 | ||
20 | injects futex deadlock and uaddr fault errors. | |
21 | ||
de1ba09b AM |
22 | o fail_make_request |
23 | ||
5d0ffa2b | 24 | injects disk IO errors on devices permitted by setting |
de1ba09b AM |
25 | /sys/block/<device>/make-it-fail or |
26 | /sys/block/<device>/<partition>/make-it-fail. (generic_make_request()) | |
27 | ||
1e4cb22b PF |
28 | o fail_mmc_request |
29 | ||
30 | injects MMC data errors on devices permitted by setting | |
31 | debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request | |
32 | ||
4b1a29a7 MH |
33 | o fail_function |
34 | ||
35 | injects error return on specific functions, which are marked by | |
36 | ALLOW_ERROR_INJECTION() macro, by setting debugfs entries | |
37 | under /sys/kernel/debug/fail_function. No boot option supported. | |
38 | ||
cf4182f3 TT |
39 | o NVMe fault injection |
40 | ||
41 | inject NVMe status code and retry flag on devices permitted by setting | |
42 | debugfs entries under /sys/kernel/debug/nvme*/fault_inject. The default | |
43 | status code is NVME_SC_INVALID_OPCODE with no retry. The status code and | |
44 | retry flag can be set via the debugfs. | |
45 | ||
46 | ||
de1ba09b AM |
47 | Configure fault-injection capabilities behavior |
48 | ----------------------------------------------- | |
49 | ||
50 | o debugfs entries | |
51 | ||
52 | fault-inject-debugfs kernel module provides some debugfs entries for runtime | |
53 | configuration of fault-injection capabilities. | |
54 | ||
156f5a78 | 55 | - /sys/kernel/debug/fail*/probability: |
de1ba09b AM |
56 | |
57 | likelihood of failure injection, in percent. | |
58 | Format: <percent> | |
59 | ||
5d0ffa2b DM |
60 | Note that one-failure-per-hundred is a very high error rate |
61 | for some testcases. Consider setting probability=100 and configure | |
156f5a78 | 62 | /sys/kernel/debug/fail*/interval for such testcases. |
de1ba09b | 63 | |
156f5a78 | 64 | - /sys/kernel/debug/fail*/interval: |
de1ba09b AM |
65 | |
66 | specifies the interval between failures, for calls to | |
67 | should_fail() that pass all the other tests. | |
68 | ||
69 | Note that if you enable this, by setting interval>1, you will | |
70 | probably want to set probability=100. | |
71 | ||
156f5a78 | 72 | - /sys/kernel/debug/fail*/times: |
de1ba09b AM |
73 | |
74 | specifies how many times failures may happen at most. | |
75 | A value of -1 means "no limit". | |
76 | ||
156f5a78 | 77 | - /sys/kernel/debug/fail*/space: |
de1ba09b AM |
78 | |
79 | specifies an initial resource "budget", decremented by "size" | |
80 | on each call to should_fail(,size). Failure injection is | |
81 | suppressed until "space" reaches zero. | |
82 | ||
156f5a78 | 83 | - /sys/kernel/debug/fail*/verbose |
de1ba09b AM |
84 | |
85 | Format: { 0 | 1 | 2 } | |
5d0ffa2b DM |
86 | specifies the verbosity of the messages when failure is |
87 | injected. '0' means no messages; '1' will print only a single | |
88 | log line per failure; '2' will print a call trace too -- useful | |
89 | to debug the problems revealed by fault injection. | |
de1ba09b | 90 | |
156f5a78 | 91 | - /sys/kernel/debug/fail*/task-filter: |
de1ba09b | 92 | |
5d0ffa2b DM |
93 | Format: { 'Y' | 'N' } |
94 | A value of 'N' disables filtering by process (default). | |
de1ba09b AM |
95 | Any positive value limits failures to only processes indicated by |
96 | /proc/<pid>/make-it-fail==1. | |
97 | ||
156f5a78 GL |
98 | - /sys/kernel/debug/fail*/require-start: |
99 | - /sys/kernel/debug/fail*/require-end: | |
100 | - /sys/kernel/debug/fail*/reject-start: | |
101 | - /sys/kernel/debug/fail*/reject-end: | |
de1ba09b AM |
102 | |
103 | specifies the range of virtual addresses tested during | |
104 | stacktrace walking. Failure is injected only if some caller | |
329409ae AM |
105 | in the walked stacktrace lies within the required range, and |
106 | none lies within the rejected range. | |
107 | Default required range is [0,ULONG_MAX) (whole of virtual address space). | |
108 | Default rejected range is [0,0). | |
de1ba09b | 109 | |
156f5a78 | 110 | - /sys/kernel/debug/fail*/stacktrace-depth: |
de1ba09b AM |
111 | |
112 | specifies the maximum stacktrace depth walked during search | |
5d0ffa2b DM |
113 | for a caller within [require-start,require-end) OR |
114 | [reject-start,reject-end). | |
de1ba09b | 115 | |
156f5a78 | 116 | - /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem: |
de1ba09b | 117 | |
5d0ffa2b DM |
118 | Format: { 'Y' | 'N' } |
119 | default is 'N', setting it to 'Y' won't inject failures into | |
de1ba09b AM |
120 | highmem/user allocations. |
121 | ||
156f5a78 GL |
122 | - /sys/kernel/debug/failslab/ignore-gfp-wait: |
123 | - /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait: | |
de1ba09b | 124 | |
5d0ffa2b DM |
125 | Format: { 'Y' | 'N' } |
126 | default is 'N', setting it to 'Y' will inject failures | |
de1ba09b AM |
127 | only into non-sleep allocations (GFP_ATOMIC allocations). |
128 | ||
156f5a78 | 129 | - /sys/kernel/debug/fail_page_alloc/min-order: |
54114994 AM |
130 | |
131 | specifies the minimum page allocation order to be injected | |
132 | failures. | |
133 | ||
ab51fbab DB |
134 | - /sys/kernel/debug/fail_futex/ignore-private: |
135 | ||
136 | Format: { 'Y' | 'N' } | |
137 | default is 'N', setting it to 'Y' will disable failure injections | |
138 | when dealing with private (address space) futexes. | |
139 | ||
4b1a29a7 MH |
140 | - /sys/kernel/debug/fail_function/inject: |
141 | ||
142 | Format: { 'function-name' | '!function-name' | '' } | |
143 | specifies the target function of error injection by name. | |
144 | If the function name leads '!' prefix, given function is | |
145 | removed from injection list. If nothing specified ('') | |
146 | injection list is cleared. | |
147 | ||
148 | - /sys/kernel/debug/fail_function/injectable: | |
149 | ||
150 | (read only) shows error injectable functions and what type of | |
151 | error values can be specified. The error type will be one of | |
152 | below; | |
153 | - NULL: retval must be 0. | |
154 | - ERRNO: retval must be -1 to -MAX_ERRNO (-4096). | |
155 | - ERR_NULL: retval must be 0 or -1 to -MAX_ERRNO (-4096). | |
156 | ||
157 | - /sys/kernel/debug/fail_function/<functiuon-name>/retval: | |
158 | ||
159 | specifies the "error" return value to inject to the given | |
160 | function for given function. This will be created when | |
161 | user specifies new injection entry. | |
162 | ||
de1ba09b AM |
163 | o Boot option |
164 | ||
165 | In order to inject faults while debugfs is not available (early boot time), | |
166 | use the boot option: | |
167 | ||
168 | failslab= | |
169 | fail_page_alloc= | |
1e4cb22b | 170 | fail_make_request= |
ab51fbab | 171 | fail_futex= |
199e3f4b | 172 | mmc_core.fail_request=<interval>,<probability>,<space>,<times> |
de1ba09b | 173 | |
e41d5818 DV |
174 | o proc entries |
175 | ||
168c42bc AM |
176 | - /proc/<pid>/fail-nth: |
177 | - /proc/self/task/<tid>/fail-nth: | |
e41d5818 | 178 | |
9049f2f6 | 179 | Write to this file of integer N makes N-th call in the task fail. |
bfc74093 AM |
180 | Read from this file returns a integer value. A value of '0' indicates |
181 | that the fault setup with a previous write to this file was injected. | |
182 | A positive integer N indicates that the fault wasn't yet injected. | |
e41d5818 DV |
183 | Note that this file enables all types of faults (slab, futex, etc). |
184 | This setting takes precedence over all other generic debugfs settings | |
185 | like probability, interval, times, etc. But per-capability settings | |
186 | (e.g. fail_futex/ignore-private) take precedence over it. | |
187 | ||
188 | This feature is intended for systematic testing of faults in a single | |
189 | system call. See an example below. | |
190 | ||
de1ba09b AM |
191 | How to add new fault injection capability |
192 | ----------------------------------------- | |
193 | ||
194 | o #include <linux/fault-inject.h> | |
195 | ||
196 | o define the fault attributes | |
197 | ||
198 | DECLARE_FAULT_INJECTION(name); | |
199 | ||
200 | Please see the definition of struct fault_attr in fault-inject.h | |
201 | for details. | |
202 | ||
5d0ffa2b | 203 | o provide a way to configure fault attributes |
de1ba09b AM |
204 | |
205 | - boot option | |
206 | ||
207 | If you need to enable the fault injection capability from boot time, you can | |
5d0ffa2b | 208 | provide boot option to configure it. There is a helper function for it: |
de1ba09b | 209 | |
5d0ffa2b | 210 | setup_fault_attr(attr, str); |
de1ba09b AM |
211 | |
212 | - debugfs entries | |
213 | ||
214 | failslab, fail_page_alloc, and fail_make_request use this way. | |
5d0ffa2b | 215 | Helper functions: |
de1ba09b | 216 | |
dd48c085 | 217 | fault_create_debugfs_attr(name, parent, attr); |
de1ba09b AM |
218 | |
219 | - module parameters | |
220 | ||
221 | If the scope of the fault injection capability is limited to a | |
222 | single kernel module, it is better to provide module parameters to | |
223 | configure the fault attributes. | |
224 | ||
225 | o add a hook to insert failures | |
226 | ||
5d0ffa2b | 227 | Upon should_fail() returning true, client code should inject a failure. |
de1ba09b | 228 | |
5d0ffa2b | 229 | should_fail(attr, size); |
de1ba09b AM |
230 | |
231 | Application Examples | |
232 | -------------------- | |
233 | ||
18584870 | 234 | o Inject slab allocation failures into module init/exit code |
de1ba09b | 235 | |
de1ba09b AM |
236 | #!/bin/bash |
237 | ||
18584870 | 238 | FAILTYPE=failslab |
156f5a78 GL |
239 | echo Y > /sys/kernel/debug/$FAILTYPE/task-filter |
240 | echo 10 > /sys/kernel/debug/$FAILTYPE/probability | |
241 | echo 100 > /sys/kernel/debug/$FAILTYPE/interval | |
242 | echo -1 > /sys/kernel/debug/$FAILTYPE/times | |
243 | echo 0 > /sys/kernel/debug/$FAILTYPE/space | |
244 | echo 2 > /sys/kernel/debug/$FAILTYPE/verbose | |
245 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait | |
de1ba09b | 246 | |
18584870 | 247 | faulty_system() |
de1ba09b | 248 | { |
18584870 | 249 | bash -c "echo 1 > /proc/self/make-it-fail && exec $*" |
de1ba09b AM |
250 | } |
251 | ||
18584870 AM |
252 | if [ $# -eq 0 ] |
253 | then | |
254 | echo "Usage: $0 modulename [ modulename ... ]" | |
255 | exit 1 | |
256 | fi | |
257 | ||
258 | for m in $* | |
259 | do | |
260 | echo inserting $m... | |
261 | faulty_system modprobe $m | |
de1ba09b | 262 | |
18584870 AM |
263 | echo removing $m... |
264 | faulty_system modprobe -r $m | |
265 | done | |
de1ba09b AM |
266 | |
267 | ------------------------------------------------------------------------------ | |
268 | ||
18584870 | 269 | o Inject page allocation failures only for a specific module |
de1ba09b | 270 | |
de1ba09b AM |
271 | #!/bin/bash |
272 | ||
18584870 AM |
273 | FAILTYPE=fail_page_alloc |
274 | module=$1 | |
de1ba09b | 275 | |
18584870 AM |
276 | if [ -z $module ] |
277 | then | |
278 | echo "Usage: $0 <modulename>" | |
279 | exit 1 | |
280 | fi | |
de1ba09b | 281 | |
18584870 | 282 | modprobe $module |
de1ba09b | 283 | |
18584870 AM |
284 | if [ ! -d /sys/module/$module/sections ] |
285 | then | |
286 | echo Module $module is not loaded | |
287 | exit 1 | |
288 | fi | |
289 | ||
156f5a78 GL |
290 | cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start |
291 | cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end | |
18584870 | 292 | |
156f5a78 GL |
293 | echo N > /sys/kernel/debug/$FAILTYPE/task-filter |
294 | echo 10 > /sys/kernel/debug/$FAILTYPE/probability | |
295 | echo 100 > /sys/kernel/debug/$FAILTYPE/interval | |
296 | echo -1 > /sys/kernel/debug/$FAILTYPE/times | |
297 | echo 0 > /sys/kernel/debug/$FAILTYPE/space | |
298 | echo 2 > /sys/kernel/debug/$FAILTYPE/verbose | |
299 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait | |
300 | echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem | |
301 | echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth | |
18584870 | 302 | |
156f5a78 | 303 | trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT |
18584870 AM |
304 | |
305 | echo "Injecting errors into the module $module... (interrupt to stop)" | |
306 | sleep 1000000 | |
de1ba09b | 307 | |
4b1a29a7 MH |
308 | ------------------------------------------------------------------------------ |
309 | ||
310 | o Inject open_ctree error while btrfs mount | |
311 | ||
312 | #!/bin/bash | |
313 | ||
314 | rm -f testfile.img | |
315 | dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 | |
316 | DEVICE=$(losetup --show -f testfile.img) | |
317 | mkfs.btrfs -f $DEVICE | |
318 | mkdir -p tmpmnt | |
319 | ||
320 | FAILTYPE=fail_function | |
321 | FAILFUNC=open_ctree | |
322 | echo $FAILFUNC > /sys/kernel/debug/$FAILTYPE/inject | |
323 | echo -12 > /sys/kernel/debug/$FAILTYPE/$FAILFUNC/retval | |
324 | echo N > /sys/kernel/debug/$FAILTYPE/task-filter | |
325 | echo 100 > /sys/kernel/debug/$FAILTYPE/probability | |
326 | echo 0 > /sys/kernel/debug/$FAILTYPE/interval | |
327 | echo -1 > /sys/kernel/debug/$FAILTYPE/times | |
328 | echo 0 > /sys/kernel/debug/$FAILTYPE/space | |
329 | echo 1 > /sys/kernel/debug/$FAILTYPE/verbose | |
330 | ||
331 | mount -t btrfs $DEVICE tmpmnt | |
332 | if [ $? -ne 0 ] | |
333 | then | |
334 | echo "SUCCESS!" | |
335 | else | |
336 | echo "FAILED!" | |
337 | umount tmpmnt | |
338 | fi | |
339 | ||
340 | echo > /sys/kernel/debug/$FAILTYPE/inject | |
341 | ||
342 | rmdir tmpmnt | |
343 | losetup -d $DEVICE | |
344 | rm testfile.img | |
345 | ||
346 | ||
c24aa64d AM |
347 | Tool to run command with failslab or fail_page_alloc |
348 | ---------------------------------------------------- | |
349 | In order to make it easier to accomplish the tasks mentioned above, we can use | |
350 | tools/testing/fault-injection/failcmd.sh. Please run a command | |
351 | "./tools/testing/fault-injection/failcmd.sh --help" for more information and | |
352 | see the following examples. | |
353 | ||
354 | Examples: | |
355 | ||
356 | Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab | |
357 | allocation failure. | |
358 | ||
359 | # ./tools/testing/fault-injection/failcmd.sh \ | |
360 | -- make -C tools/testing/selftests/ run_tests | |
361 | ||
362 | Same as above except to specify 100 times failures at most instead of one time | |
363 | at most by default. | |
364 | ||
365 | # ./tools/testing/fault-injection/failcmd.sh --times=100 \ | |
366 | -- make -C tools/testing/selftests/ run_tests | |
367 | ||
368 | Same as above except to inject page allocation failure instead of slab | |
369 | allocation failure. | |
370 | ||
371 | # env FAILCMD_TYPE=fail_page_alloc \ | |
372 | ./tools/testing/fault-injection/failcmd.sh --times=100 \ | |
373 | -- make -C tools/testing/selftests/ run_tests | |
e41d5818 DV |
374 | |
375 | Systematic faults using fail-nth | |
376 | --------------------------------- | |
377 | ||
378 | The following code systematically faults 0-th, 1-st, 2-nd and so on | |
379 | capabilities in the socketpair() system call. | |
380 | ||
381 | #include <sys/types.h> | |
382 | #include <sys/stat.h> | |
383 | #include <sys/socket.h> | |
384 | #include <sys/syscall.h> | |
385 | #include <fcntl.h> | |
386 | #include <unistd.h> | |
387 | #include <string.h> | |
388 | #include <stdlib.h> | |
389 | #include <stdio.h> | |
390 | #include <errno.h> | |
391 | ||
392 | int main() | |
393 | { | |
394 | int i, err, res, fail_nth, fds[2]; | |
395 | char buf[128]; | |
396 | ||
397 | system("echo N > /sys/kernel/debug/failslab/ignore-gfp-wait"); | |
398 | sprintf(buf, "/proc/self/task/%ld/fail-nth", syscall(SYS_gettid)); | |
399 | fail_nth = open(buf, O_RDWR); | |
9049f2f6 | 400 | for (i = 1;; i++) { |
e41d5818 DV |
401 | sprintf(buf, "%d", i); |
402 | write(fail_nth, buf, strlen(buf)); | |
403 | res = socketpair(AF_LOCAL, SOCK_STREAM, 0, fds); | |
404 | err = errno; | |
bfc74093 | 405 | pread(fail_nth, buf, sizeof(buf), 0); |
e41d5818 DV |
406 | if (res == 0) { |
407 | close(fds[0]); | |
408 | close(fds[1]); | |
409 | } | |
bfc74093 AM |
410 | printf("%d-th fault %c: res=%d/%d\n", i, atoi(buf) ? 'N' : 'Y', |
411 | res, err); | |
412 | if (atoi(buf)) | |
e41d5818 DV |
413 | break; |
414 | } | |
415 | return 0; | |
416 | } | |
417 | ||
418 | An example output: | |
419 | ||
e41d5818 DV |
420 | 1-th fault Y: res=-1/23 |
421 | 2-th fault Y: res=-1/23 | |
422 | 3-th fault Y: res=-1/12 | |
423 | 4-th fault Y: res=-1/12 | |
424 | 5-th fault Y: res=-1/23 | |
425 | 6-th fault Y: res=-1/23 | |
426 | 7-th fault Y: res=-1/23 | |
427 | 8-th fault Y: res=-1/12 | |
428 | 9-th fault Y: res=-1/12 | |
429 | 10-th fault Y: res=-1/12 | |
430 | 11-th fault Y: res=-1/12 | |
431 | 12-th fault Y: res=-1/12 | |
432 | 13-th fault Y: res=-1/12 | |
433 | 14-th fault Y: res=-1/12 | |
434 | 15-th fault Y: res=-1/12 | |
435 | 16-th fault N: res=0/12 |