sched: Introduce hierarchal order on shares update list
[linux-2.6-block.git] / kernel / sysctl.c
CommitLineData
1da177e4
LT
1/*
2 * sysctl.c: General linux system control interface
3 *
4 * Begun 24 March 1995, Stephen Tweedie
5 * Added /proc support, Dec 1995
6 * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7 * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8 * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9 * Dynamic registration fixes, Stephen Tweedie.
10 * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11 * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12 * Horn.
13 * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14 * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15 * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16 * Wendling.
17 * The list_for_each() macro wasn't appropriate for the sysctl loop.
18 * Removed it and replaced it with older style, 03/23/00, Bill Wendling
19 */
20
1da177e4
LT
21#include <linux/module.h>
22#include <linux/mm.h>
23#include <linux/swap.h>
24#include <linux/slab.h>
25#include <linux/sysctl.h>
d33ed52d 26#include <linux/signal.h>
1da177e4 27#include <linux/proc_fs.h>
72c2d582 28#include <linux/security.h>
1da177e4 29#include <linux/ctype.h>
dfec072e 30#include <linux/kmemcheck.h>
62239ac2 31#include <linux/fs.h>
1da177e4
LT
32#include <linux/init.h>
33#include <linux/kernel.h>
0296b228 34#include <linux/kobject.h>
20380731 35#include <linux/net.h>
1da177e4
LT
36#include <linux/sysrq.h>
37#include <linux/highuid.h>
38#include <linux/writeback.h>
3fff4c42 39#include <linux/ratelimit.h>
76ab0f53 40#include <linux/compaction.h>
1da177e4 41#include <linux/hugetlb.h>
1da177e4 42#include <linux/initrd.h>
0b77f5bf 43#include <linux/key.h>
1da177e4
LT
44#include <linux/times.h>
45#include <linux/limits.h>
46#include <linux/dcache.h>
6e006701 47#include <linux/dnotify.h>
1da177e4 48#include <linux/syscalls.h>
c748e134 49#include <linux/vmstat.h>
c255d844
PM
50#include <linux/nfs_fs.h>
51#include <linux/acpi.h>
10a0a8d4 52#include <linux/reboot.h>
b0fc494f 53#include <linux/ftrace.h>
cdd6c482 54#include <linux/perf_event.h>
b2be84df 55#include <linux/kprobes.h>
b492e95b 56#include <linux/pipe_fs_i.h>
8e4228e1 57#include <linux/oom.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60#include <asm/processor.h>
61
29cbc78b
AK
62#ifdef CONFIG_X86
63#include <asm/nmi.h>
0741f4d2 64#include <asm/stacktrace.h>
6e7c4025 65#include <asm/io.h>
29cbc78b 66#endif
c55b7c3e
DY
67#ifdef CONFIG_BSD_PROCESS_ACCT
68#include <linux/acct.h>
69#endif
4f0e056f
DY
70#ifdef CONFIG_RT_MUTEXES
71#include <linux/rtmutex.h>
72#endif
2edf5e49
DY
73#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
74#include <linux/lockdep.h>
75#endif
15485a46
DY
76#ifdef CONFIG_CHR_DEV_SG
77#include <scsi/sg.h>
78#endif
29cbc78b 79
58687acb 80#ifdef CONFIG_LOCKUP_DETECTOR
504d7cf1
DZ
81#include <linux/nmi.h>
82#endif
83
7058cb02 84
1da177e4
LT
85#if defined(CONFIG_SYSCTL)
86
87/* External variables not in a header file. */
1da177e4
LT
88extern int sysctl_overcommit_memory;
89extern int sysctl_overcommit_ratio;
90extern int max_threads;
1da177e4 91extern int core_uses_pid;
d6e71144 92extern int suid_dumpable;
1da177e4 93extern char core_pattern[];
a293980c 94extern unsigned int core_pipe_limit;
1da177e4
LT
95extern int pid_max;
96extern int min_free_kbytes;
1da177e4 97extern int pid_max_min, pid_max_max;
9d0243bc 98extern int sysctl_drop_caches;
8ad4b1fb 99extern int percpu_pagelist_fraction;
bebfa101 100extern int compat_log;
9745512c 101extern int latencytop_enabled;
eceea0b3 102extern int sysctl_nr_open_min, sysctl_nr_open_max;
dd8632a1
PM
103#ifndef CONFIG_MMU
104extern int sysctl_nr_trim_pages;
105#endif
cb684b5b 106#ifdef CONFIG_BLOCK
5e605b64 107extern int blk_iopoll_enabled;
cb684b5b 108#endif
1da177e4 109
c4f3b63f 110/* Constants used for minimum and maximum */
2508ce18 111#ifdef CONFIG_LOCKUP_DETECTOR
c4f3b63f 112static int sixty = 60;
9383d967 113static int neg_one = -1;
c4f3b63f
RT
114#endif
115
c4f3b63f 116static int zero;
cd5f9a4c
LT
117static int __maybe_unused one = 1;
118static int __maybe_unused two = 2;
fc3501d4 119static unsigned long one_ul = 1;
c4f3b63f 120static int one_hundred = 100;
af91322e
DY
121#ifdef CONFIG_PRINTK
122static int ten_thousand = 10000;
123#endif
c4f3b63f 124
9e4a5bda
AR
125/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
126static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
127
1da177e4
LT
128/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
129static int maxolduid = 65535;
130static int minolduid;
8ad4b1fb 131static int min_percpu_pagelist_fract = 8;
1da177e4
LT
132
133static int ngroups_max = NGROUPS_MAX;
134
d14f1729
DY
135#ifdef CONFIG_INOTIFY_USER
136#include <linux/inotify.h>
137#endif
72c57ed5 138#ifdef CONFIG_SPARC
17f04fbb 139#include <asm/system.h>
1da177e4
LT
140#endif
141
0871420f
DM
142#ifdef CONFIG_SPARC64
143extern int sysctl_tsb_ratio;
144#endif
145
1da177e4
LT
146#ifdef __hppa__
147extern int pwrsw_enabled;
148extern int unaligned_enabled;
149#endif
150
347a8dc3 151#ifdef CONFIG_S390
1da177e4
LT
152#ifdef CONFIG_MATHEMU
153extern int sysctl_ieee_emulation_warnings;
154#endif
155extern int sysctl_userprocess_debug;
951f22d5 156extern int spin_retry;
1da177e4
LT
157#endif
158
d2b176ed
JS
159#ifdef CONFIG_IA64
160extern int no_unaligned_warning;
88fc241f 161extern int unaligned_dump_stack;
d2b176ed
JS
162#endif
163
d6f8ff73 164#ifdef CONFIG_PROC_SYSCTL
8d65af78 165static int proc_do_cad_pid(struct ctl_table *table, int write,
9ec52099 166 void __user *buffer, size_t *lenp, loff_t *ppos);
8d65af78 167static int proc_taint(struct ctl_table *table, int write,
34f5a398 168 void __user *buffer, size_t *lenp, loff_t *ppos);
d6f8ff73 169#endif
9ec52099 170
97f5f0cd
DT
171#ifdef CONFIG_MAGIC_SYSRQ
172static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
173
174static int sysrq_sysctl_handler(ctl_table *table, int write,
175 void __user *buffer, size_t *lenp,
176 loff_t *ppos)
177{
178 int error;
179
180 error = proc_dointvec(table, write, buffer, lenp, ppos);
181 if (error)
182 return error;
183
184 if (write)
185 sysrq_toggle_support(__sysrq_enabled);
186
187 return 0;
188}
189
190#endif
191
d8217f07 192static struct ctl_table root_table[];
e51b6ba0
EB
193static struct ctl_table_root sysctl_table_root;
194static struct ctl_table_header root_table_header = {
b380b0d4 195 .count = 1,
e51b6ba0 196 .ctl_table = root_table,
73455092 197 .ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
e51b6ba0 198 .root = &sysctl_table_root,
73455092 199 .set = &sysctl_table_root.default_set,
e51b6ba0
EB
200};
201static struct ctl_table_root sysctl_table_root = {
202 .root_list = LIST_HEAD_INIT(sysctl_table_root.root_list),
73455092 203 .default_set.list = LIST_HEAD_INIT(root_table_header.ctl_entry),
e51b6ba0 204};
1da177e4 205
d8217f07
EB
206static struct ctl_table kern_table[];
207static struct ctl_table vm_table[];
208static struct ctl_table fs_table[];
209static struct ctl_table debug_table[];
210static struct ctl_table dev_table[];
211extern struct ctl_table random_table[];
7ef9964e
DL
212#ifdef CONFIG_EPOLL
213extern struct ctl_table epoll_table[];
214#endif
1da177e4
LT
215
216#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
217int sysctl_legacy_va_layout;
218#endif
219
1da177e4
LT
220/* The default sysctl tables: */
221
d8217f07 222static struct ctl_table root_table[] = {
1da177e4 223 {
1da177e4
LT
224 .procname = "kernel",
225 .mode = 0555,
226 .child = kern_table,
227 },
228 {
1da177e4
LT
229 .procname = "vm",
230 .mode = 0555,
231 .child = vm_table,
232 },
1da177e4 233 {
1da177e4
LT
234 .procname = "fs",
235 .mode = 0555,
236 .child = fs_table,
237 },
238 {
1da177e4
LT
239 .procname = "debug",
240 .mode = 0555,
241 .child = debug_table,
242 },
243 {
1da177e4
LT
244 .procname = "dev",
245 .mode = 0555,
246 .child = dev_table,
247 },
2be7fe07
AM
248/*
249 * NOTE: do not add new entries to this table unless you have read
250 * Documentation/sysctl/ctl_unnumbered.txt
251 */
6fce56ec 252 { }
1da177e4
LT
253};
254
77e54a1f 255#ifdef CONFIG_SCHED_DEBUG
73c4efd2
ED
256static int min_sched_granularity_ns = 100000; /* 100 usecs */
257static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
258static int min_wakeup_granularity_ns; /* 0 usecs */
259static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
1983a922
CE
260static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
261static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
77e54a1f
IM
262#endif
263
5e771905
MG
264#ifdef CONFIG_COMPACTION
265static int min_extfrag_threshold;
266static int max_extfrag_threshold = 1000;
267#endif
268
d8217f07 269static struct ctl_table kern_table[] = {
2bba22c5 270 {
2bba22c5
MG
271 .procname = "sched_child_runs_first",
272 .data = &sysctl_sched_child_runs_first,
273 .maxlen = sizeof(unsigned int),
274 .mode = 0644,
6d456111 275 .proc_handler = proc_dointvec,
2bba22c5 276 },
77e54a1f
IM
277#ifdef CONFIG_SCHED_DEBUG
278 {
b2be5e96
PZ
279 .procname = "sched_min_granularity_ns",
280 .data = &sysctl_sched_min_granularity,
77e54a1f
IM
281 .maxlen = sizeof(unsigned int),
282 .mode = 0644,
702a7c76 283 .proc_handler = sched_proc_update_handler,
b2be5e96
PZ
284 .extra1 = &min_sched_granularity_ns,
285 .extra2 = &max_sched_granularity_ns,
77e54a1f 286 },
21805085 287 {
21805085
PZ
288 .procname = "sched_latency_ns",
289 .data = &sysctl_sched_latency,
290 .maxlen = sizeof(unsigned int),
291 .mode = 0644,
702a7c76 292 .proc_handler = sched_proc_update_handler,
21805085
PZ
293 .extra1 = &min_sched_granularity_ns,
294 .extra2 = &max_sched_granularity_ns,
295 },
77e54a1f 296 {
77e54a1f
IM
297 .procname = "sched_wakeup_granularity_ns",
298 .data = &sysctl_sched_wakeup_granularity,
299 .maxlen = sizeof(unsigned int),
300 .mode = 0644,
702a7c76 301 .proc_handler = sched_proc_update_handler,
77e54a1f
IM
302 .extra1 = &min_wakeup_granularity_ns,
303 .extra2 = &max_wakeup_granularity_ns,
304 },
1983a922 305 {
1983a922
CE
306 .procname = "sched_tunable_scaling",
307 .data = &sysctl_sched_tunable_scaling,
308 .maxlen = sizeof(enum sched_tunable_scaling),
309 .mode = 0644,
702a7c76 310 .proc_handler = sched_proc_update_handler,
1983a922
CE
311 .extra1 = &min_sched_tunable_scaling,
312 .extra2 = &max_sched_tunable_scaling,
2398f2c6 313 },
da84d961 314 {
da84d961
IM
315 .procname = "sched_migration_cost",
316 .data = &sysctl_sched_migration_cost,
317 .maxlen = sizeof(unsigned int),
318 .mode = 0644,
6d456111 319 .proc_handler = proc_dointvec,
da84d961 320 },
b82d9fdd 321 {
b82d9fdd
PZ
322 .procname = "sched_nr_migrate",
323 .data = &sysctl_sched_nr_migrate,
324 .maxlen = sizeof(unsigned int),
fa85ae24 325 .mode = 0644,
6d456111 326 .proc_handler = proc_dointvec,
fa85ae24 327 },
e9e9250b 328 {
e9e9250b
PZ
329 .procname = "sched_time_avg",
330 .data = &sysctl_sched_time_avg,
331 .maxlen = sizeof(unsigned int),
332 .mode = 0644,
6d456111 333 .proc_handler = proc_dointvec,
e9e9250b 334 },
cd1bb94b 335 {
cd1bb94b
AB
336 .procname = "timer_migration",
337 .data = &sysctl_timer_migration,
338 .maxlen = sizeof(unsigned int),
339 .mode = 0644,
6d456111 340 .proc_handler = proc_dointvec_minmax,
bfdb4d9f
AB
341 .extra1 = &zero,
342 .extra2 = &one,
fa85ae24 343 },
1fc84aaa 344#endif
9f0c1e56 345 {
9f0c1e56
PZ
346 .procname = "sched_rt_period_us",
347 .data = &sysctl_sched_rt_period,
348 .maxlen = sizeof(unsigned int),
349 .mode = 0644,
6d456111 350 .proc_handler = sched_rt_handler,
9f0c1e56
PZ
351 },
352 {
9f0c1e56
PZ
353 .procname = "sched_rt_runtime_us",
354 .data = &sysctl_sched_rt_runtime,
355 .maxlen = sizeof(int),
356 .mode = 0644,
6d456111 357 .proc_handler = sched_rt_handler,
9f0c1e56 358 },
1799e35d 359 {
1799e35d
IM
360 .procname = "sched_compat_yield",
361 .data = &sysctl_sched_compat_yield,
362 .maxlen = sizeof(unsigned int),
363 .mode = 0644,
6d456111 364 .proc_handler = proc_dointvec,
1799e35d 365 },
f20786ff
PZ
366#ifdef CONFIG_PROVE_LOCKING
367 {
f20786ff
PZ
368 .procname = "prove_locking",
369 .data = &prove_locking,
370 .maxlen = sizeof(int),
371 .mode = 0644,
6d456111 372 .proc_handler = proc_dointvec,
f20786ff
PZ
373 },
374#endif
375#ifdef CONFIG_LOCK_STAT
376 {
f20786ff
PZ
377 .procname = "lock_stat",
378 .data = &lock_stat,
379 .maxlen = sizeof(int),
380 .mode = 0644,
6d456111 381 .proc_handler = proc_dointvec,
f20786ff 382 },
77e54a1f 383#endif
1da177e4 384 {
1da177e4
LT
385 .procname = "panic",
386 .data = &panic_timeout,
387 .maxlen = sizeof(int),
388 .mode = 0644,
6d456111 389 .proc_handler = proc_dointvec,
1da177e4
LT
390 },
391 {
1da177e4
LT
392 .procname = "core_uses_pid",
393 .data = &core_uses_pid,
394 .maxlen = sizeof(int),
395 .mode = 0644,
6d456111 396 .proc_handler = proc_dointvec,
1da177e4
LT
397 },
398 {
1da177e4
LT
399 .procname = "core_pattern",
400 .data = core_pattern,
71ce92f3 401 .maxlen = CORENAME_MAX_SIZE,
1da177e4 402 .mode = 0644,
6d456111 403 .proc_handler = proc_dostring,
1da177e4 404 },
a293980c 405 {
a293980c
NH
406 .procname = "core_pipe_limit",
407 .data = &core_pipe_limit,
408 .maxlen = sizeof(unsigned int),
409 .mode = 0644,
6d456111 410 .proc_handler = proc_dointvec,
a293980c 411 },
34f5a398 412#ifdef CONFIG_PROC_SYSCTL
1da177e4 413 {
1da177e4 414 .procname = "tainted",
25ddbb18 415 .maxlen = sizeof(long),
34f5a398 416 .mode = 0644,
6d456111 417 .proc_handler = proc_taint,
1da177e4 418 },
34f5a398 419#endif
9745512c
AV
420#ifdef CONFIG_LATENCYTOP
421 {
422 .procname = "latencytop",
423 .data = &latencytop_enabled,
424 .maxlen = sizeof(int),
425 .mode = 0644,
6d456111 426 .proc_handler = proc_dointvec,
9745512c
AV
427 },
428#endif
1da177e4
LT
429#ifdef CONFIG_BLK_DEV_INITRD
430 {
1da177e4
LT
431 .procname = "real-root-dev",
432 .data = &real_root_dev,
433 .maxlen = sizeof(int),
434 .mode = 0644,
6d456111 435 .proc_handler = proc_dointvec,
1da177e4
LT
436 },
437#endif
45807a1d 438 {
45807a1d
IM
439 .procname = "print-fatal-signals",
440 .data = &print_fatal_signals,
441 .maxlen = sizeof(int),
442 .mode = 0644,
6d456111 443 .proc_handler = proc_dointvec,
45807a1d 444 },
72c57ed5 445#ifdef CONFIG_SPARC
1da177e4 446 {
1da177e4
LT
447 .procname = "reboot-cmd",
448 .data = reboot_command,
449 .maxlen = 256,
450 .mode = 0644,
6d456111 451 .proc_handler = proc_dostring,
1da177e4
LT
452 },
453 {
1da177e4
LT
454 .procname = "stop-a",
455 .data = &stop_a_enabled,
456 .maxlen = sizeof (int),
457 .mode = 0644,
6d456111 458 .proc_handler = proc_dointvec,
1da177e4
LT
459 },
460 {
1da177e4
LT
461 .procname = "scons-poweroff",
462 .data = &scons_pwroff,
463 .maxlen = sizeof (int),
464 .mode = 0644,
6d456111 465 .proc_handler = proc_dointvec,
1da177e4
LT
466 },
467#endif
0871420f
DM
468#ifdef CONFIG_SPARC64
469 {
0871420f
DM
470 .procname = "tsb-ratio",
471 .data = &sysctl_tsb_ratio,
472 .maxlen = sizeof (int),
473 .mode = 0644,
6d456111 474 .proc_handler = proc_dointvec,
0871420f
DM
475 },
476#endif
1da177e4
LT
477#ifdef __hppa__
478 {
1da177e4
LT
479 .procname = "soft-power",
480 .data = &pwrsw_enabled,
481 .maxlen = sizeof (int),
482 .mode = 0644,
6d456111 483 .proc_handler = proc_dointvec,
1da177e4
LT
484 },
485 {
1da177e4
LT
486 .procname = "unaligned-trap",
487 .data = &unaligned_enabled,
488 .maxlen = sizeof (int),
489 .mode = 0644,
6d456111 490 .proc_handler = proc_dointvec,
1da177e4
LT
491 },
492#endif
493 {
1da177e4
LT
494 .procname = "ctrl-alt-del",
495 .data = &C_A_D,
496 .maxlen = sizeof(int),
497 .mode = 0644,
6d456111 498 .proc_handler = proc_dointvec,
1da177e4 499 },
606576ce 500#ifdef CONFIG_FUNCTION_TRACER
b0fc494f 501 {
b0fc494f
SR
502 .procname = "ftrace_enabled",
503 .data = &ftrace_enabled,
504 .maxlen = sizeof(int),
505 .mode = 0644,
6d456111 506 .proc_handler = ftrace_enable_sysctl,
b0fc494f
SR
507 },
508#endif
f38f1d2a
SR
509#ifdef CONFIG_STACK_TRACER
510 {
f38f1d2a
SR
511 .procname = "stack_tracer_enabled",
512 .data = &stack_tracer_enabled,
513 .maxlen = sizeof(int),
514 .mode = 0644,
6d456111 515 .proc_handler = stack_trace_sysctl,
f38f1d2a
SR
516 },
517#endif
944ac425
SR
518#ifdef CONFIG_TRACING
519 {
3299b4dd 520 .procname = "ftrace_dump_on_oops",
944ac425
SR
521 .data = &ftrace_dump_on_oops,
522 .maxlen = sizeof(int),
523 .mode = 0644,
6d456111 524 .proc_handler = proc_dointvec,
944ac425
SR
525 },
526#endif
a1ef5adb 527#ifdef CONFIG_MODULES
1da177e4 528 {
1da177e4
LT
529 .procname = "modprobe",
530 .data = &modprobe_path,
531 .maxlen = KMOD_PATH_LEN,
532 .mode = 0644,
6d456111 533 .proc_handler = proc_dostring,
1da177e4 534 },
3d43321b 535 {
3d43321b
KC
536 .procname = "modules_disabled",
537 .data = &modules_disabled,
538 .maxlen = sizeof(int),
539 .mode = 0644,
540 /* only handle a transition from default "0" to "1" */
6d456111 541 .proc_handler = proc_dointvec_minmax,
3d43321b
KC
542 .extra1 = &one,
543 .extra2 = &one,
544 },
1da177e4 545#endif
94f17cd7 546#ifdef CONFIG_HOTPLUG
1da177e4 547 {
1da177e4 548 .procname = "hotplug",
312c004d
KS
549 .data = &uevent_helper,
550 .maxlen = UEVENT_HELPER_PATH_LEN,
1da177e4 551 .mode = 0644,
6d456111 552 .proc_handler = proc_dostring,
1da177e4
LT
553 },
554#endif
555#ifdef CONFIG_CHR_DEV_SG
556 {
1da177e4
LT
557 .procname = "sg-big-buff",
558 .data = &sg_big_buff,
559 .maxlen = sizeof (int),
560 .mode = 0444,
6d456111 561 .proc_handler = proc_dointvec,
1da177e4
LT
562 },
563#endif
564#ifdef CONFIG_BSD_PROCESS_ACCT
565 {
1da177e4
LT
566 .procname = "acct",
567 .data = &acct_parm,
568 .maxlen = 3*sizeof(int),
569 .mode = 0644,
6d456111 570 .proc_handler = proc_dointvec,
1da177e4
LT
571 },
572#endif
1da177e4
LT
573#ifdef CONFIG_MAGIC_SYSRQ
574 {
1da177e4 575 .procname = "sysrq",
5d6f647f 576 .data = &__sysrq_enabled,
1da177e4
LT
577 .maxlen = sizeof (int),
578 .mode = 0644,
97f5f0cd 579 .proc_handler = sysrq_sysctl_handler,
1da177e4
LT
580 },
581#endif
d6f8ff73 582#ifdef CONFIG_PROC_SYSCTL
1da177e4 583 {
1da177e4 584 .procname = "cad_pid",
9ec52099 585 .data = NULL,
1da177e4
LT
586 .maxlen = sizeof (int),
587 .mode = 0600,
6d456111 588 .proc_handler = proc_do_cad_pid,
1da177e4 589 },
d6f8ff73 590#endif
1da177e4 591 {
1da177e4
LT
592 .procname = "threads-max",
593 .data = &max_threads,
594 .maxlen = sizeof(int),
595 .mode = 0644,
6d456111 596 .proc_handler = proc_dointvec,
1da177e4
LT
597 },
598 {
1da177e4
LT
599 .procname = "random",
600 .mode = 0555,
601 .child = random_table,
602 },
1da177e4 603 {
1da177e4
LT
604 .procname = "overflowuid",
605 .data = &overflowuid,
606 .maxlen = sizeof(int),
607 .mode = 0644,
6d456111 608 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
609 .extra1 = &minolduid,
610 .extra2 = &maxolduid,
611 },
612 {
1da177e4
LT
613 .procname = "overflowgid",
614 .data = &overflowgid,
615 .maxlen = sizeof(int),
616 .mode = 0644,
6d456111 617 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
618 .extra1 = &minolduid,
619 .extra2 = &maxolduid,
620 },
347a8dc3 621#ifdef CONFIG_S390
1da177e4
LT
622#ifdef CONFIG_MATHEMU
623 {
1da177e4
LT
624 .procname = "ieee_emulation_warnings",
625 .data = &sysctl_ieee_emulation_warnings,
626 .maxlen = sizeof(int),
627 .mode = 0644,
6d456111 628 .proc_handler = proc_dointvec,
1da177e4 629 },
1da177e4
LT
630#endif
631 {
1da177e4 632 .procname = "userprocess_debug",
ab3c68ee 633 .data = &show_unhandled_signals,
1da177e4
LT
634 .maxlen = sizeof(int),
635 .mode = 0644,
6d456111 636 .proc_handler = proc_dointvec,
1da177e4
LT
637 },
638#endif
639 {
1da177e4
LT
640 .procname = "pid_max",
641 .data = &pid_max,
642 .maxlen = sizeof (int),
643 .mode = 0644,
6d456111 644 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
645 .extra1 = &pid_max_min,
646 .extra2 = &pid_max_max,
647 },
648 {
1da177e4
LT
649 .procname = "panic_on_oops",
650 .data = &panic_on_oops,
651 .maxlen = sizeof(int),
652 .mode = 0644,
6d456111 653 .proc_handler = proc_dointvec,
1da177e4 654 },
7ef3d2fd
JP
655#if defined CONFIG_PRINTK
656 {
7ef3d2fd
JP
657 .procname = "printk",
658 .data = &console_loglevel,
659 .maxlen = 4*sizeof(int),
660 .mode = 0644,
6d456111 661 .proc_handler = proc_dointvec,
7ef3d2fd 662 },
1da177e4 663 {
1da177e4 664 .procname = "printk_ratelimit",
717115e1 665 .data = &printk_ratelimit_state.interval,
1da177e4
LT
666 .maxlen = sizeof(int),
667 .mode = 0644,
6d456111 668 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
669 },
670 {
1da177e4 671 .procname = "printk_ratelimit_burst",
717115e1 672 .data = &printk_ratelimit_state.burst,
1da177e4
LT
673 .maxlen = sizeof(int),
674 .mode = 0644,
6d456111 675 .proc_handler = proc_dointvec,
1da177e4 676 },
af91322e 677 {
af91322e
DY
678 .procname = "printk_delay",
679 .data = &printk_delay_msec,
680 .maxlen = sizeof(int),
681 .mode = 0644,
6d456111 682 .proc_handler = proc_dointvec_minmax,
af91322e
DY
683 .extra1 = &zero,
684 .extra2 = &ten_thousand,
685 },
7ef3d2fd 686#endif
eaf06b24
DR
687 {
688 .procname = "dmesg_restrict",
689 .data = &dmesg_restrict,
690 .maxlen = sizeof(int),
691 .mode = 0644,
692 .proc_handler = proc_dointvec_minmax,
693 .extra1 = &zero,
694 .extra2 = &one,
695 },
1da177e4 696 {
1da177e4
LT
697 .procname = "ngroups_max",
698 .data = &ngroups_max,
699 .maxlen = sizeof (int),
700 .mode = 0444,
6d456111 701 .proc_handler = proc_dointvec,
1da177e4 702 },
58687acb 703#if defined(CONFIG_LOCKUP_DETECTOR)
504d7cf1 704 {
58687acb
DZ
705 .procname = "watchdog",
706 .data = &watchdog_enabled,
504d7cf1
DZ
707 .maxlen = sizeof (int),
708 .mode = 0644,
58687acb
DZ
709 .proc_handler = proc_dowatchdog_enabled,
710 },
711 {
712 .procname = "watchdog_thresh",
713 .data = &softlockup_thresh,
714 .maxlen = sizeof(int),
715 .mode = 0644,
716 .proc_handler = proc_dowatchdog_thresh,
717 .extra1 = &neg_one,
718 .extra2 = &sixty,
504d7cf1 719 },
2508ce18
DZ
720 {
721 .procname = "softlockup_panic",
722 .data = &softlockup_panic,
723 .maxlen = sizeof(int),
724 .mode = 0644,
725 .proc_handler = proc_dointvec_minmax,
726 .extra1 = &zero,
727 .extra2 = &one,
728 },
504d7cf1 729#endif
58687acb 730#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
1da177e4 731 {
1da177e4
LT
732 .procname = "unknown_nmi_panic",
733 .data = &unknown_nmi_panic,
734 .maxlen = sizeof (int),
735 .mode = 0644,
6d456111 736 .proc_handler = proc_dointvec,
1da177e4 737 },
407984f1 738 {
407984f1
DZ
739 .procname = "nmi_watchdog",
740 .data = &nmi_watchdog_enabled,
741 .maxlen = sizeof (int),
742 .mode = 0644,
6d456111 743 .proc_handler = proc_nmi_enabled,
1da177e4
LT
744 },
745#endif
746#if defined(CONFIG_X86)
8da5adda 747 {
8da5adda
DZ
748 .procname = "panic_on_unrecovered_nmi",
749 .data = &panic_on_unrecovered_nmi,
750 .maxlen = sizeof(int),
751 .mode = 0644,
6d456111 752 .proc_handler = proc_dointvec,
8da5adda 753 },
5211a242 754 {
5211a242
KG
755 .procname = "panic_on_io_nmi",
756 .data = &panic_on_io_nmi,
757 .maxlen = sizeof(int),
758 .mode = 0644,
6d456111 759 .proc_handler = proc_dointvec,
5211a242 760 },
1da177e4 761 {
1da177e4
LT
762 .procname = "bootloader_type",
763 .data = &bootloader_type,
764 .maxlen = sizeof (int),
765 .mode = 0444,
6d456111 766 .proc_handler = proc_dointvec,
1da177e4 767 },
5031296c 768 {
5031296c
PA
769 .procname = "bootloader_version",
770 .data = &bootloader_version,
771 .maxlen = sizeof (int),
772 .mode = 0444,
6d456111 773 .proc_handler = proc_dointvec,
5031296c 774 },
0741f4d2 775 {
0741f4d2
CE
776 .procname = "kstack_depth_to_print",
777 .data = &kstack_depth_to_print,
778 .maxlen = sizeof(int),
779 .mode = 0644,
6d456111 780 .proc_handler = proc_dointvec,
0741f4d2 781 },
6e7c4025 782 {
6e7c4025
IM
783 .procname = "io_delay_type",
784 .data = &io_delay_type,
785 .maxlen = sizeof(int),
786 .mode = 0644,
6d456111 787 .proc_handler = proc_dointvec,
6e7c4025 788 },
1da177e4 789#endif
7a9166e3 790#if defined(CONFIG_MMU)
1da177e4 791 {
1da177e4
LT
792 .procname = "randomize_va_space",
793 .data = &randomize_va_space,
794 .maxlen = sizeof(int),
795 .mode = 0644,
6d456111 796 .proc_handler = proc_dointvec,
1da177e4 797 },
7a9166e3 798#endif
0152fb37 799#if defined(CONFIG_S390) && defined(CONFIG_SMP)
951f22d5 800 {
951f22d5
MS
801 .procname = "spin_retry",
802 .data = &spin_retry,
803 .maxlen = sizeof (int),
804 .mode = 0644,
6d456111 805 .proc_handler = proc_dointvec,
951f22d5 806 },
c255d844 807#endif
673d5b43 808#if defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
c255d844 809 {
c255d844 810 .procname = "acpi_video_flags",
77afcf78 811 .data = &acpi_realmode_flags,
c255d844
PM
812 .maxlen = sizeof (unsigned long),
813 .mode = 0644,
6d456111 814 .proc_handler = proc_doulongvec_minmax,
c255d844 815 },
d2b176ed
JS
816#endif
817#ifdef CONFIG_IA64
818 {
d2b176ed
JS
819 .procname = "ignore-unaligned-usertrap",
820 .data = &no_unaligned_warning,
821 .maxlen = sizeof (int),
822 .mode = 0644,
6d456111 823 .proc_handler = proc_dointvec,
d2b176ed 824 },
88fc241f 825 {
88fc241f
DC
826 .procname = "unaligned-dump-stack",
827 .data = &unaligned_dump_stack,
828 .maxlen = sizeof (int),
829 .mode = 0644,
6d456111 830 .proc_handler = proc_dointvec,
88fc241f 831 },
bebfa101 832#endif
e162b39a
MSB
833#ifdef CONFIG_DETECT_HUNG_TASK
834 {
e162b39a
MSB
835 .procname = "hung_task_panic",
836 .data = &sysctl_hung_task_panic,
837 .maxlen = sizeof(int),
838 .mode = 0644,
6d456111 839 .proc_handler = proc_dointvec_minmax,
e162b39a
MSB
840 .extra1 = &zero,
841 .extra2 = &one,
842 },
82a1fcb9 843 {
82a1fcb9
IM
844 .procname = "hung_task_check_count",
845 .data = &sysctl_hung_task_check_count,
90739081 846 .maxlen = sizeof(unsigned long),
82a1fcb9 847 .mode = 0644,
6d456111 848 .proc_handler = proc_doulongvec_minmax,
82a1fcb9
IM
849 },
850 {
82a1fcb9
IM
851 .procname = "hung_task_timeout_secs",
852 .data = &sysctl_hung_task_timeout_secs,
90739081 853 .maxlen = sizeof(unsigned long),
82a1fcb9 854 .mode = 0644,
6d456111 855 .proc_handler = proc_dohung_task_timeout_secs,
82a1fcb9
IM
856 },
857 {
82a1fcb9
IM
858 .procname = "hung_task_warnings",
859 .data = &sysctl_hung_task_warnings,
90739081 860 .maxlen = sizeof(unsigned long),
82a1fcb9 861 .mode = 0644,
6d456111 862 .proc_handler = proc_doulongvec_minmax,
82a1fcb9 863 },
c4f3b63f 864#endif
bebfa101
AK
865#ifdef CONFIG_COMPAT
866 {
bebfa101
AK
867 .procname = "compat-log",
868 .data = &compat_log,
869 .maxlen = sizeof (int),
870 .mode = 0644,
6d456111 871 .proc_handler = proc_dointvec,
bebfa101 872 },
951f22d5 873#endif
23f78d4a
IM
874#ifdef CONFIG_RT_MUTEXES
875 {
23f78d4a
IM
876 .procname = "max_lock_depth",
877 .data = &max_lock_depth,
878 .maxlen = sizeof(int),
879 .mode = 0644,
6d456111 880 .proc_handler = proc_dointvec,
23f78d4a 881 },
5096add8 882#endif
10a0a8d4 883 {
10a0a8d4
JF
884 .procname = "poweroff_cmd",
885 .data = &poweroff_cmd,
886 .maxlen = POWEROFF_CMD_PATH_LEN,
887 .mode = 0644,
6d456111 888 .proc_handler = proc_dostring,
10a0a8d4 889 },
0b77f5bf
DH
890#ifdef CONFIG_KEYS
891 {
0b77f5bf
DH
892 .procname = "keys",
893 .mode = 0555,
894 .child = key_sysctls,
895 },
896#endif
31a72bce
PM
897#ifdef CONFIG_RCU_TORTURE_TEST
898 {
31a72bce
PM
899 .procname = "rcutorture_runnable",
900 .data = &rcutorture_runnable,
901 .maxlen = sizeof(int),
902 .mode = 0644,
6d456111 903 .proc_handler = proc_dointvec,
31a72bce
PM
904 },
905#endif
cdd6c482 906#ifdef CONFIG_PERF_EVENTS
1ccd1549 907 {
cdd6c482
IM
908 .procname = "perf_event_paranoid",
909 .data = &sysctl_perf_event_paranoid,
910 .maxlen = sizeof(sysctl_perf_event_paranoid),
1ccd1549 911 .mode = 0644,
6d456111 912 .proc_handler = proc_dointvec,
1ccd1549 913 },
c5078f78 914 {
cdd6c482
IM
915 .procname = "perf_event_mlock_kb",
916 .data = &sysctl_perf_event_mlock,
917 .maxlen = sizeof(sysctl_perf_event_mlock),
c5078f78 918 .mode = 0644,
6d456111 919 .proc_handler = proc_dointvec,
c5078f78 920 },
a78ac325 921 {
cdd6c482
IM
922 .procname = "perf_event_max_sample_rate",
923 .data = &sysctl_perf_event_sample_rate,
924 .maxlen = sizeof(sysctl_perf_event_sample_rate),
a78ac325 925 .mode = 0644,
6d456111 926 .proc_handler = proc_dointvec,
a78ac325 927 },
1ccd1549 928#endif
dfec072e
VN
929#ifdef CONFIG_KMEMCHECK
930 {
dfec072e
VN
931 .procname = "kmemcheck",
932 .data = &kmemcheck_enabled,
933 .maxlen = sizeof(int),
934 .mode = 0644,
6d456111 935 .proc_handler = proc_dointvec,
dfec072e
VN
936 },
937#endif
cb684b5b 938#ifdef CONFIG_BLOCK
5e605b64 939 {
5e605b64
JA
940 .procname = "blk_iopoll",
941 .data = &blk_iopoll_enabled,
942 .maxlen = sizeof(int),
943 .mode = 0644,
6d456111 944 .proc_handler = proc_dointvec,
5e605b64 945 },
cb684b5b 946#endif
ed2c12f3
AM
947/*
948 * NOTE: do not add new entries to this table unless you have read
949 * Documentation/sysctl/ctl_unnumbered.txt
950 */
6fce56ec 951 { }
1da177e4
LT
952};
953
d8217f07 954static struct ctl_table vm_table[] = {
1da177e4 955 {
1da177e4
LT
956 .procname = "overcommit_memory",
957 .data = &sysctl_overcommit_memory,
958 .maxlen = sizeof(sysctl_overcommit_memory),
959 .mode = 0644,
6d456111 960 .proc_handler = proc_dointvec,
1da177e4 961 },
fadd8fbd 962 {
fadd8fbd
KH
963 .procname = "panic_on_oom",
964 .data = &sysctl_panic_on_oom,
965 .maxlen = sizeof(sysctl_panic_on_oom),
966 .mode = 0644,
6d456111 967 .proc_handler = proc_dointvec,
fadd8fbd 968 },
fe071d7e 969 {
fe071d7e
DR
970 .procname = "oom_kill_allocating_task",
971 .data = &sysctl_oom_kill_allocating_task,
972 .maxlen = sizeof(sysctl_oom_kill_allocating_task),
973 .mode = 0644,
6d456111 974 .proc_handler = proc_dointvec,
fe071d7e 975 },
fef1bdd6 976 {
fef1bdd6
DR
977 .procname = "oom_dump_tasks",
978 .data = &sysctl_oom_dump_tasks,
979 .maxlen = sizeof(sysctl_oom_dump_tasks),
980 .mode = 0644,
6d456111 981 .proc_handler = proc_dointvec,
fef1bdd6 982 },
1da177e4 983 {
1da177e4
LT
984 .procname = "overcommit_ratio",
985 .data = &sysctl_overcommit_ratio,
986 .maxlen = sizeof(sysctl_overcommit_ratio),
987 .mode = 0644,
6d456111 988 .proc_handler = proc_dointvec,
1da177e4
LT
989 },
990 {
1da177e4
LT
991 .procname = "page-cluster",
992 .data = &page_cluster,
993 .maxlen = sizeof(int),
994 .mode = 0644,
6d456111 995 .proc_handler = proc_dointvec,
1da177e4
LT
996 },
997 {
1da177e4
LT
998 .procname = "dirty_background_ratio",
999 .data = &dirty_background_ratio,
1000 .maxlen = sizeof(dirty_background_ratio),
1001 .mode = 0644,
6d456111 1002 .proc_handler = dirty_background_ratio_handler,
1da177e4
LT
1003 .extra1 = &zero,
1004 .extra2 = &one_hundred,
1005 },
2da02997 1006 {
2da02997
DR
1007 .procname = "dirty_background_bytes",
1008 .data = &dirty_background_bytes,
1009 .maxlen = sizeof(dirty_background_bytes),
1010 .mode = 0644,
6d456111 1011 .proc_handler = dirty_background_bytes_handler,
fc3501d4 1012 .extra1 = &one_ul,
2da02997 1013 },
1da177e4 1014 {
1da177e4
LT
1015 .procname = "dirty_ratio",
1016 .data = &vm_dirty_ratio,
1017 .maxlen = sizeof(vm_dirty_ratio),
1018 .mode = 0644,
6d456111 1019 .proc_handler = dirty_ratio_handler,
1da177e4
LT
1020 .extra1 = &zero,
1021 .extra2 = &one_hundred,
1022 },
2da02997 1023 {
2da02997
DR
1024 .procname = "dirty_bytes",
1025 .data = &vm_dirty_bytes,
1026 .maxlen = sizeof(vm_dirty_bytes),
1027 .mode = 0644,
6d456111 1028 .proc_handler = dirty_bytes_handler,
9e4a5bda 1029 .extra1 = &dirty_bytes_min,
2da02997 1030 },
1da177e4 1031 {
1da177e4 1032 .procname = "dirty_writeback_centisecs",
f6ef9438
BS
1033 .data = &dirty_writeback_interval,
1034 .maxlen = sizeof(dirty_writeback_interval),
1da177e4 1035 .mode = 0644,
6d456111 1036 .proc_handler = dirty_writeback_centisecs_handler,
1da177e4
LT
1037 },
1038 {
1da177e4 1039 .procname = "dirty_expire_centisecs",
f6ef9438
BS
1040 .data = &dirty_expire_interval,
1041 .maxlen = sizeof(dirty_expire_interval),
1da177e4 1042 .mode = 0644,
6d456111 1043 .proc_handler = proc_dointvec,
1da177e4
LT
1044 },
1045 {
1da177e4
LT
1046 .procname = "nr_pdflush_threads",
1047 .data = &nr_pdflush_threads,
1048 .maxlen = sizeof nr_pdflush_threads,
1049 .mode = 0444 /* read-only*/,
6d456111 1050 .proc_handler = proc_dointvec,
1da177e4
LT
1051 },
1052 {
1da177e4
LT
1053 .procname = "swappiness",
1054 .data = &vm_swappiness,
1055 .maxlen = sizeof(vm_swappiness),
1056 .mode = 0644,
6d456111 1057 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
1058 .extra1 = &zero,
1059 .extra2 = &one_hundred,
1060 },
1061#ifdef CONFIG_HUGETLB_PAGE
06808b08 1062 {
1da177e4 1063 .procname = "nr_hugepages",
e5ff2159 1064 .data = NULL,
1da177e4
LT
1065 .maxlen = sizeof(unsigned long),
1066 .mode = 0644,
6d456111 1067 .proc_handler = hugetlb_sysctl_handler,
1da177e4
LT
1068 .extra1 = (void *)&hugetlb_zero,
1069 .extra2 = (void *)&hugetlb_infinity,
06808b08
LS
1070 },
1071#ifdef CONFIG_NUMA
1072 {
1073 .procname = "nr_hugepages_mempolicy",
1074 .data = NULL,
1075 .maxlen = sizeof(unsigned long),
1076 .mode = 0644,
1077 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1078 .extra1 = (void *)&hugetlb_zero,
1079 .extra2 = (void *)&hugetlb_infinity,
1080 },
1081#endif
1da177e4 1082 {
1da177e4
LT
1083 .procname = "hugetlb_shm_group",
1084 .data = &sysctl_hugetlb_shm_group,
1085 .maxlen = sizeof(gid_t),
1086 .mode = 0644,
6d456111 1087 .proc_handler = proc_dointvec,
1da177e4 1088 },
396faf03 1089 {
396faf03
MG
1090 .procname = "hugepages_treat_as_movable",
1091 .data = &hugepages_treat_as_movable,
1092 .maxlen = sizeof(int),
1093 .mode = 0644,
6d456111 1094 .proc_handler = hugetlb_treat_movable_handler,
396faf03 1095 },
d1c3fb1f 1096 {
d1c3fb1f 1097 .procname = "nr_overcommit_hugepages",
e5ff2159
AK
1098 .data = NULL,
1099 .maxlen = sizeof(unsigned long),
d1c3fb1f 1100 .mode = 0644,
6d456111 1101 .proc_handler = hugetlb_overcommit_handler,
e5ff2159
AK
1102 .extra1 = (void *)&hugetlb_zero,
1103 .extra2 = (void *)&hugetlb_infinity,
d1c3fb1f 1104 },
1da177e4
LT
1105#endif
1106 {
1da177e4
LT
1107 .procname = "lowmem_reserve_ratio",
1108 .data = &sysctl_lowmem_reserve_ratio,
1109 .maxlen = sizeof(sysctl_lowmem_reserve_ratio),
1110 .mode = 0644,
6d456111 1111 .proc_handler = lowmem_reserve_ratio_sysctl_handler,
1da177e4 1112 },
9d0243bc 1113 {
9d0243bc
AM
1114 .procname = "drop_caches",
1115 .data = &sysctl_drop_caches,
1116 .maxlen = sizeof(int),
1117 .mode = 0644,
1118 .proc_handler = drop_caches_sysctl_handler,
9d0243bc 1119 },
76ab0f53
MG
1120#ifdef CONFIG_COMPACTION
1121 {
1122 .procname = "compact_memory",
1123 .data = &sysctl_compact_memory,
1124 .maxlen = sizeof(int),
1125 .mode = 0200,
1126 .proc_handler = sysctl_compaction_handler,
1127 },
5e771905
MG
1128 {
1129 .procname = "extfrag_threshold",
1130 .data = &sysctl_extfrag_threshold,
1131 .maxlen = sizeof(int),
1132 .mode = 0644,
1133 .proc_handler = sysctl_extfrag_handler,
1134 .extra1 = &min_extfrag_threshold,
1135 .extra2 = &max_extfrag_threshold,
1136 },
1137
76ab0f53 1138#endif /* CONFIG_COMPACTION */
1da177e4 1139 {
1da177e4
LT
1140 .procname = "min_free_kbytes",
1141 .data = &min_free_kbytes,
1142 .maxlen = sizeof(min_free_kbytes),
1143 .mode = 0644,
6d456111 1144 .proc_handler = min_free_kbytes_sysctl_handler,
1da177e4
LT
1145 .extra1 = &zero,
1146 },
8ad4b1fb 1147 {
8ad4b1fb
RS
1148 .procname = "percpu_pagelist_fraction",
1149 .data = &percpu_pagelist_fraction,
1150 .maxlen = sizeof(percpu_pagelist_fraction),
1151 .mode = 0644,
6d456111 1152 .proc_handler = percpu_pagelist_fraction_sysctl_handler,
8ad4b1fb
RS
1153 .extra1 = &min_percpu_pagelist_fract,
1154 },
1da177e4
LT
1155#ifdef CONFIG_MMU
1156 {
1da177e4
LT
1157 .procname = "max_map_count",
1158 .data = &sysctl_max_map_count,
1159 .maxlen = sizeof(sysctl_max_map_count),
1160 .mode = 0644,
3e26120c 1161 .proc_handler = proc_dointvec_minmax,
70da2340 1162 .extra1 = &zero,
1da177e4 1163 },
dd8632a1
PM
1164#else
1165 {
dd8632a1
PM
1166 .procname = "nr_trim_pages",
1167 .data = &sysctl_nr_trim_pages,
1168 .maxlen = sizeof(sysctl_nr_trim_pages),
1169 .mode = 0644,
6d456111 1170 .proc_handler = proc_dointvec_minmax,
dd8632a1
PM
1171 .extra1 = &zero,
1172 },
1da177e4
LT
1173#endif
1174 {
1da177e4
LT
1175 .procname = "laptop_mode",
1176 .data = &laptop_mode,
1177 .maxlen = sizeof(laptop_mode),
1178 .mode = 0644,
6d456111 1179 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
1180 },
1181 {
1da177e4
LT
1182 .procname = "block_dump",
1183 .data = &block_dump,
1184 .maxlen = sizeof(block_dump),
1185 .mode = 0644,
6d456111 1186 .proc_handler = proc_dointvec,
1da177e4
LT
1187 .extra1 = &zero,
1188 },
1189 {
1da177e4
LT
1190 .procname = "vfs_cache_pressure",
1191 .data = &sysctl_vfs_cache_pressure,
1192 .maxlen = sizeof(sysctl_vfs_cache_pressure),
1193 .mode = 0644,
6d456111 1194 .proc_handler = proc_dointvec,
1da177e4
LT
1195 .extra1 = &zero,
1196 },
1197#ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1198 {
1da177e4
LT
1199 .procname = "legacy_va_layout",
1200 .data = &sysctl_legacy_va_layout,
1201 .maxlen = sizeof(sysctl_legacy_va_layout),
1202 .mode = 0644,
6d456111 1203 .proc_handler = proc_dointvec,
1da177e4
LT
1204 .extra1 = &zero,
1205 },
1206#endif
1743660b
CL
1207#ifdef CONFIG_NUMA
1208 {
1743660b
CL
1209 .procname = "zone_reclaim_mode",
1210 .data = &zone_reclaim_mode,
1211 .maxlen = sizeof(zone_reclaim_mode),
1212 .mode = 0644,
6d456111 1213 .proc_handler = proc_dointvec,
c84db23c 1214 .extra1 = &zero,
1743660b 1215 },
9614634f 1216 {
9614634f
CL
1217 .procname = "min_unmapped_ratio",
1218 .data = &sysctl_min_unmapped_ratio,
1219 .maxlen = sizeof(sysctl_min_unmapped_ratio),
1220 .mode = 0644,
6d456111 1221 .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler,
9614634f
CL
1222 .extra1 = &zero,
1223 .extra2 = &one_hundred,
1224 },
0ff38490 1225 {
0ff38490
CL
1226 .procname = "min_slab_ratio",
1227 .data = &sysctl_min_slab_ratio,
1228 .maxlen = sizeof(sysctl_min_slab_ratio),
1229 .mode = 0644,
6d456111 1230 .proc_handler = sysctl_min_slab_ratio_sysctl_handler,
0ff38490
CL
1231 .extra1 = &zero,
1232 .extra2 = &one_hundred,
1233 },
e6e5494c 1234#endif
77461ab3
CL
1235#ifdef CONFIG_SMP
1236 {
77461ab3
CL
1237 .procname = "stat_interval",
1238 .data = &sysctl_stat_interval,
1239 .maxlen = sizeof(sysctl_stat_interval),
1240 .mode = 0644,
6d456111 1241 .proc_handler = proc_dointvec_jiffies,
77461ab3
CL
1242 },
1243#endif
6e141546 1244#ifdef CONFIG_MMU
ed032189 1245 {
ed032189 1246 .procname = "mmap_min_addr",
788084ab
EP
1247 .data = &dac_mmap_min_addr,
1248 .maxlen = sizeof(unsigned long),
ed032189 1249 .mode = 0644,
6d456111 1250 .proc_handler = mmap_min_addr_handler,
ed032189 1251 },
6e141546 1252#endif
f0c0b2b8
KH
1253#ifdef CONFIG_NUMA
1254 {
f0c0b2b8
KH
1255 .procname = "numa_zonelist_order",
1256 .data = &numa_zonelist_order,
1257 .maxlen = NUMA_ZONELIST_ORDER_LEN,
1258 .mode = 0644,
6d456111 1259 .proc_handler = numa_zonelist_order_handler,
f0c0b2b8
KH
1260 },
1261#endif
2b8232ce 1262#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
5c36e657 1263 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
e6e5494c 1264 {
e6e5494c
IM
1265 .procname = "vdso_enabled",
1266 .data = &vdso_enabled,
1267 .maxlen = sizeof(vdso_enabled),
1268 .mode = 0644,
6d456111 1269 .proc_handler = proc_dointvec,
e6e5494c
IM
1270 .extra1 = &zero,
1271 },
1da177e4 1272#endif
195cf453
BG
1273#ifdef CONFIG_HIGHMEM
1274 {
195cf453
BG
1275 .procname = "highmem_is_dirtyable",
1276 .data = &vm_highmem_is_dirtyable,
1277 .maxlen = sizeof(vm_highmem_is_dirtyable),
1278 .mode = 0644,
6d456111 1279 .proc_handler = proc_dointvec_minmax,
195cf453
BG
1280 .extra1 = &zero,
1281 .extra2 = &one,
1282 },
1283#endif
4be6f6bb 1284 {
4be6f6bb
PZ
1285 .procname = "scan_unevictable_pages",
1286 .data = &scan_unevictable_pages,
1287 .maxlen = sizeof(scan_unevictable_pages),
1288 .mode = 0644,
6d456111 1289 .proc_handler = scan_unevictable_handler,
4be6f6bb 1290 },
6a46079c
AK
1291#ifdef CONFIG_MEMORY_FAILURE
1292 {
6a46079c
AK
1293 .procname = "memory_failure_early_kill",
1294 .data = &sysctl_memory_failure_early_kill,
1295 .maxlen = sizeof(sysctl_memory_failure_early_kill),
1296 .mode = 0644,
6d456111 1297 .proc_handler = proc_dointvec_minmax,
6a46079c
AK
1298 .extra1 = &zero,
1299 .extra2 = &one,
1300 },
1301 {
6a46079c
AK
1302 .procname = "memory_failure_recovery",
1303 .data = &sysctl_memory_failure_recovery,
1304 .maxlen = sizeof(sysctl_memory_failure_recovery),
1305 .mode = 0644,
6d456111 1306 .proc_handler = proc_dointvec_minmax,
6a46079c
AK
1307 .extra1 = &zero,
1308 .extra2 = &one,
1309 },
1310#endif
1311
2be7fe07
AM
1312/*
1313 * NOTE: do not add new entries to this table unless you have read
1314 * Documentation/sysctl/ctl_unnumbered.txt
1315 */
6fce56ec 1316 { }
1da177e4
LT
1317};
1318
2abc26fc 1319#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
d8217f07 1320static struct ctl_table binfmt_misc_table[] = {
6fce56ec 1321 { }
2abc26fc
EB
1322};
1323#endif
1324
d8217f07 1325static struct ctl_table fs_table[] = {
1da177e4 1326 {
1da177e4
LT
1327 .procname = "inode-nr",
1328 .data = &inodes_stat,
1329 .maxlen = 2*sizeof(int),
1330 .mode = 0444,
cffbc8aa 1331 .proc_handler = proc_nr_inodes,
1da177e4
LT
1332 },
1333 {
1da177e4
LT
1334 .procname = "inode-state",
1335 .data = &inodes_stat,
1336 .maxlen = 7*sizeof(int),
1337 .mode = 0444,
cffbc8aa 1338 .proc_handler = proc_nr_inodes,
1da177e4
LT
1339 },
1340 {
1da177e4
LT
1341 .procname = "file-nr",
1342 .data = &files_stat,
518de9b3 1343 .maxlen = sizeof(files_stat),
1da177e4 1344 .mode = 0444,
6d456111 1345 .proc_handler = proc_nr_files,
1da177e4
LT
1346 },
1347 {
1da177e4
LT
1348 .procname = "file-max",
1349 .data = &files_stat.max_files,
518de9b3 1350 .maxlen = sizeof(files_stat.max_files),
1da177e4 1351 .mode = 0644,
518de9b3 1352 .proc_handler = proc_doulongvec_minmax,
1da177e4 1353 },
9cfe015a 1354 {
9cfe015a
ED
1355 .procname = "nr_open",
1356 .data = &sysctl_nr_open,
1357 .maxlen = sizeof(int),
1358 .mode = 0644,
6d456111 1359 .proc_handler = proc_dointvec_minmax,
eceea0b3
AV
1360 .extra1 = &sysctl_nr_open_min,
1361 .extra2 = &sysctl_nr_open_max,
9cfe015a 1362 },
1da177e4 1363 {
1da177e4
LT
1364 .procname = "dentry-state",
1365 .data = &dentry_stat,
1366 .maxlen = 6*sizeof(int),
1367 .mode = 0444,
312d3ca8 1368 .proc_handler = proc_nr_dentry,
1da177e4
LT
1369 },
1370 {
1da177e4
LT
1371 .procname = "overflowuid",
1372 .data = &fs_overflowuid,
1373 .maxlen = sizeof(int),
1374 .mode = 0644,
6d456111 1375 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
1376 .extra1 = &minolduid,
1377 .extra2 = &maxolduid,
1378 },
1379 {
1da177e4
LT
1380 .procname = "overflowgid",
1381 .data = &fs_overflowgid,
1382 .maxlen = sizeof(int),
1383 .mode = 0644,
6d456111 1384 .proc_handler = proc_dointvec_minmax,
1da177e4
LT
1385 .extra1 = &minolduid,
1386 .extra2 = &maxolduid,
1387 },
bfcd17a6 1388#ifdef CONFIG_FILE_LOCKING
1da177e4 1389 {
1da177e4
LT
1390 .procname = "leases-enable",
1391 .data = &leases_enable,
1392 .maxlen = sizeof(int),
1393 .mode = 0644,
6d456111 1394 .proc_handler = proc_dointvec,
1da177e4 1395 },
bfcd17a6 1396#endif
1da177e4
LT
1397#ifdef CONFIG_DNOTIFY
1398 {
1da177e4
LT
1399 .procname = "dir-notify-enable",
1400 .data = &dir_notify_enable,
1401 .maxlen = sizeof(int),
1402 .mode = 0644,
6d456111 1403 .proc_handler = proc_dointvec,
1da177e4
LT
1404 },
1405#endif
1406#ifdef CONFIG_MMU
bfcd17a6 1407#ifdef CONFIG_FILE_LOCKING
1da177e4 1408 {
1da177e4
LT
1409 .procname = "lease-break-time",
1410 .data = &lease_break_time,
1411 .maxlen = sizeof(int),
1412 .mode = 0644,
6d456111 1413 .proc_handler = proc_dointvec,
1da177e4 1414 },
bfcd17a6 1415#endif
ebf3f09c 1416#ifdef CONFIG_AIO
1da177e4 1417 {
1da177e4
LT
1418 .procname = "aio-nr",
1419 .data = &aio_nr,
1420 .maxlen = sizeof(aio_nr),
1421 .mode = 0444,
6d456111 1422 .proc_handler = proc_doulongvec_minmax,
1da177e4
LT
1423 },
1424 {
1da177e4
LT
1425 .procname = "aio-max-nr",
1426 .data = &aio_max_nr,
1427 .maxlen = sizeof(aio_max_nr),
1428 .mode = 0644,
6d456111 1429 .proc_handler = proc_doulongvec_minmax,
1da177e4 1430 },
ebf3f09c 1431#endif /* CONFIG_AIO */
2d9048e2 1432#ifdef CONFIG_INOTIFY_USER
0399cb08 1433 {
0399cb08
RL
1434 .procname = "inotify",
1435 .mode = 0555,
1436 .child = inotify_table,
1437 },
1438#endif
7ef9964e
DL
1439#ifdef CONFIG_EPOLL
1440 {
1441 .procname = "epoll",
1442 .mode = 0555,
1443 .child = epoll_table,
1444 },
1445#endif
1da177e4 1446#endif
d6e71144 1447 {
d6e71144
AC
1448 .procname = "suid_dumpable",
1449 .data = &suid_dumpable,
1450 .maxlen = sizeof(int),
1451 .mode = 0644,
6d456111 1452 .proc_handler = proc_dointvec_minmax,
8e654fba
MW
1453 .extra1 = &zero,
1454 .extra2 = &two,
d6e71144 1455 },
2abc26fc
EB
1456#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1457 {
2abc26fc
EB
1458 .procname = "binfmt_misc",
1459 .mode = 0555,
1460 .child = binfmt_misc_table,
1461 },
1462#endif
b492e95b 1463 {
ff9da691
JA
1464 .procname = "pipe-max-size",
1465 .data = &pipe_max_size,
b492e95b
JA
1466 .maxlen = sizeof(int),
1467 .mode = 0644,
ff9da691
JA
1468 .proc_handler = &pipe_proc_fn,
1469 .extra1 = &pipe_min_size,
b492e95b 1470 },
2be7fe07
AM
1471/*
1472 * NOTE: do not add new entries to this table unless you have read
1473 * Documentation/sysctl/ctl_unnumbered.txt
2be7fe07 1474 */
6fce56ec 1475 { }
1da177e4
LT
1476};
1477
d8217f07 1478static struct ctl_table debug_table[] = {
ab3c68ee
HC
1479#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
1480 defined(CONFIG_S390)
abd4f750 1481 {
abd4f750
MAS
1482 .procname = "exception-trace",
1483 .data = &show_unhandled_signals,
1484 .maxlen = sizeof(int),
1485 .mode = 0644,
1486 .proc_handler = proc_dointvec
1487 },
b2be84df
MH
1488#endif
1489#if defined(CONFIG_OPTPROBES)
1490 {
1491 .procname = "kprobes-optimization",
1492 .data = &sysctl_kprobes_optimization,
1493 .maxlen = sizeof(int),
1494 .mode = 0644,
1495 .proc_handler = proc_kprobes_optimization_handler,
1496 .extra1 = &zero,
1497 .extra2 = &one,
1498 },
abd4f750 1499#endif
6fce56ec 1500 { }
1da177e4
LT
1501};
1502
d8217f07 1503static struct ctl_table dev_table[] = {
6fce56ec 1504 { }
0eeca283 1505};
1da177e4 1506
330d57fb
AV
1507static DEFINE_SPINLOCK(sysctl_lock);
1508
1509/* called under sysctl_lock */
1510static int use_table(struct ctl_table_header *p)
1511{
1512 if (unlikely(p->unregistering))
1513 return 0;
1514 p->used++;
1515 return 1;
1516}
1517
1518/* called under sysctl_lock */
1519static void unuse_table(struct ctl_table_header *p)
1520{
1521 if (!--p->used)
1522 if (unlikely(p->unregistering))
1523 complete(p->unregistering);
1524}
1525
1526/* called under sysctl_lock, will reacquire if has to wait */
1527static void start_unregistering(struct ctl_table_header *p)
1528{
1529 /*
1530 * if p->used is 0, nobody will ever touch that entry again;
1531 * we'll eliminate all paths to it before dropping sysctl_lock
1532 */
1533 if (unlikely(p->used)) {
1534 struct completion wait;
1535 init_completion(&wait);
1536 p->unregistering = &wait;
1537 spin_unlock(&sysctl_lock);
1538 wait_for_completion(&wait);
1539 spin_lock(&sysctl_lock);
f7e6ced4
AV
1540 } else {
1541 /* anything non-NULL; we'll never dereference it */
1542 p->unregistering = ERR_PTR(-EINVAL);
330d57fb
AV
1543 }
1544 /*
1545 * do not remove from the list until nobody holds it; walking the
1546 * list in do_sysctl() relies on that.
1547 */
1548 list_del_init(&p->ctl_entry);
1549}
1550
f7e6ced4
AV
1551void sysctl_head_get(struct ctl_table_header *head)
1552{
1553 spin_lock(&sysctl_lock);
1554 head->count++;
1555 spin_unlock(&sysctl_lock);
1556}
1557
1558void sysctl_head_put(struct ctl_table_header *head)
1559{
1560 spin_lock(&sysctl_lock);
1561 if (!--head->count)
1562 kfree(head);
1563 spin_unlock(&sysctl_lock);
1564}
1565
1566struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
1567{
1568 if (!head)
1569 BUG();
1570 spin_lock(&sysctl_lock);
1571 if (!use_table(head))
1572 head = ERR_PTR(-ENOENT);
1573 spin_unlock(&sysctl_lock);
1574 return head;
1575}
1576
805b5d5e
EB
1577void sysctl_head_finish(struct ctl_table_header *head)
1578{
1579 if (!head)
1580 return;
1581 spin_lock(&sysctl_lock);
1582 unuse_table(head);
1583 spin_unlock(&sysctl_lock);
1584}
1585
73455092
AV
1586static struct ctl_table_set *
1587lookup_header_set(struct ctl_table_root *root, struct nsproxy *namespaces)
1588{
1589 struct ctl_table_set *set = &root->default_set;
1590 if (root->lookup)
1591 set = root->lookup(root, namespaces);
1592 return set;
1593}
1594
e51b6ba0
EB
1595static struct list_head *
1596lookup_header_list(struct ctl_table_root *root, struct nsproxy *namespaces)
805b5d5e 1597{
73455092
AV
1598 struct ctl_table_set *set = lookup_header_set(root, namespaces);
1599 return &set->list;
e51b6ba0
EB
1600}
1601
1602struct ctl_table_header *__sysctl_head_next(struct nsproxy *namespaces,
1603 struct ctl_table_header *prev)
1604{
1605 struct ctl_table_root *root;
1606 struct list_head *header_list;
805b5d5e
EB
1607 struct ctl_table_header *head;
1608 struct list_head *tmp;
e51b6ba0 1609
805b5d5e
EB
1610 spin_lock(&sysctl_lock);
1611 if (prev) {
e51b6ba0 1612 head = prev;
805b5d5e
EB
1613 tmp = &prev->ctl_entry;
1614 unuse_table(prev);
1615 goto next;
1616 }
1617 tmp = &root_table_header.ctl_entry;
1618 for (;;) {
1619 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1620
1621 if (!use_table(head))
1622 goto next;
1623 spin_unlock(&sysctl_lock);
1624 return head;
1625 next:
e51b6ba0 1626 root = head->root;
805b5d5e 1627 tmp = tmp->next;
e51b6ba0
EB
1628 header_list = lookup_header_list(root, namespaces);
1629 if (tmp != header_list)
1630 continue;
1631
1632 do {
1633 root = list_entry(root->root_list.next,
1634 struct ctl_table_root, root_list);
1635 if (root == &sysctl_table_root)
1636 goto out;
1637 header_list = lookup_header_list(root, namespaces);
1638 } while (list_empty(header_list));
1639 tmp = header_list->next;
805b5d5e 1640 }
e51b6ba0 1641out:
805b5d5e
EB
1642 spin_unlock(&sysctl_lock);
1643 return NULL;
1644}
1645
e51b6ba0
EB
1646struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1647{
1648 return __sysctl_head_next(current->nsproxy, prev);
1649}
1650
1651void register_sysctl_root(struct ctl_table_root *root)
1652{
1653 spin_lock(&sysctl_lock);
1654 list_add_tail(&root->root_list, &sysctl_table_root.root_list);
1655 spin_unlock(&sysctl_lock);
1656}
1657
1da177e4 1658/*
1ff007eb 1659 * sysctl_perm does NOT grant the superuser all rights automatically, because
1da177e4
LT
1660 * some sysctl variables are readonly even to root.
1661 */
1662
1663static int test_perm(int mode, int op)
1664{
76aac0e9 1665 if (!current_euid())
1da177e4
LT
1666 mode >>= 6;
1667 else if (in_egroup_p(0))
1668 mode >>= 3;
e6305c43 1669 if ((op & ~mode & (MAY_READ|MAY_WRITE|MAY_EXEC)) == 0)
1da177e4
LT
1670 return 0;
1671 return -EACCES;
1672}
1673
d7321cd6 1674int sysctl_perm(struct ctl_table_root *root, struct ctl_table *table, int op)
1da177e4
LT
1675{
1676 int error;
d7321cd6
PE
1677 int mode;
1678
e6305c43 1679 error = security_sysctl(table, op & (MAY_READ | MAY_WRITE | MAY_EXEC));
1da177e4
LT
1680 if (error)
1681 return error;
d7321cd6
PE
1682
1683 if (root->permissions)
1684 mode = root->permissions(root, current->nsproxy, table);
1685 else
1686 mode = table->mode;
1687
1688 return test_perm(mode, op);
1da177e4
LT
1689}
1690
d912b0cc
EB
1691static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1692{
2315ffa0 1693 for (; table->procname; table++) {
d912b0cc
EB
1694 table->parent = parent;
1695 if (table->child)
1696 sysctl_set_parent(table, table->child);
1697 }
1698}
1699
1700static __init int sysctl_init(void)
1701{
1702 sysctl_set_parent(NULL, root_table);
88f458e4 1703#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
b3bd3de6 1704 sysctl_check_table(current->nsproxy, root_table);
88f458e4 1705#endif
d912b0cc
EB
1706 return 0;
1707}
1708
1709core_initcall(sysctl_init);
1710
bfbcf034
AV
1711static struct ctl_table *is_branch_in(struct ctl_table *branch,
1712 struct ctl_table *table)
ae7edecc
AV
1713{
1714 struct ctl_table *p;
1715 const char *s = branch->procname;
1716
1717 /* branch should have named subdirectory as its first element */
1718 if (!s || !branch->child)
bfbcf034 1719 return NULL;
ae7edecc
AV
1720
1721 /* ... and nothing else */
2315ffa0 1722 if (branch[1].procname)
bfbcf034 1723 return NULL;
ae7edecc
AV
1724
1725 /* table should contain subdirectory with the same name */
2315ffa0 1726 for (p = table; p->procname; p++) {
ae7edecc
AV
1727 if (!p->child)
1728 continue;
1729 if (p->procname && strcmp(p->procname, s) == 0)
bfbcf034 1730 return p;
ae7edecc 1731 }
bfbcf034 1732 return NULL;
ae7edecc
AV
1733}
1734
1735/* see if attaching q to p would be an improvement */
1736static void try_attach(struct ctl_table_header *p, struct ctl_table_header *q)
1737{
1738 struct ctl_table *to = p->ctl_table, *by = q->ctl_table;
bfbcf034 1739 struct ctl_table *next;
ae7edecc
AV
1740 int is_better = 0;
1741 int not_in_parent = !p->attached_by;
1742
bfbcf034 1743 while ((next = is_branch_in(by, to)) != NULL) {
ae7edecc
AV
1744 if (by == q->attached_by)
1745 is_better = 1;
1746 if (to == p->attached_by)
1747 not_in_parent = 1;
1748 by = by->child;
bfbcf034 1749 to = next->child;
ae7edecc
AV
1750 }
1751
1752 if (is_better && not_in_parent) {
1753 q->attached_by = by;
1754 q->attached_to = to;
1755 q->parent = p;
1756 }
1757}
1758
1da177e4 1759/**
e51b6ba0
EB
1760 * __register_sysctl_paths - register a sysctl hierarchy
1761 * @root: List of sysctl headers to register on
1762 * @namespaces: Data to compute which lists of sysctl entries are visible
29e796fd 1763 * @path: The path to the directory the sysctl table is in.
1da177e4 1764 * @table: the top-level table structure
1da177e4
LT
1765 *
1766 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
29e796fd 1767 * array. A completely 0 filled entry terminates the table.
1da177e4 1768 *
d8217f07 1769 * The members of the &struct ctl_table structure are used as follows:
1da177e4 1770 *
1da177e4
LT
1771 * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1772 * enter a sysctl file
1773 *
1774 * data - a pointer to data for use by proc_handler
1775 *
1776 * maxlen - the maximum size in bytes of the data
1777 *
1778 * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1779 *
1780 * child - a pointer to the child sysctl table if this entry is a directory, or
1781 * %NULL.
1782 *
1783 * proc_handler - the text handler routine (described below)
1784 *
1da177e4
LT
1785 * de - for internal use by the sysctl routines
1786 *
1787 * extra1, extra2 - extra pointers usable by the proc handler routines
1788 *
1789 * Leaf nodes in the sysctl tree will be represented by a single file
1790 * under /proc; non-leaf nodes will be represented by directories.
1791 *
1792 * sysctl(2) can automatically manage read and write requests through
1793 * the sysctl table. The data and maxlen fields of the ctl_table
1794 * struct enable minimal validation of the values being written to be
1795 * performed, and the mode field allows minimal authentication.
1796 *
1da177e4
LT
1797 * There must be a proc_handler routine for any terminal nodes
1798 * mirrored under /proc/sys (non-terminals are handled by a built-in
1799 * directory handler). Several default handlers are available to
1800 * cover common cases -
1801 *
1802 * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1803 * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(),
1804 * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1805 *
1806 * It is the handler's job to read the input buffer from user memory
1807 * and process it. The handler should return 0 on success.
1808 *
1809 * This routine returns %NULL on a failure to register, and a pointer
1810 * to the table header on success.
1811 */
e51b6ba0
EB
1812struct ctl_table_header *__register_sysctl_paths(
1813 struct ctl_table_root *root,
1814 struct nsproxy *namespaces,
1815 const struct ctl_path *path, struct ctl_table *table)
1da177e4 1816{
29e796fd
EB
1817 struct ctl_table_header *header;
1818 struct ctl_table *new, **prevp;
1819 unsigned int n, npath;
ae7edecc 1820 struct ctl_table_set *set;
29e796fd
EB
1821
1822 /* Count the path components */
2315ffa0 1823 for (npath = 0; path[npath].procname; ++npath)
29e796fd
EB
1824 ;
1825
1826 /*
1827 * For each path component, allocate a 2-element ctl_table array.
1828 * The first array element will be filled with the sysctl entry
2315ffa0 1829 * for this, the second will be the sentinel (procname == 0).
29e796fd
EB
1830 *
1831 * We allocate everything in one go so that we don't have to
1832 * worry about freeing additional memory in unregister_sysctl_table.
1833 */
1834 header = kzalloc(sizeof(struct ctl_table_header) +
1835 (2 * npath * sizeof(struct ctl_table)), GFP_KERNEL);
1836 if (!header)
1da177e4 1837 return NULL;
29e796fd
EB
1838
1839 new = (struct ctl_table *) (header + 1);
1840
1841 /* Now connect the dots */
1842 prevp = &header->ctl_table;
1843 for (n = 0; n < npath; ++n, ++path) {
1844 /* Copy the procname */
1845 new->procname = path->procname;
29e796fd
EB
1846 new->mode = 0555;
1847
1848 *prevp = new;
1849 prevp = &new->child;
1850
1851 new += 2;
1852 }
1853 *prevp = table;
23eb06de 1854 header->ctl_table_arg = table;
29e796fd
EB
1855
1856 INIT_LIST_HEAD(&header->ctl_entry);
1857 header->used = 0;
1858 header->unregistering = NULL;
e51b6ba0 1859 header->root = root;
29e796fd 1860 sysctl_set_parent(NULL, header->ctl_table);
f7e6ced4 1861 header->count = 1;
88f458e4 1862#ifdef CONFIG_SYSCTL_SYSCALL_CHECK
e51b6ba0 1863 if (sysctl_check_table(namespaces, header->ctl_table)) {
29e796fd 1864 kfree(header);
fc6cd25b
EB
1865 return NULL;
1866 }
88f458e4 1867#endif
330d57fb 1868 spin_lock(&sysctl_lock);
73455092 1869 header->set = lookup_header_set(root, namespaces);
ae7edecc
AV
1870 header->attached_by = header->ctl_table;
1871 header->attached_to = root_table;
1872 header->parent = &root_table_header;
1873 for (set = header->set; set; set = set->parent) {
1874 struct ctl_table_header *p;
1875 list_for_each_entry(p, &set->list, ctl_entry) {
1876 if (p->unregistering)
1877 continue;
1878 try_attach(p, header);
1879 }
1880 }
1881 header->parent->count++;
73455092 1882 list_add_tail(&header->ctl_entry, &header->set->list);
330d57fb 1883 spin_unlock(&sysctl_lock);
29e796fd
EB
1884
1885 return header;
1886}
1887
e51b6ba0
EB
1888/**
1889 * register_sysctl_table_path - register a sysctl table hierarchy
1890 * @path: The path to the directory the sysctl table is in.
1891 * @table: the top-level table structure
1892 *
1893 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1894 * array. A completely 0 filled entry terminates the table.
1895 *
1896 * See __register_sysctl_paths for more details.
1897 */
1898struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1899 struct ctl_table *table)
1900{
1901 return __register_sysctl_paths(&sysctl_table_root, current->nsproxy,
1902 path, table);
1903}
1904
29e796fd
EB
1905/**
1906 * register_sysctl_table - register a sysctl table hierarchy
1907 * @table: the top-level table structure
1908 *
1909 * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1910 * array. A completely 0 filled entry terminates the table.
1911 *
1912 * See register_sysctl_paths for more details.
1913 */
1914struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
1915{
1916 static const struct ctl_path null_path[] = { {} };
1917
1918 return register_sysctl_paths(null_path, table);
1da177e4
LT
1919}
1920
1921/**
1922 * unregister_sysctl_table - unregister a sysctl table hierarchy
1923 * @header: the header returned from register_sysctl_table
1924 *
1925 * Unregisters the sysctl table and all children. proc entries may not
1926 * actually be removed until they are no longer used by anyone.
1927 */
1928void unregister_sysctl_table(struct ctl_table_header * header)
1929{
330d57fb 1930 might_sleep();
f1dad166
PE
1931
1932 if (header == NULL)
1933 return;
1934
330d57fb
AV
1935 spin_lock(&sysctl_lock);
1936 start_unregistering(header);
ae7edecc
AV
1937 if (!--header->parent->count) {
1938 WARN_ON(1);
1939 kfree(header->parent);
1940 }
f7e6ced4
AV
1941 if (!--header->count)
1942 kfree(header);
330d57fb 1943 spin_unlock(&sysctl_lock);
1da177e4
LT
1944}
1945
9043476f
AV
1946int sysctl_is_seen(struct ctl_table_header *p)
1947{
1948 struct ctl_table_set *set = p->set;
1949 int res;
1950 spin_lock(&sysctl_lock);
1951 if (p->unregistering)
1952 res = 0;
1953 else if (!set->is_seen)
1954 res = 1;
1955 else
1956 res = set->is_seen(set);
1957 spin_unlock(&sysctl_lock);
1958 return res;
1959}
1960
73455092
AV
1961void setup_sysctl_set(struct ctl_table_set *p,
1962 struct ctl_table_set *parent,
1963 int (*is_seen)(struct ctl_table_set *))
1964{
1965 INIT_LIST_HEAD(&p->list);
1966 p->parent = parent ? parent : &sysctl_table_root.default_set;
1967 p->is_seen = is_seen;
1968}
1969
b89a8171 1970#else /* !CONFIG_SYSCTL */
d8217f07 1971struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
b89a8171
EB
1972{
1973 return NULL;
1974}
1975
29e796fd
EB
1976struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
1977 struct ctl_table *table)
1978{
1979 return NULL;
1980}
1981
b89a8171
EB
1982void unregister_sysctl_table(struct ctl_table_header * table)
1983{
1984}
1985
73455092
AV
1986void setup_sysctl_set(struct ctl_table_set *p,
1987 struct ctl_table_set *parent,
1988 int (*is_seen)(struct ctl_table_set *))
1989{
1990}
1991
f7e6ced4
AV
1992void sysctl_head_put(struct ctl_table_header *head)
1993{
1994}
1995
b89a8171
EB
1996#endif /* CONFIG_SYSCTL */
1997
1da177e4
LT
1998/*
1999 * /proc/sys support
2000 */
2001
b89a8171 2002#ifdef CONFIG_PROC_SYSCTL
1da177e4 2003
b1ba4ddd 2004static int _proc_do_string(void* data, int maxlen, int write,
8d65af78 2005 void __user *buffer,
b1ba4ddd 2006 size_t *lenp, loff_t *ppos)
1da177e4
LT
2007{
2008 size_t len;
2009 char __user *p;
2010 char c;
8d060877
ON
2011
2012 if (!data || !maxlen || !*lenp) {
1da177e4
LT
2013 *lenp = 0;
2014 return 0;
2015 }
8d060877 2016
1da177e4
LT
2017 if (write) {
2018 len = 0;
2019 p = buffer;
2020 while (len < *lenp) {
2021 if (get_user(c, p++))
2022 return -EFAULT;
2023 if (c == 0 || c == '\n')
2024 break;
2025 len++;
2026 }
f5dd3d6f
SV
2027 if (len >= maxlen)
2028 len = maxlen-1;
2029 if(copy_from_user(data, buffer, len))
1da177e4 2030 return -EFAULT;
f5dd3d6f 2031 ((char *) data)[len] = 0;
1da177e4
LT
2032 *ppos += *lenp;
2033 } else {
f5dd3d6f
SV
2034 len = strlen(data);
2035 if (len > maxlen)
2036 len = maxlen;
8d060877
ON
2037
2038 if (*ppos > len) {
2039 *lenp = 0;
2040 return 0;
2041 }
2042
2043 data += *ppos;
2044 len -= *ppos;
2045
1da177e4
LT
2046 if (len > *lenp)
2047 len = *lenp;
2048 if (len)
f5dd3d6f 2049 if(copy_to_user(buffer, data, len))
1da177e4
LT
2050 return -EFAULT;
2051 if (len < *lenp) {
2052 if(put_user('\n', ((char __user *) buffer) + len))
2053 return -EFAULT;
2054 len++;
2055 }
2056 *lenp = len;
2057 *ppos += len;
2058 }
2059 return 0;
2060}
2061
f5dd3d6f
SV
2062/**
2063 * proc_dostring - read a string sysctl
2064 * @table: the sysctl table
2065 * @write: %TRUE if this is a write to the sysctl file
f5dd3d6f
SV
2066 * @buffer: the user buffer
2067 * @lenp: the size of the user buffer
2068 * @ppos: file position
2069 *
2070 * Reads/writes a string from/to the user buffer. If the kernel
2071 * buffer provided is not large enough to hold the string, the
2072 * string is truncated. The copied string is %NULL-terminated.
2073 * If the string is being read by the user process, it is copied
2074 * and a newline '\n' is added. It is truncated if the buffer is
2075 * not large enough.
2076 *
2077 * Returns 0 on success.
2078 */
8d65af78 2079int proc_dostring(struct ctl_table *table, int write,
f5dd3d6f
SV
2080 void __user *buffer, size_t *lenp, loff_t *ppos)
2081{
8d65af78 2082 return _proc_do_string(table->data, table->maxlen, write,
f5dd3d6f
SV
2083 buffer, lenp, ppos);
2084}
2085
00b7c339
AW
2086static size_t proc_skip_spaces(char **buf)
2087{
2088 size_t ret;
2089 char *tmp = skip_spaces(*buf);
2090 ret = tmp - *buf;
2091 *buf = tmp;
2092 return ret;
2093}
2094
9f977fb7
OP
2095static void proc_skip_char(char **buf, size_t *size, const char v)
2096{
2097 while (*size) {
2098 if (**buf != v)
2099 break;
2100 (*size)--;
2101 (*buf)++;
2102 }
2103}
2104
00b7c339
AW
2105#define TMPBUFLEN 22
2106/**
0fc377bd 2107 * proc_get_long - reads an ASCII formatted integer from a user buffer
00b7c339 2108 *
0fc377bd
RD
2109 * @buf: a kernel buffer
2110 * @size: size of the kernel buffer
2111 * @val: this is where the number will be stored
2112 * @neg: set to %TRUE if number is negative
2113 * @perm_tr: a vector which contains the allowed trailers
2114 * @perm_tr_len: size of the perm_tr vector
2115 * @tr: pointer to store the trailer character
00b7c339 2116 *
0fc377bd
RD
2117 * In case of success %0 is returned and @buf and @size are updated with
2118 * the amount of bytes read. If @tr is non-NULL and a trailing
2119 * character exists (size is non-zero after returning from this
2120 * function), @tr is updated with the trailing character.
00b7c339
AW
2121 */
2122static int proc_get_long(char **buf, size_t *size,
2123 unsigned long *val, bool *neg,
2124 const char *perm_tr, unsigned perm_tr_len, char *tr)
2125{
2126 int len;
2127 char *p, tmp[TMPBUFLEN];
2128
2129 if (!*size)
2130 return -EINVAL;
2131
2132 len = *size;
2133 if (len > TMPBUFLEN - 1)
2134 len = TMPBUFLEN - 1;
2135
2136 memcpy(tmp, *buf, len);
2137
2138 tmp[len] = 0;
2139 p = tmp;
2140 if (*p == '-' && *size > 1) {
2141 *neg = true;
2142 p++;
2143 } else
2144 *neg = false;
2145 if (!isdigit(*p))
2146 return -EINVAL;
2147
2148 *val = simple_strtoul(p, &p, 0);
2149
2150 len = p - tmp;
2151
2152 /* We don't know if the next char is whitespace thus we may accept
2153 * invalid integers (e.g. 1234...a) or two integers instead of one
2154 * (e.g. 123...1). So lets not allow such large numbers. */
2155 if (len == TMPBUFLEN - 1)
2156 return -EINVAL;
2157
2158 if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2159 return -EINVAL;
1da177e4 2160
00b7c339
AW
2161 if (tr && (len < *size))
2162 *tr = *p;
2163
2164 *buf += len;
2165 *size -= len;
2166
2167 return 0;
2168}
2169
2170/**
0fc377bd 2171 * proc_put_long - converts an integer to a decimal ASCII formatted string
00b7c339 2172 *
0fc377bd
RD
2173 * @buf: the user buffer
2174 * @size: the size of the user buffer
2175 * @val: the integer to be converted
2176 * @neg: sign of the number, %TRUE for negative
00b7c339 2177 *
0fc377bd
RD
2178 * In case of success %0 is returned and @buf and @size are updated with
2179 * the amount of bytes written.
00b7c339
AW
2180 */
2181static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2182 bool neg)
2183{
2184 int len;
2185 char tmp[TMPBUFLEN], *p = tmp;
2186
2187 sprintf(p, "%s%lu", neg ? "-" : "", val);
2188 len = strlen(tmp);
2189 if (len > *size)
2190 len = *size;
2191 if (copy_to_user(*buf, tmp, len))
2192 return -EFAULT;
2193 *size -= len;
2194 *buf += len;
2195 return 0;
2196}
2197#undef TMPBUFLEN
2198
2199static int proc_put_char(void __user **buf, size_t *size, char c)
2200{
2201 if (*size) {
2202 char __user **buffer = (char __user **)buf;
2203 if (put_user(c, *buffer))
2204 return -EFAULT;
2205 (*size)--, (*buffer)++;
2206 *buf = *buffer;
2207 }
2208 return 0;
2209}
1da177e4 2210
00b7c339 2211static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
2212 int *valp,
2213 int write, void *data)
2214{
2215 if (write) {
2216 *valp = *negp ? -*lvalp : *lvalp;
2217 } else {
2218 int val = *valp;
2219 if (val < 0) {
00b7c339 2220 *negp = true;
1da177e4
LT
2221 *lvalp = (unsigned long)-val;
2222 } else {
00b7c339 2223 *negp = false;
1da177e4
LT
2224 *lvalp = (unsigned long)val;
2225 }
2226 }
2227 return 0;
2228}
2229
00b7c339
AW
2230static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2231
d8217f07 2232static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
8d65af78 2233 int write, void __user *buffer,
fcfbd547 2234 size_t *lenp, loff_t *ppos,
00b7c339 2235 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
1da177e4
LT
2236 int write, void *data),
2237 void *data)
2238{
00b7c339
AW
2239 int *i, vleft, first = 1, err = 0;
2240 unsigned long page = 0;
2241 size_t left;
2242 char *kbuf;
1da177e4 2243
00b7c339 2244 if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
2245 *lenp = 0;
2246 return 0;
2247 }
2248
fcfbd547 2249 i = (int *) tbl_data;
1da177e4
LT
2250 vleft = table->maxlen / sizeof(*i);
2251 left = *lenp;
2252
2253 if (!conv)
2254 conv = do_proc_dointvec_conv;
2255
00b7c339
AW
2256 if (write) {
2257 if (left > PAGE_SIZE - 1)
2258 left = PAGE_SIZE - 1;
2259 page = __get_free_page(GFP_TEMPORARY);
2260 kbuf = (char *) page;
2261 if (!kbuf)
2262 return -ENOMEM;
2263 if (copy_from_user(kbuf, buffer, left)) {
2264 err = -EFAULT;
2265 goto free;
2266 }
2267 kbuf[left] = 0;
2268 }
2269
1da177e4 2270 for (; left && vleft--; i++, first=0) {
00b7c339
AW
2271 unsigned long lval;
2272 bool neg;
1da177e4 2273
00b7c339
AW
2274 if (write) {
2275 left -= proc_skip_spaces(&kbuf);
1da177e4 2276
563b0467
O
2277 if (!left)
2278 break;
00b7c339
AW
2279 err = proc_get_long(&kbuf, &left, &lval, &neg,
2280 proc_wspace_sep,
2281 sizeof(proc_wspace_sep), NULL);
2282 if (err)
1da177e4 2283 break;
00b7c339
AW
2284 if (conv(&neg, &lval, i, 1, data)) {
2285 err = -EINVAL;
1da177e4 2286 break;
00b7c339 2287 }
1da177e4 2288 } else {
00b7c339
AW
2289 if (conv(&neg, &lval, i, 0, data)) {
2290 err = -EINVAL;
2291 break;
2292 }
1da177e4 2293 if (!first)
00b7c339
AW
2294 err = proc_put_char(&buffer, &left, '\t');
2295 if (err)
2296 break;
2297 err = proc_put_long(&buffer, &left, lval, neg);
2298 if (err)
1da177e4 2299 break;
1da177e4
LT
2300 }
2301 }
2302
00b7c339
AW
2303 if (!write && !first && left && !err)
2304 err = proc_put_char(&buffer, &left, '\n');
563b0467 2305 if (write && !err && left)
00b7c339
AW
2306 left -= proc_skip_spaces(&kbuf);
2307free:
1da177e4 2308 if (write) {
00b7c339
AW
2309 free_page(page);
2310 if (first)
2311 return err ? : -EINVAL;
1da177e4 2312 }
1da177e4
LT
2313 *lenp -= left;
2314 *ppos += *lenp;
00b7c339 2315 return err;
1da177e4
LT
2316}
2317
8d65af78 2318static int do_proc_dointvec(struct ctl_table *table, int write,
fcfbd547 2319 void __user *buffer, size_t *lenp, loff_t *ppos,
00b7c339 2320 int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
fcfbd547
KK
2321 int write, void *data),
2322 void *data)
2323{
8d65af78 2324 return __do_proc_dointvec(table->data, table, write,
fcfbd547
KK
2325 buffer, lenp, ppos, conv, data);
2326}
2327
1da177e4
LT
2328/**
2329 * proc_dointvec - read a vector of integers
2330 * @table: the sysctl table
2331 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2332 * @buffer: the user buffer
2333 * @lenp: the size of the user buffer
2334 * @ppos: file position
2335 *
2336 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2337 * values from/to the user buffer, treated as an ASCII string.
2338 *
2339 * Returns 0 on success.
2340 */
8d65af78 2341int proc_dointvec(struct ctl_table *table, int write,
1da177e4
LT
2342 void __user *buffer, size_t *lenp, loff_t *ppos)
2343{
8d65af78 2344 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
2345 NULL,NULL);
2346}
2347
34f5a398 2348/*
25ddbb18
AK
2349 * Taint values can only be increased
2350 * This means we can safely use a temporary.
34f5a398 2351 */
8d65af78 2352static int proc_taint(struct ctl_table *table, int write,
34f5a398
TT
2353 void __user *buffer, size_t *lenp, loff_t *ppos)
2354{
25ddbb18
AK
2355 struct ctl_table t;
2356 unsigned long tmptaint = get_taint();
2357 int err;
34f5a398 2358
91fcd412 2359 if (write && !capable(CAP_SYS_ADMIN))
34f5a398
TT
2360 return -EPERM;
2361
25ddbb18
AK
2362 t = *table;
2363 t.data = &tmptaint;
8d65af78 2364 err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
25ddbb18
AK
2365 if (err < 0)
2366 return err;
2367
2368 if (write) {
2369 /*
2370 * Poor man's atomic or. Not worth adding a primitive
2371 * to everyone's atomic.h for this
2372 */
2373 int i;
2374 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2375 if ((tmptaint >> i) & 1)
2376 add_taint(i);
2377 }
2378 }
2379
2380 return err;
34f5a398
TT
2381}
2382
1da177e4
LT
2383struct do_proc_dointvec_minmax_conv_param {
2384 int *min;
2385 int *max;
2386};
2387
00b7c339
AW
2388static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2389 int *valp,
1da177e4
LT
2390 int write, void *data)
2391{
2392 struct do_proc_dointvec_minmax_conv_param *param = data;
2393 if (write) {
2394 int val = *negp ? -*lvalp : *lvalp;
2395 if ((param->min && *param->min > val) ||
2396 (param->max && *param->max < val))
2397 return -EINVAL;
2398 *valp = val;
2399 } else {
2400 int val = *valp;
2401 if (val < 0) {
00b7c339 2402 *negp = true;
1da177e4
LT
2403 *lvalp = (unsigned long)-val;
2404 } else {
00b7c339 2405 *negp = false;
1da177e4
LT
2406 *lvalp = (unsigned long)val;
2407 }
2408 }
2409 return 0;
2410}
2411
2412/**
2413 * proc_dointvec_minmax - read a vector of integers with min/max values
2414 * @table: the sysctl table
2415 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2416 * @buffer: the user buffer
2417 * @lenp: the size of the user buffer
2418 * @ppos: file position
2419 *
2420 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2421 * values from/to the user buffer, treated as an ASCII string.
2422 *
2423 * This routine will ensure the values are within the range specified by
2424 * table->extra1 (min) and table->extra2 (max).
2425 *
2426 * Returns 0 on success.
2427 */
8d65af78 2428int proc_dointvec_minmax(struct ctl_table *table, int write,
1da177e4
LT
2429 void __user *buffer, size_t *lenp, loff_t *ppos)
2430{
2431 struct do_proc_dointvec_minmax_conv_param param = {
2432 .min = (int *) table->extra1,
2433 .max = (int *) table->extra2,
2434 };
8d65af78 2435 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
2436 do_proc_dointvec_minmax_conv, &param);
2437}
2438
d8217f07 2439static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
1da177e4
LT
2440 void __user *buffer,
2441 size_t *lenp, loff_t *ppos,
2442 unsigned long convmul,
2443 unsigned long convdiv)
2444{
00b7c339
AW
2445 unsigned long *i, *min, *max;
2446 int vleft, first = 1, err = 0;
2447 unsigned long page = 0;
2448 size_t left;
2449 char *kbuf;
2450
2451 if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
1da177e4
LT
2452 *lenp = 0;
2453 return 0;
2454 }
00b7c339 2455
fcfbd547 2456 i = (unsigned long *) data;
1da177e4
LT
2457 min = (unsigned long *) table->extra1;
2458 max = (unsigned long *) table->extra2;
2459 vleft = table->maxlen / sizeof(unsigned long);
2460 left = *lenp;
00b7c339
AW
2461
2462 if (write) {
2463 if (left > PAGE_SIZE - 1)
2464 left = PAGE_SIZE - 1;
2465 page = __get_free_page(GFP_TEMPORARY);
2466 kbuf = (char *) page;
2467 if (!kbuf)
2468 return -ENOMEM;
2469 if (copy_from_user(kbuf, buffer, left)) {
2470 err = -EFAULT;
2471 goto free;
2472 }
2473 kbuf[left] = 0;
2474 }
2475
27b3d80a 2476 for (; left && vleft--; i++, first = 0) {
00b7c339
AW
2477 unsigned long val;
2478
1da177e4 2479 if (write) {
00b7c339
AW
2480 bool neg;
2481
2482 left -= proc_skip_spaces(&kbuf);
2483
2484 err = proc_get_long(&kbuf, &left, &val, &neg,
2485 proc_wspace_sep,
2486 sizeof(proc_wspace_sep), NULL);
2487 if (err)
1da177e4
LT
2488 break;
2489 if (neg)
1da177e4
LT
2490 continue;
2491 if ((min && val < *min) || (max && val > *max))
2492 continue;
2493 *i = val;
2494 } else {
00b7c339 2495 val = convdiv * (*i) / convmul;
1da177e4 2496 if (!first)
00b7c339
AW
2497 err = proc_put_char(&buffer, &left, '\t');
2498 err = proc_put_long(&buffer, &left, val, false);
2499 if (err)
2500 break;
1da177e4
LT
2501 }
2502 }
2503
00b7c339
AW
2504 if (!write && !first && left && !err)
2505 err = proc_put_char(&buffer, &left, '\n');
2506 if (write && !err)
2507 left -= proc_skip_spaces(&kbuf);
2508free:
1da177e4 2509 if (write) {
00b7c339
AW
2510 free_page(page);
2511 if (first)
2512 return err ? : -EINVAL;
1da177e4 2513 }
1da177e4
LT
2514 *lenp -= left;
2515 *ppos += *lenp;
00b7c339 2516 return err;
1da177e4
LT
2517}
2518
d8217f07 2519static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
fcfbd547
KK
2520 void __user *buffer,
2521 size_t *lenp, loff_t *ppos,
2522 unsigned long convmul,
2523 unsigned long convdiv)
2524{
2525 return __do_proc_doulongvec_minmax(table->data, table, write,
8d65af78 2526 buffer, lenp, ppos, convmul, convdiv);
fcfbd547
KK
2527}
2528
1da177e4
LT
2529/**
2530 * proc_doulongvec_minmax - read a vector of long integers with min/max values
2531 * @table: the sysctl table
2532 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2533 * @buffer: the user buffer
2534 * @lenp: the size of the user buffer
2535 * @ppos: file position
2536 *
2537 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2538 * values from/to the user buffer, treated as an ASCII string.
2539 *
2540 * This routine will ensure the values are within the range specified by
2541 * table->extra1 (min) and table->extra2 (max).
2542 *
2543 * Returns 0 on success.
2544 */
8d65af78 2545int proc_doulongvec_minmax(struct ctl_table *table, int write,
1da177e4
LT
2546 void __user *buffer, size_t *lenp, loff_t *ppos)
2547{
8d65af78 2548 return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
1da177e4
LT
2549}
2550
2551/**
2552 * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2553 * @table: the sysctl table
2554 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2555 * @buffer: the user buffer
2556 * @lenp: the size of the user buffer
2557 * @ppos: file position
2558 *
2559 * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2560 * values from/to the user buffer, treated as an ASCII string. The values
2561 * are treated as milliseconds, and converted to jiffies when they are stored.
2562 *
2563 * This routine will ensure the values are within the range specified by
2564 * table->extra1 (min) and table->extra2 (max).
2565 *
2566 * Returns 0 on success.
2567 */
d8217f07 2568int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1da177e4
LT
2569 void __user *buffer,
2570 size_t *lenp, loff_t *ppos)
2571{
8d65af78 2572 return do_proc_doulongvec_minmax(table, write, buffer,
1da177e4
LT
2573 lenp, ppos, HZ, 1000l);
2574}
2575
2576
00b7c339 2577static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
2578 int *valp,
2579 int write, void *data)
2580{
2581 if (write) {
cba9f33d
BS
2582 if (*lvalp > LONG_MAX / HZ)
2583 return 1;
1da177e4
LT
2584 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2585 } else {
2586 int val = *valp;
2587 unsigned long lval;
2588 if (val < 0) {
00b7c339 2589 *negp = true;
1da177e4
LT
2590 lval = (unsigned long)-val;
2591 } else {
00b7c339 2592 *negp = false;
1da177e4
LT
2593 lval = (unsigned long)val;
2594 }
2595 *lvalp = lval / HZ;
2596 }
2597 return 0;
2598}
2599
00b7c339 2600static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
2601 int *valp,
2602 int write, void *data)
2603{
2604 if (write) {
cba9f33d
BS
2605 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2606 return 1;
1da177e4
LT
2607 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2608 } else {
2609 int val = *valp;
2610 unsigned long lval;
2611 if (val < 0) {
00b7c339 2612 *negp = true;
1da177e4
LT
2613 lval = (unsigned long)-val;
2614 } else {
00b7c339 2615 *negp = false;
1da177e4
LT
2616 lval = (unsigned long)val;
2617 }
2618 *lvalp = jiffies_to_clock_t(lval);
2619 }
2620 return 0;
2621}
2622
00b7c339 2623static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
1da177e4
LT
2624 int *valp,
2625 int write, void *data)
2626{
2627 if (write) {
2628 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2629 } else {
2630 int val = *valp;
2631 unsigned long lval;
2632 if (val < 0) {
00b7c339 2633 *negp = true;
1da177e4
LT
2634 lval = (unsigned long)-val;
2635 } else {
00b7c339 2636 *negp = false;
1da177e4
LT
2637 lval = (unsigned long)val;
2638 }
2639 *lvalp = jiffies_to_msecs(lval);
2640 }
2641 return 0;
2642}
2643
2644/**
2645 * proc_dointvec_jiffies - read a vector of integers as seconds
2646 * @table: the sysctl table
2647 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2648 * @buffer: the user buffer
2649 * @lenp: the size of the user buffer
2650 * @ppos: file position
2651 *
2652 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2653 * values from/to the user buffer, treated as an ASCII string.
2654 * The values read are assumed to be in seconds, and are converted into
2655 * jiffies.
2656 *
2657 * Returns 0 on success.
2658 */
8d65af78 2659int proc_dointvec_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2660 void __user *buffer, size_t *lenp, loff_t *ppos)
2661{
8d65af78 2662 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
2663 do_proc_dointvec_jiffies_conv,NULL);
2664}
2665
2666/**
2667 * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2668 * @table: the sysctl table
2669 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2670 * @buffer: the user buffer
2671 * @lenp: the size of the user buffer
1e5d5331 2672 * @ppos: pointer to the file position
1da177e4
LT
2673 *
2674 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2675 * values from/to the user buffer, treated as an ASCII string.
2676 * The values read are assumed to be in 1/USER_HZ seconds, and
2677 * are converted into jiffies.
2678 *
2679 * Returns 0 on success.
2680 */
8d65af78 2681int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2682 void __user *buffer, size_t *lenp, loff_t *ppos)
2683{
8d65af78 2684 return do_proc_dointvec(table,write,buffer,lenp,ppos,
1da177e4
LT
2685 do_proc_dointvec_userhz_jiffies_conv,NULL);
2686}
2687
2688/**
2689 * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2690 * @table: the sysctl table
2691 * @write: %TRUE if this is a write to the sysctl file
1da177e4
LT
2692 * @buffer: the user buffer
2693 * @lenp: the size of the user buffer
67be2dd1
MW
2694 * @ppos: file position
2695 * @ppos: the current position in the file
1da177e4
LT
2696 *
2697 * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2698 * values from/to the user buffer, treated as an ASCII string.
2699 * The values read are assumed to be in 1/1000 seconds, and
2700 * are converted into jiffies.
2701 *
2702 * Returns 0 on success.
2703 */
8d65af78 2704int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2705 void __user *buffer, size_t *lenp, loff_t *ppos)
2706{
8d65af78 2707 return do_proc_dointvec(table, write, buffer, lenp, ppos,
1da177e4
LT
2708 do_proc_dointvec_ms_jiffies_conv, NULL);
2709}
2710
8d65af78 2711static int proc_do_cad_pid(struct ctl_table *table, int write,
9ec52099
CLG
2712 void __user *buffer, size_t *lenp, loff_t *ppos)
2713{
2714 struct pid *new_pid;
2715 pid_t tmp;
2716 int r;
2717
6c5f3e7b 2718 tmp = pid_vnr(cad_pid);
9ec52099 2719
8d65af78 2720 r = __do_proc_dointvec(&tmp, table, write, buffer,
9ec52099
CLG
2721 lenp, ppos, NULL, NULL);
2722 if (r || !write)
2723 return r;
2724
2725 new_pid = find_get_pid(tmp);
2726 if (!new_pid)
2727 return -ESRCH;
2728
2729 put_pid(xchg(&cad_pid, new_pid));
2730 return 0;
2731}
2732
9f977fb7
OP
2733/**
2734 * proc_do_large_bitmap - read/write from/to a large bitmap
2735 * @table: the sysctl table
2736 * @write: %TRUE if this is a write to the sysctl file
2737 * @buffer: the user buffer
2738 * @lenp: the size of the user buffer
2739 * @ppos: file position
2740 *
2741 * The bitmap is stored at table->data and the bitmap length (in bits)
2742 * in table->maxlen.
2743 *
2744 * We use a range comma separated format (e.g. 1,3-4,10-10) so that
2745 * large bitmaps may be represented in a compact manner. Writing into
2746 * the file will clear the bitmap then update it with the given input.
2747 *
2748 * Returns 0 on success.
2749 */
2750int proc_do_large_bitmap(struct ctl_table *table, int write,
2751 void __user *buffer, size_t *lenp, loff_t *ppos)
2752{
2753 int err = 0;
2754 bool first = 1;
2755 size_t left = *lenp;
2756 unsigned long bitmap_len = table->maxlen;
2757 unsigned long *bitmap = (unsigned long *) table->data;
2758 unsigned long *tmp_bitmap = NULL;
2759 char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
2760
2761 if (!bitmap_len || !left || (*ppos && !write)) {
2762 *lenp = 0;
2763 return 0;
2764 }
2765
2766 if (write) {
2767 unsigned long page = 0;
2768 char *kbuf;
2769
2770 if (left > PAGE_SIZE - 1)
2771 left = PAGE_SIZE - 1;
2772
2773 page = __get_free_page(GFP_TEMPORARY);
2774 kbuf = (char *) page;
2775 if (!kbuf)
2776 return -ENOMEM;
2777 if (copy_from_user(kbuf, buffer, left)) {
2778 free_page(page);
2779 return -EFAULT;
2780 }
2781 kbuf[left] = 0;
2782
2783 tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
2784 GFP_KERNEL);
2785 if (!tmp_bitmap) {
2786 free_page(page);
2787 return -ENOMEM;
2788 }
2789 proc_skip_char(&kbuf, &left, '\n');
2790 while (!err && left) {
2791 unsigned long val_a, val_b;
2792 bool neg;
2793
2794 err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
2795 sizeof(tr_a), &c);
2796 if (err)
2797 break;
2798 if (val_a >= bitmap_len || neg) {
2799 err = -EINVAL;
2800 break;
2801 }
2802
2803 val_b = val_a;
2804 if (left) {
2805 kbuf++;
2806 left--;
2807 }
2808
2809 if (c == '-') {
2810 err = proc_get_long(&kbuf, &left, &val_b,
2811 &neg, tr_b, sizeof(tr_b),
2812 &c);
2813 if (err)
2814 break;
2815 if (val_b >= bitmap_len || neg ||
2816 val_a > val_b) {
2817 err = -EINVAL;
2818 break;
2819 }
2820 if (left) {
2821 kbuf++;
2822 left--;
2823 }
2824 }
2825
2826 while (val_a <= val_b)
2827 set_bit(val_a++, tmp_bitmap);
2828
2829 first = 0;
2830 proc_skip_char(&kbuf, &left, '\n');
2831 }
2832 free_page(page);
2833 } else {
2834 unsigned long bit_a, bit_b = 0;
2835
2836 while (left) {
2837 bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
2838 if (bit_a >= bitmap_len)
2839 break;
2840 bit_b = find_next_zero_bit(bitmap, bitmap_len,
2841 bit_a + 1) - 1;
2842
2843 if (!first) {
2844 err = proc_put_char(&buffer, &left, ',');
2845 if (err)
2846 break;
2847 }
2848 err = proc_put_long(&buffer, &left, bit_a, false);
2849 if (err)
2850 break;
2851 if (bit_a != bit_b) {
2852 err = proc_put_char(&buffer, &left, '-');
2853 if (err)
2854 break;
2855 err = proc_put_long(&buffer, &left, bit_b, false);
2856 if (err)
2857 break;
2858 }
2859
2860 first = 0; bit_b++;
2861 }
2862 if (!err)
2863 err = proc_put_char(&buffer, &left, '\n');
2864 }
2865
2866 if (!err) {
2867 if (write) {
2868 if (*ppos)
2869 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
2870 else
2871 memcpy(bitmap, tmp_bitmap,
2872 BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
2873 }
2874 kfree(tmp_bitmap);
2875 *lenp -= left;
2876 *ppos += *lenp;
2877 return 0;
2878 } else {
2879 kfree(tmp_bitmap);
2880 return err;
2881 }
2882}
2883
1da177e4
LT
2884#else /* CONFIG_PROC_FS */
2885
8d65af78 2886int proc_dostring(struct ctl_table *table, int write,
1da177e4
LT
2887 void __user *buffer, size_t *lenp, loff_t *ppos)
2888{
2889 return -ENOSYS;
2890}
2891
8d65af78 2892int proc_dointvec(struct ctl_table *table, int write,
1da177e4 2893 void __user *buffer, size_t *lenp, loff_t *ppos)
1da177e4
LT
2894{
2895 return -ENOSYS;
2896}
2897
8d65af78 2898int proc_dointvec_minmax(struct ctl_table *table, int write,
1da177e4
LT
2899 void __user *buffer, size_t *lenp, loff_t *ppos)
2900{
2901 return -ENOSYS;
2902}
2903
8d65af78 2904int proc_dointvec_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2905 void __user *buffer, size_t *lenp, loff_t *ppos)
2906{
2907 return -ENOSYS;
2908}
2909
8d65af78 2910int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2911 void __user *buffer, size_t *lenp, loff_t *ppos)
2912{
2913 return -ENOSYS;
2914}
2915
8d65af78 2916int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
1da177e4
LT
2917 void __user *buffer, size_t *lenp, loff_t *ppos)
2918{
2919 return -ENOSYS;
2920}
2921
8d65af78 2922int proc_doulongvec_minmax(struct ctl_table *table, int write,
1da177e4
LT
2923 void __user *buffer, size_t *lenp, loff_t *ppos)
2924{
2925 return -ENOSYS;
2926}
2927
d8217f07 2928int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
1da177e4
LT
2929 void __user *buffer,
2930 size_t *lenp, loff_t *ppos)
2931{
2932 return -ENOSYS;
2933}
2934
2935
2936#endif /* CONFIG_PROC_FS */
2937
1da177e4
LT
2938/*
2939 * No sense putting this after each symbol definition, twice,
2940 * exception granted :-)
2941 */
2942EXPORT_SYMBOL(proc_dointvec);
2943EXPORT_SYMBOL(proc_dointvec_jiffies);
2944EXPORT_SYMBOL(proc_dointvec_minmax);
2945EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2946EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2947EXPORT_SYMBOL(proc_dostring);
2948EXPORT_SYMBOL(proc_doulongvec_minmax);
2949EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2950EXPORT_SYMBOL(register_sysctl_table);
29e796fd 2951EXPORT_SYMBOL(register_sysctl_paths);
1da177e4 2952EXPORT_SYMBOL(unregister_sysctl_table);