7f6c1a3b3485f17029c8a239e64eefc46fdb2852
[linux-2.6-block.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
94 #include <linux/stackleak.h>
95 #endif
96 #ifdef CONFIG_LOCKUP_DETECTOR
97 #include <linux/nmi.h>
98 #endif
99
100 #if defined(CONFIG_SYSCTL)
101
102 /* External variables not in a header file. */
103 extern int suid_dumpable;
104 #ifdef CONFIG_COREDUMP
105 extern int core_uses_pid;
106 extern char core_pattern[];
107 extern unsigned int core_pipe_limit;
108 #endif
109 extern int pid_max;
110 extern int pid_max_min, pid_max_max;
111 extern int percpu_pagelist_fraction;
112 extern int latencytop_enabled;
113 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
114 #ifndef CONFIG_MMU
115 extern int sysctl_nr_trim_pages;
116 #endif
117
118 /* Constants used for minimum and  maximum */
119 #ifdef CONFIG_LOCKUP_DETECTOR
120 static int sixty = 60;
121 #endif
122
123 static int __maybe_unused neg_one = -1;
124
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused four = 4;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178
179 #ifdef CONFIG_PROC_SYSCTL
180
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  *      to be written, and multiple writes on the same sysctl file descriptor
186  *      will rewrite the sysctl value, regardless of file position. No warning
187  *      is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  *      not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  *      file position 0 and the value must be fully contained in the buffer
192  *      sent to the write syscall. If dealing with strings respect the file
193  *      position, but restrict this to the max length of the buffer, anything
194  *      passed the max lenght will be ignored. Multiple writes will append
195  *      to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201         SYSCTL_WRITES_LEGACY            = -1,
202         SYSCTL_WRITES_WARN              = 0,
203         SYSCTL_WRITES_STRICT            = 1,
204 };
205
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209                   void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211                                void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216                                 void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223                 void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226                 void __user *buffer, size_t *lenp, loff_t *ppos);
227
228 #ifdef CONFIG_MAGIC_SYSRQ
229 /* Note: sysrq code uses its own private copy */
230 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
231
232 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
233                                 void __user *buffer, size_t *lenp,
234                                 loff_t *ppos)
235 {
236         int error;
237
238         error = proc_dointvec(table, write, buffer, lenp, ppos);
239         if (error)
240                 return error;
241
242         if (write)
243                 sysrq_toggle_support(__sysrq_enabled);
244
245         return 0;
246 }
247
248 #endif
249
250 static struct ctl_table kern_table[];
251 static struct ctl_table vm_table[];
252 static struct ctl_table fs_table[];
253 static struct ctl_table debug_table[];
254 static struct ctl_table dev_table[];
255 extern struct ctl_table random_table[];
256 #ifdef CONFIG_EPOLL
257 extern struct ctl_table epoll_table[];
258 #endif
259
260 #ifdef CONFIG_FW_LOADER_USER_HELPER
261 extern struct ctl_table firmware_config_table[];
262 #endif
263
264 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
265 int sysctl_legacy_va_layout;
266 #endif
267
268 /* The default sysctl tables: */
269
270 static struct ctl_table sysctl_base_table[] = {
271         {
272                 .procname       = "kernel",
273                 .mode           = 0555,
274                 .child          = kern_table,
275         },
276         {
277                 .procname       = "vm",
278                 .mode           = 0555,
279                 .child          = vm_table,
280         },
281         {
282                 .procname       = "fs",
283                 .mode           = 0555,
284                 .child          = fs_table,
285         },
286         {
287                 .procname       = "debug",
288                 .mode           = 0555,
289                 .child          = debug_table,
290         },
291         {
292                 .procname       = "dev",
293                 .mode           = 0555,
294                 .child          = dev_table,
295         },
296         { }
297 };
298
299 #ifdef CONFIG_SCHED_DEBUG
300 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
301 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
302 static int min_wakeup_granularity_ns;                   /* 0 usecs */
303 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
304 #ifdef CONFIG_SMP
305 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
306 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
307 #endif /* CONFIG_SMP */
308 #endif /* CONFIG_SCHED_DEBUG */
309
310 #ifdef CONFIG_COMPACTION
311 static int min_extfrag_threshold;
312 static int max_extfrag_threshold = 1000;
313 #endif
314
315 static struct ctl_table kern_table[] = {
316         {
317                 .procname       = "sched_child_runs_first",
318                 .data           = &sysctl_sched_child_runs_first,
319                 .maxlen         = sizeof(unsigned int),
320                 .mode           = 0644,
321                 .proc_handler   = proc_dointvec,
322         },
323 #ifdef CONFIG_SCHED_DEBUG
324         {
325                 .procname       = "sched_min_granularity_ns",
326                 .data           = &sysctl_sched_min_granularity,
327                 .maxlen         = sizeof(unsigned int),
328                 .mode           = 0644,
329                 .proc_handler   = sched_proc_update_handler,
330                 .extra1         = &min_sched_granularity_ns,
331                 .extra2         = &max_sched_granularity_ns,
332         },
333         {
334                 .procname       = "sched_latency_ns",
335                 .data           = &sysctl_sched_latency,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = sched_proc_update_handler,
339                 .extra1         = &min_sched_granularity_ns,
340                 .extra2         = &max_sched_granularity_ns,
341         },
342         {
343                 .procname       = "sched_wakeup_granularity_ns",
344                 .data           = &sysctl_sched_wakeup_granularity,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = sched_proc_update_handler,
348                 .extra1         = &min_wakeup_granularity_ns,
349                 .extra2         = &max_wakeup_granularity_ns,
350         },
351 #ifdef CONFIG_SMP
352         {
353                 .procname       = "sched_tunable_scaling",
354                 .data           = &sysctl_sched_tunable_scaling,
355                 .maxlen         = sizeof(enum sched_tunable_scaling),
356                 .mode           = 0644,
357                 .proc_handler   = sched_proc_update_handler,
358                 .extra1         = &min_sched_tunable_scaling,
359                 .extra2         = &max_sched_tunable_scaling,
360         },
361         {
362                 .procname       = "sched_migration_cost_ns",
363                 .data           = &sysctl_sched_migration_cost,
364                 .maxlen         = sizeof(unsigned int),
365                 .mode           = 0644,
366                 .proc_handler   = proc_dointvec,
367         },
368         {
369                 .procname       = "sched_nr_migrate",
370                 .data           = &sysctl_sched_nr_migrate,
371                 .maxlen         = sizeof(unsigned int),
372                 .mode           = 0644,
373                 .proc_handler   = proc_dointvec,
374         },
375 #ifdef CONFIG_SCHEDSTATS
376         {
377                 .procname       = "sched_schedstats",
378                 .data           = NULL,
379                 .maxlen         = sizeof(unsigned int),
380                 .mode           = 0644,
381                 .proc_handler   = sysctl_schedstats,
382                 .extra1         = &zero,
383                 .extra2         = &one,
384         },
385 #endif /* CONFIG_SCHEDSTATS */
386 #endif /* CONFIG_SMP */
387 #ifdef CONFIG_NUMA_BALANCING
388         {
389                 .procname       = "numa_balancing_scan_delay_ms",
390                 .data           = &sysctl_numa_balancing_scan_delay,
391                 .maxlen         = sizeof(unsigned int),
392                 .mode           = 0644,
393                 .proc_handler   = proc_dointvec,
394         },
395         {
396                 .procname       = "numa_balancing_scan_period_min_ms",
397                 .data           = &sysctl_numa_balancing_scan_period_min,
398                 .maxlen         = sizeof(unsigned int),
399                 .mode           = 0644,
400                 .proc_handler   = proc_dointvec,
401         },
402         {
403                 .procname       = "numa_balancing_scan_period_max_ms",
404                 .data           = &sysctl_numa_balancing_scan_period_max,
405                 .maxlen         = sizeof(unsigned int),
406                 .mode           = 0644,
407                 .proc_handler   = proc_dointvec,
408         },
409         {
410                 .procname       = "numa_balancing_scan_size_mb",
411                 .data           = &sysctl_numa_balancing_scan_size,
412                 .maxlen         = sizeof(unsigned int),
413                 .mode           = 0644,
414                 .proc_handler   = proc_dointvec_minmax,
415                 .extra1         = &one,
416         },
417         {
418                 .procname       = "numa_balancing",
419                 .data           = NULL, /* filled in by handler */
420                 .maxlen         = sizeof(unsigned int),
421                 .mode           = 0644,
422                 .proc_handler   = sysctl_numa_balancing,
423                 .extra1         = &zero,
424                 .extra2         = &one,
425         },
426 #endif /* CONFIG_NUMA_BALANCING */
427 #endif /* CONFIG_SCHED_DEBUG */
428         {
429                 .procname       = "sched_rt_period_us",
430                 .data           = &sysctl_sched_rt_period,
431                 .maxlen         = sizeof(unsigned int),
432                 .mode           = 0644,
433                 .proc_handler   = sched_rt_handler,
434         },
435         {
436                 .procname       = "sched_rt_runtime_us",
437                 .data           = &sysctl_sched_rt_runtime,
438                 .maxlen         = sizeof(int),
439                 .mode           = 0644,
440                 .proc_handler   = sched_rt_handler,
441         },
442         {
443                 .procname       = "sched_rr_timeslice_ms",
444                 .data           = &sysctl_sched_rr_timeslice,
445                 .maxlen         = sizeof(int),
446                 .mode           = 0644,
447                 .proc_handler   = sched_rr_handler,
448         },
449 #ifdef CONFIG_SCHED_AUTOGROUP
450         {
451                 .procname       = "sched_autogroup_enabled",
452                 .data           = &sysctl_sched_autogroup_enabled,
453                 .maxlen         = sizeof(unsigned int),
454                 .mode           = 0644,
455                 .proc_handler   = proc_dointvec_minmax,
456                 .extra1         = &zero,
457                 .extra2         = &one,
458         },
459 #endif
460 #ifdef CONFIG_CFS_BANDWIDTH
461         {
462                 .procname       = "sched_cfs_bandwidth_slice_us",
463                 .data           = &sysctl_sched_cfs_bandwidth_slice,
464                 .maxlen         = sizeof(unsigned int),
465                 .mode           = 0644,
466                 .proc_handler   = proc_dointvec_minmax,
467                 .extra1         = &one,
468         },
469 #endif
470 #ifdef CONFIG_PROVE_LOCKING
471         {
472                 .procname       = "prove_locking",
473                 .data           = &prove_locking,
474                 .maxlen         = sizeof(int),
475                 .mode           = 0644,
476                 .proc_handler   = proc_dointvec,
477         },
478 #endif
479 #ifdef CONFIG_LOCK_STAT
480         {
481                 .procname       = "lock_stat",
482                 .data           = &lock_stat,
483                 .maxlen         = sizeof(int),
484                 .mode           = 0644,
485                 .proc_handler   = proc_dointvec,
486         },
487 #endif
488         {
489                 .procname       = "panic",
490                 .data           = &panic_timeout,
491                 .maxlen         = sizeof(int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec,
494         },
495 #ifdef CONFIG_COREDUMP
496         {
497                 .procname       = "core_uses_pid",
498                 .data           = &core_uses_pid,
499                 .maxlen         = sizeof(int),
500                 .mode           = 0644,
501                 .proc_handler   = proc_dointvec,
502         },
503         {
504                 .procname       = "core_pattern",
505                 .data           = core_pattern,
506                 .maxlen         = CORENAME_MAX_SIZE,
507                 .mode           = 0644,
508                 .proc_handler   = proc_dostring_coredump,
509         },
510         {
511                 .procname       = "core_pipe_limit",
512                 .data           = &core_pipe_limit,
513                 .maxlen         = sizeof(unsigned int),
514                 .mode           = 0644,
515                 .proc_handler   = proc_dointvec,
516         },
517 #endif
518 #ifdef CONFIG_PROC_SYSCTL
519         {
520                 .procname       = "tainted",
521                 .maxlen         = sizeof(long),
522                 .mode           = 0644,
523                 .proc_handler   = proc_taint,
524         },
525         {
526                 .procname       = "sysctl_writes_strict",
527                 .data           = &sysctl_writes_strict,
528                 .maxlen         = sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = proc_dointvec_minmax,
531                 .extra1         = &neg_one,
532                 .extra2         = &one,
533         },
534 #endif
535 #ifdef CONFIG_LATENCYTOP
536         {
537                 .procname       = "latencytop",
538                 .data           = &latencytop_enabled,
539                 .maxlen         = sizeof(int),
540                 .mode           = 0644,
541                 .proc_handler   = sysctl_latencytop,
542         },
543 #endif
544 #ifdef CONFIG_BLK_DEV_INITRD
545         {
546                 .procname       = "real-root-dev",
547                 .data           = &real_root_dev,
548                 .maxlen         = sizeof(int),
549                 .mode           = 0644,
550                 .proc_handler   = proc_dointvec,
551         },
552 #endif
553         {
554                 .procname       = "print-fatal-signals",
555                 .data           = &print_fatal_signals,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec,
559         },
560 #ifdef CONFIG_SPARC
561         {
562                 .procname       = "reboot-cmd",
563                 .data           = reboot_command,
564                 .maxlen         = 256,
565                 .mode           = 0644,
566                 .proc_handler   = proc_dostring,
567         },
568         {
569                 .procname       = "stop-a",
570                 .data           = &stop_a_enabled,
571                 .maxlen         = sizeof (int),
572                 .mode           = 0644,
573                 .proc_handler   = proc_dointvec,
574         },
575         {
576                 .procname       = "scons-poweroff",
577                 .data           = &scons_pwroff,
578                 .maxlen         = sizeof (int),
579                 .mode           = 0644,
580                 .proc_handler   = proc_dointvec,
581         },
582 #endif
583 #ifdef CONFIG_SPARC64
584         {
585                 .procname       = "tsb-ratio",
586                 .data           = &sysctl_tsb_ratio,
587                 .maxlen         = sizeof (int),
588                 .mode           = 0644,
589                 .proc_handler   = proc_dointvec,
590         },
591 #endif
592 #ifdef __hppa__
593         {
594                 .procname       = "soft-power",
595                 .data           = &pwrsw_enabled,
596                 .maxlen         = sizeof (int),
597                 .mode           = 0644,
598                 .proc_handler   = proc_dointvec,
599         },
600 #endif
601 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
602         {
603                 .procname       = "unaligned-trap",
604                 .data           = &unaligned_enabled,
605                 .maxlen         = sizeof (int),
606                 .mode           = 0644,
607                 .proc_handler   = proc_dointvec,
608         },
609 #endif
610         {
611                 .procname       = "ctrl-alt-del",
612                 .data           = &C_A_D,
613                 .maxlen         = sizeof(int),
614                 .mode           = 0644,
615                 .proc_handler   = proc_dointvec,
616         },
617 #ifdef CONFIG_FUNCTION_TRACER
618         {
619                 .procname       = "ftrace_enabled",
620                 .data           = &ftrace_enabled,
621                 .maxlen         = sizeof(int),
622                 .mode           = 0644,
623                 .proc_handler   = ftrace_enable_sysctl,
624         },
625 #endif
626 #ifdef CONFIG_STACK_TRACER
627         {
628                 .procname       = "stack_tracer_enabled",
629                 .data           = &stack_tracer_enabled,
630                 .maxlen         = sizeof(int),
631                 .mode           = 0644,
632                 .proc_handler   = stack_trace_sysctl,
633         },
634 #endif
635 #ifdef CONFIG_TRACING
636         {
637                 .procname       = "ftrace_dump_on_oops",
638                 .data           = &ftrace_dump_on_oops,
639                 .maxlen         = sizeof(int),
640                 .mode           = 0644,
641                 .proc_handler   = proc_dointvec,
642         },
643         {
644                 .procname       = "traceoff_on_warning",
645                 .data           = &__disable_trace_on_warning,
646                 .maxlen         = sizeof(__disable_trace_on_warning),
647                 .mode           = 0644,
648                 .proc_handler   = proc_dointvec,
649         },
650         {
651                 .procname       = "tracepoint_printk",
652                 .data           = &tracepoint_printk,
653                 .maxlen         = sizeof(tracepoint_printk),
654                 .mode           = 0644,
655                 .proc_handler   = tracepoint_printk_sysctl,
656         },
657 #endif
658 #ifdef CONFIG_KEXEC_CORE
659         {
660                 .procname       = "kexec_load_disabled",
661                 .data           = &kexec_load_disabled,
662                 .maxlen         = sizeof(int),
663                 .mode           = 0644,
664                 /* only handle a transition from default "0" to "1" */
665                 .proc_handler   = proc_dointvec_minmax,
666                 .extra1         = &one,
667                 .extra2         = &one,
668         },
669 #endif
670 #ifdef CONFIG_MODULES
671         {
672                 .procname       = "modprobe",
673                 .data           = &modprobe_path,
674                 .maxlen         = KMOD_PATH_LEN,
675                 .mode           = 0644,
676                 .proc_handler   = proc_dostring,
677         },
678         {
679                 .procname       = "modules_disabled",
680                 .data           = &modules_disabled,
681                 .maxlen         = sizeof(int),
682                 .mode           = 0644,
683                 /* only handle a transition from default "0" to "1" */
684                 .proc_handler   = proc_dointvec_minmax,
685                 .extra1         = &one,
686                 .extra2         = &one,
687         },
688 #endif
689 #ifdef CONFIG_UEVENT_HELPER
690         {
691                 .procname       = "hotplug",
692                 .data           = &uevent_helper,
693                 .maxlen         = UEVENT_HELPER_PATH_LEN,
694                 .mode           = 0644,
695                 .proc_handler   = proc_dostring,
696         },
697 #endif
698 #ifdef CONFIG_CHR_DEV_SG
699         {
700                 .procname       = "sg-big-buff",
701                 .data           = &sg_big_buff,
702                 .maxlen         = sizeof (int),
703                 .mode           = 0444,
704                 .proc_handler   = proc_dointvec,
705         },
706 #endif
707 #ifdef CONFIG_BSD_PROCESS_ACCT
708         {
709                 .procname       = "acct",
710                 .data           = &acct_parm,
711                 .maxlen         = 3*sizeof(int),
712                 .mode           = 0644,
713                 .proc_handler   = proc_dointvec,
714         },
715 #endif
716 #ifdef CONFIG_MAGIC_SYSRQ
717         {
718                 .procname       = "sysrq",
719                 .data           = &__sysrq_enabled,
720                 .maxlen         = sizeof (int),
721                 .mode           = 0644,
722                 .proc_handler   = sysrq_sysctl_handler,
723         },
724 #endif
725 #ifdef CONFIG_PROC_SYSCTL
726         {
727                 .procname       = "cad_pid",
728                 .data           = NULL,
729                 .maxlen         = sizeof (int),
730                 .mode           = 0600,
731                 .proc_handler   = proc_do_cad_pid,
732         },
733 #endif
734         {
735                 .procname       = "threads-max",
736                 .data           = NULL,
737                 .maxlen         = sizeof(int),
738                 .mode           = 0644,
739                 .proc_handler   = sysctl_max_threads,
740         },
741         {
742                 .procname       = "random",
743                 .mode           = 0555,
744                 .child          = random_table,
745         },
746         {
747                 .procname       = "usermodehelper",
748                 .mode           = 0555,
749                 .child          = usermodehelper_table,
750         },
751 #ifdef CONFIG_FW_LOADER_USER_HELPER
752         {
753                 .procname       = "firmware_config",
754                 .mode           = 0555,
755                 .child          = firmware_config_table,
756         },
757 #endif
758         {
759                 .procname       = "overflowuid",
760                 .data           = &overflowuid,
761                 .maxlen         = sizeof(int),
762                 .mode           = 0644,
763                 .proc_handler   = proc_dointvec_minmax,
764                 .extra1         = &minolduid,
765                 .extra2         = &maxolduid,
766         },
767         {
768                 .procname       = "overflowgid",
769                 .data           = &overflowgid,
770                 .maxlen         = sizeof(int),
771                 .mode           = 0644,
772                 .proc_handler   = proc_dointvec_minmax,
773                 .extra1         = &minolduid,
774                 .extra2         = &maxolduid,
775         },
776 #ifdef CONFIG_S390
777 #ifdef CONFIG_MATHEMU
778         {
779                 .procname       = "ieee_emulation_warnings",
780                 .data           = &sysctl_ieee_emulation_warnings,
781                 .maxlen         = sizeof(int),
782                 .mode           = 0644,
783                 .proc_handler   = proc_dointvec,
784         },
785 #endif
786         {
787                 .procname       = "userprocess_debug",
788                 .data           = &show_unhandled_signals,
789                 .maxlen         = sizeof(int),
790                 .mode           = 0644,
791                 .proc_handler   = proc_dointvec,
792         },
793 #endif
794         {
795                 .procname       = "pid_max",
796                 .data           = &pid_max,
797                 .maxlen         = sizeof (int),
798                 .mode           = 0644,
799                 .proc_handler   = proc_dointvec_minmax,
800                 .extra1         = &pid_max_min,
801                 .extra2         = &pid_max_max,
802         },
803         {
804                 .procname       = "panic_on_oops",
805                 .data           = &panic_on_oops,
806                 .maxlen         = sizeof(int),
807                 .mode           = 0644,
808                 .proc_handler   = proc_dointvec,
809         },
810 #if defined CONFIG_PRINTK
811         {
812                 .procname       = "printk",
813                 .data           = &console_loglevel,
814                 .maxlen         = 4*sizeof(int),
815                 .mode           = 0644,
816                 .proc_handler   = proc_dointvec,
817         },
818         {
819                 .procname       = "printk_ratelimit",
820                 .data           = &printk_ratelimit_state.interval,
821                 .maxlen         = sizeof(int),
822                 .mode           = 0644,
823                 .proc_handler   = proc_dointvec_jiffies,
824         },
825         {
826                 .procname       = "printk_ratelimit_burst",
827                 .data           = &printk_ratelimit_state.burst,
828                 .maxlen         = sizeof(int),
829                 .mode           = 0644,
830                 .proc_handler   = proc_dointvec,
831         },
832         {
833                 .procname       = "printk_delay",
834                 .data           = &printk_delay_msec,
835                 .maxlen         = sizeof(int),
836                 .mode           = 0644,
837                 .proc_handler   = proc_dointvec_minmax,
838                 .extra1         = &zero,
839                 .extra2         = &ten_thousand,
840         },
841         {
842                 .procname       = "printk_devkmsg",
843                 .data           = devkmsg_log_str,
844                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
845                 .mode           = 0644,
846                 .proc_handler   = devkmsg_sysctl_set_loglvl,
847         },
848         {
849                 .procname       = "dmesg_restrict",
850                 .data           = &dmesg_restrict,
851                 .maxlen         = sizeof(int),
852                 .mode           = 0644,
853                 .proc_handler   = proc_dointvec_minmax_sysadmin,
854                 .extra1         = &zero,
855                 .extra2         = &one,
856         },
857         {
858                 .procname       = "kptr_restrict",
859                 .data           = &kptr_restrict,
860                 .maxlen         = sizeof(int),
861                 .mode           = 0644,
862                 .proc_handler   = proc_dointvec_minmax_sysadmin,
863                 .extra1         = &zero,
864                 .extra2         = &two,
865         },
866 #endif
867         {
868                 .procname       = "ngroups_max",
869                 .data           = &ngroups_max,
870                 .maxlen         = sizeof (int),
871                 .mode           = 0444,
872                 .proc_handler   = proc_dointvec,
873         },
874         {
875                 .procname       = "cap_last_cap",
876                 .data           = (void *)&cap_last_cap,
877                 .maxlen         = sizeof(int),
878                 .mode           = 0444,
879                 .proc_handler   = proc_dointvec,
880         },
881 #if defined(CONFIG_LOCKUP_DETECTOR)
882         {
883                 .procname       = "watchdog",
884                 .data           = &watchdog_user_enabled,
885                 .maxlen         = sizeof(int),
886                 .mode           = 0644,
887                 .proc_handler   = proc_watchdog,
888                 .extra1         = &zero,
889                 .extra2         = &one,
890         },
891         {
892                 .procname       = "watchdog_thresh",
893                 .data           = &watchdog_thresh,
894                 .maxlen         = sizeof(int),
895                 .mode           = 0644,
896                 .proc_handler   = proc_watchdog_thresh,
897                 .extra1         = &zero,
898                 .extra2         = &sixty,
899         },
900         {
901                 .procname       = "nmi_watchdog",
902                 .data           = &nmi_watchdog_user_enabled,
903                 .maxlen         = sizeof(int),
904                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
905                 .proc_handler   = proc_nmi_watchdog,
906                 .extra1         = &zero,
907                 .extra2         = &one,
908         },
909         {
910                 .procname       = "watchdog_cpumask",
911                 .data           = &watchdog_cpumask_bits,
912                 .maxlen         = NR_CPUS,
913                 .mode           = 0644,
914                 .proc_handler   = proc_watchdog_cpumask,
915         },
916 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
917         {
918                 .procname       = "soft_watchdog",
919                 .data           = &soft_watchdog_user_enabled,
920                 .maxlen         = sizeof(int),
921                 .mode           = 0644,
922                 .proc_handler   = proc_soft_watchdog,
923                 .extra1         = &zero,
924                 .extra2         = &one,
925         },
926         {
927                 .procname       = "softlockup_panic",
928                 .data           = &softlockup_panic,
929                 .maxlen         = sizeof(int),
930                 .mode           = 0644,
931                 .proc_handler   = proc_dointvec_minmax,
932                 .extra1         = &zero,
933                 .extra2         = &one,
934         },
935 #ifdef CONFIG_SMP
936         {
937                 .procname       = "softlockup_all_cpu_backtrace",
938                 .data           = &sysctl_softlockup_all_cpu_backtrace,
939                 .maxlen         = sizeof(int),
940                 .mode           = 0644,
941                 .proc_handler   = proc_dointvec_minmax,
942                 .extra1         = &zero,
943                 .extra2         = &one,
944         },
945 #endif /* CONFIG_SMP */
946 #endif
947 #ifdef CONFIG_HARDLOCKUP_DETECTOR
948         {
949                 .procname       = "hardlockup_panic",
950                 .data           = &hardlockup_panic,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &zero,
955                 .extra2         = &one,
956         },
957 #ifdef CONFIG_SMP
958         {
959                 .procname       = "hardlockup_all_cpu_backtrace",
960                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
961                 .maxlen         = sizeof(int),
962                 .mode           = 0644,
963                 .proc_handler   = proc_dointvec_minmax,
964                 .extra1         = &zero,
965                 .extra2         = &one,
966         },
967 #endif /* CONFIG_SMP */
968 #endif
969 #endif
970
971 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
972         {
973                 .procname       = "unknown_nmi_panic",
974                 .data           = &unknown_nmi_panic,
975                 .maxlen         = sizeof (int),
976                 .mode           = 0644,
977                 .proc_handler   = proc_dointvec,
978         },
979 #endif
980 #if defined(CONFIG_X86)
981         {
982                 .procname       = "panic_on_unrecovered_nmi",
983                 .data           = &panic_on_unrecovered_nmi,
984                 .maxlen         = sizeof(int),
985                 .mode           = 0644,
986                 .proc_handler   = proc_dointvec,
987         },
988         {
989                 .procname       = "panic_on_io_nmi",
990                 .data           = &panic_on_io_nmi,
991                 .maxlen         = sizeof(int),
992                 .mode           = 0644,
993                 .proc_handler   = proc_dointvec,
994         },
995 #ifdef CONFIG_DEBUG_STACKOVERFLOW
996         {
997                 .procname       = "panic_on_stackoverflow",
998                 .data           = &sysctl_panic_on_stackoverflow,
999                 .maxlen         = sizeof(int),
1000                 .mode           = 0644,
1001                 .proc_handler   = proc_dointvec,
1002         },
1003 #endif
1004         {
1005                 .procname       = "bootloader_type",
1006                 .data           = &bootloader_type,
1007                 .maxlen         = sizeof (int),
1008                 .mode           = 0444,
1009                 .proc_handler   = proc_dointvec,
1010         },
1011         {
1012                 .procname       = "bootloader_version",
1013                 .data           = &bootloader_version,
1014                 .maxlen         = sizeof (int),
1015                 .mode           = 0444,
1016                 .proc_handler   = proc_dointvec,
1017         },
1018         {
1019                 .procname       = "io_delay_type",
1020                 .data           = &io_delay_type,
1021                 .maxlen         = sizeof(int),
1022                 .mode           = 0644,
1023                 .proc_handler   = proc_dointvec,
1024         },
1025 #endif
1026 #if defined(CONFIG_MMU)
1027         {
1028                 .procname       = "randomize_va_space",
1029                 .data           = &randomize_va_space,
1030                 .maxlen         = sizeof(int),
1031                 .mode           = 0644,
1032                 .proc_handler   = proc_dointvec,
1033         },
1034 #endif
1035 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1036         {
1037                 .procname       = "spin_retry",
1038                 .data           = &spin_retry,
1039                 .maxlen         = sizeof (int),
1040                 .mode           = 0644,
1041                 .proc_handler   = proc_dointvec,
1042         },
1043 #endif
1044 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1045         {
1046                 .procname       = "acpi_video_flags",
1047                 .data           = &acpi_realmode_flags,
1048                 .maxlen         = sizeof (unsigned long),
1049                 .mode           = 0644,
1050                 .proc_handler   = proc_doulongvec_minmax,
1051         },
1052 #endif
1053 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1054         {
1055                 .procname       = "ignore-unaligned-usertrap",
1056                 .data           = &no_unaligned_warning,
1057                 .maxlen         = sizeof (int),
1058                 .mode           = 0644,
1059                 .proc_handler   = proc_dointvec,
1060         },
1061 #endif
1062 #ifdef CONFIG_IA64
1063         {
1064                 .procname       = "unaligned-dump-stack",
1065                 .data           = &unaligned_dump_stack,
1066                 .maxlen         = sizeof (int),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec,
1069         },
1070 #endif
1071 #ifdef CONFIG_DETECT_HUNG_TASK
1072         {
1073                 .procname       = "hung_task_panic",
1074                 .data           = &sysctl_hung_task_panic,
1075                 .maxlen         = sizeof(int),
1076                 .mode           = 0644,
1077                 .proc_handler   = proc_dointvec_minmax,
1078                 .extra1         = &zero,
1079                 .extra2         = &one,
1080         },
1081         {
1082                 .procname       = "hung_task_check_count",
1083                 .data           = &sysctl_hung_task_check_count,
1084                 .maxlen         = sizeof(int),
1085                 .mode           = 0644,
1086                 .proc_handler   = proc_dointvec_minmax,
1087                 .extra1         = &zero,
1088         },
1089         {
1090                 .procname       = "hung_task_timeout_secs",
1091                 .data           = &sysctl_hung_task_timeout_secs,
1092                 .maxlen         = sizeof(unsigned long),
1093                 .mode           = 0644,
1094                 .proc_handler   = proc_dohung_task_timeout_secs,
1095                 .extra2         = &hung_task_timeout_max,
1096         },
1097         {
1098                 .procname       = "hung_task_check_interval_secs",
1099                 .data           = &sysctl_hung_task_check_interval_secs,
1100                 .maxlen         = sizeof(unsigned long),
1101                 .mode           = 0644,
1102                 .proc_handler   = proc_dohung_task_timeout_secs,
1103                 .extra2         = &hung_task_timeout_max,
1104         },
1105         {
1106                 .procname       = "hung_task_warnings",
1107                 .data           = &sysctl_hung_task_warnings,
1108                 .maxlen         = sizeof(int),
1109                 .mode           = 0644,
1110                 .proc_handler   = proc_dointvec_minmax,
1111                 .extra1         = &neg_one,
1112         },
1113 #endif
1114 #ifdef CONFIG_RT_MUTEXES
1115         {
1116                 .procname       = "max_lock_depth",
1117                 .data           = &max_lock_depth,
1118                 .maxlen         = sizeof(int),
1119                 .mode           = 0644,
1120                 .proc_handler   = proc_dointvec,
1121         },
1122 #endif
1123         {
1124                 .procname       = "poweroff_cmd",
1125                 .data           = &poweroff_cmd,
1126                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1127                 .mode           = 0644,
1128                 .proc_handler   = proc_dostring,
1129         },
1130 #ifdef CONFIG_KEYS
1131         {
1132                 .procname       = "keys",
1133                 .mode           = 0555,
1134                 .child          = key_sysctls,
1135         },
1136 #endif
1137 #ifdef CONFIG_PERF_EVENTS
1138         /*
1139          * User-space scripts rely on the existence of this file
1140          * as a feature check for perf_events being enabled.
1141          *
1142          * So it's an ABI, do not remove!
1143          */
1144         {
1145                 .procname       = "perf_event_paranoid",
1146                 .data           = &sysctl_perf_event_paranoid,
1147                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1148                 .mode           = 0644,
1149                 .proc_handler   = proc_dointvec,
1150         },
1151         {
1152                 .procname       = "perf_event_mlock_kb",
1153                 .data           = &sysctl_perf_event_mlock,
1154                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1155                 .mode           = 0644,
1156                 .proc_handler   = proc_dointvec,
1157         },
1158         {
1159                 .procname       = "perf_event_max_sample_rate",
1160                 .data           = &sysctl_perf_event_sample_rate,
1161                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1162                 .mode           = 0644,
1163                 .proc_handler   = perf_proc_update_handler,
1164                 .extra1         = &one,
1165         },
1166         {
1167                 .procname       = "perf_cpu_time_max_percent",
1168                 .data           = &sysctl_perf_cpu_time_max_percent,
1169                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1170                 .mode           = 0644,
1171                 .proc_handler   = perf_cpu_time_max_percent_handler,
1172                 .extra1         = &zero,
1173                 .extra2         = &one_hundred,
1174         },
1175         {
1176                 .procname       = "perf_event_max_stack",
1177                 .data           = &sysctl_perf_event_max_stack,
1178                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1179                 .mode           = 0644,
1180                 .proc_handler   = perf_event_max_stack_handler,
1181                 .extra1         = &zero,
1182                 .extra2         = &six_hundred_forty_kb,
1183         },
1184         {
1185                 .procname       = "perf_event_max_contexts_per_stack",
1186                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1187                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1188                 .mode           = 0644,
1189                 .proc_handler   = perf_event_max_stack_handler,
1190                 .extra1         = &zero,
1191                 .extra2         = &one_thousand,
1192         },
1193 #endif
1194         {
1195                 .procname       = "panic_on_warn",
1196                 .data           = &panic_on_warn,
1197                 .maxlen         = sizeof(int),
1198                 .mode           = 0644,
1199                 .proc_handler   = proc_dointvec_minmax,
1200                 .extra1         = &zero,
1201                 .extra2         = &one,
1202         },
1203 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1204         {
1205                 .procname       = "timer_migration",
1206                 .data           = &sysctl_timer_migration,
1207                 .maxlen         = sizeof(unsigned int),
1208                 .mode           = 0644,
1209                 .proc_handler   = timer_migration_handler,
1210                 .extra1         = &zero,
1211                 .extra2         = &one,
1212         },
1213 #endif
1214 #ifdef CONFIG_BPF_SYSCALL
1215         {
1216                 .procname       = "unprivileged_bpf_disabled",
1217                 .data           = &sysctl_unprivileged_bpf_disabled,
1218                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1219                 .mode           = 0644,
1220                 /* only handle a transition from default "0" to "1" */
1221                 .proc_handler   = proc_dointvec_minmax,
1222                 .extra1         = &one,
1223                 .extra2         = &one,
1224         },
1225 #endif
1226 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1227         {
1228                 .procname       = "panic_on_rcu_stall",
1229                 .data           = &sysctl_panic_on_rcu_stall,
1230                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1231                 .mode           = 0644,
1232                 .proc_handler   = proc_dointvec_minmax,
1233                 .extra1         = &zero,
1234                 .extra2         = &one,
1235         },
1236 #endif
1237 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1238         {
1239                 .procname       = "stack_erasing",
1240                 .data           = NULL,
1241                 .maxlen         = sizeof(int),
1242                 .mode           = 0600,
1243                 .proc_handler   = stack_erasing_sysctl,
1244                 .extra1         = &zero,
1245                 .extra2         = &one,
1246         },
1247 #endif
1248         { }
1249 };
1250
1251 static struct ctl_table vm_table[] = {
1252         {
1253                 .procname       = "overcommit_memory",
1254                 .data           = &sysctl_overcommit_memory,
1255                 .maxlen         = sizeof(sysctl_overcommit_memory),
1256                 .mode           = 0644,
1257                 .proc_handler   = proc_dointvec_minmax,
1258                 .extra1         = &zero,
1259                 .extra2         = &two,
1260         },
1261         {
1262                 .procname       = "panic_on_oom",
1263                 .data           = &sysctl_panic_on_oom,
1264                 .maxlen         = sizeof(sysctl_panic_on_oom),
1265                 .mode           = 0644,
1266                 .proc_handler   = proc_dointvec_minmax,
1267                 .extra1         = &zero,
1268                 .extra2         = &two,
1269         },
1270         {
1271                 .procname       = "oom_kill_allocating_task",
1272                 .data           = &sysctl_oom_kill_allocating_task,
1273                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1274                 .mode           = 0644,
1275                 .proc_handler   = proc_dointvec,
1276         },
1277         {
1278                 .procname       = "oom_dump_tasks",
1279                 .data           = &sysctl_oom_dump_tasks,
1280                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1281                 .mode           = 0644,
1282                 .proc_handler   = proc_dointvec,
1283         },
1284         {
1285                 .procname       = "overcommit_ratio",
1286                 .data           = &sysctl_overcommit_ratio,
1287                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1288                 .mode           = 0644,
1289                 .proc_handler   = overcommit_ratio_handler,
1290         },
1291         {
1292                 .procname       = "overcommit_kbytes",
1293                 .data           = &sysctl_overcommit_kbytes,
1294                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1295                 .mode           = 0644,
1296                 .proc_handler   = overcommit_kbytes_handler,
1297         },
1298         {
1299                 .procname       = "page-cluster", 
1300                 .data           = &page_cluster,
1301                 .maxlen         = sizeof(int),
1302                 .mode           = 0644,
1303                 .proc_handler   = proc_dointvec_minmax,
1304                 .extra1         = &zero,
1305         },
1306         {
1307                 .procname       = "dirty_background_ratio",
1308                 .data           = &dirty_background_ratio,
1309                 .maxlen         = sizeof(dirty_background_ratio),
1310                 .mode           = 0644,
1311                 .proc_handler   = dirty_background_ratio_handler,
1312                 .extra1         = &zero,
1313                 .extra2         = &one_hundred,
1314         },
1315         {
1316                 .procname       = "dirty_background_bytes",
1317                 .data           = &dirty_background_bytes,
1318                 .maxlen         = sizeof(dirty_background_bytes),
1319                 .mode           = 0644,
1320                 .proc_handler   = dirty_background_bytes_handler,
1321                 .extra1         = &one_ul,
1322         },
1323         {
1324                 .procname       = "dirty_ratio",
1325                 .data           = &vm_dirty_ratio,
1326                 .maxlen         = sizeof(vm_dirty_ratio),
1327                 .mode           = 0644,
1328                 .proc_handler   = dirty_ratio_handler,
1329                 .extra1         = &zero,
1330                 .extra2         = &one_hundred,
1331         },
1332         {
1333                 .procname       = "dirty_bytes",
1334                 .data           = &vm_dirty_bytes,
1335                 .maxlen         = sizeof(vm_dirty_bytes),
1336                 .mode           = 0644,
1337                 .proc_handler   = dirty_bytes_handler,
1338                 .extra1         = &dirty_bytes_min,
1339         },
1340         {
1341                 .procname       = "dirty_writeback_centisecs",
1342                 .data           = &dirty_writeback_interval,
1343                 .maxlen         = sizeof(dirty_writeback_interval),
1344                 .mode           = 0644,
1345                 .proc_handler   = dirty_writeback_centisecs_handler,
1346         },
1347         {
1348                 .procname       = "dirty_expire_centisecs",
1349                 .data           = &dirty_expire_interval,
1350                 .maxlen         = sizeof(dirty_expire_interval),
1351                 .mode           = 0644,
1352                 .proc_handler   = proc_dointvec_minmax,
1353                 .extra1         = &zero,
1354         },
1355         {
1356                 .procname       = "dirtytime_expire_seconds",
1357                 .data           = &dirtytime_expire_interval,
1358                 .maxlen         = sizeof(dirtytime_expire_interval),
1359                 .mode           = 0644,
1360                 .proc_handler   = dirtytime_interval_handler,
1361                 .extra1         = &zero,
1362         },
1363         {
1364                 .procname       = "swappiness",
1365                 .data           = &vm_swappiness,
1366                 .maxlen         = sizeof(vm_swappiness),
1367                 .mode           = 0644,
1368                 .proc_handler   = proc_dointvec_minmax,
1369                 .extra1         = &zero,
1370                 .extra2         = &one_hundred,
1371         },
1372 #ifdef CONFIG_HUGETLB_PAGE
1373         {
1374                 .procname       = "nr_hugepages",
1375                 .data           = NULL,
1376                 .maxlen         = sizeof(unsigned long),
1377                 .mode           = 0644,
1378                 .proc_handler   = hugetlb_sysctl_handler,
1379         },
1380 #ifdef CONFIG_NUMA
1381         {
1382                 .procname       = "nr_hugepages_mempolicy",
1383                 .data           = NULL,
1384                 .maxlen         = sizeof(unsigned long),
1385                 .mode           = 0644,
1386                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1387         },
1388         {
1389                 .procname               = "numa_stat",
1390                 .data                   = &sysctl_vm_numa_stat,
1391                 .maxlen                 = sizeof(int),
1392                 .mode                   = 0644,
1393                 .proc_handler   = sysctl_vm_numa_stat_handler,
1394                 .extra1                 = &zero,
1395                 .extra2                 = &one,
1396         },
1397 #endif
1398          {
1399                 .procname       = "hugetlb_shm_group",
1400                 .data           = &sysctl_hugetlb_shm_group,
1401                 .maxlen         = sizeof(gid_t),
1402                 .mode           = 0644,
1403                 .proc_handler   = proc_dointvec,
1404          },
1405         {
1406                 .procname       = "nr_overcommit_hugepages",
1407                 .data           = NULL,
1408                 .maxlen         = sizeof(unsigned long),
1409                 .mode           = 0644,
1410                 .proc_handler   = hugetlb_overcommit_handler,
1411         },
1412 #endif
1413         {
1414                 .procname       = "lowmem_reserve_ratio",
1415                 .data           = &sysctl_lowmem_reserve_ratio,
1416                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1417                 .mode           = 0644,
1418                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1419         },
1420         {
1421                 .procname       = "drop_caches",
1422                 .data           = &sysctl_drop_caches,
1423                 .maxlen         = sizeof(int),
1424                 .mode           = 0644,
1425                 .proc_handler   = drop_caches_sysctl_handler,
1426                 .extra1         = &one,
1427                 .extra2         = &four,
1428         },
1429 #ifdef CONFIG_COMPACTION
1430         {
1431                 .procname       = "compact_memory",
1432                 .data           = &sysctl_compact_memory,
1433                 .maxlen         = sizeof(int),
1434                 .mode           = 0200,
1435                 .proc_handler   = sysctl_compaction_handler,
1436         },
1437         {
1438                 .procname       = "extfrag_threshold",
1439                 .data           = &sysctl_extfrag_threshold,
1440                 .maxlen         = sizeof(int),
1441                 .mode           = 0644,
1442                 .proc_handler   = sysctl_extfrag_handler,
1443                 .extra1         = &min_extfrag_threshold,
1444                 .extra2         = &max_extfrag_threshold,
1445         },
1446         {
1447                 .procname       = "compact_unevictable_allowed",
1448                 .data           = &sysctl_compact_unevictable_allowed,
1449                 .maxlen         = sizeof(int),
1450                 .mode           = 0644,
1451                 .proc_handler   = proc_dointvec,
1452                 .extra1         = &zero,
1453                 .extra2         = &one,
1454         },
1455
1456 #endif /* CONFIG_COMPACTION */
1457         {
1458                 .procname       = "min_free_kbytes",
1459                 .data           = &min_free_kbytes,
1460                 .maxlen         = sizeof(min_free_kbytes),
1461                 .mode           = 0644,
1462                 .proc_handler   = min_free_kbytes_sysctl_handler,
1463                 .extra1         = &zero,
1464         },
1465         {
1466                 .procname       = "watermark_boost_factor",
1467                 .data           = &watermark_boost_factor,
1468                 .maxlen         = sizeof(watermark_boost_factor),
1469                 .mode           = 0644,
1470                 .proc_handler   = watermark_boost_factor_sysctl_handler,
1471                 .extra1         = &zero,
1472         },
1473         {
1474                 .procname       = "watermark_scale_factor",
1475                 .data           = &watermark_scale_factor,
1476                 .maxlen         = sizeof(watermark_scale_factor),
1477                 .mode           = 0644,
1478                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1479                 .extra1         = &one,
1480                 .extra2         = &one_thousand,
1481         },
1482         {
1483                 .procname       = "percpu_pagelist_fraction",
1484                 .data           = &percpu_pagelist_fraction,
1485                 .maxlen         = sizeof(percpu_pagelist_fraction),
1486                 .mode           = 0644,
1487                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1488                 .extra1         = &zero,
1489         },
1490 #ifdef CONFIG_MMU
1491         {
1492                 .procname       = "max_map_count",
1493                 .data           = &sysctl_max_map_count,
1494                 .maxlen         = sizeof(sysctl_max_map_count),
1495                 .mode           = 0644,
1496                 .proc_handler   = proc_dointvec_minmax,
1497                 .extra1         = &zero,
1498         },
1499 #else
1500         {
1501                 .procname       = "nr_trim_pages",
1502                 .data           = &sysctl_nr_trim_pages,
1503                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1504                 .mode           = 0644,
1505                 .proc_handler   = proc_dointvec_minmax,
1506                 .extra1         = &zero,
1507         },
1508 #endif
1509         {
1510                 .procname       = "laptop_mode",
1511                 .data           = &laptop_mode,
1512                 .maxlen         = sizeof(laptop_mode),
1513                 .mode           = 0644,
1514                 .proc_handler   = proc_dointvec_jiffies,
1515         },
1516         {
1517                 .procname       = "block_dump",
1518                 .data           = &block_dump,
1519                 .maxlen         = sizeof(block_dump),
1520                 .mode           = 0644,
1521                 .proc_handler   = proc_dointvec,
1522                 .extra1         = &zero,
1523         },
1524         {
1525                 .procname       = "vfs_cache_pressure",
1526                 .data           = &sysctl_vfs_cache_pressure,
1527                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1528                 .mode           = 0644,
1529                 .proc_handler   = proc_dointvec,
1530                 .extra1         = &zero,
1531         },
1532 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1533         {
1534                 .procname       = "legacy_va_layout",
1535                 .data           = &sysctl_legacy_va_layout,
1536                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1537                 .mode           = 0644,
1538                 .proc_handler   = proc_dointvec,
1539                 .extra1         = &zero,
1540         },
1541 #endif
1542 #ifdef CONFIG_NUMA
1543         {
1544                 .procname       = "zone_reclaim_mode",
1545                 .data           = &node_reclaim_mode,
1546                 .maxlen         = sizeof(node_reclaim_mode),
1547                 .mode           = 0644,
1548                 .proc_handler   = proc_dointvec,
1549                 .extra1         = &zero,
1550         },
1551         {
1552                 .procname       = "min_unmapped_ratio",
1553                 .data           = &sysctl_min_unmapped_ratio,
1554                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1555                 .mode           = 0644,
1556                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1557                 .extra1         = &zero,
1558                 .extra2         = &one_hundred,
1559         },
1560         {
1561                 .procname       = "min_slab_ratio",
1562                 .data           = &sysctl_min_slab_ratio,
1563                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1564                 .mode           = 0644,
1565                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1566                 .extra1         = &zero,
1567                 .extra2         = &one_hundred,
1568         },
1569 #endif
1570 #ifdef CONFIG_SMP
1571         {
1572                 .procname       = "stat_interval",
1573                 .data           = &sysctl_stat_interval,
1574                 .maxlen         = sizeof(sysctl_stat_interval),
1575                 .mode           = 0644,
1576                 .proc_handler   = proc_dointvec_jiffies,
1577         },
1578         {
1579                 .procname       = "stat_refresh",
1580                 .data           = NULL,
1581                 .maxlen         = 0,
1582                 .mode           = 0600,
1583                 .proc_handler   = vmstat_refresh,
1584         },
1585 #endif
1586 #ifdef CONFIG_MMU
1587         {
1588                 .procname       = "mmap_min_addr",
1589                 .data           = &dac_mmap_min_addr,
1590                 .maxlen         = sizeof(unsigned long),
1591                 .mode           = 0644,
1592                 .proc_handler   = mmap_min_addr_handler,
1593         },
1594 #endif
1595 #ifdef CONFIG_NUMA
1596         {
1597                 .procname       = "numa_zonelist_order",
1598                 .data           = &numa_zonelist_order,
1599                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1600                 .mode           = 0644,
1601                 .proc_handler   = numa_zonelist_order_handler,
1602         },
1603 #endif
1604 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1605    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1606         {
1607                 .procname       = "vdso_enabled",
1608 #ifdef CONFIG_X86_32
1609                 .data           = &vdso32_enabled,
1610                 .maxlen         = sizeof(vdso32_enabled),
1611 #else
1612                 .data           = &vdso_enabled,
1613                 .maxlen         = sizeof(vdso_enabled),
1614 #endif
1615                 .mode           = 0644,
1616                 .proc_handler   = proc_dointvec,
1617                 .extra1         = &zero,
1618         },
1619 #endif
1620 #ifdef CONFIG_HIGHMEM
1621         {
1622                 .procname       = "highmem_is_dirtyable",
1623                 .data           = &vm_highmem_is_dirtyable,
1624                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1625                 .mode           = 0644,
1626                 .proc_handler   = proc_dointvec_minmax,
1627                 .extra1         = &zero,
1628                 .extra2         = &one,
1629         },
1630 #endif
1631 #ifdef CONFIG_MEMORY_FAILURE
1632         {
1633                 .procname       = "memory_failure_early_kill",
1634                 .data           = &sysctl_memory_failure_early_kill,
1635                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1636                 .mode           = 0644,
1637                 .proc_handler   = proc_dointvec_minmax,
1638                 .extra1         = &zero,
1639                 .extra2         = &one,
1640         },
1641         {
1642                 .procname       = "memory_failure_recovery",
1643                 .data           = &sysctl_memory_failure_recovery,
1644                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1645                 .mode           = 0644,
1646                 .proc_handler   = proc_dointvec_minmax,
1647                 .extra1         = &zero,
1648                 .extra2         = &one,
1649         },
1650 #endif
1651         {
1652                 .procname       = "user_reserve_kbytes",
1653                 .data           = &sysctl_user_reserve_kbytes,
1654                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1655                 .mode           = 0644,
1656                 .proc_handler   = proc_doulongvec_minmax,
1657         },
1658         {
1659                 .procname       = "admin_reserve_kbytes",
1660                 .data           = &sysctl_admin_reserve_kbytes,
1661                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1662                 .mode           = 0644,
1663                 .proc_handler   = proc_doulongvec_minmax,
1664         },
1665 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1666         {
1667                 .procname       = "mmap_rnd_bits",
1668                 .data           = &mmap_rnd_bits,
1669                 .maxlen         = sizeof(mmap_rnd_bits),
1670                 .mode           = 0600,
1671                 .proc_handler   = proc_dointvec_minmax,
1672                 .extra1         = (void *)&mmap_rnd_bits_min,
1673                 .extra2         = (void *)&mmap_rnd_bits_max,
1674         },
1675 #endif
1676 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1677         {
1678                 .procname       = "mmap_rnd_compat_bits",
1679                 .data           = &mmap_rnd_compat_bits,
1680                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1681                 .mode           = 0600,
1682                 .proc_handler   = proc_dointvec_minmax,
1683                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1684                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1685         },
1686 #endif
1687         { }
1688 };
1689
1690 static struct ctl_table fs_table[] = {
1691         {
1692                 .procname       = "inode-nr",
1693                 .data           = &inodes_stat,
1694                 .maxlen         = 2*sizeof(long),
1695                 .mode           = 0444,
1696                 .proc_handler   = proc_nr_inodes,
1697         },
1698         {
1699                 .procname       = "inode-state",
1700                 .data           = &inodes_stat,
1701                 .maxlen         = 7*sizeof(long),
1702                 .mode           = 0444,
1703                 .proc_handler   = proc_nr_inodes,
1704         },
1705         {
1706                 .procname       = "file-nr",
1707                 .data           = &files_stat,
1708                 .maxlen         = sizeof(files_stat),
1709                 .mode           = 0444,
1710                 .proc_handler   = proc_nr_files,
1711         },
1712         {
1713                 .procname       = "file-max",
1714                 .data           = &files_stat.max_files,
1715                 .maxlen         = sizeof(files_stat.max_files),
1716                 .mode           = 0644,
1717                 .proc_handler   = proc_doulongvec_minmax,
1718         },
1719         {
1720                 .procname       = "nr_open",
1721                 .data           = &sysctl_nr_open,
1722                 .maxlen         = sizeof(unsigned int),
1723                 .mode           = 0644,
1724                 .proc_handler   = proc_dointvec_minmax,
1725                 .extra1         = &sysctl_nr_open_min,
1726                 .extra2         = &sysctl_nr_open_max,
1727         },
1728         {
1729                 .procname       = "dentry-state",
1730                 .data           = &dentry_stat,
1731                 .maxlen         = 6*sizeof(long),
1732                 .mode           = 0444,
1733                 .proc_handler   = proc_nr_dentry,
1734         },
1735         {
1736                 .procname       = "overflowuid",
1737                 .data           = &fs_overflowuid,
1738                 .maxlen         = sizeof(int),
1739                 .mode           = 0644,
1740                 .proc_handler   = proc_dointvec_minmax,
1741                 .extra1         = &minolduid,
1742                 .extra2         = &maxolduid,
1743         },
1744         {
1745                 .procname       = "overflowgid",
1746                 .data           = &fs_overflowgid,
1747                 .maxlen         = sizeof(int),
1748                 .mode           = 0644,
1749                 .proc_handler   = proc_dointvec_minmax,
1750                 .extra1         = &minolduid,
1751                 .extra2         = &maxolduid,
1752         },
1753 #ifdef CONFIG_FILE_LOCKING
1754         {
1755                 .procname       = "leases-enable",
1756                 .data           = &leases_enable,
1757                 .maxlen         = sizeof(int),
1758                 .mode           = 0644,
1759                 .proc_handler   = proc_dointvec,
1760         },
1761 #endif
1762 #ifdef CONFIG_DNOTIFY
1763         {
1764                 .procname       = "dir-notify-enable",
1765                 .data           = &dir_notify_enable,
1766                 .maxlen         = sizeof(int),
1767                 .mode           = 0644,
1768                 .proc_handler   = proc_dointvec,
1769         },
1770 #endif
1771 #ifdef CONFIG_MMU
1772 #ifdef CONFIG_FILE_LOCKING
1773         {
1774                 .procname       = "lease-break-time",
1775                 .data           = &lease_break_time,
1776                 .maxlen         = sizeof(int),
1777                 .mode           = 0644,
1778                 .proc_handler   = proc_dointvec,
1779         },
1780 #endif
1781 #ifdef CONFIG_AIO
1782         {
1783                 .procname       = "aio-nr",
1784                 .data           = &aio_nr,
1785                 .maxlen         = sizeof(aio_nr),
1786                 .mode           = 0444,
1787                 .proc_handler   = proc_doulongvec_minmax,
1788         },
1789         {
1790                 .procname       = "aio-max-nr",
1791                 .data           = &aio_max_nr,
1792                 .maxlen         = sizeof(aio_max_nr),
1793                 .mode           = 0644,
1794                 .proc_handler   = proc_doulongvec_minmax,
1795         },
1796 #endif /* CONFIG_AIO */
1797 #ifdef CONFIG_INOTIFY_USER
1798         {
1799                 .procname       = "inotify",
1800                 .mode           = 0555,
1801                 .child          = inotify_table,
1802         },
1803 #endif  
1804 #ifdef CONFIG_EPOLL
1805         {
1806                 .procname       = "epoll",
1807                 .mode           = 0555,
1808                 .child          = epoll_table,
1809         },
1810 #endif
1811 #endif
1812         {
1813                 .procname       = "protected_symlinks",
1814                 .data           = &sysctl_protected_symlinks,
1815                 .maxlen         = sizeof(int),
1816                 .mode           = 0600,
1817                 .proc_handler   = proc_dointvec_minmax,
1818                 .extra1         = &zero,
1819                 .extra2         = &one,
1820         },
1821         {
1822                 .procname       = "protected_hardlinks",
1823                 .data           = &sysctl_protected_hardlinks,
1824                 .maxlen         = sizeof(int),
1825                 .mode           = 0600,
1826                 .proc_handler   = proc_dointvec_minmax,
1827                 .extra1         = &zero,
1828                 .extra2         = &one,
1829         },
1830         {
1831                 .procname       = "protected_fifos",
1832                 .data           = &sysctl_protected_fifos,
1833                 .maxlen         = sizeof(int),
1834                 .mode           = 0600,
1835                 .proc_handler   = proc_dointvec_minmax,
1836                 .extra1         = &zero,
1837                 .extra2         = &two,
1838         },
1839         {
1840                 .procname       = "protected_regular",
1841                 .data           = &sysctl_protected_regular,
1842                 .maxlen         = sizeof(int),
1843                 .mode           = 0600,
1844                 .proc_handler   = proc_dointvec_minmax,
1845                 .extra1         = &zero,
1846                 .extra2         = &two,
1847         },
1848         {
1849                 .procname       = "suid_dumpable",
1850                 .data           = &suid_dumpable,
1851                 .maxlen         = sizeof(int),
1852                 .mode           = 0644,
1853                 .proc_handler   = proc_dointvec_minmax_coredump,
1854                 .extra1         = &zero,
1855                 .extra2         = &two,
1856         },
1857 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1858         {
1859                 .procname       = "binfmt_misc",
1860                 .mode           = 0555,
1861                 .child          = sysctl_mount_point,
1862         },
1863 #endif
1864         {
1865                 .procname       = "pipe-max-size",
1866                 .data           = &pipe_max_size,
1867                 .maxlen         = sizeof(pipe_max_size),
1868                 .mode           = 0644,
1869                 .proc_handler   = proc_dopipe_max_size,
1870         },
1871         {
1872                 .procname       = "pipe-user-pages-hard",
1873                 .data           = &pipe_user_pages_hard,
1874                 .maxlen         = sizeof(pipe_user_pages_hard),
1875                 .mode           = 0644,
1876                 .proc_handler   = proc_doulongvec_minmax,
1877         },
1878         {
1879                 .procname       = "pipe-user-pages-soft",
1880                 .data           = &pipe_user_pages_soft,
1881                 .maxlen         = sizeof(pipe_user_pages_soft),
1882                 .mode           = 0644,
1883                 .proc_handler   = proc_doulongvec_minmax,
1884         },
1885         {
1886                 .procname       = "mount-max",
1887                 .data           = &sysctl_mount_max,
1888                 .maxlen         = sizeof(unsigned int),
1889                 .mode           = 0644,
1890                 .proc_handler   = proc_dointvec_minmax,
1891                 .extra1         = &one,
1892         },
1893         { }
1894 };
1895
1896 static struct ctl_table debug_table[] = {
1897 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1898         {
1899                 .procname       = "exception-trace",
1900                 .data           = &show_unhandled_signals,
1901                 .maxlen         = sizeof(int),
1902                 .mode           = 0644,
1903                 .proc_handler   = proc_dointvec
1904         },
1905 #endif
1906 #if defined(CONFIG_OPTPROBES)
1907         {
1908                 .procname       = "kprobes-optimization",
1909                 .data           = &sysctl_kprobes_optimization,
1910                 .maxlen         = sizeof(int),
1911                 .mode           = 0644,
1912                 .proc_handler   = proc_kprobes_optimization_handler,
1913                 .extra1         = &zero,
1914                 .extra2         = &one,
1915         },
1916 #endif
1917         { }
1918 };
1919
1920 static struct ctl_table dev_table[] = {
1921         { }
1922 };
1923
1924 int __init sysctl_init(void)
1925 {
1926         struct ctl_table_header *hdr;
1927
1928         hdr = register_sysctl_table(sysctl_base_table);
1929         kmemleak_not_leak(hdr);
1930         return 0;
1931 }
1932
1933 #endif /* CONFIG_SYSCTL */
1934
1935 /*
1936  * /proc/sys support
1937  */
1938
1939 #ifdef CONFIG_PROC_SYSCTL
1940
1941 static int _proc_do_string(char *data, int maxlen, int write,
1942                            char __user *buffer,
1943                            size_t *lenp, loff_t *ppos)
1944 {
1945         size_t len;
1946         char __user *p;
1947         char c;
1948
1949         if (!data || !maxlen || !*lenp) {
1950                 *lenp = 0;
1951                 return 0;
1952         }
1953
1954         if (write) {
1955                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1956                         /* Only continue writes not past the end of buffer. */
1957                         len = strlen(data);
1958                         if (len > maxlen - 1)
1959                                 len = maxlen - 1;
1960
1961                         if (*ppos > len)
1962                                 return 0;
1963                         len = *ppos;
1964                 } else {
1965                         /* Start writing from beginning of buffer. */
1966                         len = 0;
1967                 }
1968
1969                 *ppos += *lenp;
1970                 p = buffer;
1971                 while ((p - buffer) < *lenp && len < maxlen - 1) {
1972                         if (get_user(c, p++))
1973                                 return -EFAULT;
1974                         if (c == 0 || c == '\n')
1975                                 break;
1976                         data[len++] = c;
1977                 }
1978                 data[len] = 0;
1979         } else {
1980                 len = strlen(data);
1981                 if (len > maxlen)
1982                         len = maxlen;
1983
1984                 if (*ppos > len) {
1985                         *lenp = 0;
1986                         return 0;
1987                 }
1988
1989                 data += *ppos;
1990                 len  -= *ppos;
1991
1992                 if (len > *lenp)
1993                         len = *lenp;
1994                 if (len)
1995                         if (copy_to_user(buffer, data, len))
1996                                 return -EFAULT;
1997                 if (len < *lenp) {
1998                         if (put_user('\n', buffer + len))
1999                                 return -EFAULT;
2000                         len++;
2001                 }
2002                 *lenp = len;
2003                 *ppos += len;
2004         }
2005         return 0;
2006 }
2007
2008 static void warn_sysctl_write(struct ctl_table *table)
2009 {
2010         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2011                 "This will not be supported in the future. To silence this\n"
2012                 "warning, set kernel.sysctl_writes_strict = -1\n",
2013                 current->comm, table->procname);
2014 }
2015
2016 /**
2017  * proc_first_pos_non_zero_ignore - check if first position is allowed
2018  * @ppos: file position
2019  * @table: the sysctl table
2020  *
2021  * Returns true if the first position is non-zero and the sysctl_writes_strict
2022  * mode indicates this is not allowed for numeric input types. String proc
2023  * handlers can ignore the return value.
2024  */
2025 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2026                                            struct ctl_table *table)
2027 {
2028         if (!*ppos)
2029                 return false;
2030
2031         switch (sysctl_writes_strict) {
2032         case SYSCTL_WRITES_STRICT:
2033                 return true;
2034         case SYSCTL_WRITES_WARN:
2035                 warn_sysctl_write(table);
2036                 return false;
2037         default:
2038                 return false;
2039         }
2040 }
2041
2042 /**
2043  * proc_dostring - read a string sysctl
2044  * @table: the sysctl table
2045  * @write: %TRUE if this is a write to the sysctl file
2046  * @buffer: the user buffer
2047  * @lenp: the size of the user buffer
2048  * @ppos: file position
2049  *
2050  * Reads/writes a string from/to the user buffer. If the kernel
2051  * buffer provided is not large enough to hold the string, the
2052  * string is truncated. The copied string is %NULL-terminated.
2053  * If the string is being read by the user process, it is copied
2054  * and a newline '\n' is added. It is truncated if the buffer is
2055  * not large enough.
2056  *
2057  * Returns 0 on success.
2058  */
2059 int proc_dostring(struct ctl_table *table, int write,
2060                   void __user *buffer, size_t *lenp, loff_t *ppos)
2061 {
2062         if (write)
2063                 proc_first_pos_non_zero_ignore(ppos, table);
2064
2065         return _proc_do_string((char *)(table->data), table->maxlen, write,
2066                                (char __user *)buffer, lenp, ppos);
2067 }
2068
2069 static size_t proc_skip_spaces(char **buf)
2070 {
2071         size_t ret;
2072         char *tmp = skip_spaces(*buf);
2073         ret = tmp - *buf;
2074         *buf = tmp;
2075         return ret;
2076 }
2077
2078 static void proc_skip_char(char **buf, size_t *size, const char v)
2079 {
2080         while (*size) {
2081                 if (**buf != v)
2082                         break;
2083                 (*size)--;
2084                 (*buf)++;
2085         }
2086 }
2087
2088 #define TMPBUFLEN 22
2089 /**
2090  * proc_get_long - reads an ASCII formatted integer from a user buffer
2091  *
2092  * @buf: a kernel buffer
2093  * @size: size of the kernel buffer
2094  * @val: this is where the number will be stored
2095  * @neg: set to %TRUE if number is negative
2096  * @perm_tr: a vector which contains the allowed trailers
2097  * @perm_tr_len: size of the perm_tr vector
2098  * @tr: pointer to store the trailer character
2099  *
2100  * In case of success %0 is returned and @buf and @size are updated with
2101  * the amount of bytes read. If @tr is non-NULL and a trailing
2102  * character exists (size is non-zero after returning from this
2103  * function), @tr is updated with the trailing character.
2104  */
2105 static int proc_get_long(char **buf, size_t *size,
2106                           unsigned long *val, bool *neg,
2107                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2108 {
2109         int len;
2110         char *p, tmp[TMPBUFLEN];
2111
2112         if (!*size)
2113                 return -EINVAL;
2114
2115         len = *size;
2116         if (len > TMPBUFLEN - 1)
2117                 len = TMPBUFLEN - 1;
2118
2119         memcpy(tmp, *buf, len);
2120
2121         tmp[len] = 0;
2122         p = tmp;
2123         if (*p == '-' && *size > 1) {
2124                 *neg = true;
2125                 p++;
2126         } else
2127                 *neg = false;
2128         if (!isdigit(*p))
2129                 return -EINVAL;
2130
2131         *val = simple_strtoul(p, &p, 0);
2132
2133         len = p - tmp;
2134
2135         /* We don't know if the next char is whitespace thus we may accept
2136          * invalid integers (e.g. 1234...a) or two integers instead of one
2137          * (e.g. 123...1). So lets not allow such large numbers. */
2138         if (len == TMPBUFLEN - 1)
2139                 return -EINVAL;
2140
2141         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2142                 return -EINVAL;
2143
2144         if (tr && (len < *size))
2145                 *tr = *p;
2146
2147         *buf += len;
2148         *size -= len;
2149
2150         return 0;
2151 }
2152
2153 /**
2154  * proc_put_long - converts an integer to a decimal ASCII formatted string
2155  *
2156  * @buf: the user buffer
2157  * @size: the size of the user buffer
2158  * @val: the integer to be converted
2159  * @neg: sign of the number, %TRUE for negative
2160  *
2161  * In case of success %0 is returned and @buf and @size are updated with
2162  * the amount of bytes written.
2163  */
2164 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2165                           bool neg)
2166 {
2167         int len;
2168         char tmp[TMPBUFLEN], *p = tmp;
2169
2170         sprintf(p, "%s%lu", neg ? "-" : "", val);
2171         len = strlen(tmp);
2172         if (len > *size)
2173                 len = *size;
2174         if (copy_to_user(*buf, tmp, len))
2175                 return -EFAULT;
2176         *size -= len;
2177         *buf += len;
2178         return 0;
2179 }
2180 #undef TMPBUFLEN
2181
2182 static int proc_put_char(void __user **buf, size_t *size, char c)
2183 {
2184         if (*size) {
2185                 char __user **buffer = (char __user **)buf;
2186                 if (put_user(c, *buffer))
2187                         return -EFAULT;
2188                 (*size)--, (*buffer)++;
2189                 *buf = *buffer;
2190         }
2191         return 0;
2192 }
2193
2194 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2195                                  int *valp,
2196                                  int write, void *data)
2197 {
2198         if (write) {
2199                 if (*negp) {
2200                         if (*lvalp > (unsigned long) INT_MAX + 1)
2201                                 return -EINVAL;
2202                         *valp = -*lvalp;
2203                 } else {
2204                         if (*lvalp > (unsigned long) INT_MAX)
2205                                 return -EINVAL;
2206                         *valp = *lvalp;
2207                 }
2208         } else {
2209                 int val = *valp;
2210                 if (val < 0) {
2211                         *negp = true;
2212                         *lvalp = -(unsigned long)val;
2213                 } else {
2214                         *negp = false;
2215                         *lvalp = (unsigned long)val;
2216                 }
2217         }
2218         return 0;
2219 }
2220
2221 static int do_proc_douintvec_conv(unsigned long *lvalp,
2222                                   unsigned int *valp,
2223                                   int write, void *data)
2224 {
2225         if (write) {
2226                 if (*lvalp > UINT_MAX)
2227                         return -EINVAL;
2228                 *valp = *lvalp;
2229         } else {
2230                 unsigned int val = *valp;
2231                 *lvalp = (unsigned long)val;
2232         }
2233         return 0;
2234 }
2235
2236 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2237
2238 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2239                   int write, void __user *buffer,
2240                   size_t *lenp, loff_t *ppos,
2241                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2242                               int write, void *data),
2243                   void *data)
2244 {
2245         int *i, vleft, first = 1, err = 0;
2246         size_t left;
2247         char *kbuf = NULL, *p;
2248         
2249         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2250                 *lenp = 0;
2251                 return 0;
2252         }
2253         
2254         i = (int *) tbl_data;
2255         vleft = table->maxlen / sizeof(*i);
2256         left = *lenp;
2257
2258         if (!conv)
2259                 conv = do_proc_dointvec_conv;
2260
2261         if (write) {
2262                 if (proc_first_pos_non_zero_ignore(ppos, table))
2263                         goto out;
2264
2265                 if (left > PAGE_SIZE - 1)
2266                         left = PAGE_SIZE - 1;
2267                 p = kbuf = memdup_user_nul(buffer, left);
2268                 if (IS_ERR(kbuf))
2269                         return PTR_ERR(kbuf);
2270         }
2271
2272         for (; left && vleft--; i++, first=0) {
2273                 unsigned long lval;
2274                 bool neg;
2275
2276                 if (write) {
2277                         left -= proc_skip_spaces(&p);
2278
2279                         if (!left)
2280                                 break;
2281                         err = proc_get_long(&p, &left, &lval, &neg,
2282                                              proc_wspace_sep,
2283                                              sizeof(proc_wspace_sep), NULL);
2284                         if (err)
2285                                 break;
2286                         if (conv(&neg, &lval, i, 1, data)) {
2287                                 err = -EINVAL;
2288                                 break;
2289                         }
2290                 } else {
2291                         if (conv(&neg, &lval, i, 0, data)) {
2292                                 err = -EINVAL;
2293                                 break;
2294                         }
2295                         if (!first)
2296                                 err = proc_put_char(&buffer, &left, '\t');
2297                         if (err)
2298                                 break;
2299                         err = proc_put_long(&buffer, &left, lval, neg);
2300                         if (err)
2301                                 break;
2302                 }
2303         }
2304
2305         if (!write && !first && left && !err)
2306                 err = proc_put_char(&buffer, &left, '\n');
2307         if (write && !err && left)
2308                 left -= proc_skip_spaces(&p);
2309         if (write) {
2310                 kfree(kbuf);
2311                 if (first)
2312                         return err ? : -EINVAL;
2313         }
2314         *lenp -= left;
2315 out:
2316         *ppos += *lenp;
2317         return err;
2318 }
2319
2320 static int do_proc_dointvec(struct ctl_table *table, int write,
2321                   void __user *buffer, size_t *lenp, loff_t *ppos,
2322                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2323                               int write, void *data),
2324                   void *data)
2325 {
2326         return __do_proc_dointvec(table->data, table, write,
2327                         buffer, lenp, ppos, conv, data);
2328 }
2329
2330 static int do_proc_douintvec_w(unsigned int *tbl_data,
2331                                struct ctl_table *table,
2332                                void __user *buffer,
2333                                size_t *lenp, loff_t *ppos,
2334                                int (*conv)(unsigned long *lvalp,
2335                                            unsigned int *valp,
2336                                            int write, void *data),
2337                                void *data)
2338 {
2339         unsigned long lval;
2340         int err = 0;
2341         size_t left;
2342         bool neg;
2343         char *kbuf = NULL, *p;
2344
2345         left = *lenp;
2346
2347         if (proc_first_pos_non_zero_ignore(ppos, table))
2348                 goto bail_early;
2349
2350         if (left > PAGE_SIZE - 1)
2351                 left = PAGE_SIZE - 1;
2352
2353         p = kbuf = memdup_user_nul(buffer, left);
2354         if (IS_ERR(kbuf))
2355                 return -EINVAL;
2356
2357         left -= proc_skip_spaces(&p);
2358         if (!left) {
2359                 err = -EINVAL;
2360                 goto out_free;
2361         }
2362
2363         err = proc_get_long(&p, &left, &lval, &neg,
2364                              proc_wspace_sep,
2365                              sizeof(proc_wspace_sep), NULL);
2366         if (err || neg) {
2367                 err = -EINVAL;
2368                 goto out_free;
2369         }
2370
2371         if (conv(&lval, tbl_data, 1, data)) {
2372                 err = -EINVAL;
2373                 goto out_free;
2374         }
2375
2376         if (!err && left)
2377                 left -= proc_skip_spaces(&p);
2378
2379 out_free:
2380         kfree(kbuf);
2381         if (err)
2382                 return -EINVAL;
2383
2384         return 0;
2385
2386         /* This is in keeping with old __do_proc_dointvec() */
2387 bail_early:
2388         *ppos += *lenp;
2389         return err;
2390 }
2391
2392 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2393                                size_t *lenp, loff_t *ppos,
2394                                int (*conv)(unsigned long *lvalp,
2395                                            unsigned int *valp,
2396                                            int write, void *data),
2397                                void *data)
2398 {
2399         unsigned long lval;
2400         int err = 0;
2401         size_t left;
2402
2403         left = *lenp;
2404
2405         if (conv(&lval, tbl_data, 0, data)) {
2406                 err = -EINVAL;
2407                 goto out;
2408         }
2409
2410         err = proc_put_long(&buffer, &left, lval, false);
2411         if (err || !left)
2412                 goto out;
2413
2414         err = proc_put_char(&buffer, &left, '\n');
2415
2416 out:
2417         *lenp -= left;
2418         *ppos += *lenp;
2419
2420         return err;
2421 }
2422
2423 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2424                                int write, void __user *buffer,
2425                                size_t *lenp, loff_t *ppos,
2426                                int (*conv)(unsigned long *lvalp,
2427                                            unsigned int *valp,
2428                                            int write, void *data),
2429                                void *data)
2430 {
2431         unsigned int *i, vleft;
2432
2433         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2434                 *lenp = 0;
2435                 return 0;
2436         }
2437
2438         i = (unsigned int *) tbl_data;
2439         vleft = table->maxlen / sizeof(*i);
2440
2441         /*
2442          * Arrays are not supported, keep this simple. *Do not* add
2443          * support for them.
2444          */
2445         if (vleft != 1) {
2446                 *lenp = 0;
2447                 return -EINVAL;
2448         }
2449
2450         if (!conv)
2451                 conv = do_proc_douintvec_conv;
2452
2453         if (write)
2454                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2455                                            conv, data);
2456         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2457 }
2458
2459 static int do_proc_douintvec(struct ctl_table *table, int write,
2460                              void __user *buffer, size_t *lenp, loff_t *ppos,
2461                              int (*conv)(unsigned long *lvalp,
2462                                          unsigned int *valp,
2463                                          int write, void *data),
2464                              void *data)
2465 {
2466         return __do_proc_douintvec(table->data, table, write,
2467                                    buffer, lenp, ppos, conv, data);
2468 }
2469
2470 /**
2471  * proc_dointvec - read a vector of integers
2472  * @table: the sysctl table
2473  * @write: %TRUE if this is a write to the sysctl file
2474  * @buffer: the user buffer
2475  * @lenp: the size of the user buffer
2476  * @ppos: file position
2477  *
2478  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2479  * values from/to the user buffer, treated as an ASCII string. 
2480  *
2481  * Returns 0 on success.
2482  */
2483 int proc_dointvec(struct ctl_table *table, int write,
2484                      void __user *buffer, size_t *lenp, loff_t *ppos)
2485 {
2486         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2487 }
2488
2489 /**
2490  * proc_douintvec - read a vector of unsigned integers
2491  * @table: the sysctl table
2492  * @write: %TRUE if this is a write to the sysctl file
2493  * @buffer: the user buffer
2494  * @lenp: the size of the user buffer
2495  * @ppos: file position
2496  *
2497  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2498  * values from/to the user buffer, treated as an ASCII string.
2499  *
2500  * Returns 0 on success.
2501  */
2502 int proc_douintvec(struct ctl_table *table, int write,
2503                      void __user *buffer, size_t *lenp, loff_t *ppos)
2504 {
2505         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2506                                  do_proc_douintvec_conv, NULL);
2507 }
2508
2509 /*
2510  * Taint values can only be increased
2511  * This means we can safely use a temporary.
2512  */
2513 static int proc_taint(struct ctl_table *table, int write,
2514                                void __user *buffer, size_t *lenp, loff_t *ppos)
2515 {
2516         struct ctl_table t;
2517         unsigned long tmptaint = get_taint();
2518         int err;
2519
2520         if (write && !capable(CAP_SYS_ADMIN))
2521                 return -EPERM;
2522
2523         t = *table;
2524         t.data = &tmptaint;
2525         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2526         if (err < 0)
2527                 return err;
2528
2529         if (write) {
2530                 /*
2531                  * Poor man's atomic or. Not worth adding a primitive
2532                  * to everyone's atomic.h for this
2533                  */
2534                 int i;
2535                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2536                         if ((tmptaint >> i) & 1)
2537                                 add_taint(i, LOCKDEP_STILL_OK);
2538                 }
2539         }
2540
2541         return err;
2542 }
2543
2544 #ifdef CONFIG_PRINTK
2545 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2546                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2547 {
2548         if (write && !capable(CAP_SYS_ADMIN))
2549                 return -EPERM;
2550
2551         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2552 }
2553 #endif
2554
2555 /**
2556  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2557  * @min: pointer to minimum allowable value
2558  * @max: pointer to maximum allowable value
2559  *
2560  * The do_proc_dointvec_minmax_conv_param structure provides the
2561  * minimum and maximum values for doing range checking for those sysctl
2562  * parameters that use the proc_dointvec_minmax() handler.
2563  */
2564 struct do_proc_dointvec_minmax_conv_param {
2565         int *min;
2566         int *max;
2567 };
2568
2569 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2570                                         int *valp,
2571                                         int write, void *data)
2572 {
2573         struct do_proc_dointvec_minmax_conv_param *param = data;
2574         if (write) {
2575                 int val = *negp ? -*lvalp : *lvalp;
2576                 if ((param->min && *param->min > val) ||
2577                     (param->max && *param->max < val))
2578                         return -EINVAL;
2579                 *valp = val;
2580         } else {
2581                 int val = *valp;
2582                 if (val < 0) {
2583                         *negp = true;
2584                         *lvalp = -(unsigned long)val;
2585                 } else {
2586                         *negp = false;
2587                         *lvalp = (unsigned long)val;
2588                 }
2589         }
2590         return 0;
2591 }
2592
2593 /**
2594  * proc_dointvec_minmax - read a vector of integers with min/max values
2595  * @table: the sysctl table
2596  * @write: %TRUE if this is a write to the sysctl file
2597  * @buffer: the user buffer
2598  * @lenp: the size of the user buffer
2599  * @ppos: file position
2600  *
2601  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2602  * values from/to the user buffer, treated as an ASCII string.
2603  *
2604  * This routine will ensure the values are within the range specified by
2605  * table->extra1 (min) and table->extra2 (max).
2606  *
2607  * Returns 0 on success or -EINVAL on write when the range check fails.
2608  */
2609 int proc_dointvec_minmax(struct ctl_table *table, int write,
2610                   void __user *buffer, size_t *lenp, loff_t *ppos)
2611 {
2612         struct do_proc_dointvec_minmax_conv_param param = {
2613                 .min = (int *) table->extra1,
2614                 .max = (int *) table->extra2,
2615         };
2616         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2617                                 do_proc_dointvec_minmax_conv, &param);
2618 }
2619
2620 /**
2621  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2622  * @min: pointer to minimum allowable value
2623  * @max: pointer to maximum allowable value
2624  *
2625  * The do_proc_douintvec_minmax_conv_param structure provides the
2626  * minimum and maximum values for doing range checking for those sysctl
2627  * parameters that use the proc_douintvec_minmax() handler.
2628  */
2629 struct do_proc_douintvec_minmax_conv_param {
2630         unsigned int *min;
2631         unsigned int *max;
2632 };
2633
2634 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2635                                          unsigned int *valp,
2636                                          int write, void *data)
2637 {
2638         struct do_proc_douintvec_minmax_conv_param *param = data;
2639
2640         if (write) {
2641                 unsigned int val = *lvalp;
2642
2643                 if (*lvalp > UINT_MAX)
2644                         return -EINVAL;
2645
2646                 if ((param->min && *param->min > val) ||
2647                     (param->max && *param->max < val))
2648                         return -ERANGE;
2649
2650                 *valp = val;
2651         } else {
2652                 unsigned int val = *valp;
2653                 *lvalp = (unsigned long) val;
2654         }
2655
2656         return 0;
2657 }
2658
2659 /**
2660  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2661  * @table: the sysctl table
2662  * @write: %TRUE if this is a write to the sysctl file
2663  * @buffer: the user buffer
2664  * @lenp: the size of the user buffer
2665  * @ppos: file position
2666  *
2667  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2668  * values from/to the user buffer, treated as an ASCII string. Negative
2669  * strings are not allowed.
2670  *
2671  * This routine will ensure the values are within the range specified by
2672  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2673  * check for UINT_MAX to avoid having to support wrap around uses from
2674  * userspace.
2675  *
2676  * Returns 0 on success or -ERANGE on write when the range check fails.
2677  */
2678 int proc_douintvec_minmax(struct ctl_table *table, int write,
2679                           void __user *buffer, size_t *lenp, loff_t *ppos)
2680 {
2681         struct do_proc_douintvec_minmax_conv_param param = {
2682                 .min = (unsigned int *) table->extra1,
2683                 .max = (unsigned int *) table->extra2,
2684         };
2685         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2686                                  do_proc_douintvec_minmax_conv, &param);
2687 }
2688
2689 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2690                                         unsigned int *valp,
2691                                         int write, void *data)
2692 {
2693         if (write) {
2694                 unsigned int val;
2695
2696                 val = round_pipe_size(*lvalp);
2697                 if (val == 0)
2698                         return -EINVAL;
2699
2700                 *valp = val;
2701         } else {
2702                 unsigned int val = *valp;
2703                 *lvalp = (unsigned long) val;
2704         }
2705
2706         return 0;
2707 }
2708
2709 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2710                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2711 {
2712         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2713                                  do_proc_dopipe_max_size_conv, NULL);
2714 }
2715
2716 static void validate_coredump_safety(void)
2717 {
2718 #ifdef CONFIG_COREDUMP
2719         if (suid_dumpable == SUID_DUMP_ROOT &&
2720             core_pattern[0] != '/' && core_pattern[0] != '|') {
2721                 printk(KERN_WARNING
2722 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2723 "Pipe handler or fully qualified core dump path required.\n"
2724 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2725                 );
2726         }
2727 #endif
2728 }
2729
2730 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2731                 void __user *buffer, size_t *lenp, loff_t *ppos)
2732 {
2733         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2734         if (!error)
2735                 validate_coredump_safety();
2736         return error;
2737 }
2738
2739 #ifdef CONFIG_COREDUMP
2740 static int proc_dostring_coredump(struct ctl_table *table, int write,
2741                   void __user *buffer, size_t *lenp, loff_t *ppos)
2742 {
2743         int error = proc_dostring(table, write, buffer, lenp, ppos);
2744         if (!error)
2745                 validate_coredump_safety();
2746         return error;
2747 }
2748 #endif
2749
2750 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2751                                      void __user *buffer,
2752                                      size_t *lenp, loff_t *ppos,
2753                                      unsigned long convmul,
2754                                      unsigned long convdiv)
2755 {
2756         unsigned long *i, *min, *max;
2757         int vleft, first = 1, err = 0;
2758         size_t left;
2759         char *kbuf = NULL, *p;
2760
2761         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2762                 *lenp = 0;
2763                 return 0;
2764         }
2765
2766         i = (unsigned long *) data;
2767         min = (unsigned long *) table->extra1;
2768         max = (unsigned long *) table->extra2;
2769         vleft = table->maxlen / sizeof(unsigned long);
2770         left = *lenp;
2771
2772         if (write) {
2773                 if (proc_first_pos_non_zero_ignore(ppos, table))
2774                         goto out;
2775
2776                 if (left > PAGE_SIZE - 1)
2777                         left = PAGE_SIZE - 1;
2778                 p = kbuf = memdup_user_nul(buffer, left);
2779                 if (IS_ERR(kbuf))
2780                         return PTR_ERR(kbuf);
2781         }
2782
2783         for (; left && vleft--; i++, first = 0) {
2784                 unsigned long val;
2785
2786                 if (write) {
2787                         bool neg;
2788
2789                         left -= proc_skip_spaces(&p);
2790                         if (!left)
2791                                 break;
2792
2793                         err = proc_get_long(&p, &left, &val, &neg,
2794                                              proc_wspace_sep,
2795                                              sizeof(proc_wspace_sep), NULL);
2796                         if (err)
2797                                 break;
2798                         if (neg)
2799                                 continue;
2800                         val = convmul * val / convdiv;
2801                         if ((min && val < *min) || (max && val > *max))
2802                                 continue;
2803                         *i = val;
2804                 } else {
2805                         val = convdiv * (*i) / convmul;
2806                         if (!first) {
2807                                 err = proc_put_char(&buffer, &left, '\t');
2808                                 if (err)
2809                                         break;
2810                         }
2811                         err = proc_put_long(&buffer, &left, val, false);
2812                         if (err)
2813                                 break;
2814                 }
2815         }
2816
2817         if (!write && !first && left && !err)
2818                 err = proc_put_char(&buffer, &left, '\n');
2819         if (write && !err)
2820                 left -= proc_skip_spaces(&p);
2821         if (write) {
2822                 kfree(kbuf);
2823                 if (first)
2824                         return err ? : -EINVAL;
2825         }
2826         *lenp -= left;
2827 out:
2828         *ppos += *lenp;
2829         return err;
2830 }
2831
2832 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2833                                      void __user *buffer,
2834                                      size_t *lenp, loff_t *ppos,
2835                                      unsigned long convmul,
2836                                      unsigned long convdiv)
2837 {
2838         return __do_proc_doulongvec_minmax(table->data, table, write,
2839                         buffer, lenp, ppos, convmul, convdiv);
2840 }
2841
2842 /**
2843  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2844  * @table: the sysctl table
2845  * @write: %TRUE if this is a write to the sysctl file
2846  * @buffer: the user buffer
2847  * @lenp: the size of the user buffer
2848  * @ppos: file position
2849  *
2850  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2851  * values from/to the user buffer, treated as an ASCII string.
2852  *
2853  * This routine will ensure the values are within the range specified by
2854  * table->extra1 (min) and table->extra2 (max).
2855  *
2856  * Returns 0 on success.
2857  */
2858 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2859                            void __user *buffer, size_t *lenp, loff_t *ppos)
2860 {
2861     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2862 }
2863
2864 /**
2865  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2866  * @table: the sysctl table
2867  * @write: %TRUE if this is a write to the sysctl file
2868  * @buffer: the user buffer
2869  * @lenp: the size of the user buffer
2870  * @ppos: file position
2871  *
2872  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2873  * values from/to the user buffer, treated as an ASCII string. The values
2874  * are treated as milliseconds, and converted to jiffies when they are stored.
2875  *
2876  * This routine will ensure the values are within the range specified by
2877  * table->extra1 (min) and table->extra2 (max).
2878  *
2879  * Returns 0 on success.
2880  */
2881 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2882                                       void __user *buffer,
2883                                       size_t *lenp, loff_t *ppos)
2884 {
2885     return do_proc_doulongvec_minmax(table, write, buffer,
2886                                      lenp, ppos, HZ, 1000l);
2887 }
2888
2889
2890 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2891                                          int *valp,
2892                                          int write, void *data)
2893 {
2894         if (write) {
2895                 if (*lvalp > INT_MAX / HZ)
2896                         return 1;
2897                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2898         } else {
2899                 int val = *valp;
2900                 unsigned long lval;
2901                 if (val < 0) {
2902                         *negp = true;
2903                         lval = -(unsigned long)val;
2904                 } else {
2905                         *negp = false;
2906                         lval = (unsigned long)val;
2907                 }
2908                 *lvalp = lval / HZ;
2909         }
2910         return 0;
2911 }
2912
2913 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2914                                                 int *valp,
2915                                                 int write, void *data)
2916 {
2917         if (write) {
2918                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2919                         return 1;
2920                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2921         } else {
2922                 int val = *valp;
2923                 unsigned long lval;
2924                 if (val < 0) {
2925                         *negp = true;
2926                         lval = -(unsigned long)val;
2927                 } else {
2928                         *negp = false;
2929                         lval = (unsigned long)val;
2930                 }
2931                 *lvalp = jiffies_to_clock_t(lval);
2932         }
2933         return 0;
2934 }
2935
2936 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2937                                             int *valp,
2938                                             int write, void *data)
2939 {
2940         if (write) {
2941                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2942
2943                 if (jif > INT_MAX)
2944                         return 1;
2945                 *valp = (int)jif;
2946         } else {
2947                 int val = *valp;
2948                 unsigned long lval;
2949                 if (val < 0) {
2950                         *negp = true;
2951                         lval = -(unsigned long)val;
2952                 } else {
2953                         *negp = false;
2954                         lval = (unsigned long)val;
2955                 }
2956                 *lvalp = jiffies_to_msecs(lval);
2957         }
2958         return 0;
2959 }
2960
2961 /**
2962  * proc_dointvec_jiffies - read a vector of integers as seconds
2963  * @table: the sysctl table
2964  * @write: %TRUE if this is a write to the sysctl file
2965  * @buffer: the user buffer
2966  * @lenp: the size of the user buffer
2967  * @ppos: file position
2968  *
2969  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2970  * values from/to the user buffer, treated as an ASCII string. 
2971  * The values read are assumed to be in seconds, and are converted into
2972  * jiffies.
2973  *
2974  * Returns 0 on success.
2975  */
2976 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2977                           void __user *buffer, size_t *lenp, loff_t *ppos)
2978 {
2979     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2980                             do_proc_dointvec_jiffies_conv,NULL);
2981 }
2982
2983 /**
2984  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2985  * @table: the sysctl table
2986  * @write: %TRUE if this is a write to the sysctl file
2987  * @buffer: the user buffer
2988  * @lenp: the size of the user buffer
2989  * @ppos: pointer to the file position
2990  *
2991  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2992  * values from/to the user buffer, treated as an ASCII string. 
2993  * The values read are assumed to be in 1/USER_HZ seconds, and 
2994  * are converted into jiffies.
2995  *
2996  * Returns 0 on success.
2997  */
2998 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2999                                  void __user *buffer, size_t *lenp, loff_t *ppos)
3000 {
3001     return do_proc_dointvec(table,write,buffer,lenp,ppos,
3002                             do_proc_dointvec_userhz_jiffies_conv,NULL);
3003 }
3004
3005 /**
3006  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
3007  * @table: the sysctl table
3008  * @write: %TRUE if this is a write to the sysctl file
3009  * @buffer: the user buffer
3010  * @lenp: the size of the user buffer
3011  * @ppos: file position
3012  * @ppos: the current position in the file
3013  *
3014  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3015  * values from/to the user buffer, treated as an ASCII string. 
3016  * The values read are assumed to be in 1/1000 seconds, and 
3017  * are converted into jiffies.
3018  *
3019  * Returns 0 on success.
3020  */
3021 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3022                              void __user *buffer, size_t *lenp, loff_t *ppos)
3023 {
3024         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3025                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3026 }
3027
3028 static int proc_do_cad_pid(struct ctl_table *table, int write,
3029                            void __user *buffer, size_t *lenp, loff_t *ppos)
3030 {
3031         struct pid *new_pid;
3032         pid_t tmp;
3033         int r;
3034
3035         tmp = pid_vnr(cad_pid);
3036
3037         r = __do_proc_dointvec(&tmp, table, write, buffer,
3038                                lenp, ppos, NULL, NULL);
3039         if (r || !write)
3040                 return r;
3041
3042         new_pid = find_get_pid(tmp);
3043         if (!new_pid)
3044                 return -ESRCH;
3045
3046         put_pid(xchg(&cad_pid, new_pid));
3047         return 0;
3048 }
3049
3050 /**
3051  * proc_do_large_bitmap - read/write from/to a large bitmap
3052  * @table: the sysctl table
3053  * @write: %TRUE if this is a write to the sysctl file
3054  * @buffer: the user buffer
3055  * @lenp: the size of the user buffer
3056  * @ppos: file position
3057  *
3058  * The bitmap is stored at table->data and the bitmap length (in bits)
3059  * in table->maxlen.
3060  *
3061  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3062  * large bitmaps may be represented in a compact manner. Writing into
3063  * the file will clear the bitmap then update it with the given input.
3064  *
3065  * Returns 0 on success.
3066  */
3067 int proc_do_large_bitmap(struct ctl_table *table, int write,
3068                          void __user *buffer, size_t *lenp, loff_t *ppos)
3069 {
3070         int err = 0;
3071         bool first = 1;
3072         size_t left = *lenp;
3073         unsigned long bitmap_len = table->maxlen;
3074         unsigned long *bitmap = *(unsigned long **) table->data;
3075         unsigned long *tmp_bitmap = NULL;
3076         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3077
3078         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3079                 *lenp = 0;
3080                 return 0;
3081         }
3082
3083         if (write) {
3084                 char *kbuf, *p;
3085
3086                 if (left > PAGE_SIZE - 1)
3087                         left = PAGE_SIZE - 1;
3088
3089                 p = kbuf = memdup_user_nul(buffer, left);
3090                 if (IS_ERR(kbuf))
3091                         return PTR_ERR(kbuf);
3092
3093                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3094                                      sizeof(unsigned long),
3095                                      GFP_KERNEL);
3096                 if (!tmp_bitmap) {
3097                         kfree(kbuf);
3098                         return -ENOMEM;
3099                 }
3100                 proc_skip_char(&p, &left, '\n');
3101                 while (!err && left) {
3102                         unsigned long val_a, val_b;
3103                         bool neg;
3104
3105                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3106                                              sizeof(tr_a), &c);
3107                         if (err)
3108                                 break;
3109                         if (val_a >= bitmap_len || neg) {
3110                                 err = -EINVAL;
3111                                 break;
3112                         }
3113
3114                         val_b = val_a;
3115                         if (left) {
3116                                 p++;
3117                                 left--;
3118                         }
3119
3120                         if (c == '-') {
3121                                 err = proc_get_long(&p, &left, &val_b,
3122                                                      &neg, tr_b, sizeof(tr_b),
3123                                                      &c);
3124                                 if (err)
3125                                         break;
3126                                 if (val_b >= bitmap_len || neg ||
3127                                     val_a > val_b) {
3128                                         err = -EINVAL;
3129                                         break;
3130                                 }
3131                                 if (left) {
3132                                         p++;
3133                                         left--;
3134                                 }
3135                         }
3136
3137                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3138                         first = 0;
3139                         proc_skip_char(&p, &left, '\n');
3140                 }
3141                 kfree(kbuf);
3142         } else {
3143                 unsigned long bit_a, bit_b = 0;
3144
3145                 while (left) {
3146                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3147                         if (bit_a >= bitmap_len)
3148                                 break;
3149                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3150                                                    bit_a + 1) - 1;
3151
3152                         if (!first) {
3153                                 err = proc_put_char(&buffer, &left, ',');
3154                                 if (err)
3155                                         break;
3156                         }
3157                         err = proc_put_long(&buffer, &left, bit_a, false);
3158                         if (err)
3159                                 break;
3160                         if (bit_a != bit_b) {
3161                                 err = proc_put_char(&buffer, &left, '-');
3162                                 if (err)
3163                                         break;
3164                                 err = proc_put_long(&buffer, &left, bit_b, false);
3165                                 if (err)
3166                                         break;
3167                         }
3168
3169                         first = 0; bit_b++;
3170                 }
3171                 if (!err)
3172                         err = proc_put_char(&buffer, &left, '\n');
3173         }
3174
3175         if (!err) {
3176                 if (write) {
3177                         if (*ppos)
3178                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3179                         else
3180                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3181                 }
3182                 *lenp -= left;
3183                 *ppos += *lenp;
3184         }
3185
3186         kfree(tmp_bitmap);
3187         return err;
3188 }
3189
3190 #else /* CONFIG_PROC_SYSCTL */
3191
3192 int proc_dostring(struct ctl_table *table, int write,
3193                   void __user *buffer, size_t *lenp, loff_t *ppos)
3194 {
3195         return -ENOSYS;
3196 }
3197
3198 int proc_dointvec(struct ctl_table *table, int write,
3199                   void __user *buffer, size_t *lenp, loff_t *ppos)
3200 {
3201         return -ENOSYS;
3202 }
3203
3204 int proc_douintvec(struct ctl_table *table, int write,
3205                   void __user *buffer, size_t *lenp, loff_t *ppos)
3206 {
3207         return -ENOSYS;
3208 }
3209
3210 int proc_dointvec_minmax(struct ctl_table *table, int write,
3211                     void __user *buffer, size_t *lenp, loff_t *ppos)
3212 {
3213         return -ENOSYS;
3214 }
3215
3216 int proc_douintvec_minmax(struct ctl_table *table, int write,
3217                           void __user *buffer, size_t *lenp, loff_t *ppos)
3218 {
3219         return -ENOSYS;
3220 }
3221
3222 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3223                     void __user *buffer, size_t *lenp, loff_t *ppos)
3224 {
3225         return -ENOSYS;
3226 }
3227
3228 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3229                     void __user *buffer, size_t *lenp, loff_t *ppos)
3230 {
3231         return -ENOSYS;
3232 }
3233
3234 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3235                              void __user *buffer, size_t *lenp, loff_t *ppos)
3236 {
3237         return -ENOSYS;
3238 }
3239
3240 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3241                     void __user *buffer, size_t *lenp, loff_t *ppos)
3242 {
3243         return -ENOSYS;
3244 }
3245
3246 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3247                                       void __user *buffer,
3248                                       size_t *lenp, loff_t *ppos)
3249 {
3250     return -ENOSYS;
3251 }
3252
3253
3254 #endif /* CONFIG_PROC_SYSCTL */
3255
3256 /*
3257  * No sense putting this after each symbol definition, twice,
3258  * exception granted :-)
3259  */
3260 EXPORT_SYMBOL(proc_dointvec);
3261 EXPORT_SYMBOL(proc_douintvec);
3262 EXPORT_SYMBOL(proc_dointvec_jiffies);
3263 EXPORT_SYMBOL(proc_dointvec_minmax);
3264 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3265 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3266 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3267 EXPORT_SYMBOL(proc_dostring);
3268 EXPORT_SYMBOL(proc_doulongvec_minmax);
3269 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);