drm/omap: dsi: Fix missing of_platform_depopulate()
[sfrench/cifs-2.6.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/aio.h>
23 #include <linux/mm.h>
24 #include <linux/swap.h>
25 #include <linux/slab.h>
26 #include <linux/sysctl.h>
27 #include <linux/bitmap.h>
28 #include <linux/signal.h>
29 #include <linux/printk.h>
30 #include <linux/proc_fs.h>
31 #include <linux/security.h>
32 #include <linux/ctype.h>
33 #include <linux/kmemleak.h>
34 #include <linux/fs.h>
35 #include <linux/init.h>
36 #include <linux/kernel.h>
37 #include <linux/kobject.h>
38 #include <linux/net.h>
39 #include <linux/sysrq.h>
40 #include <linux/highuid.h>
41 #include <linux/writeback.h>
42 #include <linux/ratelimit.h>
43 #include <linux/compaction.h>
44 #include <linux/hugetlb.h>
45 #include <linux/initrd.h>
46 #include <linux/key.h>
47 #include <linux/times.h>
48 #include <linux/limits.h>
49 #include <linux/dcache.h>
50 #include <linux/dnotify.h>
51 #include <linux/syscalls.h>
52 #include <linux/vmstat.h>
53 #include <linux/nfs_fs.h>
54 #include <linux/acpi.h>
55 #include <linux/reboot.h>
56 #include <linux/ftrace.h>
57 #include <linux/perf_event.h>
58 #include <linux/kprobes.h>
59 #include <linux/pipe_fs_i.h>
60 #include <linux/oom.h>
61 #include <linux/kmod.h>
62 #include <linux/capability.h>
63 #include <linux/binfmts.h>
64 #include <linux/sched/sysctl.h>
65 #include <linux/sched/coredump.h>
66 #include <linux/kexec.h>
67 #include <linux/bpf.h>
68 #include <linux/mount.h>
69
70 #include <linux/uaccess.h>
71 #include <asm/processor.h>
72
73 #ifdef CONFIG_X86
74 #include <asm/nmi.h>
75 #include <asm/stacktrace.h>
76 #include <asm/io.h>
77 #endif
78 #ifdef CONFIG_SPARC
79 #include <asm/setup.h>
80 #endif
81 #ifdef CONFIG_BSD_PROCESS_ACCT
82 #include <linux/acct.h>
83 #endif
84 #ifdef CONFIG_RT_MUTEXES
85 #include <linux/rtmutex.h>
86 #endif
87 #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT)
88 #include <linux/lockdep.h>
89 #endif
90 #ifdef CONFIG_CHR_DEV_SG
91 #include <scsi/sg.h>
92 #endif
93 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
94 #include <linux/stackleak.h>
95 #endif
96 #ifdef CONFIG_LOCKUP_DETECTOR
97 #include <linux/nmi.h>
98 #endif
99
100 #if defined(CONFIG_SYSCTL)
101
102 /* External variables not in a header file. */
103 extern int suid_dumpable;
104 #ifdef CONFIG_COREDUMP
105 extern int core_uses_pid;
106 extern char core_pattern[];
107 extern unsigned int core_pipe_limit;
108 #endif
109 extern int pid_max;
110 extern int pid_max_min, pid_max_max;
111 extern int percpu_pagelist_fraction;
112 extern int latencytop_enabled;
113 extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
114 #ifndef CONFIG_MMU
115 extern int sysctl_nr_trim_pages;
116 #endif
117
118 /* Constants used for minimum and  maximum */
119 #ifdef CONFIG_LOCKUP_DETECTOR
120 static int sixty = 60;
121 #endif
122
123 static int __maybe_unused neg_one = -1;
124
125 static int zero;
126 static int __maybe_unused one = 1;
127 static int __maybe_unused two = 2;
128 static int __maybe_unused four = 4;
129 static unsigned long one_ul = 1;
130 static int one_hundred = 100;
131 static int one_thousand = 1000;
132 #ifdef CONFIG_PRINTK
133 static int ten_thousand = 10000;
134 #endif
135 #ifdef CONFIG_PERF_EVENTS
136 static int six_hundred_forty_kb = 640 * 1024;
137 #endif
138
139 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
140 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
141
142 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
143 static int maxolduid = 65535;
144 static int minolduid;
145
146 static int ngroups_max = NGROUPS_MAX;
147 static const int cap_last_cap = CAP_LAST_CAP;
148
149 /*
150  * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs
151  * and hung_task_check_interval_secs
152  */
153 #ifdef CONFIG_DETECT_HUNG_TASK
154 static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
155 #endif
156
157 #ifdef CONFIG_INOTIFY_USER
158 #include <linux/inotify.h>
159 #endif
160 #ifdef CONFIG_SPARC
161 #endif
162
163 #ifdef __hppa__
164 extern int pwrsw_enabled;
165 #endif
166
167 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
168 extern int unaligned_enabled;
169 #endif
170
171 #ifdef CONFIG_IA64
172 extern int unaligned_dump_stack;
173 #endif
174
175 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
176 extern int no_unaligned_warning;
177 #endif
178
179 #ifdef CONFIG_PROC_SYSCTL
180
181 /**
182  * enum sysctl_writes_mode - supported sysctl write modes
183  *
184  * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
185  *      to be written, and multiple writes on the same sysctl file descriptor
186  *      will rewrite the sysctl value, regardless of file position. No warning
187  *      is issued when the initial position is not 0.
188  * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
189  *      not 0.
190  * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
191  *      file position 0 and the value must be fully contained in the buffer
192  *      sent to the write syscall. If dealing with strings respect the file
193  *      position, but restrict this to the max length of the buffer, anything
194  *      passed the max lenght will be ignored. Multiple writes will append
195  *      to the buffer.
196  *
197  * These write modes control how current file position affects the behavior of
198  * updating sysctl values through the proc interface on each write.
199  */
200 enum sysctl_writes_mode {
201         SYSCTL_WRITES_LEGACY            = -1,
202         SYSCTL_WRITES_WARN              = 0,
203         SYSCTL_WRITES_STRICT            = 1,
204 };
205
206 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
207
208 static int proc_do_cad_pid(struct ctl_table *table, int write,
209                   void __user *buffer, size_t *lenp, loff_t *ppos);
210 static int proc_taint(struct ctl_table *table, int write,
211                                void __user *buffer, size_t *lenp, loff_t *ppos);
212 #endif
213
214 #ifdef CONFIG_PRINTK
215 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
216                                 void __user *buffer, size_t *lenp, loff_t *ppos);
217 #endif
218
219 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
220                 void __user *buffer, size_t *lenp, loff_t *ppos);
221 #ifdef CONFIG_COREDUMP
222 static int proc_dostring_coredump(struct ctl_table *table, int write,
223                 void __user *buffer, size_t *lenp, loff_t *ppos);
224 #endif
225 static int proc_dopipe_max_size(struct ctl_table *table, int write,
226                 void __user *buffer, size_t *lenp, loff_t *ppos);
227
228 #ifdef CONFIG_MAGIC_SYSRQ
229 /* Note: sysrq code uses its own private copy */
230 static int __sysrq_enabled = CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE;
231
232 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
233                                 void __user *buffer, size_t *lenp,
234                                 loff_t *ppos)
235 {
236         int error;
237
238         error = proc_dointvec(table, write, buffer, lenp, ppos);
239         if (error)
240                 return error;
241
242         if (write)
243                 sysrq_toggle_support(__sysrq_enabled);
244
245         return 0;
246 }
247
248 #endif
249
250 static struct ctl_table kern_table[];
251 static struct ctl_table vm_table[];
252 static struct ctl_table fs_table[];
253 static struct ctl_table debug_table[];
254 static struct ctl_table dev_table[];
255 extern struct ctl_table random_table[];
256 #ifdef CONFIG_EPOLL
257 extern struct ctl_table epoll_table[];
258 #endif
259
260 #ifdef CONFIG_FW_LOADER_USER_HELPER
261 extern struct ctl_table firmware_config_table[];
262 #endif
263
264 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
265 int sysctl_legacy_va_layout;
266 #endif
267
268 /* The default sysctl tables: */
269
270 static struct ctl_table sysctl_base_table[] = {
271         {
272                 .procname       = "kernel",
273                 .mode           = 0555,
274                 .child          = kern_table,
275         },
276         {
277                 .procname       = "vm",
278                 .mode           = 0555,
279                 .child          = vm_table,
280         },
281         {
282                 .procname       = "fs",
283                 .mode           = 0555,
284                 .child          = fs_table,
285         },
286         {
287                 .procname       = "debug",
288                 .mode           = 0555,
289                 .child          = debug_table,
290         },
291         {
292                 .procname       = "dev",
293                 .mode           = 0555,
294                 .child          = dev_table,
295         },
296         { }
297 };
298
299 #ifdef CONFIG_SCHED_DEBUG
300 static int min_sched_granularity_ns = 100000;           /* 100 usecs */
301 static int max_sched_granularity_ns = NSEC_PER_SEC;     /* 1 second */
302 static int min_wakeup_granularity_ns;                   /* 0 usecs */
303 static int max_wakeup_granularity_ns = NSEC_PER_SEC;    /* 1 second */
304 #ifdef CONFIG_SMP
305 static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
306 static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
307 #endif /* CONFIG_SMP */
308 #endif /* CONFIG_SCHED_DEBUG */
309
310 #ifdef CONFIG_COMPACTION
311 static int min_extfrag_threshold;
312 static int max_extfrag_threshold = 1000;
313 #endif
314
315 static struct ctl_table kern_table[] = {
316         {
317                 .procname       = "sched_child_runs_first",
318                 .data           = &sysctl_sched_child_runs_first,
319                 .maxlen         = sizeof(unsigned int),
320                 .mode           = 0644,
321                 .proc_handler   = proc_dointvec,
322         },
323 #ifdef CONFIG_SCHED_DEBUG
324         {
325                 .procname       = "sched_min_granularity_ns",
326                 .data           = &sysctl_sched_min_granularity,
327                 .maxlen         = sizeof(unsigned int),
328                 .mode           = 0644,
329                 .proc_handler   = sched_proc_update_handler,
330                 .extra1         = &min_sched_granularity_ns,
331                 .extra2         = &max_sched_granularity_ns,
332         },
333         {
334                 .procname       = "sched_latency_ns",
335                 .data           = &sysctl_sched_latency,
336                 .maxlen         = sizeof(unsigned int),
337                 .mode           = 0644,
338                 .proc_handler   = sched_proc_update_handler,
339                 .extra1         = &min_sched_granularity_ns,
340                 .extra2         = &max_sched_granularity_ns,
341         },
342         {
343                 .procname       = "sched_wakeup_granularity_ns",
344                 .data           = &sysctl_sched_wakeup_granularity,
345                 .maxlen         = sizeof(unsigned int),
346                 .mode           = 0644,
347                 .proc_handler   = sched_proc_update_handler,
348                 .extra1         = &min_wakeup_granularity_ns,
349                 .extra2         = &max_wakeup_granularity_ns,
350         },
351 #ifdef CONFIG_SMP
352         {
353                 .procname       = "sched_tunable_scaling",
354                 .data           = &sysctl_sched_tunable_scaling,
355                 .maxlen         = sizeof(enum sched_tunable_scaling),
356                 .mode           = 0644,
357                 .proc_handler   = sched_proc_update_handler,
358                 .extra1         = &min_sched_tunable_scaling,
359                 .extra2         = &max_sched_tunable_scaling,
360         },
361         {
362                 .procname       = "sched_migration_cost_ns",
363                 .data           = &sysctl_sched_migration_cost,
364                 .maxlen         = sizeof(unsigned int),
365                 .mode           = 0644,
366                 .proc_handler   = proc_dointvec,
367         },
368         {
369                 .procname       = "sched_nr_migrate",
370                 .data           = &sysctl_sched_nr_migrate,
371                 .maxlen         = sizeof(unsigned int),
372                 .mode           = 0644,
373                 .proc_handler   = proc_dointvec,
374         },
375 #ifdef CONFIG_SCHEDSTATS
376         {
377                 .procname       = "sched_schedstats",
378                 .data           = NULL,
379                 .maxlen         = sizeof(unsigned int),
380                 .mode           = 0644,
381                 .proc_handler   = sysctl_schedstats,
382                 .extra1         = &zero,
383                 .extra2         = &one,
384         },
385 #endif /* CONFIG_SCHEDSTATS */
386 #endif /* CONFIG_SMP */
387 #ifdef CONFIG_NUMA_BALANCING
388         {
389                 .procname       = "numa_balancing_scan_delay_ms",
390                 .data           = &sysctl_numa_balancing_scan_delay,
391                 .maxlen         = sizeof(unsigned int),
392                 .mode           = 0644,
393                 .proc_handler   = proc_dointvec,
394         },
395         {
396                 .procname       = "numa_balancing_scan_period_min_ms",
397                 .data           = &sysctl_numa_balancing_scan_period_min,
398                 .maxlen         = sizeof(unsigned int),
399                 .mode           = 0644,
400                 .proc_handler   = proc_dointvec,
401         },
402         {
403                 .procname       = "numa_balancing_scan_period_max_ms",
404                 .data           = &sysctl_numa_balancing_scan_period_max,
405                 .maxlen         = sizeof(unsigned int),
406                 .mode           = 0644,
407                 .proc_handler   = proc_dointvec,
408         },
409         {
410                 .procname       = "numa_balancing_scan_size_mb",
411                 .data           = &sysctl_numa_balancing_scan_size,
412                 .maxlen         = sizeof(unsigned int),
413                 .mode           = 0644,
414                 .proc_handler   = proc_dointvec_minmax,
415                 .extra1         = &one,
416         },
417         {
418                 .procname       = "numa_balancing",
419                 .data           = NULL, /* filled in by handler */
420                 .maxlen         = sizeof(unsigned int),
421                 .mode           = 0644,
422                 .proc_handler   = sysctl_numa_balancing,
423                 .extra1         = &zero,
424                 .extra2         = &one,
425         },
426 #endif /* CONFIG_NUMA_BALANCING */
427 #endif /* CONFIG_SCHED_DEBUG */
428         {
429                 .procname       = "sched_rt_period_us",
430                 .data           = &sysctl_sched_rt_period,
431                 .maxlen         = sizeof(unsigned int),
432                 .mode           = 0644,
433                 .proc_handler   = sched_rt_handler,
434         },
435         {
436                 .procname       = "sched_rt_runtime_us",
437                 .data           = &sysctl_sched_rt_runtime,
438                 .maxlen         = sizeof(int),
439                 .mode           = 0644,
440                 .proc_handler   = sched_rt_handler,
441         },
442         {
443                 .procname       = "sched_rr_timeslice_ms",
444                 .data           = &sysctl_sched_rr_timeslice,
445                 .maxlen         = sizeof(int),
446                 .mode           = 0644,
447                 .proc_handler   = sched_rr_handler,
448         },
449 #ifdef CONFIG_SCHED_AUTOGROUP
450         {
451                 .procname       = "sched_autogroup_enabled",
452                 .data           = &sysctl_sched_autogroup_enabled,
453                 .maxlen         = sizeof(unsigned int),
454                 .mode           = 0644,
455                 .proc_handler   = proc_dointvec_minmax,
456                 .extra1         = &zero,
457                 .extra2         = &one,
458         },
459 #endif
460 #ifdef CONFIG_CFS_BANDWIDTH
461         {
462                 .procname       = "sched_cfs_bandwidth_slice_us",
463                 .data           = &sysctl_sched_cfs_bandwidth_slice,
464                 .maxlen         = sizeof(unsigned int),
465                 .mode           = 0644,
466                 .proc_handler   = proc_dointvec_minmax,
467                 .extra1         = &one,
468         },
469 #endif
470 #ifdef CONFIG_PROVE_LOCKING
471         {
472                 .procname       = "prove_locking",
473                 .data           = &prove_locking,
474                 .maxlen         = sizeof(int),
475                 .mode           = 0644,
476                 .proc_handler   = proc_dointvec,
477         },
478 #endif
479 #ifdef CONFIG_LOCK_STAT
480         {
481                 .procname       = "lock_stat",
482                 .data           = &lock_stat,
483                 .maxlen         = sizeof(int),
484                 .mode           = 0644,
485                 .proc_handler   = proc_dointvec,
486         },
487 #endif
488         {
489                 .procname       = "panic",
490                 .data           = &panic_timeout,
491                 .maxlen         = sizeof(int),
492                 .mode           = 0644,
493                 .proc_handler   = proc_dointvec,
494         },
495 #ifdef CONFIG_COREDUMP
496         {
497                 .procname       = "core_uses_pid",
498                 .data           = &core_uses_pid,
499                 .maxlen         = sizeof(int),
500                 .mode           = 0644,
501                 .proc_handler   = proc_dointvec,
502         },
503         {
504                 .procname       = "core_pattern",
505                 .data           = core_pattern,
506                 .maxlen         = CORENAME_MAX_SIZE,
507                 .mode           = 0644,
508                 .proc_handler   = proc_dostring_coredump,
509         },
510         {
511                 .procname       = "core_pipe_limit",
512                 .data           = &core_pipe_limit,
513                 .maxlen         = sizeof(unsigned int),
514                 .mode           = 0644,
515                 .proc_handler   = proc_dointvec,
516         },
517 #endif
518 #ifdef CONFIG_PROC_SYSCTL
519         {
520                 .procname       = "tainted",
521                 .maxlen         = sizeof(long),
522                 .mode           = 0644,
523                 .proc_handler   = proc_taint,
524         },
525         {
526                 .procname       = "sysctl_writes_strict",
527                 .data           = &sysctl_writes_strict,
528                 .maxlen         = sizeof(int),
529                 .mode           = 0644,
530                 .proc_handler   = proc_dointvec_minmax,
531                 .extra1         = &neg_one,
532                 .extra2         = &one,
533         },
534 #endif
535 #ifdef CONFIG_LATENCYTOP
536         {
537                 .procname       = "latencytop",
538                 .data           = &latencytop_enabled,
539                 .maxlen         = sizeof(int),
540                 .mode           = 0644,
541                 .proc_handler   = sysctl_latencytop,
542         },
543 #endif
544 #ifdef CONFIG_BLK_DEV_INITRD
545         {
546                 .procname       = "real-root-dev",
547                 .data           = &real_root_dev,
548                 .maxlen         = sizeof(int),
549                 .mode           = 0644,
550                 .proc_handler   = proc_dointvec,
551         },
552 #endif
553         {
554                 .procname       = "print-fatal-signals",
555                 .data           = &print_fatal_signals,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = proc_dointvec,
559         },
560 #ifdef CONFIG_SPARC
561         {
562                 .procname       = "reboot-cmd",
563                 .data           = reboot_command,
564                 .maxlen         = 256,
565                 .mode           = 0644,
566                 .proc_handler   = proc_dostring,
567         },
568         {
569                 .procname       = "stop-a",
570                 .data           = &stop_a_enabled,
571                 .maxlen         = sizeof (int),
572                 .mode           = 0644,
573                 .proc_handler   = proc_dointvec,
574         },
575         {
576                 .procname       = "scons-poweroff",
577                 .data           = &scons_pwroff,
578                 .maxlen         = sizeof (int),
579                 .mode           = 0644,
580                 .proc_handler   = proc_dointvec,
581         },
582 #endif
583 #ifdef CONFIG_SPARC64
584         {
585                 .procname       = "tsb-ratio",
586                 .data           = &sysctl_tsb_ratio,
587                 .maxlen         = sizeof (int),
588                 .mode           = 0644,
589                 .proc_handler   = proc_dointvec,
590         },
591 #endif
592 #ifdef __hppa__
593         {
594                 .procname       = "soft-power",
595                 .data           = &pwrsw_enabled,
596                 .maxlen         = sizeof (int),
597                 .mode           = 0644,
598                 .proc_handler   = proc_dointvec,
599         },
600 #endif
601 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
602         {
603                 .procname       = "unaligned-trap",
604                 .data           = &unaligned_enabled,
605                 .maxlen         = sizeof (int),
606                 .mode           = 0644,
607                 .proc_handler   = proc_dointvec,
608         },
609 #endif
610         {
611                 .procname       = "ctrl-alt-del",
612                 .data           = &C_A_D,
613                 .maxlen         = sizeof(int),
614                 .mode           = 0644,
615                 .proc_handler   = proc_dointvec,
616         },
617 #ifdef CONFIG_FUNCTION_TRACER
618         {
619                 .procname       = "ftrace_enabled",
620                 .data           = &ftrace_enabled,
621                 .maxlen         = sizeof(int),
622                 .mode           = 0644,
623                 .proc_handler   = ftrace_enable_sysctl,
624         },
625 #endif
626 #ifdef CONFIG_STACK_TRACER
627         {
628                 .procname       = "stack_tracer_enabled",
629                 .data           = &stack_tracer_enabled,
630                 .maxlen         = sizeof(int),
631                 .mode           = 0644,
632                 .proc_handler   = stack_trace_sysctl,
633         },
634 #endif
635 #ifdef CONFIG_TRACING
636         {
637                 .procname       = "ftrace_dump_on_oops",
638                 .data           = &ftrace_dump_on_oops,
639                 .maxlen         = sizeof(int),
640                 .mode           = 0644,
641                 .proc_handler   = proc_dointvec,
642         },
643         {
644                 .procname       = "traceoff_on_warning",
645                 .data           = &__disable_trace_on_warning,
646                 .maxlen         = sizeof(__disable_trace_on_warning),
647                 .mode           = 0644,
648                 .proc_handler   = proc_dointvec,
649         },
650         {
651                 .procname       = "tracepoint_printk",
652                 .data           = &tracepoint_printk,
653                 .maxlen         = sizeof(tracepoint_printk),
654                 .mode           = 0644,
655                 .proc_handler   = tracepoint_printk_sysctl,
656         },
657 #endif
658 #ifdef CONFIG_KEXEC_CORE
659         {
660                 .procname       = "kexec_load_disabled",
661                 .data           = &kexec_load_disabled,
662                 .maxlen         = sizeof(int),
663                 .mode           = 0644,
664                 /* only handle a transition from default "0" to "1" */
665                 .proc_handler   = proc_dointvec_minmax,
666                 .extra1         = &one,
667                 .extra2         = &one,
668         },
669 #endif
670 #ifdef CONFIG_MODULES
671         {
672                 .procname       = "modprobe",
673                 .data           = &modprobe_path,
674                 .maxlen         = KMOD_PATH_LEN,
675                 .mode           = 0644,
676                 .proc_handler   = proc_dostring,
677         },
678         {
679                 .procname       = "modules_disabled",
680                 .data           = &modules_disabled,
681                 .maxlen         = sizeof(int),
682                 .mode           = 0644,
683                 /* only handle a transition from default "0" to "1" */
684                 .proc_handler   = proc_dointvec_minmax,
685                 .extra1         = &one,
686                 .extra2         = &one,
687         },
688 #endif
689 #ifdef CONFIG_UEVENT_HELPER
690         {
691                 .procname       = "hotplug",
692                 .data           = &uevent_helper,
693                 .maxlen         = UEVENT_HELPER_PATH_LEN,
694                 .mode           = 0644,
695                 .proc_handler   = proc_dostring,
696         },
697 #endif
698 #ifdef CONFIG_CHR_DEV_SG
699         {
700                 .procname       = "sg-big-buff",
701                 .data           = &sg_big_buff,
702                 .maxlen         = sizeof (int),
703                 .mode           = 0444,
704                 .proc_handler   = proc_dointvec,
705         },
706 #endif
707 #ifdef CONFIG_BSD_PROCESS_ACCT
708         {
709                 .procname       = "acct",
710                 .data           = &acct_parm,
711                 .maxlen         = 3*sizeof(int),
712                 .mode           = 0644,
713                 .proc_handler   = proc_dointvec,
714         },
715 #endif
716 #ifdef CONFIG_MAGIC_SYSRQ
717         {
718                 .procname       = "sysrq",
719                 .data           = &__sysrq_enabled,
720                 .maxlen         = sizeof (int),
721                 .mode           = 0644,
722                 .proc_handler   = sysrq_sysctl_handler,
723         },
724 #endif
725 #ifdef CONFIG_PROC_SYSCTL
726         {
727                 .procname       = "cad_pid",
728                 .data           = NULL,
729                 .maxlen         = sizeof (int),
730                 .mode           = 0600,
731                 .proc_handler   = proc_do_cad_pid,
732         },
733 #endif
734         {
735                 .procname       = "threads-max",
736                 .data           = NULL,
737                 .maxlen         = sizeof(int),
738                 .mode           = 0644,
739                 .proc_handler   = sysctl_max_threads,
740         },
741         {
742                 .procname       = "random",
743                 .mode           = 0555,
744                 .child          = random_table,
745         },
746         {
747                 .procname       = "usermodehelper",
748                 .mode           = 0555,
749                 .child          = usermodehelper_table,
750         },
751 #ifdef CONFIG_FW_LOADER_USER_HELPER
752         {
753                 .procname       = "firmware_config",
754                 .mode           = 0555,
755                 .child          = firmware_config_table,
756         },
757 #endif
758         {
759                 .procname       = "overflowuid",
760                 .data           = &overflowuid,
761                 .maxlen         = sizeof(int),
762                 .mode           = 0644,
763                 .proc_handler   = proc_dointvec_minmax,
764                 .extra1         = &minolduid,
765                 .extra2         = &maxolduid,
766         },
767         {
768                 .procname       = "overflowgid",
769                 .data           = &overflowgid,
770                 .maxlen         = sizeof(int),
771                 .mode           = 0644,
772                 .proc_handler   = proc_dointvec_minmax,
773                 .extra1         = &minolduid,
774                 .extra2         = &maxolduid,
775         },
776 #ifdef CONFIG_S390
777 #ifdef CONFIG_MATHEMU
778         {
779                 .procname       = "ieee_emulation_warnings",
780                 .data           = &sysctl_ieee_emulation_warnings,
781                 .maxlen         = sizeof(int),
782                 .mode           = 0644,
783                 .proc_handler   = proc_dointvec,
784         },
785 #endif
786         {
787                 .procname       = "userprocess_debug",
788                 .data           = &show_unhandled_signals,
789                 .maxlen         = sizeof(int),
790                 .mode           = 0644,
791                 .proc_handler   = proc_dointvec,
792         },
793 #endif
794         {
795                 .procname       = "pid_max",
796                 .data           = &pid_max,
797                 .maxlen         = sizeof (int),
798                 .mode           = 0644,
799                 .proc_handler   = proc_dointvec_minmax,
800                 .extra1         = &pid_max_min,
801                 .extra2         = &pid_max_max,
802         },
803         {
804                 .procname       = "panic_on_oops",
805                 .data           = &panic_on_oops,
806                 .maxlen         = sizeof(int),
807                 .mode           = 0644,
808                 .proc_handler   = proc_dointvec,
809         },
810 #if defined CONFIG_PRINTK
811         {
812                 .procname       = "printk",
813                 .data           = &console_loglevel,
814                 .maxlen         = 4*sizeof(int),
815                 .mode           = 0644,
816                 .proc_handler   = proc_dointvec,
817         },
818         {
819                 .procname       = "printk_ratelimit",
820                 .data           = &printk_ratelimit_state.interval,
821                 .maxlen         = sizeof(int),
822                 .mode           = 0644,
823                 .proc_handler   = proc_dointvec_jiffies,
824         },
825         {
826                 .procname       = "printk_ratelimit_burst",
827                 .data           = &printk_ratelimit_state.burst,
828                 .maxlen         = sizeof(int),
829                 .mode           = 0644,
830                 .proc_handler   = proc_dointvec,
831         },
832         {
833                 .procname       = "printk_delay",
834                 .data           = &printk_delay_msec,
835                 .maxlen         = sizeof(int),
836                 .mode           = 0644,
837                 .proc_handler   = proc_dointvec_minmax,
838                 .extra1         = &zero,
839                 .extra2         = &ten_thousand,
840         },
841         {
842                 .procname       = "printk_devkmsg",
843                 .data           = devkmsg_log_str,
844                 .maxlen         = DEVKMSG_STR_MAX_SIZE,
845                 .mode           = 0644,
846                 .proc_handler   = devkmsg_sysctl_set_loglvl,
847         },
848         {
849                 .procname       = "dmesg_restrict",
850                 .data           = &dmesg_restrict,
851                 .maxlen         = sizeof(int),
852                 .mode           = 0644,
853                 .proc_handler   = proc_dointvec_minmax_sysadmin,
854                 .extra1         = &zero,
855                 .extra2         = &one,
856         },
857         {
858                 .procname       = "kptr_restrict",
859                 .data           = &kptr_restrict,
860                 .maxlen         = sizeof(int),
861                 .mode           = 0644,
862                 .proc_handler   = proc_dointvec_minmax_sysadmin,
863                 .extra1         = &zero,
864                 .extra2         = &two,
865         },
866 #endif
867         {
868                 .procname       = "ngroups_max",
869                 .data           = &ngroups_max,
870                 .maxlen         = sizeof (int),
871                 .mode           = 0444,
872                 .proc_handler   = proc_dointvec,
873         },
874         {
875                 .procname       = "cap_last_cap",
876                 .data           = (void *)&cap_last_cap,
877                 .maxlen         = sizeof(int),
878                 .mode           = 0444,
879                 .proc_handler   = proc_dointvec,
880         },
881 #if defined(CONFIG_LOCKUP_DETECTOR)
882         {
883                 .procname       = "watchdog",
884                 .data           = &watchdog_user_enabled,
885                 .maxlen         = sizeof(int),
886                 .mode           = 0644,
887                 .proc_handler   = proc_watchdog,
888                 .extra1         = &zero,
889                 .extra2         = &one,
890         },
891         {
892                 .procname       = "watchdog_thresh",
893                 .data           = &watchdog_thresh,
894                 .maxlen         = sizeof(int),
895                 .mode           = 0644,
896                 .proc_handler   = proc_watchdog_thresh,
897                 .extra1         = &zero,
898                 .extra2         = &sixty,
899         },
900         {
901                 .procname       = "nmi_watchdog",
902                 .data           = &nmi_watchdog_user_enabled,
903                 .maxlen         = sizeof(int),
904                 .mode           = NMI_WATCHDOG_SYSCTL_PERM,
905                 .proc_handler   = proc_nmi_watchdog,
906                 .extra1         = &zero,
907                 .extra2         = &one,
908         },
909         {
910                 .procname       = "watchdog_cpumask",
911                 .data           = &watchdog_cpumask_bits,
912                 .maxlen         = NR_CPUS,
913                 .mode           = 0644,
914                 .proc_handler   = proc_watchdog_cpumask,
915         },
916 #ifdef CONFIG_SOFTLOCKUP_DETECTOR
917         {
918                 .procname       = "soft_watchdog",
919                 .data           = &soft_watchdog_user_enabled,
920                 .maxlen         = sizeof(int),
921                 .mode           = 0644,
922                 .proc_handler   = proc_soft_watchdog,
923                 .extra1         = &zero,
924                 .extra2         = &one,
925         },
926         {
927                 .procname       = "softlockup_panic",
928                 .data           = &softlockup_panic,
929                 .maxlen         = sizeof(int),
930                 .mode           = 0644,
931                 .proc_handler   = proc_dointvec_minmax,
932                 .extra1         = &zero,
933                 .extra2         = &one,
934         },
935 #ifdef CONFIG_SMP
936         {
937                 .procname       = "softlockup_all_cpu_backtrace",
938                 .data           = &sysctl_softlockup_all_cpu_backtrace,
939                 .maxlen         = sizeof(int),
940                 .mode           = 0644,
941                 .proc_handler   = proc_dointvec_minmax,
942                 .extra1         = &zero,
943                 .extra2         = &one,
944         },
945 #endif /* CONFIG_SMP */
946 #endif
947 #ifdef CONFIG_HARDLOCKUP_DETECTOR
948         {
949                 .procname       = "hardlockup_panic",
950                 .data           = &hardlockup_panic,
951                 .maxlen         = sizeof(int),
952                 .mode           = 0644,
953                 .proc_handler   = proc_dointvec_minmax,
954                 .extra1         = &zero,
955                 .extra2         = &one,
956         },
957 #ifdef CONFIG_SMP
958         {
959                 .procname       = "hardlockup_all_cpu_backtrace",
960                 .data           = &sysctl_hardlockup_all_cpu_backtrace,
961                 .maxlen         = sizeof(int),
962                 .mode           = 0644,
963                 .proc_handler   = proc_dointvec_minmax,
964                 .extra1         = &zero,
965                 .extra2         = &one,
966         },
967 #endif /* CONFIG_SMP */
968 #endif
969 #endif
970
971 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
972         {
973                 .procname       = "unknown_nmi_panic",
974                 .data           = &unknown_nmi_panic,
975                 .maxlen         = sizeof (int),
976                 .mode           = 0644,
977                 .proc_handler   = proc_dointvec,
978         },
979 #endif
980 #if defined(CONFIG_X86)
981         {
982                 .procname       = "panic_on_unrecovered_nmi",
983                 .data           = &panic_on_unrecovered_nmi,
984                 .maxlen         = sizeof(int),
985                 .mode           = 0644,
986                 .proc_handler   = proc_dointvec,
987         },
988         {
989                 .procname       = "panic_on_io_nmi",
990                 .data           = &panic_on_io_nmi,
991                 .maxlen         = sizeof(int),
992                 .mode           = 0644,
993                 .proc_handler   = proc_dointvec,
994         },
995 #ifdef CONFIG_DEBUG_STACKOVERFLOW
996         {
997                 .procname       = "panic_on_stackoverflow",
998                 .data           = &sysctl_panic_on_stackoverflow,
999                 .maxlen         = sizeof(int),
1000                 .mode           = 0644,
1001                 .proc_handler   = proc_dointvec,
1002         },
1003 #endif
1004         {
1005                 .procname       = "bootloader_type",
1006                 .data           = &bootloader_type,
1007                 .maxlen         = sizeof (int),
1008                 .mode           = 0444,
1009                 .proc_handler   = proc_dointvec,
1010         },
1011         {
1012                 .procname       = "bootloader_version",
1013                 .data           = &bootloader_version,
1014                 .maxlen         = sizeof (int),
1015                 .mode           = 0444,
1016                 .proc_handler   = proc_dointvec,
1017         },
1018         {
1019                 .procname       = "io_delay_type",
1020                 .data           = &io_delay_type,
1021                 .maxlen         = sizeof(int),
1022                 .mode           = 0644,
1023                 .proc_handler   = proc_dointvec,
1024         },
1025 #endif
1026 #if defined(CONFIG_MMU)
1027         {
1028                 .procname       = "randomize_va_space",
1029                 .data           = &randomize_va_space,
1030                 .maxlen         = sizeof(int),
1031                 .mode           = 0644,
1032                 .proc_handler   = proc_dointvec,
1033         },
1034 #endif
1035 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
1036         {
1037                 .procname       = "spin_retry",
1038                 .data           = &spin_retry,
1039                 .maxlen         = sizeof (int),
1040                 .mode           = 0644,
1041                 .proc_handler   = proc_dointvec,
1042         },
1043 #endif
1044 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
1045         {
1046                 .procname       = "acpi_video_flags",
1047                 .data           = &acpi_realmode_flags,
1048                 .maxlen         = sizeof (unsigned long),
1049                 .mode           = 0644,
1050                 .proc_handler   = proc_doulongvec_minmax,
1051         },
1052 #endif
1053 #ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
1054         {
1055                 .procname       = "ignore-unaligned-usertrap",
1056                 .data           = &no_unaligned_warning,
1057                 .maxlen         = sizeof (int),
1058                 .mode           = 0644,
1059                 .proc_handler   = proc_dointvec,
1060         },
1061 #endif
1062 #ifdef CONFIG_IA64
1063         {
1064                 .procname       = "unaligned-dump-stack",
1065                 .data           = &unaligned_dump_stack,
1066                 .maxlen         = sizeof (int),
1067                 .mode           = 0644,
1068                 .proc_handler   = proc_dointvec,
1069         },
1070 #endif
1071 #ifdef CONFIG_DETECT_HUNG_TASK
1072         {
1073                 .procname       = "hung_task_panic",
1074                 .data           = &sysctl_hung_task_panic,
1075                 .maxlen         = sizeof(int),
1076                 .mode           = 0644,
1077                 .proc_handler   = proc_dointvec_minmax,
1078                 .extra1         = &zero,
1079                 .extra2         = &one,
1080         },
1081         {
1082                 .procname       = "hung_task_check_count",
1083                 .data           = &sysctl_hung_task_check_count,
1084                 .maxlen         = sizeof(int),
1085                 .mode           = 0644,
1086                 .proc_handler   = proc_dointvec_minmax,
1087                 .extra1         = &zero,
1088         },
1089         {
1090                 .procname       = "hung_task_timeout_secs",
1091                 .data           = &sysctl_hung_task_timeout_secs,
1092                 .maxlen         = sizeof(unsigned long),
1093                 .mode           = 0644,
1094                 .proc_handler   = proc_dohung_task_timeout_secs,
1095                 .extra2         = &hung_task_timeout_max,
1096         },
1097         {
1098                 .procname       = "hung_task_check_interval_secs",
1099                 .data           = &sysctl_hung_task_check_interval_secs,
1100                 .maxlen         = sizeof(unsigned long),
1101                 .mode           = 0644,
1102                 .proc_handler   = proc_dohung_task_timeout_secs,
1103                 .extra2         = &hung_task_timeout_max,
1104         },
1105         {
1106                 .procname       = "hung_task_warnings",
1107                 .data           = &sysctl_hung_task_warnings,
1108                 .maxlen         = sizeof(int),
1109                 .mode           = 0644,
1110                 .proc_handler   = proc_dointvec_minmax,
1111                 .extra1         = &neg_one,
1112         },
1113 #endif
1114 #ifdef CONFIG_RT_MUTEXES
1115         {
1116                 .procname       = "max_lock_depth",
1117                 .data           = &max_lock_depth,
1118                 .maxlen         = sizeof(int),
1119                 .mode           = 0644,
1120                 .proc_handler   = proc_dointvec,
1121         },
1122 #endif
1123         {
1124                 .procname       = "poweroff_cmd",
1125                 .data           = &poweroff_cmd,
1126                 .maxlen         = POWEROFF_CMD_PATH_LEN,
1127                 .mode           = 0644,
1128                 .proc_handler   = proc_dostring,
1129         },
1130 #ifdef CONFIG_KEYS
1131         {
1132                 .procname       = "keys",
1133                 .mode           = 0555,
1134                 .child          = key_sysctls,
1135         },
1136 #endif
1137 #ifdef CONFIG_PERF_EVENTS
1138         /*
1139          * User-space scripts rely on the existence of this file
1140          * as a feature check for perf_events being enabled.
1141          *
1142          * So it's an ABI, do not remove!
1143          */
1144         {
1145                 .procname       = "perf_event_paranoid",
1146                 .data           = &sysctl_perf_event_paranoid,
1147                 .maxlen         = sizeof(sysctl_perf_event_paranoid),
1148                 .mode           = 0644,
1149                 .proc_handler   = proc_dointvec,
1150         },
1151         {
1152                 .procname       = "perf_event_mlock_kb",
1153                 .data           = &sysctl_perf_event_mlock,
1154                 .maxlen         = sizeof(sysctl_perf_event_mlock),
1155                 .mode           = 0644,
1156                 .proc_handler   = proc_dointvec,
1157         },
1158         {
1159                 .procname       = "perf_event_max_sample_rate",
1160                 .data           = &sysctl_perf_event_sample_rate,
1161                 .maxlen         = sizeof(sysctl_perf_event_sample_rate),
1162                 .mode           = 0644,
1163                 .proc_handler   = perf_proc_update_handler,
1164                 .extra1         = &one,
1165         },
1166         {
1167                 .procname       = "perf_cpu_time_max_percent",
1168                 .data           = &sysctl_perf_cpu_time_max_percent,
1169                 .maxlen         = sizeof(sysctl_perf_cpu_time_max_percent),
1170                 .mode           = 0644,
1171                 .proc_handler   = perf_cpu_time_max_percent_handler,
1172                 .extra1         = &zero,
1173                 .extra2         = &one_hundred,
1174         },
1175         {
1176                 .procname       = "perf_event_max_stack",
1177                 .data           = &sysctl_perf_event_max_stack,
1178                 .maxlen         = sizeof(sysctl_perf_event_max_stack),
1179                 .mode           = 0644,
1180                 .proc_handler   = perf_event_max_stack_handler,
1181                 .extra1         = &zero,
1182                 .extra2         = &six_hundred_forty_kb,
1183         },
1184         {
1185                 .procname       = "perf_event_max_contexts_per_stack",
1186                 .data           = &sysctl_perf_event_max_contexts_per_stack,
1187                 .maxlen         = sizeof(sysctl_perf_event_max_contexts_per_stack),
1188                 .mode           = 0644,
1189                 .proc_handler   = perf_event_max_stack_handler,
1190                 .extra1         = &zero,
1191                 .extra2         = &one_thousand,
1192         },
1193 #endif
1194         {
1195                 .procname       = "panic_on_warn",
1196                 .data           = &panic_on_warn,
1197                 .maxlen         = sizeof(int),
1198                 .mode           = 0644,
1199                 .proc_handler   = proc_dointvec_minmax,
1200                 .extra1         = &zero,
1201                 .extra2         = &one,
1202         },
1203 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1204         {
1205                 .procname       = "timer_migration",
1206                 .data           = &sysctl_timer_migration,
1207                 .maxlen         = sizeof(unsigned int),
1208                 .mode           = 0644,
1209                 .proc_handler   = timer_migration_handler,
1210                 .extra1         = &zero,
1211                 .extra2         = &one,
1212         },
1213 #endif
1214 #ifdef CONFIG_BPF_SYSCALL
1215         {
1216                 .procname       = "unprivileged_bpf_disabled",
1217                 .data           = &sysctl_unprivileged_bpf_disabled,
1218                 .maxlen         = sizeof(sysctl_unprivileged_bpf_disabled),
1219                 .mode           = 0644,
1220                 /* only handle a transition from default "0" to "1" */
1221                 .proc_handler   = proc_dointvec_minmax,
1222                 .extra1         = &one,
1223                 .extra2         = &one,
1224         },
1225 #endif
1226 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
1227         {
1228                 .procname       = "panic_on_rcu_stall",
1229                 .data           = &sysctl_panic_on_rcu_stall,
1230                 .maxlen         = sizeof(sysctl_panic_on_rcu_stall),
1231                 .mode           = 0644,
1232                 .proc_handler   = proc_dointvec_minmax,
1233                 .extra1         = &zero,
1234                 .extra2         = &one,
1235         },
1236 #endif
1237 #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
1238         {
1239                 .procname       = "stack_erasing",
1240                 .data           = NULL,
1241                 .maxlen         = sizeof(int),
1242                 .mode           = 0600,
1243                 .proc_handler   = stack_erasing_sysctl,
1244                 .extra1         = &zero,
1245                 .extra2         = &one,
1246         },
1247 #endif
1248         { }
1249 };
1250
1251 static struct ctl_table vm_table[] = {
1252         {
1253                 .procname       = "overcommit_memory",
1254                 .data           = &sysctl_overcommit_memory,
1255                 .maxlen         = sizeof(sysctl_overcommit_memory),
1256                 .mode           = 0644,
1257                 .proc_handler   = proc_dointvec_minmax,
1258                 .extra1         = &zero,
1259                 .extra2         = &two,
1260         },
1261         {
1262                 .procname       = "panic_on_oom",
1263                 .data           = &sysctl_panic_on_oom,
1264                 .maxlen         = sizeof(sysctl_panic_on_oom),
1265                 .mode           = 0644,
1266                 .proc_handler   = proc_dointvec_minmax,
1267                 .extra1         = &zero,
1268                 .extra2         = &two,
1269         },
1270         {
1271                 .procname       = "oom_kill_allocating_task",
1272                 .data           = &sysctl_oom_kill_allocating_task,
1273                 .maxlen         = sizeof(sysctl_oom_kill_allocating_task),
1274                 .mode           = 0644,
1275                 .proc_handler   = proc_dointvec,
1276         },
1277         {
1278                 .procname       = "oom_dump_tasks",
1279                 .data           = &sysctl_oom_dump_tasks,
1280                 .maxlen         = sizeof(sysctl_oom_dump_tasks),
1281                 .mode           = 0644,
1282                 .proc_handler   = proc_dointvec,
1283         },
1284         {
1285                 .procname       = "overcommit_ratio",
1286                 .data           = &sysctl_overcommit_ratio,
1287                 .maxlen         = sizeof(sysctl_overcommit_ratio),
1288                 .mode           = 0644,
1289                 .proc_handler   = overcommit_ratio_handler,
1290         },
1291         {
1292                 .procname       = "overcommit_kbytes",
1293                 .data           = &sysctl_overcommit_kbytes,
1294                 .maxlen         = sizeof(sysctl_overcommit_kbytes),
1295                 .mode           = 0644,
1296                 .proc_handler   = overcommit_kbytes_handler,
1297         },
1298         {
1299                 .procname       = "page-cluster", 
1300                 .data           = &page_cluster,
1301                 .maxlen         = sizeof(int),
1302                 .mode           = 0644,
1303                 .proc_handler   = proc_dointvec_minmax,
1304                 .extra1         = &zero,
1305         },
1306         {
1307                 .procname       = "dirty_background_ratio",
1308                 .data           = &dirty_background_ratio,
1309                 .maxlen         = sizeof(dirty_background_ratio),
1310                 .mode           = 0644,
1311                 .proc_handler   = dirty_background_ratio_handler,
1312                 .extra1         = &zero,
1313                 .extra2         = &one_hundred,
1314         },
1315         {
1316                 .procname       = "dirty_background_bytes",
1317                 .data           = &dirty_background_bytes,
1318                 .maxlen         = sizeof(dirty_background_bytes),
1319                 .mode           = 0644,
1320                 .proc_handler   = dirty_background_bytes_handler,
1321                 .extra1         = &one_ul,
1322         },
1323         {
1324                 .procname       = "dirty_ratio",
1325                 .data           = &vm_dirty_ratio,
1326                 .maxlen         = sizeof(vm_dirty_ratio),
1327                 .mode           = 0644,
1328                 .proc_handler   = dirty_ratio_handler,
1329                 .extra1         = &zero,
1330                 .extra2         = &one_hundred,
1331         },
1332         {
1333                 .procname       = "dirty_bytes",
1334                 .data           = &vm_dirty_bytes,
1335                 .maxlen         = sizeof(vm_dirty_bytes),
1336                 .mode           = 0644,
1337                 .proc_handler   = dirty_bytes_handler,
1338                 .extra1         = &dirty_bytes_min,
1339         },
1340         {
1341                 .procname       = "dirty_writeback_centisecs",
1342                 .data           = &dirty_writeback_interval,
1343                 .maxlen         = sizeof(dirty_writeback_interval),
1344                 .mode           = 0644,
1345                 .proc_handler   = dirty_writeback_centisecs_handler,
1346         },
1347         {
1348                 .procname       = "dirty_expire_centisecs",
1349                 .data           = &dirty_expire_interval,
1350                 .maxlen         = sizeof(dirty_expire_interval),
1351                 .mode           = 0644,
1352                 .proc_handler   = proc_dointvec_minmax,
1353                 .extra1         = &zero,
1354         },
1355         {
1356                 .procname       = "dirtytime_expire_seconds",
1357                 .data           = &dirtytime_expire_interval,
1358                 .maxlen         = sizeof(dirtytime_expire_interval),
1359                 .mode           = 0644,
1360                 .proc_handler   = dirtytime_interval_handler,
1361                 .extra1         = &zero,
1362         },
1363         {
1364                 .procname       = "swappiness",
1365                 .data           = &vm_swappiness,
1366                 .maxlen         = sizeof(vm_swappiness),
1367                 .mode           = 0644,
1368                 .proc_handler   = proc_dointvec_minmax,
1369                 .extra1         = &zero,
1370                 .extra2         = &one_hundred,
1371         },
1372 #ifdef CONFIG_HUGETLB_PAGE
1373         {
1374                 .procname       = "nr_hugepages",
1375                 .data           = NULL,
1376                 .maxlen         = sizeof(unsigned long),
1377                 .mode           = 0644,
1378                 .proc_handler   = hugetlb_sysctl_handler,
1379         },
1380 #ifdef CONFIG_NUMA
1381         {
1382                 .procname       = "nr_hugepages_mempolicy",
1383                 .data           = NULL,
1384                 .maxlen         = sizeof(unsigned long),
1385                 .mode           = 0644,
1386                 .proc_handler   = &hugetlb_mempolicy_sysctl_handler,
1387         },
1388         {
1389                 .procname               = "numa_stat",
1390                 .data                   = &sysctl_vm_numa_stat,
1391                 .maxlen                 = sizeof(int),
1392                 .mode                   = 0644,
1393                 .proc_handler   = sysctl_vm_numa_stat_handler,
1394                 .extra1                 = &zero,
1395                 .extra2                 = &one,
1396         },
1397 #endif
1398          {
1399                 .procname       = "hugetlb_shm_group",
1400                 .data           = &sysctl_hugetlb_shm_group,
1401                 .maxlen         = sizeof(gid_t),
1402                 .mode           = 0644,
1403                 .proc_handler   = proc_dointvec,
1404          },
1405         {
1406                 .procname       = "nr_overcommit_hugepages",
1407                 .data           = NULL,
1408                 .maxlen         = sizeof(unsigned long),
1409                 .mode           = 0644,
1410                 .proc_handler   = hugetlb_overcommit_handler,
1411         },
1412 #endif
1413         {
1414                 .procname       = "lowmem_reserve_ratio",
1415                 .data           = &sysctl_lowmem_reserve_ratio,
1416                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
1417                 .mode           = 0644,
1418                 .proc_handler   = lowmem_reserve_ratio_sysctl_handler,
1419         },
1420         {
1421                 .procname       = "drop_caches",
1422                 .data           = &sysctl_drop_caches,
1423                 .maxlen         = sizeof(int),
1424                 .mode           = 0644,
1425                 .proc_handler   = drop_caches_sysctl_handler,
1426                 .extra1         = &one,
1427                 .extra2         = &four,
1428         },
1429 #ifdef CONFIG_COMPACTION
1430         {
1431                 .procname       = "compact_memory",
1432                 .data           = &sysctl_compact_memory,
1433                 .maxlen         = sizeof(int),
1434                 .mode           = 0200,
1435                 .proc_handler   = sysctl_compaction_handler,
1436         },
1437         {
1438                 .procname       = "extfrag_threshold",
1439                 .data           = &sysctl_extfrag_threshold,
1440                 .maxlen         = sizeof(int),
1441                 .mode           = 0644,
1442                 .proc_handler   = sysctl_extfrag_handler,
1443                 .extra1         = &min_extfrag_threshold,
1444                 .extra2         = &max_extfrag_threshold,
1445         },
1446         {
1447                 .procname       = "compact_unevictable_allowed",
1448                 .data           = &sysctl_compact_unevictable_allowed,
1449                 .maxlen         = sizeof(int),
1450                 .mode           = 0644,
1451                 .proc_handler   = proc_dointvec,
1452                 .extra1         = &zero,
1453                 .extra2         = &one,
1454         },
1455
1456 #endif /* CONFIG_COMPACTION */
1457         {
1458                 .procname       = "min_free_kbytes",
1459                 .data           = &min_free_kbytes,
1460                 .maxlen         = sizeof(min_free_kbytes),
1461                 .mode           = 0644,
1462                 .proc_handler   = min_free_kbytes_sysctl_handler,
1463                 .extra1         = &zero,
1464         },
1465         {
1466                 .procname       = "watermark_scale_factor",
1467                 .data           = &watermark_scale_factor,
1468                 .maxlen         = sizeof(watermark_scale_factor),
1469                 .mode           = 0644,
1470                 .proc_handler   = watermark_scale_factor_sysctl_handler,
1471                 .extra1         = &one,
1472                 .extra2         = &one_thousand,
1473         },
1474         {
1475                 .procname       = "percpu_pagelist_fraction",
1476                 .data           = &percpu_pagelist_fraction,
1477                 .maxlen         = sizeof(percpu_pagelist_fraction),
1478                 .mode           = 0644,
1479                 .proc_handler   = percpu_pagelist_fraction_sysctl_handler,
1480                 .extra1         = &zero,
1481         },
1482 #ifdef CONFIG_MMU
1483         {
1484                 .procname       = "max_map_count",
1485                 .data           = &sysctl_max_map_count,
1486                 .maxlen         = sizeof(sysctl_max_map_count),
1487                 .mode           = 0644,
1488                 .proc_handler   = proc_dointvec_minmax,
1489                 .extra1         = &zero,
1490         },
1491 #else
1492         {
1493                 .procname       = "nr_trim_pages",
1494                 .data           = &sysctl_nr_trim_pages,
1495                 .maxlen         = sizeof(sysctl_nr_trim_pages),
1496                 .mode           = 0644,
1497                 .proc_handler   = proc_dointvec_minmax,
1498                 .extra1         = &zero,
1499         },
1500 #endif
1501         {
1502                 .procname       = "laptop_mode",
1503                 .data           = &laptop_mode,
1504                 .maxlen         = sizeof(laptop_mode),
1505                 .mode           = 0644,
1506                 .proc_handler   = proc_dointvec_jiffies,
1507         },
1508         {
1509                 .procname       = "block_dump",
1510                 .data           = &block_dump,
1511                 .maxlen         = sizeof(block_dump),
1512                 .mode           = 0644,
1513                 .proc_handler   = proc_dointvec,
1514                 .extra1         = &zero,
1515         },
1516         {
1517                 .procname       = "vfs_cache_pressure",
1518                 .data           = &sysctl_vfs_cache_pressure,
1519                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
1520                 .mode           = 0644,
1521                 .proc_handler   = proc_dointvec,
1522                 .extra1         = &zero,
1523         },
1524 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
1525         {
1526                 .procname       = "legacy_va_layout",
1527                 .data           = &sysctl_legacy_va_layout,
1528                 .maxlen         = sizeof(sysctl_legacy_va_layout),
1529                 .mode           = 0644,
1530                 .proc_handler   = proc_dointvec,
1531                 .extra1         = &zero,
1532         },
1533 #endif
1534 #ifdef CONFIG_NUMA
1535         {
1536                 .procname       = "zone_reclaim_mode",
1537                 .data           = &node_reclaim_mode,
1538                 .maxlen         = sizeof(node_reclaim_mode),
1539                 .mode           = 0644,
1540                 .proc_handler   = proc_dointvec,
1541                 .extra1         = &zero,
1542         },
1543         {
1544                 .procname       = "min_unmapped_ratio",
1545                 .data           = &sysctl_min_unmapped_ratio,
1546                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
1547                 .mode           = 0644,
1548                 .proc_handler   = sysctl_min_unmapped_ratio_sysctl_handler,
1549                 .extra1         = &zero,
1550                 .extra2         = &one_hundred,
1551         },
1552         {
1553                 .procname       = "min_slab_ratio",
1554                 .data           = &sysctl_min_slab_ratio,
1555                 .maxlen         = sizeof(sysctl_min_slab_ratio),
1556                 .mode           = 0644,
1557                 .proc_handler   = sysctl_min_slab_ratio_sysctl_handler,
1558                 .extra1         = &zero,
1559                 .extra2         = &one_hundred,
1560         },
1561 #endif
1562 #ifdef CONFIG_SMP
1563         {
1564                 .procname       = "stat_interval",
1565                 .data           = &sysctl_stat_interval,
1566                 .maxlen         = sizeof(sysctl_stat_interval),
1567                 .mode           = 0644,
1568                 .proc_handler   = proc_dointvec_jiffies,
1569         },
1570         {
1571                 .procname       = "stat_refresh",
1572                 .data           = NULL,
1573                 .maxlen         = 0,
1574                 .mode           = 0600,
1575                 .proc_handler   = vmstat_refresh,
1576         },
1577 #endif
1578 #ifdef CONFIG_MMU
1579         {
1580                 .procname       = "mmap_min_addr",
1581                 .data           = &dac_mmap_min_addr,
1582                 .maxlen         = sizeof(unsigned long),
1583                 .mode           = 0644,
1584                 .proc_handler   = mmap_min_addr_handler,
1585         },
1586 #endif
1587 #ifdef CONFIG_NUMA
1588         {
1589                 .procname       = "numa_zonelist_order",
1590                 .data           = &numa_zonelist_order,
1591                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1592                 .mode           = 0644,
1593                 .proc_handler   = numa_zonelist_order_handler,
1594         },
1595 #endif
1596 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1597    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1598         {
1599                 .procname       = "vdso_enabled",
1600 #ifdef CONFIG_X86_32
1601                 .data           = &vdso32_enabled,
1602                 .maxlen         = sizeof(vdso32_enabled),
1603 #else
1604                 .data           = &vdso_enabled,
1605                 .maxlen         = sizeof(vdso_enabled),
1606 #endif
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec,
1609                 .extra1         = &zero,
1610         },
1611 #endif
1612 #ifdef CONFIG_HIGHMEM
1613         {
1614                 .procname       = "highmem_is_dirtyable",
1615                 .data           = &vm_highmem_is_dirtyable,
1616                 .maxlen         = sizeof(vm_highmem_is_dirtyable),
1617                 .mode           = 0644,
1618                 .proc_handler   = proc_dointvec_minmax,
1619                 .extra1         = &zero,
1620                 .extra2         = &one,
1621         },
1622 #endif
1623 #ifdef CONFIG_MEMORY_FAILURE
1624         {
1625                 .procname       = "memory_failure_early_kill",
1626                 .data           = &sysctl_memory_failure_early_kill,
1627                 .maxlen         = sizeof(sysctl_memory_failure_early_kill),
1628                 .mode           = 0644,
1629                 .proc_handler   = proc_dointvec_minmax,
1630                 .extra1         = &zero,
1631                 .extra2         = &one,
1632         },
1633         {
1634                 .procname       = "memory_failure_recovery",
1635                 .data           = &sysctl_memory_failure_recovery,
1636                 .maxlen         = sizeof(sysctl_memory_failure_recovery),
1637                 .mode           = 0644,
1638                 .proc_handler   = proc_dointvec_minmax,
1639                 .extra1         = &zero,
1640                 .extra2         = &one,
1641         },
1642 #endif
1643         {
1644                 .procname       = "user_reserve_kbytes",
1645                 .data           = &sysctl_user_reserve_kbytes,
1646                 .maxlen         = sizeof(sysctl_user_reserve_kbytes),
1647                 .mode           = 0644,
1648                 .proc_handler   = proc_doulongvec_minmax,
1649         },
1650         {
1651                 .procname       = "admin_reserve_kbytes",
1652                 .data           = &sysctl_admin_reserve_kbytes,
1653                 .maxlen         = sizeof(sysctl_admin_reserve_kbytes),
1654                 .mode           = 0644,
1655                 .proc_handler   = proc_doulongvec_minmax,
1656         },
1657 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
1658         {
1659                 .procname       = "mmap_rnd_bits",
1660                 .data           = &mmap_rnd_bits,
1661                 .maxlen         = sizeof(mmap_rnd_bits),
1662                 .mode           = 0600,
1663                 .proc_handler   = proc_dointvec_minmax,
1664                 .extra1         = (void *)&mmap_rnd_bits_min,
1665                 .extra2         = (void *)&mmap_rnd_bits_max,
1666         },
1667 #endif
1668 #ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
1669         {
1670                 .procname       = "mmap_rnd_compat_bits",
1671                 .data           = &mmap_rnd_compat_bits,
1672                 .maxlen         = sizeof(mmap_rnd_compat_bits),
1673                 .mode           = 0600,
1674                 .proc_handler   = proc_dointvec_minmax,
1675                 .extra1         = (void *)&mmap_rnd_compat_bits_min,
1676                 .extra2         = (void *)&mmap_rnd_compat_bits_max,
1677         },
1678 #endif
1679         { }
1680 };
1681
1682 static struct ctl_table fs_table[] = {
1683         {
1684                 .procname       = "inode-nr",
1685                 .data           = &inodes_stat,
1686                 .maxlen         = 2*sizeof(long),
1687                 .mode           = 0444,
1688                 .proc_handler   = proc_nr_inodes,
1689         },
1690         {
1691                 .procname       = "inode-state",
1692                 .data           = &inodes_stat,
1693                 .maxlen         = 7*sizeof(long),
1694                 .mode           = 0444,
1695                 .proc_handler   = proc_nr_inodes,
1696         },
1697         {
1698                 .procname       = "file-nr",
1699                 .data           = &files_stat,
1700                 .maxlen         = sizeof(files_stat),
1701                 .mode           = 0444,
1702                 .proc_handler   = proc_nr_files,
1703         },
1704         {
1705                 .procname       = "file-max",
1706                 .data           = &files_stat.max_files,
1707                 .maxlen         = sizeof(files_stat.max_files),
1708                 .mode           = 0644,
1709                 .proc_handler   = proc_doulongvec_minmax,
1710         },
1711         {
1712                 .procname       = "nr_open",
1713                 .data           = &sysctl_nr_open,
1714                 .maxlen         = sizeof(unsigned int),
1715                 .mode           = 0644,
1716                 .proc_handler   = proc_dointvec_minmax,
1717                 .extra1         = &sysctl_nr_open_min,
1718                 .extra2         = &sysctl_nr_open_max,
1719         },
1720         {
1721                 .procname       = "dentry-state",
1722                 .data           = &dentry_stat,
1723                 .maxlen         = 6*sizeof(long),
1724                 .mode           = 0444,
1725                 .proc_handler   = proc_nr_dentry,
1726         },
1727         {
1728                 .procname       = "overflowuid",
1729                 .data           = &fs_overflowuid,
1730                 .maxlen         = sizeof(int),
1731                 .mode           = 0644,
1732                 .proc_handler   = proc_dointvec_minmax,
1733                 .extra1         = &minolduid,
1734                 .extra2         = &maxolduid,
1735         },
1736         {
1737                 .procname       = "overflowgid",
1738                 .data           = &fs_overflowgid,
1739                 .maxlen         = sizeof(int),
1740                 .mode           = 0644,
1741                 .proc_handler   = proc_dointvec_minmax,
1742                 .extra1         = &minolduid,
1743                 .extra2         = &maxolduid,
1744         },
1745 #ifdef CONFIG_FILE_LOCKING
1746         {
1747                 .procname       = "leases-enable",
1748                 .data           = &leases_enable,
1749                 .maxlen         = sizeof(int),
1750                 .mode           = 0644,
1751                 .proc_handler   = proc_dointvec,
1752         },
1753 #endif
1754 #ifdef CONFIG_DNOTIFY
1755         {
1756                 .procname       = "dir-notify-enable",
1757                 .data           = &dir_notify_enable,
1758                 .maxlen         = sizeof(int),
1759                 .mode           = 0644,
1760                 .proc_handler   = proc_dointvec,
1761         },
1762 #endif
1763 #ifdef CONFIG_MMU
1764 #ifdef CONFIG_FILE_LOCKING
1765         {
1766                 .procname       = "lease-break-time",
1767                 .data           = &lease_break_time,
1768                 .maxlen         = sizeof(int),
1769                 .mode           = 0644,
1770                 .proc_handler   = proc_dointvec,
1771         },
1772 #endif
1773 #ifdef CONFIG_AIO
1774         {
1775                 .procname       = "aio-nr",
1776                 .data           = &aio_nr,
1777                 .maxlen         = sizeof(aio_nr),
1778                 .mode           = 0444,
1779                 .proc_handler   = proc_doulongvec_minmax,
1780         },
1781         {
1782                 .procname       = "aio-max-nr",
1783                 .data           = &aio_max_nr,
1784                 .maxlen         = sizeof(aio_max_nr),
1785                 .mode           = 0644,
1786                 .proc_handler   = proc_doulongvec_minmax,
1787         },
1788 #endif /* CONFIG_AIO */
1789 #ifdef CONFIG_INOTIFY_USER
1790         {
1791                 .procname       = "inotify",
1792                 .mode           = 0555,
1793                 .child          = inotify_table,
1794         },
1795 #endif  
1796 #ifdef CONFIG_EPOLL
1797         {
1798                 .procname       = "epoll",
1799                 .mode           = 0555,
1800                 .child          = epoll_table,
1801         },
1802 #endif
1803 #endif
1804         {
1805                 .procname       = "protected_symlinks",
1806                 .data           = &sysctl_protected_symlinks,
1807                 .maxlen         = sizeof(int),
1808                 .mode           = 0600,
1809                 .proc_handler   = proc_dointvec_minmax,
1810                 .extra1         = &zero,
1811                 .extra2         = &one,
1812         },
1813         {
1814                 .procname       = "protected_hardlinks",
1815                 .data           = &sysctl_protected_hardlinks,
1816                 .maxlen         = sizeof(int),
1817                 .mode           = 0600,
1818                 .proc_handler   = proc_dointvec_minmax,
1819                 .extra1         = &zero,
1820                 .extra2         = &one,
1821         },
1822         {
1823                 .procname       = "protected_fifos",
1824                 .data           = &sysctl_protected_fifos,
1825                 .maxlen         = sizeof(int),
1826                 .mode           = 0600,
1827                 .proc_handler   = proc_dointvec_minmax,
1828                 .extra1         = &zero,
1829                 .extra2         = &two,
1830         },
1831         {
1832                 .procname       = "protected_regular",
1833                 .data           = &sysctl_protected_regular,
1834                 .maxlen         = sizeof(int),
1835                 .mode           = 0600,
1836                 .proc_handler   = proc_dointvec_minmax,
1837                 .extra1         = &zero,
1838                 .extra2         = &two,
1839         },
1840         {
1841                 .procname       = "suid_dumpable",
1842                 .data           = &suid_dumpable,
1843                 .maxlen         = sizeof(int),
1844                 .mode           = 0644,
1845                 .proc_handler   = proc_dointvec_minmax_coredump,
1846                 .extra1         = &zero,
1847                 .extra2         = &two,
1848         },
1849 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1850         {
1851                 .procname       = "binfmt_misc",
1852                 .mode           = 0555,
1853                 .child          = sysctl_mount_point,
1854         },
1855 #endif
1856         {
1857                 .procname       = "pipe-max-size",
1858                 .data           = &pipe_max_size,
1859                 .maxlen         = sizeof(pipe_max_size),
1860                 .mode           = 0644,
1861                 .proc_handler   = proc_dopipe_max_size,
1862         },
1863         {
1864                 .procname       = "pipe-user-pages-hard",
1865                 .data           = &pipe_user_pages_hard,
1866                 .maxlen         = sizeof(pipe_user_pages_hard),
1867                 .mode           = 0644,
1868                 .proc_handler   = proc_doulongvec_minmax,
1869         },
1870         {
1871                 .procname       = "pipe-user-pages-soft",
1872                 .data           = &pipe_user_pages_soft,
1873                 .maxlen         = sizeof(pipe_user_pages_soft),
1874                 .mode           = 0644,
1875                 .proc_handler   = proc_doulongvec_minmax,
1876         },
1877         {
1878                 .procname       = "mount-max",
1879                 .data           = &sysctl_mount_max,
1880                 .maxlen         = sizeof(unsigned int),
1881                 .mode           = 0644,
1882                 .proc_handler   = proc_dointvec_minmax,
1883                 .extra1         = &one,
1884         },
1885         { }
1886 };
1887
1888 static struct ctl_table debug_table[] = {
1889 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
1890         {
1891                 .procname       = "exception-trace",
1892                 .data           = &show_unhandled_signals,
1893                 .maxlen         = sizeof(int),
1894                 .mode           = 0644,
1895                 .proc_handler   = proc_dointvec
1896         },
1897 #endif
1898 #if defined(CONFIG_OPTPROBES)
1899         {
1900                 .procname       = "kprobes-optimization",
1901                 .data           = &sysctl_kprobes_optimization,
1902                 .maxlen         = sizeof(int),
1903                 .mode           = 0644,
1904                 .proc_handler   = proc_kprobes_optimization_handler,
1905                 .extra1         = &zero,
1906                 .extra2         = &one,
1907         },
1908 #endif
1909         { }
1910 };
1911
1912 static struct ctl_table dev_table[] = {
1913         { }
1914 };
1915
1916 int __init sysctl_init(void)
1917 {
1918         struct ctl_table_header *hdr;
1919
1920         hdr = register_sysctl_table(sysctl_base_table);
1921         kmemleak_not_leak(hdr);
1922         return 0;
1923 }
1924
1925 #endif /* CONFIG_SYSCTL */
1926
1927 /*
1928  * /proc/sys support
1929  */
1930
1931 #ifdef CONFIG_PROC_SYSCTL
1932
1933 static int _proc_do_string(char *data, int maxlen, int write,
1934                            char __user *buffer,
1935                            size_t *lenp, loff_t *ppos)
1936 {
1937         size_t len;
1938         char __user *p;
1939         char c;
1940
1941         if (!data || !maxlen || !*lenp) {
1942                 *lenp = 0;
1943                 return 0;
1944         }
1945
1946         if (write) {
1947                 if (sysctl_writes_strict == SYSCTL_WRITES_STRICT) {
1948                         /* Only continue writes not past the end of buffer. */
1949                         len = strlen(data);
1950                         if (len > maxlen - 1)
1951                                 len = maxlen - 1;
1952
1953                         if (*ppos > len)
1954                                 return 0;
1955                         len = *ppos;
1956                 } else {
1957                         /* Start writing from beginning of buffer. */
1958                         len = 0;
1959                 }
1960
1961                 *ppos += *lenp;
1962                 p = buffer;
1963                 while ((p - buffer) < *lenp && len < maxlen - 1) {
1964                         if (get_user(c, p++))
1965                                 return -EFAULT;
1966                         if (c == 0 || c == '\n')
1967                                 break;
1968                         data[len++] = c;
1969                 }
1970                 data[len] = 0;
1971         } else {
1972                 len = strlen(data);
1973                 if (len > maxlen)
1974                         len = maxlen;
1975
1976                 if (*ppos > len) {
1977                         *lenp = 0;
1978                         return 0;
1979                 }
1980
1981                 data += *ppos;
1982                 len  -= *ppos;
1983
1984                 if (len > *lenp)
1985                         len = *lenp;
1986                 if (len)
1987                         if (copy_to_user(buffer, data, len))
1988                                 return -EFAULT;
1989                 if (len < *lenp) {
1990                         if (put_user('\n', buffer + len))
1991                                 return -EFAULT;
1992                         len++;
1993                 }
1994                 *lenp = len;
1995                 *ppos += len;
1996         }
1997         return 0;
1998 }
1999
2000 static void warn_sysctl_write(struct ctl_table *table)
2001 {
2002         pr_warn_once("%s wrote to %s when file position was not 0!\n"
2003                 "This will not be supported in the future. To silence this\n"
2004                 "warning, set kernel.sysctl_writes_strict = -1\n",
2005                 current->comm, table->procname);
2006 }
2007
2008 /**
2009  * proc_first_pos_non_zero_ignore - check if first position is allowed
2010  * @ppos: file position
2011  * @table: the sysctl table
2012  *
2013  * Returns true if the first position is non-zero and the sysctl_writes_strict
2014  * mode indicates this is not allowed for numeric input types. String proc
2015  * handlers can ignore the return value.
2016  */
2017 static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
2018                                            struct ctl_table *table)
2019 {
2020         if (!*ppos)
2021                 return false;
2022
2023         switch (sysctl_writes_strict) {
2024         case SYSCTL_WRITES_STRICT:
2025                 return true;
2026         case SYSCTL_WRITES_WARN:
2027                 warn_sysctl_write(table);
2028                 return false;
2029         default:
2030                 return false;
2031         }
2032 }
2033
2034 /**
2035  * proc_dostring - read a string sysctl
2036  * @table: the sysctl table
2037  * @write: %TRUE if this is a write to the sysctl file
2038  * @buffer: the user buffer
2039  * @lenp: the size of the user buffer
2040  * @ppos: file position
2041  *
2042  * Reads/writes a string from/to the user buffer. If the kernel
2043  * buffer provided is not large enough to hold the string, the
2044  * string is truncated. The copied string is %NULL-terminated.
2045  * If the string is being read by the user process, it is copied
2046  * and a newline '\n' is added. It is truncated if the buffer is
2047  * not large enough.
2048  *
2049  * Returns 0 on success.
2050  */
2051 int proc_dostring(struct ctl_table *table, int write,
2052                   void __user *buffer, size_t *lenp, loff_t *ppos)
2053 {
2054         if (write)
2055                 proc_first_pos_non_zero_ignore(ppos, table);
2056
2057         return _proc_do_string((char *)(table->data), table->maxlen, write,
2058                                (char __user *)buffer, lenp, ppos);
2059 }
2060
2061 static size_t proc_skip_spaces(char **buf)
2062 {
2063         size_t ret;
2064         char *tmp = skip_spaces(*buf);
2065         ret = tmp - *buf;
2066         *buf = tmp;
2067         return ret;
2068 }
2069
2070 static void proc_skip_char(char **buf, size_t *size, const char v)
2071 {
2072         while (*size) {
2073                 if (**buf != v)
2074                         break;
2075                 (*size)--;
2076                 (*buf)++;
2077         }
2078 }
2079
2080 #define TMPBUFLEN 22
2081 /**
2082  * proc_get_long - reads an ASCII formatted integer from a user buffer
2083  *
2084  * @buf: a kernel buffer
2085  * @size: size of the kernel buffer
2086  * @val: this is where the number will be stored
2087  * @neg: set to %TRUE if number is negative
2088  * @perm_tr: a vector which contains the allowed trailers
2089  * @perm_tr_len: size of the perm_tr vector
2090  * @tr: pointer to store the trailer character
2091  *
2092  * In case of success %0 is returned and @buf and @size are updated with
2093  * the amount of bytes read. If @tr is non-NULL and a trailing
2094  * character exists (size is non-zero after returning from this
2095  * function), @tr is updated with the trailing character.
2096  */
2097 static int proc_get_long(char **buf, size_t *size,
2098                           unsigned long *val, bool *neg,
2099                           const char *perm_tr, unsigned perm_tr_len, char *tr)
2100 {
2101         int len;
2102         char *p, tmp[TMPBUFLEN];
2103
2104         if (!*size)
2105                 return -EINVAL;
2106
2107         len = *size;
2108         if (len > TMPBUFLEN - 1)
2109                 len = TMPBUFLEN - 1;
2110
2111         memcpy(tmp, *buf, len);
2112
2113         tmp[len] = 0;
2114         p = tmp;
2115         if (*p == '-' && *size > 1) {
2116                 *neg = true;
2117                 p++;
2118         } else
2119                 *neg = false;
2120         if (!isdigit(*p))
2121                 return -EINVAL;
2122
2123         *val = simple_strtoul(p, &p, 0);
2124
2125         len = p - tmp;
2126
2127         /* We don't know if the next char is whitespace thus we may accept
2128          * invalid integers (e.g. 1234...a) or two integers instead of one
2129          * (e.g. 123...1). So lets not allow such large numbers. */
2130         if (len == TMPBUFLEN - 1)
2131                 return -EINVAL;
2132
2133         if (len < *size && perm_tr_len && !memchr(perm_tr, *p, perm_tr_len))
2134                 return -EINVAL;
2135
2136         if (tr && (len < *size))
2137                 *tr = *p;
2138
2139         *buf += len;
2140         *size -= len;
2141
2142         return 0;
2143 }
2144
2145 /**
2146  * proc_put_long - converts an integer to a decimal ASCII formatted string
2147  *
2148  * @buf: the user buffer
2149  * @size: the size of the user buffer
2150  * @val: the integer to be converted
2151  * @neg: sign of the number, %TRUE for negative
2152  *
2153  * In case of success %0 is returned and @buf and @size are updated with
2154  * the amount of bytes written.
2155  */
2156 static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
2157                           bool neg)
2158 {
2159         int len;
2160         char tmp[TMPBUFLEN], *p = tmp;
2161
2162         sprintf(p, "%s%lu", neg ? "-" : "", val);
2163         len = strlen(tmp);
2164         if (len > *size)
2165                 len = *size;
2166         if (copy_to_user(*buf, tmp, len))
2167                 return -EFAULT;
2168         *size -= len;
2169         *buf += len;
2170         return 0;
2171 }
2172 #undef TMPBUFLEN
2173
2174 static int proc_put_char(void __user **buf, size_t *size, char c)
2175 {
2176         if (*size) {
2177                 char __user **buffer = (char __user **)buf;
2178                 if (put_user(c, *buffer))
2179                         return -EFAULT;
2180                 (*size)--, (*buffer)++;
2181                 *buf = *buffer;
2182         }
2183         return 0;
2184 }
2185
2186 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
2187                                  int *valp,
2188                                  int write, void *data)
2189 {
2190         if (write) {
2191                 if (*negp) {
2192                         if (*lvalp > (unsigned long) INT_MAX + 1)
2193                                 return -EINVAL;
2194                         *valp = -*lvalp;
2195                 } else {
2196                         if (*lvalp > (unsigned long) INT_MAX)
2197                                 return -EINVAL;
2198                         *valp = *lvalp;
2199                 }
2200         } else {
2201                 int val = *valp;
2202                 if (val < 0) {
2203                         *negp = true;
2204                         *lvalp = -(unsigned long)val;
2205                 } else {
2206                         *negp = false;
2207                         *lvalp = (unsigned long)val;
2208                 }
2209         }
2210         return 0;
2211 }
2212
2213 static int do_proc_douintvec_conv(unsigned long *lvalp,
2214                                   unsigned int *valp,
2215                                   int write, void *data)
2216 {
2217         if (write) {
2218                 if (*lvalp > UINT_MAX)
2219                         return -EINVAL;
2220                 *valp = *lvalp;
2221         } else {
2222                 unsigned int val = *valp;
2223                 *lvalp = (unsigned long)val;
2224         }
2225         return 0;
2226 }
2227
2228 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
2229
2230 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
2231                   int write, void __user *buffer,
2232                   size_t *lenp, loff_t *ppos,
2233                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2234                               int write, void *data),
2235                   void *data)
2236 {
2237         int *i, vleft, first = 1, err = 0;
2238         size_t left;
2239         char *kbuf = NULL, *p;
2240         
2241         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2242                 *lenp = 0;
2243                 return 0;
2244         }
2245         
2246         i = (int *) tbl_data;
2247         vleft = table->maxlen / sizeof(*i);
2248         left = *lenp;
2249
2250         if (!conv)
2251                 conv = do_proc_dointvec_conv;
2252
2253         if (write) {
2254                 if (proc_first_pos_non_zero_ignore(ppos, table))
2255                         goto out;
2256
2257                 if (left > PAGE_SIZE - 1)
2258                         left = PAGE_SIZE - 1;
2259                 p = kbuf = memdup_user_nul(buffer, left);
2260                 if (IS_ERR(kbuf))
2261                         return PTR_ERR(kbuf);
2262         }
2263
2264         for (; left && vleft--; i++, first=0) {
2265                 unsigned long lval;
2266                 bool neg;
2267
2268                 if (write) {
2269                         left -= proc_skip_spaces(&p);
2270
2271                         if (!left)
2272                                 break;
2273                         err = proc_get_long(&p, &left, &lval, &neg,
2274                                              proc_wspace_sep,
2275                                              sizeof(proc_wspace_sep), NULL);
2276                         if (err)
2277                                 break;
2278                         if (conv(&neg, &lval, i, 1, data)) {
2279                                 err = -EINVAL;
2280                                 break;
2281                         }
2282                 } else {
2283                         if (conv(&neg, &lval, i, 0, data)) {
2284                                 err = -EINVAL;
2285                                 break;
2286                         }
2287                         if (!first)
2288                                 err = proc_put_char(&buffer, &left, '\t');
2289                         if (err)
2290                                 break;
2291                         err = proc_put_long(&buffer, &left, lval, neg);
2292                         if (err)
2293                                 break;
2294                 }
2295         }
2296
2297         if (!write && !first && left && !err)
2298                 err = proc_put_char(&buffer, &left, '\n');
2299         if (write && !err && left)
2300                 left -= proc_skip_spaces(&p);
2301         if (write) {
2302                 kfree(kbuf);
2303                 if (first)
2304                         return err ? : -EINVAL;
2305         }
2306         *lenp -= left;
2307 out:
2308         *ppos += *lenp;
2309         return err;
2310 }
2311
2312 static int do_proc_dointvec(struct ctl_table *table, int write,
2313                   void __user *buffer, size_t *lenp, loff_t *ppos,
2314                   int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
2315                               int write, void *data),
2316                   void *data)
2317 {
2318         return __do_proc_dointvec(table->data, table, write,
2319                         buffer, lenp, ppos, conv, data);
2320 }
2321
2322 static int do_proc_douintvec_w(unsigned int *tbl_data,
2323                                struct ctl_table *table,
2324                                void __user *buffer,
2325                                size_t *lenp, loff_t *ppos,
2326                                int (*conv)(unsigned long *lvalp,
2327                                            unsigned int *valp,
2328                                            int write, void *data),
2329                                void *data)
2330 {
2331         unsigned long lval;
2332         int err = 0;
2333         size_t left;
2334         bool neg;
2335         char *kbuf = NULL, *p;
2336
2337         left = *lenp;
2338
2339         if (proc_first_pos_non_zero_ignore(ppos, table))
2340                 goto bail_early;
2341
2342         if (left > PAGE_SIZE - 1)
2343                 left = PAGE_SIZE - 1;
2344
2345         p = kbuf = memdup_user_nul(buffer, left);
2346         if (IS_ERR(kbuf))
2347                 return -EINVAL;
2348
2349         left -= proc_skip_spaces(&p);
2350         if (!left) {
2351                 err = -EINVAL;
2352                 goto out_free;
2353         }
2354
2355         err = proc_get_long(&p, &left, &lval, &neg,
2356                              proc_wspace_sep,
2357                              sizeof(proc_wspace_sep), NULL);
2358         if (err || neg) {
2359                 err = -EINVAL;
2360                 goto out_free;
2361         }
2362
2363         if (conv(&lval, tbl_data, 1, data)) {
2364                 err = -EINVAL;
2365                 goto out_free;
2366         }
2367
2368         if (!err && left)
2369                 left -= proc_skip_spaces(&p);
2370
2371 out_free:
2372         kfree(kbuf);
2373         if (err)
2374                 return -EINVAL;
2375
2376         return 0;
2377
2378         /* This is in keeping with old __do_proc_dointvec() */
2379 bail_early:
2380         *ppos += *lenp;
2381         return err;
2382 }
2383
2384 static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
2385                                size_t *lenp, loff_t *ppos,
2386                                int (*conv)(unsigned long *lvalp,
2387                                            unsigned int *valp,
2388                                            int write, void *data),
2389                                void *data)
2390 {
2391         unsigned long lval;
2392         int err = 0;
2393         size_t left;
2394
2395         left = *lenp;
2396
2397         if (conv(&lval, tbl_data, 0, data)) {
2398                 err = -EINVAL;
2399                 goto out;
2400         }
2401
2402         err = proc_put_long(&buffer, &left, lval, false);
2403         if (err || !left)
2404                 goto out;
2405
2406         err = proc_put_char(&buffer, &left, '\n');
2407
2408 out:
2409         *lenp -= left;
2410         *ppos += *lenp;
2411
2412         return err;
2413 }
2414
2415 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
2416                                int write, void __user *buffer,
2417                                size_t *lenp, loff_t *ppos,
2418                                int (*conv)(unsigned long *lvalp,
2419                                            unsigned int *valp,
2420                                            int write, void *data),
2421                                void *data)
2422 {
2423         unsigned int *i, vleft;
2424
2425         if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
2426                 *lenp = 0;
2427                 return 0;
2428         }
2429
2430         i = (unsigned int *) tbl_data;
2431         vleft = table->maxlen / sizeof(*i);
2432
2433         /*
2434          * Arrays are not supported, keep this simple. *Do not* add
2435          * support for them.
2436          */
2437         if (vleft != 1) {
2438                 *lenp = 0;
2439                 return -EINVAL;
2440         }
2441
2442         if (!conv)
2443                 conv = do_proc_douintvec_conv;
2444
2445         if (write)
2446                 return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
2447                                            conv, data);
2448         return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
2449 }
2450
2451 static int do_proc_douintvec(struct ctl_table *table, int write,
2452                              void __user *buffer, size_t *lenp, loff_t *ppos,
2453                              int (*conv)(unsigned long *lvalp,
2454                                          unsigned int *valp,
2455                                          int write, void *data),
2456                              void *data)
2457 {
2458         return __do_proc_douintvec(table->data, table, write,
2459                                    buffer, lenp, ppos, conv, data);
2460 }
2461
2462 /**
2463  * proc_dointvec - read a vector of integers
2464  * @table: the sysctl table
2465  * @write: %TRUE if this is a write to the sysctl file
2466  * @buffer: the user buffer
2467  * @lenp: the size of the user buffer
2468  * @ppos: file position
2469  *
2470  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2471  * values from/to the user buffer, treated as an ASCII string. 
2472  *
2473  * Returns 0 on success.
2474  */
2475 int proc_dointvec(struct ctl_table *table, int write,
2476                      void __user *buffer, size_t *lenp, loff_t *ppos)
2477 {
2478         return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
2479 }
2480
2481 /**
2482  * proc_douintvec - read a vector of unsigned integers
2483  * @table: the sysctl table
2484  * @write: %TRUE if this is a write to the sysctl file
2485  * @buffer: the user buffer
2486  * @lenp: the size of the user buffer
2487  * @ppos: file position
2488  *
2489  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2490  * values from/to the user buffer, treated as an ASCII string.
2491  *
2492  * Returns 0 on success.
2493  */
2494 int proc_douintvec(struct ctl_table *table, int write,
2495                      void __user *buffer, size_t *lenp, loff_t *ppos)
2496 {
2497         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2498                                  do_proc_douintvec_conv, NULL);
2499 }
2500
2501 /*
2502  * Taint values can only be increased
2503  * This means we can safely use a temporary.
2504  */
2505 static int proc_taint(struct ctl_table *table, int write,
2506                                void __user *buffer, size_t *lenp, loff_t *ppos)
2507 {
2508         struct ctl_table t;
2509         unsigned long tmptaint = get_taint();
2510         int err;
2511
2512         if (write && !capable(CAP_SYS_ADMIN))
2513                 return -EPERM;
2514
2515         t = *table;
2516         t.data = &tmptaint;
2517         err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
2518         if (err < 0)
2519                 return err;
2520
2521         if (write) {
2522                 /*
2523                  * Poor man's atomic or. Not worth adding a primitive
2524                  * to everyone's atomic.h for this
2525                  */
2526                 int i;
2527                 for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
2528                         if ((tmptaint >> i) & 1)
2529                                 add_taint(i, LOCKDEP_STILL_OK);
2530                 }
2531         }
2532
2533         return err;
2534 }
2535
2536 #ifdef CONFIG_PRINTK
2537 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
2538                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2539 {
2540         if (write && !capable(CAP_SYS_ADMIN))
2541                 return -EPERM;
2542
2543         return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2544 }
2545 #endif
2546
2547 /**
2548  * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure
2549  * @min: pointer to minimum allowable value
2550  * @max: pointer to maximum allowable value
2551  *
2552  * The do_proc_dointvec_minmax_conv_param structure provides the
2553  * minimum and maximum values for doing range checking for those sysctl
2554  * parameters that use the proc_dointvec_minmax() handler.
2555  */
2556 struct do_proc_dointvec_minmax_conv_param {
2557         int *min;
2558         int *max;
2559 };
2560
2561 static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
2562                                         int *valp,
2563                                         int write, void *data)
2564 {
2565         struct do_proc_dointvec_minmax_conv_param *param = data;
2566         if (write) {
2567                 int val = *negp ? -*lvalp : *lvalp;
2568                 if ((param->min && *param->min > val) ||
2569                     (param->max && *param->max < val))
2570                         return -EINVAL;
2571                 *valp = val;
2572         } else {
2573                 int val = *valp;
2574                 if (val < 0) {
2575                         *negp = true;
2576                         *lvalp = -(unsigned long)val;
2577                 } else {
2578                         *negp = false;
2579                         *lvalp = (unsigned long)val;
2580                 }
2581         }
2582         return 0;
2583 }
2584
2585 /**
2586  * proc_dointvec_minmax - read a vector of integers with min/max values
2587  * @table: the sysctl table
2588  * @write: %TRUE if this is a write to the sysctl file
2589  * @buffer: the user buffer
2590  * @lenp: the size of the user buffer
2591  * @ppos: file position
2592  *
2593  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2594  * values from/to the user buffer, treated as an ASCII string.
2595  *
2596  * This routine will ensure the values are within the range specified by
2597  * table->extra1 (min) and table->extra2 (max).
2598  *
2599  * Returns 0 on success or -EINVAL on write when the range check fails.
2600  */
2601 int proc_dointvec_minmax(struct ctl_table *table, int write,
2602                   void __user *buffer, size_t *lenp, loff_t *ppos)
2603 {
2604         struct do_proc_dointvec_minmax_conv_param param = {
2605                 .min = (int *) table->extra1,
2606                 .max = (int *) table->extra2,
2607         };
2608         return do_proc_dointvec(table, write, buffer, lenp, ppos,
2609                                 do_proc_dointvec_minmax_conv, &param);
2610 }
2611
2612 /**
2613  * struct do_proc_douintvec_minmax_conv_param - proc_douintvec_minmax() range checking structure
2614  * @min: pointer to minimum allowable value
2615  * @max: pointer to maximum allowable value
2616  *
2617  * The do_proc_douintvec_minmax_conv_param structure provides the
2618  * minimum and maximum values for doing range checking for those sysctl
2619  * parameters that use the proc_douintvec_minmax() handler.
2620  */
2621 struct do_proc_douintvec_minmax_conv_param {
2622         unsigned int *min;
2623         unsigned int *max;
2624 };
2625
2626 static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
2627                                          unsigned int *valp,
2628                                          int write, void *data)
2629 {
2630         struct do_proc_douintvec_minmax_conv_param *param = data;
2631
2632         if (write) {
2633                 unsigned int val = *lvalp;
2634
2635                 if (*lvalp > UINT_MAX)
2636                         return -EINVAL;
2637
2638                 if ((param->min && *param->min > val) ||
2639                     (param->max && *param->max < val))
2640                         return -ERANGE;
2641
2642                 *valp = val;
2643         } else {
2644                 unsigned int val = *valp;
2645                 *lvalp = (unsigned long) val;
2646         }
2647
2648         return 0;
2649 }
2650
2651 /**
2652  * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
2653  * @table: the sysctl table
2654  * @write: %TRUE if this is a write to the sysctl file
2655  * @buffer: the user buffer
2656  * @lenp: the size of the user buffer
2657  * @ppos: file position
2658  *
2659  * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
2660  * values from/to the user buffer, treated as an ASCII string. Negative
2661  * strings are not allowed.
2662  *
2663  * This routine will ensure the values are within the range specified by
2664  * table->extra1 (min) and table->extra2 (max). There is a final sanity
2665  * check for UINT_MAX to avoid having to support wrap around uses from
2666  * userspace.
2667  *
2668  * Returns 0 on success or -ERANGE on write when the range check fails.
2669  */
2670 int proc_douintvec_minmax(struct ctl_table *table, int write,
2671                           void __user *buffer, size_t *lenp, loff_t *ppos)
2672 {
2673         struct do_proc_douintvec_minmax_conv_param param = {
2674                 .min = (unsigned int *) table->extra1,
2675                 .max = (unsigned int *) table->extra2,
2676         };
2677         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2678                                  do_proc_douintvec_minmax_conv, &param);
2679 }
2680
2681 static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
2682                                         unsigned int *valp,
2683                                         int write, void *data)
2684 {
2685         if (write) {
2686                 unsigned int val;
2687
2688                 val = round_pipe_size(*lvalp);
2689                 if (val == 0)
2690                         return -EINVAL;
2691
2692                 *valp = val;
2693         } else {
2694                 unsigned int val = *valp;
2695                 *lvalp = (unsigned long) val;
2696         }
2697
2698         return 0;
2699 }
2700
2701 static int proc_dopipe_max_size(struct ctl_table *table, int write,
2702                                 void __user *buffer, size_t *lenp, loff_t *ppos)
2703 {
2704         return do_proc_douintvec(table, write, buffer, lenp, ppos,
2705                                  do_proc_dopipe_max_size_conv, NULL);
2706 }
2707
2708 static void validate_coredump_safety(void)
2709 {
2710 #ifdef CONFIG_COREDUMP
2711         if (suid_dumpable == SUID_DUMP_ROOT &&
2712             core_pattern[0] != '/' && core_pattern[0] != '|') {
2713                 printk(KERN_WARNING
2714 "Unsafe core_pattern used with fs.suid_dumpable=2.\n"
2715 "Pipe handler or fully qualified core dump path required.\n"
2716 "Set kernel.core_pattern before fs.suid_dumpable.\n"
2717                 );
2718         }
2719 #endif
2720 }
2721
2722 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
2723                 void __user *buffer, size_t *lenp, loff_t *ppos)
2724 {
2725         int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
2726         if (!error)
2727                 validate_coredump_safety();
2728         return error;
2729 }
2730
2731 #ifdef CONFIG_COREDUMP
2732 static int proc_dostring_coredump(struct ctl_table *table, int write,
2733                   void __user *buffer, size_t *lenp, loff_t *ppos)
2734 {
2735         int error = proc_dostring(table, write, buffer, lenp, ppos);
2736         if (!error)
2737                 validate_coredump_safety();
2738         return error;
2739 }
2740 #endif
2741
2742 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
2743                                      void __user *buffer,
2744                                      size_t *lenp, loff_t *ppos,
2745                                      unsigned long convmul,
2746                                      unsigned long convdiv)
2747 {
2748         unsigned long *i, *min, *max;
2749         int vleft, first = 1, err = 0;
2750         size_t left;
2751         char *kbuf = NULL, *p;
2752
2753         if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
2754                 *lenp = 0;
2755                 return 0;
2756         }
2757
2758         i = (unsigned long *) data;
2759         min = (unsigned long *) table->extra1;
2760         max = (unsigned long *) table->extra2;
2761         vleft = table->maxlen / sizeof(unsigned long);
2762         left = *lenp;
2763
2764         if (write) {
2765                 if (proc_first_pos_non_zero_ignore(ppos, table))
2766                         goto out;
2767
2768                 if (left > PAGE_SIZE - 1)
2769                         left = PAGE_SIZE - 1;
2770                 p = kbuf = memdup_user_nul(buffer, left);
2771                 if (IS_ERR(kbuf))
2772                         return PTR_ERR(kbuf);
2773         }
2774
2775         for (; left && vleft--; i++, first = 0) {
2776                 unsigned long val;
2777
2778                 if (write) {
2779                         bool neg;
2780
2781                         left -= proc_skip_spaces(&p);
2782
2783                         err = proc_get_long(&p, &left, &val, &neg,
2784                                              proc_wspace_sep,
2785                                              sizeof(proc_wspace_sep), NULL);
2786                         if (err)
2787                                 break;
2788                         if (neg)
2789                                 continue;
2790                         val = convmul * val / convdiv;
2791                         if ((min && val < *min) || (max && val > *max))
2792                                 continue;
2793                         *i = val;
2794                 } else {
2795                         val = convdiv * (*i) / convmul;
2796                         if (!first) {
2797                                 err = proc_put_char(&buffer, &left, '\t');
2798                                 if (err)
2799                                         break;
2800                         }
2801                         err = proc_put_long(&buffer, &left, val, false);
2802                         if (err)
2803                                 break;
2804                 }
2805         }
2806
2807         if (!write && !first && left && !err)
2808                 err = proc_put_char(&buffer, &left, '\n');
2809         if (write && !err)
2810                 left -= proc_skip_spaces(&p);
2811         if (write) {
2812                 kfree(kbuf);
2813                 if (first)
2814                         return err ? : -EINVAL;
2815         }
2816         *lenp -= left;
2817 out:
2818         *ppos += *lenp;
2819         return err;
2820 }
2821
2822 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
2823                                      void __user *buffer,
2824                                      size_t *lenp, loff_t *ppos,
2825                                      unsigned long convmul,
2826                                      unsigned long convdiv)
2827 {
2828         return __do_proc_doulongvec_minmax(table->data, table, write,
2829                         buffer, lenp, ppos, convmul, convdiv);
2830 }
2831
2832 /**
2833  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2834  * @table: the sysctl table
2835  * @write: %TRUE if this is a write to the sysctl file
2836  * @buffer: the user buffer
2837  * @lenp: the size of the user buffer
2838  * @ppos: file position
2839  *
2840  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2841  * values from/to the user buffer, treated as an ASCII string.
2842  *
2843  * This routine will ensure the values are within the range specified by
2844  * table->extra1 (min) and table->extra2 (max).
2845  *
2846  * Returns 0 on success.
2847  */
2848 int proc_doulongvec_minmax(struct ctl_table *table, int write,
2849                            void __user *buffer, size_t *lenp, loff_t *ppos)
2850 {
2851     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
2852 }
2853
2854 /**
2855  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2856  * @table: the sysctl table
2857  * @write: %TRUE if this is a write to the sysctl file
2858  * @buffer: the user buffer
2859  * @lenp: the size of the user buffer
2860  * @ppos: file position
2861  *
2862  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2863  * values from/to the user buffer, treated as an ASCII string. The values
2864  * are treated as milliseconds, and converted to jiffies when they are stored.
2865  *
2866  * This routine will ensure the values are within the range specified by
2867  * table->extra1 (min) and table->extra2 (max).
2868  *
2869  * Returns 0 on success.
2870  */
2871 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
2872                                       void __user *buffer,
2873                                       size_t *lenp, loff_t *ppos)
2874 {
2875     return do_proc_doulongvec_minmax(table, write, buffer,
2876                                      lenp, ppos, HZ, 1000l);
2877 }
2878
2879
2880 static int do_proc_dointvec_jiffies_conv(bool *negp, unsigned long *lvalp,
2881                                          int *valp,
2882                                          int write, void *data)
2883 {
2884         if (write) {
2885                 if (*lvalp > INT_MAX / HZ)
2886                         return 1;
2887                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2888         } else {
2889                 int val = *valp;
2890                 unsigned long lval;
2891                 if (val < 0) {
2892                         *negp = true;
2893                         lval = -(unsigned long)val;
2894                 } else {
2895                         *negp = false;
2896                         lval = (unsigned long)val;
2897                 }
2898                 *lvalp = lval / HZ;
2899         }
2900         return 0;
2901 }
2902
2903 static int do_proc_dointvec_userhz_jiffies_conv(bool *negp, unsigned long *lvalp,
2904                                                 int *valp,
2905                                                 int write, void *data)
2906 {
2907         if (write) {
2908                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2909                         return 1;
2910                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2911         } else {
2912                 int val = *valp;
2913                 unsigned long lval;
2914                 if (val < 0) {
2915                         *negp = true;
2916                         lval = -(unsigned long)val;
2917                 } else {
2918                         *negp = false;
2919                         lval = (unsigned long)val;
2920                 }
2921                 *lvalp = jiffies_to_clock_t(lval);
2922         }
2923         return 0;
2924 }
2925
2926 static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
2927                                             int *valp,
2928                                             int write, void *data)
2929 {
2930         if (write) {
2931                 unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2932
2933                 if (jif > INT_MAX)
2934                         return 1;
2935                 *valp = (int)jif;
2936         } else {
2937                 int val = *valp;
2938                 unsigned long lval;
2939                 if (val < 0) {
2940                         *negp = true;
2941                         lval = -(unsigned long)val;
2942                 } else {
2943                         *negp = false;
2944                         lval = (unsigned long)val;
2945                 }
2946                 *lvalp = jiffies_to_msecs(lval);
2947         }
2948         return 0;
2949 }
2950
2951 /**
2952  * proc_dointvec_jiffies - read a vector of integers as seconds
2953  * @table: the sysctl table
2954  * @write: %TRUE if this is a write to the sysctl file
2955  * @buffer: the user buffer
2956  * @lenp: the size of the user buffer
2957  * @ppos: file position
2958  *
2959  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2960  * values from/to the user buffer, treated as an ASCII string. 
2961  * The values read are assumed to be in seconds, and are converted into
2962  * jiffies.
2963  *
2964  * Returns 0 on success.
2965  */
2966 int proc_dointvec_jiffies(struct ctl_table *table, int write,
2967                           void __user *buffer, size_t *lenp, loff_t *ppos)
2968 {
2969     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2970                             do_proc_dointvec_jiffies_conv,NULL);
2971 }
2972
2973 /**
2974  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2975  * @table: the sysctl table
2976  * @write: %TRUE if this is a write to the sysctl file
2977  * @buffer: the user buffer
2978  * @lenp: the size of the user buffer
2979  * @ppos: pointer to the file position
2980  *
2981  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2982  * values from/to the user buffer, treated as an ASCII string. 
2983  * The values read are assumed to be in 1/USER_HZ seconds, and 
2984  * are converted into jiffies.
2985  *
2986  * Returns 0 on success.
2987  */
2988 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
2989                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2990 {
2991     return do_proc_dointvec(table,write,buffer,lenp,ppos,
2992                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2993 }
2994
2995 /**
2996  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2997  * @table: the sysctl table
2998  * @write: %TRUE if this is a write to the sysctl file
2999  * @buffer: the user buffer
3000  * @lenp: the size of the user buffer
3001  * @ppos: file position
3002  * @ppos: the current position in the file
3003  *
3004  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
3005  * values from/to the user buffer, treated as an ASCII string. 
3006  * The values read are assumed to be in 1/1000 seconds, and 
3007  * are converted into jiffies.
3008  *
3009  * Returns 0 on success.
3010  */
3011 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3012                              void __user *buffer, size_t *lenp, loff_t *ppos)
3013 {
3014         return do_proc_dointvec(table, write, buffer, lenp, ppos,
3015                                 do_proc_dointvec_ms_jiffies_conv, NULL);
3016 }
3017
3018 static int proc_do_cad_pid(struct ctl_table *table, int write,
3019                            void __user *buffer, size_t *lenp, loff_t *ppos)
3020 {
3021         struct pid *new_pid;
3022         pid_t tmp;
3023         int r;
3024
3025         tmp = pid_vnr(cad_pid);
3026
3027         r = __do_proc_dointvec(&tmp, table, write, buffer,
3028                                lenp, ppos, NULL, NULL);
3029         if (r || !write)
3030                 return r;
3031
3032         new_pid = find_get_pid(tmp);
3033         if (!new_pid)
3034                 return -ESRCH;
3035
3036         put_pid(xchg(&cad_pid, new_pid));
3037         return 0;
3038 }
3039
3040 /**
3041  * proc_do_large_bitmap - read/write from/to a large bitmap
3042  * @table: the sysctl table
3043  * @write: %TRUE if this is a write to the sysctl file
3044  * @buffer: the user buffer
3045  * @lenp: the size of the user buffer
3046  * @ppos: file position
3047  *
3048  * The bitmap is stored at table->data and the bitmap length (in bits)
3049  * in table->maxlen.
3050  *
3051  * We use a range comma separated format (e.g. 1,3-4,10-10) so that
3052  * large bitmaps may be represented in a compact manner. Writing into
3053  * the file will clear the bitmap then update it with the given input.
3054  *
3055  * Returns 0 on success.
3056  */
3057 int proc_do_large_bitmap(struct ctl_table *table, int write,
3058                          void __user *buffer, size_t *lenp, loff_t *ppos)
3059 {
3060         int err = 0;
3061         bool first = 1;
3062         size_t left = *lenp;
3063         unsigned long bitmap_len = table->maxlen;
3064         unsigned long *bitmap = *(unsigned long **) table->data;
3065         unsigned long *tmp_bitmap = NULL;
3066         char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
3067
3068         if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
3069                 *lenp = 0;
3070                 return 0;
3071         }
3072
3073         if (write) {
3074                 char *kbuf, *p;
3075
3076                 if (left > PAGE_SIZE - 1)
3077                         left = PAGE_SIZE - 1;
3078
3079                 p = kbuf = memdup_user_nul(buffer, left);
3080                 if (IS_ERR(kbuf))
3081                         return PTR_ERR(kbuf);
3082
3083                 tmp_bitmap = kcalloc(BITS_TO_LONGS(bitmap_len),
3084                                      sizeof(unsigned long),
3085                                      GFP_KERNEL);
3086                 if (!tmp_bitmap) {
3087                         kfree(kbuf);
3088                         return -ENOMEM;
3089                 }
3090                 proc_skip_char(&p, &left, '\n');
3091                 while (!err && left) {
3092                         unsigned long val_a, val_b;
3093                         bool neg;
3094
3095                         err = proc_get_long(&p, &left, &val_a, &neg, tr_a,
3096                                              sizeof(tr_a), &c);
3097                         if (err)
3098                                 break;
3099                         if (val_a >= bitmap_len || neg) {
3100                                 err = -EINVAL;
3101                                 break;
3102                         }
3103
3104                         val_b = val_a;
3105                         if (left) {
3106                                 p++;
3107                                 left--;
3108                         }
3109
3110                         if (c == '-') {
3111                                 err = proc_get_long(&p, &left, &val_b,
3112                                                      &neg, tr_b, sizeof(tr_b),
3113                                                      &c);
3114                                 if (err)
3115                                         break;
3116                                 if (val_b >= bitmap_len || neg ||
3117                                     val_a > val_b) {
3118                                         err = -EINVAL;
3119                                         break;
3120                                 }
3121                                 if (left) {
3122                                         p++;
3123                                         left--;
3124                                 }
3125                         }
3126
3127                         bitmap_set(tmp_bitmap, val_a, val_b - val_a + 1);
3128                         first = 0;
3129                         proc_skip_char(&p, &left, '\n');
3130                 }
3131                 kfree(kbuf);
3132         } else {
3133                 unsigned long bit_a, bit_b = 0;
3134
3135                 while (left) {
3136                         bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
3137                         if (bit_a >= bitmap_len)
3138                                 break;
3139                         bit_b = find_next_zero_bit(bitmap, bitmap_len,
3140                                                    bit_a + 1) - 1;
3141
3142                         if (!first) {
3143                                 err = proc_put_char(&buffer, &left, ',');
3144                                 if (err)
3145                                         break;
3146                         }
3147                         err = proc_put_long(&buffer, &left, bit_a, false);
3148                         if (err)
3149                                 break;
3150                         if (bit_a != bit_b) {
3151                                 err = proc_put_char(&buffer, &left, '-');
3152                                 if (err)
3153                                         break;
3154                                 err = proc_put_long(&buffer, &left, bit_b, false);
3155                                 if (err)
3156                                         break;
3157                         }
3158
3159                         first = 0; bit_b++;
3160                 }
3161                 if (!err)
3162                         err = proc_put_char(&buffer, &left, '\n');
3163         }
3164
3165         if (!err) {
3166                 if (write) {
3167                         if (*ppos)
3168                                 bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
3169                         else
3170                                 bitmap_copy(bitmap, tmp_bitmap, bitmap_len);
3171                 }
3172                 *lenp -= left;
3173                 *ppos += *lenp;
3174         }
3175
3176         kfree(tmp_bitmap);
3177         return err;
3178 }
3179
3180 #else /* CONFIG_PROC_SYSCTL */
3181
3182 int proc_dostring(struct ctl_table *table, int write,
3183                   void __user *buffer, size_t *lenp, loff_t *ppos)
3184 {
3185         return -ENOSYS;
3186 }
3187
3188 int proc_dointvec(struct ctl_table *table, int write,
3189                   void __user *buffer, size_t *lenp, loff_t *ppos)
3190 {
3191         return -ENOSYS;
3192 }
3193
3194 int proc_douintvec(struct ctl_table *table, int write,
3195                   void __user *buffer, size_t *lenp, loff_t *ppos)
3196 {
3197         return -ENOSYS;
3198 }
3199
3200 int proc_dointvec_minmax(struct ctl_table *table, int write,
3201                     void __user *buffer, size_t *lenp, loff_t *ppos)
3202 {
3203         return -ENOSYS;
3204 }
3205
3206 int proc_douintvec_minmax(struct ctl_table *table, int write,
3207                           void __user *buffer, size_t *lenp, loff_t *ppos)
3208 {
3209         return -ENOSYS;
3210 }
3211
3212 int proc_dointvec_jiffies(struct ctl_table *table, int write,
3213                     void __user *buffer, size_t *lenp, loff_t *ppos)
3214 {
3215         return -ENOSYS;
3216 }
3217
3218 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
3219                     void __user *buffer, size_t *lenp, loff_t *ppos)
3220 {
3221         return -ENOSYS;
3222 }
3223
3224 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
3225                              void __user *buffer, size_t *lenp, loff_t *ppos)
3226 {
3227         return -ENOSYS;
3228 }
3229
3230 int proc_doulongvec_minmax(struct ctl_table *table, int write,
3231                     void __user *buffer, size_t *lenp, loff_t *ppos)
3232 {
3233         return -ENOSYS;
3234 }
3235
3236 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
3237                                       void __user *buffer,
3238                                       size_t *lenp, loff_t *ppos)
3239 {
3240     return -ENOSYS;
3241 }
3242
3243
3244 #endif /* CONFIG_PROC_SYSCTL */
3245
3246 /*
3247  * No sense putting this after each symbol definition, twice,
3248  * exception granted :-)
3249  */
3250 EXPORT_SYMBOL(proc_dointvec);
3251 EXPORT_SYMBOL(proc_douintvec);
3252 EXPORT_SYMBOL(proc_dointvec_jiffies);
3253 EXPORT_SYMBOL(proc_dointvec_minmax);
3254 EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
3255 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
3256 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
3257 EXPORT_SYMBOL(proc_dostring);
3258 EXPORT_SYMBOL(proc_doulongvec_minmax);
3259 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);