* 'proc' of git://git.kernel.org/pub/scm/linux/kernel/git/adobriyan/proc: (35 commits)
proc: remove fs/proc/proc_misc.c
proc: move /proc/vmcore creation to fs/proc/vmcore.c
proc: move pagecount stuff to fs/proc/page.c
proc: move all /proc/kcore stuff to fs/proc/kcore.c
proc: move /proc/schedstat boilerplate to kernel/sched_stats.h
proc: move /proc/modules boilerplate to kernel/module.c
proc: move /proc/diskstats boilerplate to block/genhd.c
proc: move /proc/zoneinfo boilerplate to mm/vmstat.c
proc: move /proc/vmstat boilerplate to mm/vmstat.c
proc: move /proc/pagetypeinfo boilerplate to mm/vmstat.c
proc: move /proc/buddyinfo boilerplate to mm/vmstat.c
proc: move /proc/vmallocinfo to mm/vmalloc.c
proc: move /proc/slabinfo boilerplate to mm/slub.c, mm/slab.c
proc: move /proc/slab_allocators boilerplate to mm/slab.c
proc: move /proc/interrupts boilerplate code to fs/proc/interrupts.c
proc: move /proc/stat to fs/proc/stat.c
proc: move rest of /proc/partitions code to block/genhd.c
proc: move /proc/cpuinfo code to fs/proc/cpuinfo.c
proc: move /proc/devices code to fs/proc/devices.c
proc: move rest of /proc/locks to fs/locks.c
...
--- /dev/null
- extern int arch_report_meminfo(char *page);
+#ifndef _ASM_X86_PGTABLE_H
+#define _ASM_X86_PGTABLE_H
+
+#define FIRST_USER_ADDRESS 0
+
+#define _PAGE_BIT_PRESENT 0 /* is present */
+#define _PAGE_BIT_RW 1 /* writeable */
+#define _PAGE_BIT_USER 2 /* userspace addressable */
+#define _PAGE_BIT_PWT 3 /* page write through */
+#define _PAGE_BIT_PCD 4 /* page cache disabled */
+#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
+#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
+#define _PAGE_BIT_FILE 6
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
+#define _PAGE_BIT_PAT 7 /* on 4KB pages */
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
+#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
+#define _PAGE_BIT_UNUSED3 11
+#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
+#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
+
+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
+#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
+#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
+#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
+#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
+#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
+#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define __HAVE_ARCH_PTE_SPECIAL
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
+#else
+#define _PAGE_NX (_AT(pteval_t, 0))
+#endif
+
+/* If _PAGE_PRESENT is clear, we use these: */
+#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping,
+ * saved PTE; unset:swap */
+#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE;
+ pte_present gives true */
+
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_DIRTY)
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
+ _PAGE_DIRTY)
+
+/* Set of bits not changed in pte_modify */
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
+ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+
+#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT)
+#define _PAGE_CACHE_WB (0)
+#define _PAGE_CACHE_WC (_PAGE_PWT)
+#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
+#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
+
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
+ _PAGE_USER | _PAGE_ACCESSED)
+#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED)
+#define PAGE_COPY PAGE_COPY_NOEXEC
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED | _PAGE_NX)
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
+ _PAGE_ACCESSED)
+
+#define __PAGE_KERNEL_EXEC \
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
+#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
+
+#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
+#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
+
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
+
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
+#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
+#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
+#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
+#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
+#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+
+#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
+#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
+#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
+#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
+
+/* xwr */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_EXEC
+#define __P101 PAGE_READONLY_EXEC
+#define __P110 PAGE_COPY_EXEC
+#define __P111 PAGE_COPY_EXEC
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_EXEC
+#define __S101 PAGE_READONLY_EXEC
+#define __S110 PAGE_SHARED_EXEC
+#define __S111 PAGE_SHARED_EXEC
+
+/*
+ * early identity mapping pte attrib macros.
+ */
+#ifdef CONFIG_X86_64
+#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
+#else
+/*
+ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
+ * bits are combined, this will alow user to access the high address mapped
+ * VDSO in the presence of CONFIG_COMPAT_VDSO
+ */
+#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
+#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
+#endif
+
+#ifndef __ASSEMBLY__
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
+extern spinlock_t pgd_lock;
+extern struct list_head pgd_list;
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_dirty(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_DIRTY;
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_ACCESSED;
+}
+
+static inline int pte_write(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_RW;
+}
+
+static inline int pte_file(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_FILE;
+}
+
+static inline int pte_huge(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_PSE;
+}
+
+static inline int pte_global(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_GLOBAL;
+}
+
+static inline int pte_exec(pte_t pte)
+{
+ return !(pte_flags(pte) & _PAGE_NX);
+}
+
+static inline int pte_special(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_SPECIAL;
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+ return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
+#define pte_page(pte) pfn_to_page(pte_pfn(pte))
+
+static inline int pmd_large(pmd_t pte)
+{
+ return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ (_PAGE_PSE | _PAGE_PRESENT);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_NX);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_PSE);
+}
+
+static inline pte_t pte_clrhuge(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_PSE);
+}
+
+static inline pte_t pte_mkglobal(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_GLOBAL);
+}
+
+static inline pte_t pte_clrglobal(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+extern pteval_t __supported_pte_mask;
+
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
+static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
+{
+ return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pteval_t val = pte_val(pte);
+
+ /*
+ * Chop off the NX bit (if present), and add the NX portion of
+ * the newprot (if present):
+ */
+ val &= _PAGE_CHG_MASK;
+ val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
+
+ return __pte(val);
+}
+
+/* mprotect needs to preserve PAT bits when updating vm_page_prot */
+#define pgprot_modify pgprot_modify
+static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+{
+ pgprotval_t preservebits = pgprot_val(oldprot) & _PAGE_CHG_MASK;
+ pgprotval_t addbits = pgprot_val(newprot);
+ return __pgprot(preservebits | addbits);
+}
+
+#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+
+#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
+
+#ifndef __ASSEMBLY__
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+struct file;
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot);
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t *vma_prot);
+#endif
+
+/* Install a pte for a particular vaddr in kernel space. */
+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
+
+#ifdef CONFIG_X86_32
+extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_done(pgd_t *base);
+#else
+static inline void native_pagetable_setup_start(pgd_t *base) {}
+static inline void native_pagetable_setup_done(pgd_t *base) {}
+#endif
+
++struct seq_file;
++extern void arch_report_meminfo(struct seq_file *m);
+
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt.h>
+#else /* !CONFIG_PARAVIRT */
+#define set_pte(ptep, pte) native_set_pte(ptep, pte)
+#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
+
+#define set_pte_present(mm, addr, ptep, pte) \
+ native_set_pte_present(mm, addr, ptep, pte)
+#define set_pte_atomic(ptep, pte) \
+ native_set_pte_atomic(ptep, pte)
+
+#define set_pmd(pmdp, pmd) native_set_pmd(pmdp, pmd)
+
+#ifndef __PAGETABLE_PUD_FOLDED
+#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
+#define pgd_clear(pgd) native_pgd_clear(pgd)
+#endif
+
+#ifndef set_pud
+# define set_pud(pudp, pud) native_set_pud(pudp, pud)
+#endif
+
+#ifndef __PAGETABLE_PMD_FOLDED
+#define pud_clear(pud) native_pud_clear(pud)
+#endif
+
+#define pte_clear(mm, addr, ptep) native_pte_clear(mm, addr, ptep)
+#define pmd_clear(pmd) native_pmd_clear(pmd)
+
+#define pte_update(mm, addr, ptep) do { } while (0)
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
+
+static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
+{
+ native_pagetable_setup_start(base);
+}
+
+static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
+{
+ native_pagetable_setup_done(base);
+}
+#endif /* CONFIG_PARAVIRT */
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_32
+# include "pgtable_32.h"
+#else
+# include "pgtable_64.h"
+#endif
+
+/*
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+ *
+ * this macro returns the index of the entry in the pgd page which would
+ * control the given virtual address
+ */
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+
+/*
+ * pgd_offset() returns a (pgd_t *)
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
+ */
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
+/*
+ * a shortcut which implies the use of the kernel's pgd, instead
+ * of a process's
+ */
+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
+
+
+#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
+#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
+
+#ifndef __ASSEMBLY__
+
+enum {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_NUM
+};
+
+#ifdef CONFIG_PROC_FS
+extern void update_page_count(int level, unsigned long pages);
+#else
+static inline void update_page_count(int level, unsigned long pages) { }
+#endif
+
+/*
+ * Helper function that returns the kernel pagetable entry controlling
+ * the virtual address 'address'. NULL means no pagetable entry present.
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+ * as a pte too.
+ */
+extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+
+/* local pte updates need not use xchg for locking */
+static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+{
+ pte_t res = *ptep;
+
+ /* Pure native function needs no input for mm, addr */
+ native_pte_clear(NULL, 0, ptep);
+ return res;
+}
+
+static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep , pte_t pte)
+{
+ native_set_pte(ptep, pte);
+}
+
+#ifndef CONFIG_PARAVIRT
+/*
+ * Rules for using pte_update - it must be called after any PTE update which
+ * has not been done using the set_pte / clear_pte interfaces. It is used by
+ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
+ * updates should either be sets, clears, or set_pte_atomic for P->P
+ * transitions, which means this hook should only be called for user PTEs.
+ * This hook implies a P->P protection or access change has taken place, which
+ * requires a subsequent TLB flush. The notification can optionally be delayed
+ * until the TLB flush event by using the pte_update_defer form of the
+ * interface, but care must be taken to assure that the flush happens while
+ * still holding the same page table lock so that the shadow and primary pages
+ * do not become out of sync on SMP.
+ */
+#define pte_update(mm, addr, ptep) do { } while (0)
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
+#endif
+
+/*
+ * We only update the dirty/accessed state if we set
+ * the dirty bit by hand in the kernel, since the hardware
+ * will do the accessed bit for us, and we don't want to
+ * race with other CPU's that might be updating the dirty
+ * bit at the same time.
+ */
+struct vm_area_struct;
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep,
+ pte_t entry, int dirty);
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+extern int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+extern int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep);
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t pte = native_ptep_get_and_clear(ptep);
+ pte_update(mm, addr, ptep);
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep,
+ int full)
+{
+ pte_t pte;
+ if (full) {
+ /*
+ * Full address destruction in progress; paravirt does not
+ * care about updates and native needs no locking
+ */
+ pte = native_local_ptep_get_and_clear(ptep);
+ } else {
+ pte = ptep_get_and_clear(mm, addr, ptep);
+ }
+ return pte;
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
+ pte_update(mm, addr, ptep);
+}
+
+/*
+ * clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
+ *
+ * dst - pointer to pgd range anwhere on a pgd page
+ * src - ""
+ * count - the number of pgds to copy.
+ *
+ * dst and src can be on the same page, but the range must not overlap,
+ * and must not cross a page boundary.
+ */
+static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+{
+ memcpy(dst, src, count * sizeof(pgd_t));
+}
+
+
+#include <asm-generic/pgtable.h>
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_X86_PGTABLE_H */
cmd &= ~LOCK_NB;
unlock = (cmd == LOCK_UN);
- if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3))
+ if (!unlock && !(cmd & LOCK_MAND) &&
+ !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
goto out_putf;
error = flock_make_lock(filp, &lock, cmd);
EXPORT_SYMBOL_GPL(vfs_cancel_lock);
#ifdef CONFIG_PROC_FS
+ #include <linux/proc_fs.h>
#include <linux/seq_file.h>
static void lock_get_status(struct seq_file *f, struct file_lock *fl,
unlock_kernel();
}
- struct seq_operations locks_seq_operations = {
+ static const struct seq_operations locks_seq_operations = {
.start = locks_start,
.next = locks_next,
.stop = locks_stop,
.show = locks_show,
};
+
+ static int locks_open(struct inode *inode, struct file *filp)
+ {
+ return seq_open(filp, &locks_seq_operations);
+ }
+
+ static const struct file_operations proc_locks_operations = {
+ .open = locks_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ };
+
+ static int __init proc_locks_init(void)
+ {
+ proc_create("locks", 0, NULL, &proc_locks_operations);
+ return 0;
+ }
+ module_init(proc_locks_init);
#endif
/**
/*
* /proc/sys support
*/
-
+ #include <linux/init.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
* sysctl entries that are not writeable,
* are _NOT_ writeable, capabilities or not.
*/
- struct ctl_table_header *head = grab_header(inode);
- struct ctl_table *table = PROC_I(inode)->sysctl_entry;
+ struct ctl_table_header *head;
+ struct ctl_table *table;
int error;
+ /* Executable files are not allowed under /proc/sys/ */
+ if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
+ return -EACCES;
+
+ head = grab_header(inode);
if (IS_ERR(head))
return PTR_ERR(head);
+ table = PROC_I(inode)->sysctl_entry;
if (!table) /* global root - r-xr-xr-x */
error = mask & MAY_WRITE ? -EACCES : 0;
else /* Use the permissions on the sysctl table entry */
static const struct file_operations proc_sys_dir_file_operations = {
.readdir = proc_sys_readdir,
+ .llseek = generic_file_llseek,
};
static const struct inode_operations proc_sys_inode_operations = {
.d_compare = proc_sys_compare,
};
- int proc_sys_init(void)
+ int __init proc_sys_init(void)
{
struct proc_dir_entry *proc_sys_root;
#define MAY_ACCESS 16
#define MAY_OPEN 32
-#define FMODE_READ 1
-#define FMODE_WRITE 2
+#define FMODE_READ ((__force fmode_t)1)
+#define FMODE_WRITE ((__force fmode_t)2)
/* Internal kernel extensions */
-#define FMODE_LSEEK 4
-#define FMODE_PREAD 8
+#define FMODE_LSEEK ((__force fmode_t)4)
+#define FMODE_PREAD ((__force fmode_t)8)
#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */
/* File is being opened for execution. Primary users of this flag are
distributed filesystems that can use it to achieve correct ETXTBUSY
behavior for cross-node execution/opening_for_writing of files */
-#define FMODE_EXEC 16
+#define FMODE_EXEC ((__force fmode_t)16)
+
+#define FMODE_NDELAY ((__force fmode_t)32)
+#define FMODE_EXCL ((__force fmode_t)64)
+#define FMODE_WRITE_IOCTL ((__force fmode_t)128)
+#define FMODE_NDELAY_NOW ((__force fmode_t)256)
#define RW_MASK 1
#define RWA_MASK 2
/*
* Superblock flags that can be altered by MS_REMOUNT
*/
-#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK)
+#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
/*
* Old magic mount flag and mask
const struct file_operations *f_op;
atomic_long_t f_count;
unsigned int f_flags;
- mode_t f_mode;
+ fmode_t f_mode;
loff_t f_pos;
struct fown_struct f_owner;
unsigned int f_uid, f_gid;
extern int lease_modify(struct file_lock **, int);
extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
- extern struct seq_operations locks_seq_operations;
#else /* !CONFIG_FILE_LOCKING */
#define fcntl_getlk(a, b) ({ -EINVAL; })
#define fcntl_setlk(a, b, c, d) ({ -EACCES; })
char s_id[32]; /* Informational name */
void *s_fs_info; /* Filesystem private info */
+ fmode_t s_mode;
/*
* The next field is for VFS *only*. No filesystems have any business
* to have different dirent layouts depending on the binary type.
*/
typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
-
-struct block_device_operations {
- int (*open) (struct inode *, struct file *);
- int (*release) (struct inode *, struct file *);
- int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
- long (*unlocked_ioctl) (struct file *, unsigned, unsigned long);
- long (*compat_ioctl) (struct file *, unsigned, unsigned long);
- int (*direct_access) (struct block_device *, sector_t,
- void **, unsigned long *);
- int (*media_changed) (struct gendisk *);
- int (*revalidate_disk) (struct gendisk *);
- int (*getgeo)(struct block_device *, struct hd_geometry *);
- struct module *owner;
-};
+struct block_device_operations;
/* These macros are for out of kernel modules to test that
* the kernel supports the unlocked_ioctl and compat_ioctl
struct vfsmount *mnt);
extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
int __put_super_and_need_restart(struct super_block *sb);
-void unnamed_dev_init(void);
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
#define fops_get(fops) \
extern void bd_set_size(struct block_device *, loff_t size);
extern void bd_forget(struct inode *inode);
extern void bdput(struct block_device *);
-extern struct block_device *open_by_devnum(dev_t, unsigned);
+extern struct block_device *open_by_devnum(dev_t, fmode_t);
#else
static inline void bd_forget(struct inode *inode) {}
#endif
extern const struct file_operations def_fifo_fops;
#ifdef CONFIG_BLOCK
extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
-extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long);
-extern int blkdev_driver_ioctl(struct inode *inode, struct file *file,
- struct gendisk *disk, unsigned cmd,
- unsigned long arg);
+extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *, mode_t, unsigned);
-extern int blkdev_put(struct block_device *);
+extern int blkdev_get(struct block_device *, fmode_t);
+extern int blkdev_put(struct block_device *, fmode_t);
extern int bd_claim(struct block_device *, void *);
extern void bd_release(struct block_device *);
#ifdef CONFIG_SYSFS
extern const char *__bdevname(dev_t, char *buffer);
extern const char *bdevname(struct block_device *bdev, char *buffer);
extern struct block_device *lookup_bdev(const char *);
-extern struct block_device *open_bdev_excl(const char *, int, void *);
-extern void close_bdev_excl(struct block_device *);
+extern struct block_device *open_bdev_exclusive(const char *, fmode_t, void *);
+extern void close_bdev_exclusive(struct block_device *, fmode_t);
extern void blkdev_show(struct seq_file *,off_t);
+
#else
#define BLKDEV_MAJOR_HASH_SIZE 0
#endif
extern int generic_permission(struct inode *, int,
int (*check_acl)(struct inode *, int));
+static inline bool execute_ok(struct inode *inode)
+{
+ return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
+}
+
extern int get_write_access(struct inode *);
extern int deny_write_access(struct file *);
static inline void put_write_access(struct inode * inode)
extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
-extern void hrtick_resched(void);
extern void sched_show_task(struct task_struct *p);
};
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
- #ifdef CONFIG_SCHEDSTATS
- extern const struct file_operations proc_schedstat_operations;
- #endif /* CONFIG_SCHEDSTATS */
-
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info {
spinlock_t lock;
int latency_record_count;
struct latency_record latency_record[LT_SAVECOUNT];
#endif
+ /*
+ * time slack values; these are used to round up poll() and
+ * select() etc timeout values. These are in nanoseconds.
+ */
+ unsigned long timer_slack_ns;
+ unsigned long default_timer_slack_ns;
};
/*
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_shares_ratelimit;
+extern unsigned int sysctl_sched_shares_thresh;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
#include <linux/moduleloader.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
+ #include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/elf.h>
+ #include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
#include <linux/string.h>
#include <linux/mutex.h>
#include <linux/unwind.h>
+#include <linux/rculist.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
#include <linux/license.h>
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
/* List of modules, protected by module_mutex or preempt_disable
- * (add/delete uses stop_machine). */
+ * (delete uses stop_machine/add uses RCU list operations). */
static DEFINE_MUTEX(module_mutex);
static LIST_HEAD(modules);
return 0;
}
+/* Find a module section, or NULL. */
+static void *section_addr(Elf_Ehdr *hdr, Elf_Shdr *shdrs,
+ const char *secstrings, const char *name)
+{
+ /* Section 0 has sh_addr 0. */
+ return (void *)shdrs[find_sec(hdr, shdrs, secstrings, name)].sh_addr;
+}
+
+/* Find a module section, or NULL. Fill in number of "objects" in section. */
+static void *section_objs(Elf_Ehdr *hdr,
+ Elf_Shdr *sechdrs,
+ const char *secstrings,
+ const char *name,
+ size_t object_size,
+ unsigned int *num)
+{
+ unsigned int sec = find_sec(hdr, sechdrs, secstrings, name);
+
+ /* Section 0 has sh_addr 0 and sh_size 0. */
+ *num = sechdrs[sec].sh_size / object_size;
+ return (void *)sechdrs[sec].sh_addr;
+}
+
/* Provided by the linker */
extern const struct kernel_symbol __start___ksymtab[];
extern const struct kernel_symbol __stop___ksymtab[];
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
return true;
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
struct symsearch arr[] = {
{ mod->syms, mod->syms + mod->num_syms, mod->crcs,
NOT_GPL_ONLY, false },
mod_sysfs_fini(mod);
}
-/*
- * link the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __link_module(void *_mod)
-{
- struct module *mod = _mod;
- list_add(&mod->list, &modules);
- return 0;
-}
-
/*
* unlink the module with the whole machine is stopped with interrupts off
* - this defends against kallsyms not taking locks
}
#endif /* CONFIG_KALLSYMS */
-#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
-static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
+static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
{
- struct mod_debug *debug_info;
- unsigned long pos, end;
- unsigned int num_verbose;
-
- pos = sechdrs[verboseindex].sh_addr;
- num_verbose = sechdrs[verboseindex].sh_size /
- sizeof(struct mod_debug);
- end = pos + (num_verbose * sizeof(struct mod_debug));
+#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
+ unsigned int i;
- for (; pos < end; pos += sizeof(struct mod_debug)) {
- debug_info = (struct mod_debug *)pos;
- register_dynamic_debug_module(debug_info->modname,
- debug_info->type, debug_info->logical_modname,
- debug_info->flag_names, debug_info->hash,
- debug_info->hash2);
+ for (i = 0; i < num; i++) {
+ register_dynamic_debug_module(debug[i].modname,
+ debug[i].type,
+ debug[i].logical_modname,
+ debug[i].flag_names,
+ debug[i].hash, debug[i].hash2);
}
-}
-#else
-static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
- unsigned int verboseindex)
-{
-}
#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
+}
static void *module_alloc_update_bounds(unsigned long size)
{
unsigned int i;
unsigned int symindex = 0;
unsigned int strindex = 0;
- unsigned int setupindex;
- unsigned int exindex;
- unsigned int exportindex;
- unsigned int modindex;
- unsigned int obsparmindex;
- unsigned int infoindex;
- unsigned int gplindex;
- unsigned int crcindex;
- unsigned int gplcrcindex;
- unsigned int versindex;
- unsigned int pcpuindex;
- unsigned int gplfutureindex;
- unsigned int gplfuturecrcindex;
+ unsigned int modindex, versindex, infoindex, pcpuindex;
unsigned int unwindex = 0;
-#ifdef CONFIG_UNUSED_SYMBOLS
- unsigned int unusedindex;
- unsigned int unusedcrcindex;
- unsigned int unusedgplindex;
- unsigned int unusedgplcrcindex;
-#endif
- unsigned int markersindex;
- unsigned int markersstringsindex;
- unsigned int verboseindex;
- unsigned int tracepointsindex;
- unsigned int tracepointsstringsindex;
- unsigned int mcountindex;
+ unsigned int num_kp, num_mcount;
+ struct kernel_param *kp;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
- void *mseg;
- struct exception_table_entry *extable;
+ unsigned long *mseg;
mm_segment_t old_fs;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
err = -ENOEXEC;
goto free_hdr;
}
+ /* This is temporary: point mod into copy of data. */
mod = (void *)sechdrs[modindex].sh_addr;
if (symindex == 0) {
goto free_hdr;
}
- /* Optional sections */
- exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
- gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
- gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
- crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
- gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
- gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
-#ifdef CONFIG_UNUSED_SYMBOLS
- unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
- unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
- unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
- unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
-#endif
- setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
- exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
- obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
if (err < 0)
goto cleanup;
- /* Set up EXPORTed & EXPORT_GPLed symbols (section 0 is 0 length) */
- mod->num_syms = sechdrs[exportindex].sh_size / sizeof(*mod->syms);
- mod->syms = (void *)sechdrs[exportindex].sh_addr;
- if (crcindex)
- mod->crcs = (void *)sechdrs[crcindex].sh_addr;
- mod->num_gpl_syms = sechdrs[gplindex].sh_size / sizeof(*mod->gpl_syms);
- mod->gpl_syms = (void *)sechdrs[gplindex].sh_addr;
- if (gplcrcindex)
- mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
- mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
- sizeof(*mod->gpl_future_syms);
- mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
- if (gplfuturecrcindex)
- mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+ /* Now we've got everything in the final locations, we can
+ * find optional sections. */
+ kp = section_objs(hdr, sechdrs, secstrings, "__param", sizeof(*kp),
+ &num_kp);
+ mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
+ sizeof(*mod->syms), &mod->num_syms);
+ mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
+ mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
+ sizeof(*mod->gpl_syms),
+ &mod->num_gpl_syms);
+ mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
+ mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
+ "__ksymtab_gpl_future",
+ sizeof(*mod->gpl_future_syms),
+ &mod->num_gpl_future_syms);
+ mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
+ "__kcrctab_gpl_future");
#ifdef CONFIG_UNUSED_SYMBOLS
- mod->num_unused_syms = sechdrs[unusedindex].sh_size /
- sizeof(*mod->unused_syms);
- mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
- sizeof(*mod->unused_gpl_syms);
- mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
- if (unusedcrcindex)
- mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
- mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr;
- if (unusedgplcrcindex)
- mod->unused_gpl_crcs
- = (void *)sechdrs[unusedgplcrcindex].sh_addr;
+ mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
+ "__ksymtab_unused",
+ sizeof(*mod->unused_syms),
+ &mod->num_unused_syms);
+ mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
+ "__kcrctab_unused");
+ mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
+ "__ksymtab_unused_gpl",
+ sizeof(*mod->unused_gpl_syms),
+ &mod->num_unused_gpl_syms);
+ mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
+ "__kcrctab_unused_gpl");
+#endif
+
+#ifdef CONFIG_MARKERS
+ mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
+ sizeof(*mod->markers), &mod->num_markers);
+#endif
+#ifdef CONFIG_TRACEPOINTS
+ mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
+ "__tracepoints",
+ sizeof(*mod->tracepoints),
+ &mod->num_tracepoints);
#endif
#ifdef CONFIG_MODVERSIONS
- if ((mod->num_syms && !crcindex)
- || (mod->num_gpl_syms && !gplcrcindex)
- || (mod->num_gpl_future_syms && !gplfuturecrcindex)
+ if ((mod->num_syms && !mod->crcs)
+ || (mod->num_gpl_syms && !mod->gpl_crcs)
+ || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
#ifdef CONFIG_UNUSED_SYMBOLS
- || (mod->num_unused_syms && !unusedcrcindex)
- || (mod->num_unused_gpl_syms && !unusedgplcrcindex)
+ || (mod->num_unused_syms && !mod->unused_crcs)
+ || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
#endif
) {
printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
goto cleanup;
}
#endif
- markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
- markersstringsindex = find_sec(hdr, sechdrs, secstrings,
- "__markers_strings");
- verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
- tracepointsindex = find_sec(hdr, sechdrs, secstrings, "__tracepoints");
- tracepointsstringsindex = find_sec(hdr, sechdrs, secstrings,
- "__tracepoints_strings");
-
- mcountindex = find_sec(hdr, sechdrs, secstrings,
- "__mcount_loc");
/* Now do relocations. */
for (i = 1; i < hdr->e_shnum; i++) {
if (err < 0)
goto cleanup;
}
-#ifdef CONFIG_MARKERS
- mod->markers = (void *)sechdrs[markersindex].sh_addr;
- mod->num_markers =
- sechdrs[markersindex].sh_size / sizeof(*mod->markers);
-#endif
-#ifdef CONFIG_TRACEPOINTS
- mod->tracepoints = (void *)sechdrs[tracepointsindex].sh_addr;
- mod->num_tracepoints =
- sechdrs[tracepointsindex].sh_size / sizeof(*mod->tracepoints);
-#endif
-
/* Find duplicate symbols */
err = verify_export_symbols(mod);
-
if (err < 0)
goto cleanup;
/* Set up and sort exception table */
- mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
- mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
- sort_extable(extable, extable + mod->num_exentries);
+ mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
+ sizeof(*mod->extable), &mod->num_exentries);
+ sort_extable(mod->extable, mod->extable + mod->num_exentries);
/* Finally, copy percpu area over. */
percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
if (!mod->taints) {
+ struct mod_debug *debug;
+ unsigned int num_debug;
+
#ifdef CONFIG_MARKERS
marker_update_probe_range(mod->markers,
mod->markers + mod->num_markers);
#endif
- dynamic_printk_setup(sechdrs, verboseindex);
+ debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
+ sizeof(*debug), &num_debug);
+ dynamic_printk_setup(debug, num_debug);
+
#ifdef CONFIG_TRACEPOINTS
tracepoint_update_probe_range(mod->tracepoints,
mod->tracepoints + mod->num_tracepoints);
}
/* sechdrs[0].sh_size is always zero */
- mseg = (void *)sechdrs[mcountindex].sh_addr;
- ftrace_init_module(mseg, mseg + sechdrs[mcountindex].sh_size);
+ mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
+ sizeof(*mseg), &num_mcount);
+ ftrace_init_module(mseg, mseg + num_mcount);
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
set_fs(old_fs);
mod->args = args;
- if (obsparmindex)
+ if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
mod->name);
/* Now sew it into the lists so we can get lockdep and oops
- * info during argument parsing. Noone should access us, since
- * strong_try_module_get() will fail. */
- stop_machine(__link_module, mod, NULL);
-
- /* Size of section 0 is 0, so this works well if no params */
- err = parse_args(mod->name, mod->args,
- (struct kernel_param *)
- sechdrs[setupindex].sh_addr,
- sechdrs[setupindex].sh_size
- / sizeof(struct kernel_param),
- NULL);
+ * info during argument parsing. Noone should access us, since
+ * strong_try_module_get() will fail.
+ * lockdep/oops can run asynchronous, so use the RCU list insertion
+ * function to insert in a way safe to concurrent readers.
+ * The mutex protects against concurrent writers.
+ */
+ list_add_rcu(&mod->list, &modules);
+
+ err = parse_args(mod->name, mod->args, kp, num_kp, NULL);
if (err < 0)
goto unlink;
- err = mod_sysfs_setup(mod,
- (struct kernel_param *)
- sechdrs[setupindex].sh_addr,
- sechdrs[setupindex].sh_size
- / sizeof(struct kernel_param));
+ err = mod_sysfs_setup(mod, kp, num_kp);
if (err < 0)
goto unlink;
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
const char *ret = NULL;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size)
|| within(addr, mod->module_core, mod->core_size)) {
if (modname)
struct module *mod;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size) ||
within(addr, mod->module_core, mod->core_size)) {
const char *sym;
struct module *mod;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size) ||
within(addr, mod->module_core, mod->core_size)) {
const char *sym;
struct module *mod;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (symnum < mod->num_symtab) {
*value = mod->symtab[symnum].st_value;
*type = mod->symtab[symnum].st_info;
ret = mod_find_symname(mod, colon+1);
*colon = ':';
} else {
- list_for_each_entry(mod, &modules, list)
+ list_for_each_entry_rcu(mod, &modules, list)
if ((ret = mod_find_symname(mod, name)) != 0)
break;
}
}
#endif /* CONFIG_KALLSYMS */
- /* Called by the /proc file system to return a list of modules. */
- static void *m_start(struct seq_file *m, loff_t *pos)
- {
- mutex_lock(&module_mutex);
- return seq_list_start(&modules, *pos);
- }
-
- static void *m_next(struct seq_file *m, void *p, loff_t *pos)
- {
- return seq_list_next(p, &modules, pos);
- }
-
- static void m_stop(struct seq_file *m, void *p)
- {
- mutex_unlock(&module_mutex);
- }
-
static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
return buf;
}
+ #ifdef CONFIG_PROC_FS
+ /* Called by the /proc file system to return a list of modules. */
+ static void *m_start(struct seq_file *m, loff_t *pos)
+ {
+ mutex_lock(&module_mutex);
+ return seq_list_start(&modules, *pos);
+ }
+
+ static void *m_next(struct seq_file *m, void *p, loff_t *pos)
+ {
+ return seq_list_next(p, &modules, pos);
+ }
+
+ static void m_stop(struct seq_file *m, void *p)
+ {
+ mutex_unlock(&module_mutex);
+ }
+
static int m_show(struct seq_file *m, void *p)
{
struct module *mod = list_entry(p, struct module, list);
Where refcount is a number or -, and deps is a comma-separated list
of depends or -.
*/
- const struct seq_operations modules_op = {
+ static const struct seq_operations modules_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = m_show
};
+ static int modules_open(struct inode *inode, struct file *file)
+ {
+ return seq_open(file, &modules_op);
+ }
+
+ static const struct file_operations proc_modules_operations = {
+ .open = modules_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+ };
+
+ static int __init proc_modules_init(void)
+ {
+ proc_create("modules", 0, NULL, &proc_modules_operations);
+ return 0;
+ }
+ module_init(proc_modules_init);
+ #endif
+
/* Given an address, look for it in the module exception tables. */
const struct exception_table_entry *search_module_extables(unsigned long addr)
{
struct module *mod;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (mod->num_exentries == 0)
continue;
preempt_disable();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_core, mod->core_size)) {
preempt_enable();
return 1;
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
- list_for_each_entry(mod, &modules, list)
+ list_for_each_entry_rcu(mod, &modules, list)
if (within(addr, mod->module_init, mod->init_text_size)
|| within(addr, mod->module_core, mod->core_text_size))
return mod;
char buf[8];
printk("Modules linked in:");
- list_for_each_entry(mod, &modules, list)
+ /* Most callers should already have preempt disabled, but make sure */
+ preempt_disable();
+ list_for_each_entry_rcu(mod, &modules, list)
printk(" %s%s", mod->name, module_flags(mod, buf));
+ preempt_enable();
if (last_unloaded_module[0])
printk(" [last unloaded: %s]", last_unloaded_module);
printk("\n");
#include <linux/cpuset.h>
#include <linux/percpu.h>
#include <linux/kthread.h>
+ #include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/sysctl.h>
#include <linux/syscalls.h>
now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
- hrtimer_start(&rt_b->rt_period_timer,
- rt_b->rt_period_timer.expires,
- HRTIMER_MODE_ABS);
+ hrtimer_start_expires(&rt_b->rt_period_timer,
+ HRTIMER_MODE_ABS);
}
spin_unlock(&rt_b->rt_runtime_lock);
}
*/
unsigned int sysctl_sched_shares_ratelimit = 250000;
+/*
+ * Inject some fuzzyness into changing the per-cpu group shares
+ * this avoids remote rq-locks at the expense of fairness.
+ * default: 4
+ */
+unsigned int sysctl_sched_shares_thresh = 4;
+
/*
* period over which we measure -rt task cpu usage in us.
* default: 1s
struct hrtimer *timer = &rq->hrtick_timer;
ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
- timer->expires = time;
+ hrtimer_set_expires(timer, time);
if (rq == this_rq()) {
hrtimer_restart(timer);
* Calculate and set the cpu's group shares.
*/
static void
-__update_group_shares_cpu(struct task_group *tg, int cpu,
- unsigned long sd_shares, unsigned long sd_rq_weight)
+update_group_shares_cpu(struct task_group *tg, int cpu,
+ unsigned long sd_shares, unsigned long sd_rq_weight)
{
int boost = 0;
unsigned long shares;
*
*/
shares = (sd_shares * rq_weight) / (sd_rq_weight + 1);
+ shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
- /*
- * record the actual number of shares, not the boosted amount.
- */
- tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
- tg->cfs_rq[cpu]->rq_weight = rq_weight;
+ if (abs(shares - tg->se[cpu]->load.weight) >
+ sysctl_sched_shares_thresh) {
+ struct rq *rq = cpu_rq(cpu);
+ unsigned long flags;
- if (shares < MIN_SHARES)
- shares = MIN_SHARES;
- else if (shares > MAX_SHARES)
- shares = MAX_SHARES;
+ spin_lock_irqsave(&rq->lock, flags);
+ /*
+ * record the actual number of shares, not the boosted amount.
+ */
+ tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
+ tg->cfs_rq[cpu]->rq_weight = rq_weight;
- __set_se_shares(tg->se[cpu], shares);
+ __set_se_shares(tg->se[cpu], shares);
+ spin_unlock_irqrestore(&rq->lock, flags);
+ }
}
/*
if (!rq_weight)
rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
- for_each_cpu_mask(i, sd->span) {
- struct rq *rq = cpu_rq(i);
- unsigned long flags;
-
- spin_lock_irqsave(&rq->lock, flags);
- __update_group_shares_cpu(tg, i, shares, rq_weight);
- spin_unlock_irqrestore(&rq->lock, flags);
- }
+ for_each_cpu_mask(i, sd->span)
+ update_group_shares_cpu(tg, i, shares, rq_weight);
return 0;
}
if (sched_feat(HRTICK))
hrtick_clear(rq);
- /*
- * Do the rq-clock update outside the rq lock:
- */
- local_irq_disable();
+ spin_lock_irq(&rq->lock);
update_rq_clock(rq);
- spin_lock(&rq->lock);
clear_tsk_need_resched(prev);
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
static int show_schedstat(struct seq_file *seq, void *v)
{
int cpu;
- int mask_len = NR_CPUS/32 * 9;
+ int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
char *mask_str = kmalloc(mask_len, GFP_KERNEL);
if (mask_str == NULL)
return res;
}
- const struct file_operations proc_schedstat_operations = {
+ static const struct file_operations proc_schedstat_operations = {
.open = schedstat_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
+ static int __init proc_schedstat_init(void)
+ {
+ proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
+ return 0;
+ }
+ module_init(proc_schedstat_init);
+
/*
* Expects runqueue lock to be held for atomicity of update
*/