Merge branch 'linus' into irq/threaded

author Ingo Molnar <mingo@elte.hu>

Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)

committer Ingo Molnar <mingo@elte.hu>

Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)
author Ingo Molnar <mingo@elte.hu>
Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)
committer Ingo Molnar <mingo@elte.hu>
Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)
diff --combined include/linux/hardirq.h

index 2dfaadbdb2ac04bf95fe4ca45a95569cf5294d44,faa1cf848bcd38e5c59725c3c0bb3e7bfdf86f63..45257475623cad94c90304d82b4b5cd68bc498d8
--- 1/include/linux/hardirq.h
--- 2/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@@ -15,55 -15,61 +15,61 @@@
    * - bits 0-7 are the preemption count (max preemption depth: 256)
    * - bits 8-15 are the softirq count (max # of softirqs: 256)
    *
-  * The hardirq count can be overridden per architecture, the default is:
+  * The hardirq count can in theory reach the same as NR_IRQS.
+  * In reality, the number of nested IRQS is limited to the stack
+  * size as well. For archs with over 1000 IRQS it is not practical
+  * to expect that they will all nest. We give a max of 10 bits for
+  * hardirq nesting. An arch may choose to give less than 10 bits.
+  * m68k expects it to be 8.
    *
-  * - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
-  * - ( bit 28 is the PREEMPT_ACTIVE flag. )
+  * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
+  * - bit 26 is the NMI_MASK
+  * - bit 28 is the PREEMPT_ACTIVE flag
    *
    * PREEMPT_MASK: 0x000000ff
    * SOFTIRQ_MASK: 0x0000ff00
-  * HARDIRQ_MASK: 0x0fff0000
+  * HARDIRQ_MASK: 0x03ff0000
+  *     NMI_MASK: 0x04000000
    */
   #define PREEMPT_BITS  8
   #define SOFTIRQ_BITS  8
+ #define NMI_BITS      1
   
- #ifndef HARDIRQ_BITS
- #define HARDIRQ_BITS  12
+ #define MAX_HARDIRQ_BITS 10
   
- #ifndef MAX_HARDIRQS_PER_CPU
- #define MAX_HARDIRQS_PER_CPU NR_IRQS
+ #ifndef HARDIRQ_BITS
+ # define HARDIRQ_BITS MAX_HARDIRQ_BITS
   #endif
   
- /*
-  * The hardirq mask has to be large enough to have space for potentially
-  * all IRQ sources in the system nesting on a single CPU.
-  */
- #if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
- # error HARDIRQ_BITS is too low!
- #endif
+ #if HARDIRQ_BITS > MAX_HARDIRQ_BITS
+ #error HARDIRQ_BITS too high!
   #endif
   
   #define PREEMPT_SHIFT 0
   #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
   #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+ #define NMI_SHIFT     (HARDIRQ_SHIFT + HARDIRQ_BITS)
   
   #define __IRQ_MASK(x) ((1UL << (x))-1)
   
   #define PREEMPT_MASK  (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
   #define SOFTIRQ_MASK  (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
   #define HARDIRQ_MASK  (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+ #define NMI_MASK      (__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
   
   #define PREEMPT_OFFSET        (1UL << PREEMPT_SHIFT)
   #define SOFTIRQ_OFFSET        (1UL << SOFTIRQ_SHIFT)
   #define HARDIRQ_OFFSET        (1UL << HARDIRQ_SHIFT)
+ #define NMI_OFFSET    (1UL << NMI_SHIFT)
   
- #if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
+ #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
   #error PREEMPT_ACTIVE is too low!
   #endif
   
   #define hardirq_count()       (preempt_count() & HARDIRQ_MASK)
   #define softirq_count()       (preempt_count() & SOFTIRQ_MASK)
- #define irq_count()   (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
+ #define irq_count()   (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
+                                | NMI_MASK))
   
   /*
    * Are we doing bottom half or hardware interrupt processing?
@@@ -73,6 -79,11 +79,11 @@@
   #define in_softirq()          (softirq_count())
   #define in_interrupt()                (irq_count())
   
+ /*
+  * Are we in NMI context?
+  */
+ #define in_nmi()      (preempt_count() & NMI_MASK)
+ 
   #if defined(CONFIG_PREEMPT)
   # define PREEMPT_INATOMIC_BASE kernel_locked()
   # define PREEMPT_CHECK_OFFSET 1
@@@ -105,7 -116,7 +116,7 @@@
   # define IRQ_EXIT_OFFSET HARDIRQ_OFFSET
   #endif
   
- -#ifdef CONFIG_SMP
+ +#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
   extern void synchronize_irq(unsigned int irq);
   #else
   # define synchronize_irq(irq) barrier()
@@@ -164,20 -175,24 +175,24 @@@ extern void irq_enter(void)
    */
   extern void irq_exit(void);
   
- #define nmi_enter()                           \
-       do {                                    \
-               ftrace_nmi_enter();             \
-               lockdep_off();                  \
-               rcu_nmi_enter();                \
-               __irq_enter();                  \
+ #define nmi_enter()                                           \
+       do {                                                    \
+               ftrace_nmi_enter();                             \
+               BUG_ON(in_nmi());                               \
+               add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+               lockdep_off();                                  \
+               rcu_nmi_enter();                                \
+               trace_hardirq_enter();                          \
         } while (0)
   
- #define nmi_exit()                            \
-       do {                                    \
-               __irq_exit();                   \
-               rcu_nmi_exit();                 \
-               lockdep_on();                   \
-               ftrace_nmi_exit();              \
+ #define nmi_exit()                                            \
+       do {                                                    \
+               trace_hardirq_exit();                           \
+               rcu_nmi_exit();                                 \
+               lockdep_on();                                   \
+               BUG_ON(!in_nmi());                              \
+               sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET); \
+               ftrace_nmi_exit();                              \
         } while (0)
   
   #endif /* LINUX_HARDIRQ_H */
diff --combined include/linux/interrupt.h

index 143192f48bf3d67ce75e144a2be92d1dfcc6088d,ce2c07d99fc3a54934d140214cd46a0f7542937b..675727fb4b479741fb12b804bc850fbc59919f05
--- 1/include/linux/interrupt.h
--- 2/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@@ -59,18 -59,6 +59,18 @@@
   #define IRQF_NOBALANCING      0x00000800
   #define IRQF_IRQPOLL          0x00001000
   
+ +/*
+ + * Bits used by threaded handlers:
+ + * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
+ + * IRQTF_DIED      - handler thread died
+ + * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ + */
+ +enum {
+ +      IRQTF_RUNTHREAD,
+ +      IRQTF_DIED,
+ +      IRQTF_WARNED,
+ +};
+ +
   typedef irqreturn_t (*irq_handler_t)(int, void *);
   
   /**
@@@ -83,9 -71,6 +83,9 @@@
    * @next:     pointer to the next irqaction for shared interrupts
    * @irq:      interrupt number
    * @dir:      pointer to the proc/irq/NN/name entry
+ + * @thread_fn:        interupt handler function for threaded interrupts
+ + * @thread:   thread pointer for threaded interrupts
+ + * @thread_flags:     flags related to @thread
    */
   struct irqaction {
         irq_handler_t handler;
@@@ -96,68 -81,18 +96,68 @@@
         struct irqaction *next;
         int irq;
         struct proc_dir_entry *dir;
+ +      irq_handler_t thread_fn;
+ +      struct task_struct *thread;
+ +      unsigned long thread_flags;
   };
   
   extern irqreturn_t no_action(int cpl, void *dev_id);
- -extern int __must_check request_irq(unsigned int, irq_handler_t handler,
- -                     unsigned long, const char *, void *);
+ +
+ +#ifdef CONFIG_GENERIC_HARDIRQS
+ +extern int __must_check
+ +request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ +                   irq_handler_t thread_fn,
+ +                   unsigned long flags, const char *name, void *dev);
+ +
+ +static inline int __must_check
+ +request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ +          const char *name, void *dev)
+ +{
+ +      return request_threaded_irq(irq, handler, NULL, flags, name, dev);
+ +}
+ +
+ +extern void exit_irq_thread(void);
+ +#else
+ +
+ +extern int __must_check
+ +request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
+ +          const char *name, void *dev);
+ +
+ +/*
+ + * Special function to avoid ifdeffery in kernel/irq/devres.c which
+ + * gets magically built by GENERIC_HARDIRQS=n architectures (sparc,
+ + * m68k). I really love these $@%#!* obvious Makefile references:
+ + * ../../../kernel/irq/devres.o
+ + */
+ +static inline int __must_check
+ +request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ +                   irq_handler_t thread_fn,
+ +                   unsigned long flags, const char *name, void *dev)
+ +{
+ +      return request_irq(irq, handler, flags, name, dev);
+ +}
+ +
+ +static inline void exit_irq_thread(void) { }
+ +#endif
+ +
   extern void free_irq(unsigned int, void *);
   
   struct device;
   
- -extern int __must_check devm_request_irq(struct device *dev, unsigned int irq,
- -                          irq_handler_t handler, unsigned long irqflags,
- -                          const char *devname, void *dev_id);
+ +extern int __must_check
+ +devm_request_threaded_irq(struct device *dev, unsigned int irq,
+ +                        irq_handler_t handler, irq_handler_t thread_fn,
+ +                        unsigned long irqflags, const char *devname,
+ +                        void *dev_id);
+ +
+ +static inline int __must_check
+ +devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
+ +               unsigned long irqflags, const char *devname, void *dev_id)
+ +{
+ +      return devm_request_threaded_irq(dev, irq, handler, NULL, irqflags,
+ +                                       devname, dev_id);
+ +}
+ +
   extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
   
   /*
@@@ -182,6 -117,15 +182,15 @@@ extern void disable_irq_nosync(unsigne
   extern void disable_irq(unsigned int irq);
   extern void enable_irq(unsigned int irq);
   
+ /* The following three functions are for the core kernel use only. */
+ extern void suspend_device_irqs(void);
+ extern void resume_device_irqs(void);
+ #ifdef CONFIG_PM_SLEEP
+ extern int check_wakeup_irqs(void);
+ #else
+ static inline int check_wakeup_irqs(void) { return 0; }
+ #endif
+ 
   #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
   
   extern cpumask_var_t irq_default_affinity;
@@@ -334,6 -278,11 +343,11 @@@ enu
         NR_SOFTIRQS
   };
   
+ /* map softirq index to softirq name. update 'softirq_to_name' in
+  * kernel/softirq.c when adding a new softirq.
+  */
+ extern char *softirq_to_name[NR_SOFTIRQS];
+ 
   /* softirq mask and active fields moved to irq_cpustat_t in
    * asm/hardirq.h to get better cache usage.  KAO
    */
diff --combined include/linux/irq.h

index 8b1cf063021044bc9da0151a85956d5ff455d7b1,974890b3c52fbfe0048df3cf7d736798caf873b6..ca507c9426b00972af3254d180837d7bdb05b471
--- 1/include/linux/irq.h
--- 2/include/linux/irq.h
+++ b/include/linux/irq.h
@@@ -17,10 -17,11 +17,12 @@@
   #include <linux/cache.h>
   #include <linux/spinlock.h>
   #include <linux/cpumask.h>
+ #include <linux/gfp.h>
   #include <linux/irqreturn.h>
   #include <linux/irqnr.h>
   #include <linux/errno.h>
+ #include <linux/topology.h>
+ +#include <linux/wait.h>
   
   #include <asm/irq.h>
   #include <asm/ptrace.h>
@@@ -66,6 -67,7 +68,7 @@@ typedef       void (*irq_flow_handler_t)(unsi
   #define IRQ_SPURIOUS_DISABLED 0x00800000      /* IRQ was disabled by the spurious trap */
   #define IRQ_MOVE_PCNTXT               0x01000000      /* IRQ migration from process context */
   #define IRQ_AFFINITY_SET      0x02000000      /* IRQ affinity was set from userspace*/
+ #define IRQ_SUSPENDED         0x04000000      /* IRQ has gone through suspend sequence */
   
   #ifdef CONFIG_IRQ_PER_CPU
   # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
@@@ -156,8 -158,6 +159,8 @@@ struct irq_2_iommu
    * @affinity:         IRQ affinity on SMP
    * @cpu:              cpu index useful for balancing
    * @pending_mask:     pending rebalanced interrupts
+ + * @threads_active:   number of irqaction threads currently running
+ + * @wait_for_threads: wait queue for sync_irq to wait for threaded handlers
    * @dir:              /proc/irq/ procfs entry
    * @name:             flow handler name for /proc/interrupts output
    */
@@@ -189,8 -189,6 +192,8 @@@ struct irq_desc 
         cpumask_var_t           pending_mask;
   #endif
   #endif
+ +      atomic_t                threads_active;
+ +      wait_queue_head_t       wait_for_threads;
   #ifdef CONFIG_PROC_FS
         struct proc_dir_entry   *dir;
   #endif
diff --combined include/linux/sched.h

index 38b77b0f56e53887e58c8eeb73965f9659e12847,b94f3541f67be00802c28f22fa5b56f9e8c91e55..c96140210d1c4286cdd249e936e07e103689cbd3
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -68,7 -68,7 +68,7 @@@ struct sched_param 
   #include <linux/smp.h>
   #include <linux/sem.h>
   #include <linux/signal.h>
- #include <linux/fs_struct.h>
+ #include <linux/path.h>
   #include <linux/compiler.h>
   #include <linux/completion.h>
   #include <linux/pid.h>
@@@ -97,6 -97,7 +97,7 @@@ struct futex_pi_state
   struct robust_list_head;
   struct bio;
   struct bts_tracer;
+ struct fs_struct;
   
   /*
    * List of flags we want to share for kernel threads,
@@@ -137,6 -138,8 +138,8 @@@ extern unsigned long nr_uninterruptible
   extern unsigned long nr_active(void);
   extern unsigned long nr_iowait(void);
   
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
   struct seq_file;
   struct cfs_rq;
   struct task_group;
@@@ -331,7 -334,9 +334,9 @@@ extern signed long schedule_timeout(sig
   extern signed long schedule_timeout_interruptible(signed long timeout);
   extern signed long schedule_timeout_killable(signed long timeout);
   extern signed long schedule_timeout_uninterruptible(signed long timeout);
+ asmlinkage void __schedule(void);
   asmlinkage void schedule(void);
+ extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
   
   struct nsproxy;
   struct user_namespace;
@@@ -389,8 -394,15 +394,15 @@@ extern void arch_unmap_area_topdown(str
                 (mm)->hiwater_vm = (mm)->total_vm;      \
   } while (0)
   
- #define get_mm_hiwater_rss(mm)        max((mm)->hiwater_rss, get_mm_rss(mm))
- #define get_mm_hiwater_vm(mm) max((mm)->hiwater_vm, (mm)->total_vm)
+ static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
+ {
+       return max(mm->hiwater_rss, get_mm_rss(mm));
+ }
+ 
+ static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
+ {
+       return max(mm->hiwater_vm, mm->total_vm);
+ }
   
   extern void set_dumpable(struct mm_struct *mm, int value);
   extern int get_dumpable(struct mm_struct *mm);
@@@ -538,25 -550,8 +550,8 @@@ struct signal_struct 
   
         struct list_head cpu_timers[3];
   
-       /* job control IDs */
- 
-       /*
-        * pgrp and session fields are deprecated.
-        * use the task_session_Xnr and task_pgrp_Xnr routines below
-        */
- 
-       union {
-               pid_t pgrp __deprecated;
-               pid_t __pgrp;
-       };
- 
         struct pid *tty_old_pgrp;
   
-       union {
-               pid_t session __deprecated;
-               pid_t __session;
-       };
- 
         /* boolean value for session group leader */
         int leader;
   
@@@ -998,6 -993,7 +993,7 @@@ struct sched_class 
                               struct rq *busiest, struct sched_domain *sd,
                               enum cpu_idle_type idle);
         void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+       int (*needs_post_schedule) (struct rq *this_rq);
         void (*post_schedule) (struct rq *this_rq);
         void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
   
@@@ -1052,6 -1048,10 +1048,10 @@@ struct sched_entity 
         u64                     last_wakeup;
         u64                     avg_overlap;
   
+       u64                     start_runtime;
+       u64                     avg_wakeup;
+       u64                     nr_migrations;
+ 
   #ifdef CONFIG_SCHEDSTATS
         u64                     wait_start;
         u64                     wait_max;
@@@ -1067,7 -1067,6 +1067,6 @@@
         u64                     exec_max;
         u64                     slice_max;
   
-       u64                     nr_migrations;
         u64                     nr_migrations_cold;
         u64                     nr_failed_migrations_affine;
         u64                     nr_failed_migrations_running;
@@@ -1164,6 -1163,7 +1163,7 @@@ struct task_struct 
   #endif
   
         struct list_head tasks;
+       struct plist_node pushable_tasks;
   
         struct mm_struct *mm, *active_mm;
   
@@@ -1175,6 -1175,8 +1175,8 @@@
         /* ??? */
         unsigned int personality;
         unsigned did_exec:1;
+       unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
+                                * execve */
         pid_t pid;
         pid_t tgid;
   
@@@ -1292,11 -1294,6 +1294,11 @@@
   /* Protection of (de-)allocation: mm, files, fs, tty, keyrings */
         spinlock_t alloc_lock;
   
+ +#ifdef CONFIG_GENERIC_HARDIRQS
+ +      /* IRQ handler threads */
+ +      struct irqaction *irqaction;
+ +#endif
+ +
         /* Protection of the PI data structures: */
         spinlock_t pi_lock;
   
@@@ -1332,6 -1329,7 +1334,7 @@@
         int lockdep_depth;
         unsigned int lockdep_recursion;
         struct held_lock held_locks[MAX_LOCK_DEPTH];
+       gfp_t lockdep_reclaim_gfp;
   #endif
   
   /* journalling filesystem info */
@@@ -1409,6 -1407,8 +1412,8 @@@
         int curr_ret_stack;
         /* Stack of return addresses for return function tracing */
         struct ftrace_ret_stack *ret_stack;
+       /* time stamp for last schedule */
+       unsigned long long ftrace_timestamp;
         /*
          * Number of functions that haven't been traced
          * because of depth overrun.
@@@ -1457,16 -1457,6 +1462,6 @@@ static inline int rt_task(struct task_s
         return rt_prio(p->prio);
   }
   
- static inline void set_task_session(struct task_struct *tsk, pid_t session)
- {
-       tsk->signal->__session = session;
- }
- 
- static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
- {
-       tsk->signal->__pgrp = pgrp;
- }
- 
   static inline struct pid *task_pid(struct task_struct *task)
   {
         return task->pids[PIDTYPE_PID].pid;
@@@ -1477,6 -1467,11 +1472,11 @@@ static inline struct pid *task_tgid(str
         return task->group_leader->pids[PIDTYPE_PID].pid;
   }
   
+ /*
+  * Without tasklist or rcu lock it is not safe to dereference
+  * the result of task_pgrp/task_session even if task == current,
+  * we can race with another thread doing sys_setsid/sys_setpgid.
+  */
   static inline struct pid *task_pgrp(struct task_struct *task)
   {
         return task->group_leader->pids[PIDTYPE_PGID].pid;
@@@ -1502,17 -1497,23 +1502,23 @@@ struct pid_namespace
    *
    * see also pid_nr() etc in include/linux/pid.h
    */
+ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
+                       struct pid_namespace *ns);
   
   static inline pid_t task_pid_nr(struct task_struct *tsk)
   {
         return tsk->pid;
   }
   
- pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
+ static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
+                                       struct pid_namespace *ns)
+ {
+       return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
+ }
   
   static inline pid_t task_pid_vnr(struct task_struct *tsk)
   {
-       return pid_vnr(task_pid(tsk));
+       return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
   }
   
   
@@@ -1529,31 -1530,34 +1535,34 @@@ static inline pid_t task_tgid_vnr(struc
   }
   
   
- static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+ static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
+                                       struct pid_namespace *ns)
   {
-       return tsk->signal->__pgrp;
+       return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
   }
   
- pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
- 
   static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
   {
-       return pid_vnr(task_pgrp(tsk));
+       return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
   }
   
   
- static inline pid_t task_session_nr(struct task_struct *tsk)
+ static inline pid_t task_session_nr_ns(struct task_struct *tsk,
+                                       struct pid_namespace *ns)
   {
-       return tsk->signal->__session;
+       return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
   }
   
- pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
- 
   static inline pid_t task_session_vnr(struct task_struct *tsk)
   {
-       return pid_vnr(task_session(tsk));
+       return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
   }
   
+ /* obsolete, do not use */
+ static inline pid_t task_pgrp_nr(struct task_struct *tsk)
+ {
+       return task_pgrp_nr_ns(tsk, &init_pid_ns);
+ }
   
   /**
    * pid_alive - check that a task structure is not stale
@@@ -1677,6 -1681,16 +1686,16 @@@ static inline int set_cpus_allowed(stru
         return set_cpus_allowed_ptr(p, &new_mask);
   }
   
+ /*
+  * Architectures can set this to 1 if they have specified
+  * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
+  * but then during bootup it turns out that sched_clock()
+  * is reliable after all:
+  */
+ #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+ extern int sched_clock_stable;
+ #endif
+ 
   extern unsigned long long sched_clock(void);
   
   extern void sched_clock_init(void);
@@@ -1953,7 -1967,8 +1972,8 @@@ extern void mm_release(struct task_stru
   /* Allocate a new mm structure and copy contents from tsk->mm */
   extern struct mm_struct *dup_mm(struct task_struct *tsk);
   
- extern int  copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *);
+ extern int copy_thread(unsigned long, unsigned long, unsigned long,
+                       struct task_struct *, struct pt_regs *);
   extern void flush_thread(void);
   extern void exit_thread(void);
   
@@@ -2038,6 -2053,11 +2058,11 @@@ static inline int thread_group_empty(st
   #define delay_group_leader(p) \
                 (thread_group_leader(p) && !thread_group_empty(p))
   
+ static inline int task_detached(struct task_struct *p)
+ {
+       return p->exit_signal == -1;
+ }
+ 
   /*
    * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
    * subscriptions and synchronises with wait4().  Also used in procfs.  Also
diff --combined kernel/exit.c

index ca0b3488c4a9550351f0ba679644719ba28299c3,6686ed1e4aa3aedd25a613d3ad7282798392f224..789b8862fe3b3fd1381e0cc45705cbf56db2da97
--- 1/kernel/exit.c
--- 2/kernel/exit.c
+++ b/kernel/exit.c
@@@ -46,6 -46,7 +46,7 @@@
   #include <linux/blkdev.h>
   #include <linux/task_io_accounting_ops.h>
   #include <linux/tracehook.h>
+ #include <linux/fs_struct.h>
   #include <linux/init_task.h>
   #include <trace/sched.h>
   
@@@ -61,11 -62,6 +62,6 @@@ DEFINE_TRACE(sched_process_wait)
   
   static void exit_mm(struct task_struct * tsk);
   
- static inline int task_detached(struct task_struct *p)
- {
-       return p->exit_signal == -1;
- }
- 
   static void __unhash_process(struct task_struct *p)
   {
         nr_threads--;
@@@ -362,16 -358,12 +358,12 @@@ static void reparent_to_kthreadd(void
   void __set_special_pids(struct pid *pid)
   {
         struct task_struct *curr = current->group_leader;
-       pid_t nr = pid_nr(pid);
   
-       if (task_session(curr) != pid) {
+       if (task_session(curr) != pid)
                 change_pid(curr, PIDTYPE_SID, pid);
-               set_task_session(curr, nr);
-       }
-       if (task_pgrp(curr) != pid) {
+ 
+       if (task_pgrp(curr) != pid)
                 change_pid(curr, PIDTYPE_PGID, pid);
-               set_task_pgrp(curr, nr);
-       }
   }
   
   static void set_special_pids(struct pid *pid)
@@@ -429,7 -421,6 +421,6 @@@ EXPORT_SYMBOL(disallow_signal)
   void daemonize(const char *name, ...)
   {
         va_list args;
-       struct fs_struct *fs;
         sigset_t blocked;
   
         va_start(args, name);
@@@ -462,11 -453,7 +453,7 @@@
   
         /* Become as one with the init task */
   
-       exit_fs(current);       /* current->fs->count--; */
-       fs = init_task.fs;
-       current->fs = fs;
-       atomic_inc(&fs->count);
- 
+       daemonize_fs_struct();
         exit_files(current);
         current->files = init_task.files;
         atomic_inc(&current->files->count);
@@@ -565,30 -552,6 +552,6 @@@ void exit_files(struct task_struct *tsk
         }
   }
   
- void put_fs_struct(struct fs_struct *fs)
- {
-       /* No need to hold fs->lock if we are killing it */
-       if (atomic_dec_and_test(&fs->count)) {
-               path_put(&fs->root);
-               path_put(&fs->pwd);
-               kmem_cache_free(fs_cachep, fs);
-       }
- }
- 
- void exit_fs(struct task_struct *tsk)
- {
-       struct fs_struct * fs = tsk->fs;
- 
-       if (fs) {
-               task_lock(tsk);
-               tsk->fs = NULL;
-               task_unlock(tsk);
-               put_fs_struct(fs);
-       }
- }
- 
- EXPORT_SYMBOL_GPL(exit_fs);
- 
   #ifdef CONFIG_MM_OWNER
   /*
    * Task p is exiting and it owned mm, lets find a new owner for it
@@@ -731,119 -694,6 +694,6 @@@ static void exit_mm(struct task_struct 
         mmput(mm);
   }
   
- /*
-  * Return nonzero if @parent's children should reap themselves.
-  *
-  * Called with write_lock_irq(&tasklist_lock) held.
-  */
- static int ignoring_children(struct task_struct *parent)
- {
-       int ret;
-       struct sighand_struct *psig = parent->sighand;
-       unsigned long flags;
-       spin_lock_irqsave(&psig->siglock, flags);
-       ret = (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN ||
-              (psig->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDWAIT));
-       spin_unlock_irqrestore(&psig->siglock, flags);
-       return ret;
- }
- 
- /*
-  * Detach all tasks we were using ptrace on.
-  * Any that need to be release_task'd are put on the @dead list.
-  *
-  * Called with write_lock(&tasklist_lock) held.
-  */
- static void ptrace_exit(struct task_struct *parent, struct list_head *dead)
- {
-       struct task_struct *p, *n;
-       int ign = -1;
- 
-       list_for_each_entry_safe(p, n, &parent->ptraced, ptrace_entry) {
-               __ptrace_unlink(p);
- 
-               if (p->exit_state != EXIT_ZOMBIE)
-                       continue;
- 
-               /*
-                * If it's a zombie, our attachedness prevented normal
-                * parent notification or self-reaping.  Do notification
-                * now if it would have happened earlier.  If it should
-                * reap itself, add it to the @dead list.  We can't call
-                * release_task() here because we already hold tasklist_lock.
-                *
-                * If it's our own child, there is no notification to do.
-                * But if our normal children self-reap, then this child
-                * was prevented by ptrace and we must reap it now.
-                */
-               if (!task_detached(p) && thread_group_empty(p)) {
-                       if (!same_thread_group(p->real_parent, parent))
-                               do_notify_parent(p, p->exit_signal);
-                       else {
-                               if (ign < 0)
-                                       ign = ignoring_children(parent);
-                               if (ign)
-                                       p->exit_signal = -1;
-                       }
-               }
- 
-               if (task_detached(p)) {
-                       /*
-                        * Mark it as in the process of being reaped.
-                        */
-                       p->exit_state = EXIT_DEAD;
-                       list_add(&p->ptrace_entry, dead);
-               }
-       }
- }
- 
- /*
-  * Finish up exit-time ptrace cleanup.
-  *
-  * Called without locks.
-  */
- static void ptrace_exit_finish(struct task_struct *parent,
-                              struct list_head *dead)
- {
-       struct task_struct *p, *n;
- 
-       BUG_ON(!list_empty(&parent->ptraced));
- 
-       list_for_each_entry_safe(p, n, dead, ptrace_entry) {
-               list_del_init(&p->ptrace_entry);
-               release_task(p);
-       }
- }
- 
- static void reparent_thread(struct task_struct *p, struct task_struct *father)
- {
-       if (p->pdeath_signal)
-               /* We already hold the tasklist_lock here.  */
-               group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
- 
-       list_move_tail(&p->sibling, &p->real_parent->children);
- 
-       /* If this is a threaded reparent there is no need to
-        * notify anyone anything has happened.
-        */
-       if (same_thread_group(p->real_parent, father))
-               return;
- 
-       /* We don't want people slaying init.  */
-       if (!task_detached(p))
-               p->exit_signal = SIGCHLD;
- 
-       /* If we'd notified the old parent about this child's death,
-        * also notify the new parent.
-        */
-       if (!ptrace_reparented(p) &&
-           p->exit_state == EXIT_ZOMBIE &&
-           !task_detached(p) && thread_group_empty(p))
-               do_notify_parent(p, p->exit_signal);
- 
-       kill_orphaned_pgrp(p, father);
- }
- 
   /*
    * When we die, we re-parent all our children.
    * Try to give them to another thread in our thread
@@@ -883,17 -733,51 +733,51 @@@ static struct task_struct *find_new_rea
         return pid_ns->child_reaper;
   }
   
+ /*
+ * Any that need to be release_task'd are put on the @dead list.
+  */
+ static void reparent_thread(struct task_struct *father, struct task_struct *p,
+                               struct list_head *dead)
+ {
+       if (p->pdeath_signal)
+               group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
+ 
+       list_move_tail(&p->sibling, &p->real_parent->children);
+ 
+       if (task_detached(p))
+               return;
+       /*
+        * If this is a threaded reparent there is no need to
+        * notify anyone anything has happened.
+        */
+       if (same_thread_group(p->real_parent, father))
+               return;
+ 
+       /* We don't want people slaying init.  */
+       p->exit_signal = SIGCHLD;
+ 
+       /* If it has exited notify the new parent about this child's death. */
+       if (!p->ptrace &&
+           p->exit_state == EXIT_ZOMBIE && thread_group_empty(p)) {
+               do_notify_parent(p, p->exit_signal);
+               if (task_detached(p)) {
+                       p->exit_state = EXIT_DEAD;
+                       list_move_tail(&p->sibling, dead);
+               }
+       }
+ 
+       kill_orphaned_pgrp(p, father);
+ }
+ 
   static void forget_original_parent(struct task_struct *father)
   {
         struct task_struct *p, *n, *reaper;
-       LIST_HEAD(ptrace_dead);
+       LIST_HEAD(dead_children);
+ 
+       exit_ptrace(father);
   
         write_lock_irq(&tasklist_lock);
         reaper = find_new_reaper(father);
-       /*
-        * First clean up ptrace if we were using it.
-        */
-       ptrace_exit(father, &ptrace_dead);
   
         list_for_each_entry_safe(p, n, &father->children, sibling) {
                 p->real_parent = reaper;
@@@ -901,13 -785,16 +785,16 @@@
                         BUG_ON(p->ptrace);
                         p->parent = p->real_parent;
                 }
-               reparent_thread(p, father);
+               reparent_thread(father, p, &dead_children);
         }
- 
         write_unlock_irq(&tasklist_lock);
+ 
         BUG_ON(!list_empty(&father->children));
   
-       ptrace_exit_finish(father, &ptrace_dead);
+       list_for_each_entry_safe(p, n, &dead_children, sibling) {
+               list_del_init(&p->sibling);
+               release_task(p);
+       }
   }
   
   /*
@@@ -1037,8 -924,6 +924,8 @@@ NORET_TYPE void do_exit(long code
                 schedule();
         }
   
+ +      exit_irq_thread();
+ +
         exit_signals(tsk);  /* sets PF_EXITING */
         /*
          * tsk->flags are checked in the futex code to protect against
@@@ -1419,6 -1304,18 +1306,18 @@@ static int wait_task_zombie(struct task
         return retval;
   }
   
+ static int *task_stopped_code(struct task_struct *p, bool ptrace)
+ {
+       if (ptrace) {
+               if (task_is_stopped_or_traced(p))
+                       return &p->exit_code;
+       } else {
+               if (p->signal->flags & SIGNAL_STOP_STOPPED)
+                       return &p->signal->group_exit_code;
+       }
+       return NULL;
+ }
+ 
   /*
    * Handle sys_wait4 work for one task in state TASK_STOPPED.  We hold
    * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
@@@ -1429,7 -1326,7 +1328,7 @@@ static int wait_task_stopped(int ptrace
                              int options, struct siginfo __user *infop,
                              int __user *stat_addr, struct rusage __user *ru)
   {
-       int retval, exit_code, why;
+       int retval, exit_code, *p_code, why;
         uid_t uid = 0; /* unneeded, required by compiler */
         pid_t pid;
   
@@@ -1439,22 -1336,16 +1338,16 @@@
         exit_code = 0;
         spin_lock_irq(&p->sighand->siglock);
   
-       if (unlikely(!task_is_stopped_or_traced(p)))
-               goto unlock_sig;
- 
-       if (!ptrace && p->signal->group_stop_count > 0)
-               /*
-                * A group stop is in progress and this is the group leader.
-                * We won't report until all threads have stopped.
-                */
+       p_code = task_stopped_code(p, ptrace);
+       if (unlikely(!p_code))
                 goto unlock_sig;
   
-       exit_code = p->exit_code;
+       exit_code = *p_code;
         if (!exit_code)
                 goto unlock_sig;
   
         if (!unlikely(options & WNOWAIT))
-               p->exit_code = 0;
+               *p_code = 0;
   
         /* don't need the RCU readlock here as we're holding a spinlock */
         uid = __task_cred(p)->uid;
@@@ -1610,7 -1501,7 +1503,7 @@@ static int wait_consider_task(struct ta
          */
         *notask_error = 0;
   
-       if (task_is_stopped_or_traced(p))
+       if (task_stopped_code(p, ptrace))
                 return wait_task_stopped(ptrace, p, options,
                                          infop, stat_addr, ru);
   
@@@ -1814,7 -1705,7 +1707,7 @@@ SYSCALL_DEFINE4(wait4, pid_t, upid, in
                 pid = find_get_pid(-upid);
         } else if (upid == 0) {
                 type = PIDTYPE_PGID;
-               pid = get_pid(task_pgrp(current));
+               pid = get_task_pid(current, PIDTYPE_PGID);
         } else /* upid > 0 */ {
                 type = PIDTYPE_PID;
                 pid = find_get_pid(upid);
diff --combined kernel/irq/handle.c

index 38b49a9e508a77be0670f48b2f970c0b01df0f0a,343acecae629ff37c32013daede12da10b21f7fb..d82142be8dd2c8fddc340058ff9cd73373f6a31b
--- 1/kernel/irq/handle.c
--- 2/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@@ -17,6 -17,7 +17,7 @@@
   #include <linux/kernel_stat.h>
   #include <linux/rculist.h>
   #include <linux/hash.h>
+ #include <trace/irq.h>
   #include <linux/bootmem.h>
   
   #include "internals.h"
@@@ -338,15 -339,9 +339,18 @@@ irqreturn_t no_action(int cpl, void *de
         return IRQ_NONE;
   }
   
+ +static void warn_no_thread(unsigned int irq, struct irqaction *action)
+ +{
+ +      if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
+ +              return;
+ +
+ +      printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
+ +             "but no thread function available.", irq, action->name);
+ +}
+ +
+ DEFINE_TRACE(irq_handler_entry);
+ DEFINE_TRACE(irq_handler_exit);
+ 
   /**
    * handle_IRQ_event - irq action chain handler
    * @irq:      the interrupt number
@@@ -365,48 -360,11 +369,50 @@@ irqreturn_t handle_IRQ_event(unsigned i
                 local_irq_enable_in_hardirq();
   
         do {
+               trace_irq_handler_entry(irq, action);
                 ret = action->handler(irq, action->dev_id);
- -              if (ret == IRQ_HANDLED)
+               trace_irq_handler_exit(irq, action, ret);
+ +
+ +              switch (ret) {
+ +              case IRQ_WAKE_THREAD:
+ +                      /*
+ +                       * Set result to handled so the spurious check
+ +                       * does not trigger.
+ +                       */
+ +                      ret = IRQ_HANDLED;
+ +
+ +                      /*
+ +                       * Catch drivers which return WAKE_THREAD but
+ +                       * did not set up a thread function
+ +                       */
+ +                      if (unlikely(!action->thread_fn)) {
+ +                              warn_no_thread(irq, action);
+ +                              break;
+ +                      }
+ +
+ +                      /*
+ +                       * Wake up the handler thread for this
+ +                       * action. In case the thread crashed and was
+ +                       * killed we just pretend that we handled the
+ +                       * interrupt. The hardirq handler above has
+ +                       * disabled the device interrupt, so no irq
+ +                       * storm is lurking.
+ +                       */
+ +                      if (likely(!test_bit(IRQTF_DIED,
+ +                                           &action->thread_flags))) {
+ +                              set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
+ +                              wake_up_process(action->thread);
+ +                      }
+ +
+ +                      /* Fall through to add to randomness */
+ +              case IRQ_HANDLED:
                         status |= action->flags;
+ +                      break;
+ +
+ +              default:
+ +                      break;
+ +              }
+ +
                 retval |= ret;
                 action = action->next;
         } while (action);
diff --combined kernel/irq/manage.c

index a3eb7baf1e46f2c735edb4cc44e0386cfbc4989e,1516ab77355c928bd1e1f5c8b6759e53d3b0c7e3..7e2e7dd4cd2f70e5d619f893161225e230bf487c
--- 1/kernel/irq/manage.c
--- 2/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@@ -8,15 -8,16 +8,15 @@@
    */
   
   #include <linux/irq.h>
+ +#include <linux/kthread.h>
   #include <linux/module.h>
   #include <linux/random.h>
   #include <linux/interrupt.h>
   #include <linux/slab.h>
+ +#include <linux/sched.h>
   
   #include "internals.h"
   
- -#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
- -cpumask_var_t irq_default_affinity;
- -
   /**
    *    synchronize_irq - wait for pending IRQ handlers (on other CPUs)
    *    @irq: interrupt number to wait for
@@@ -52,18 -53,9 +52,18 @@@ void synchronize_irq(unsigned int irq
   
                 /* Oops, that failed? */
         } while (status & IRQ_INPROGRESS);
+ +
+ +      /*
+ +       * We made sure that no hardirq handler is running. Now verify
+ +       * that no threaded handlers are active.
+ +       */
+ +      wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
   }
   EXPORT_SYMBOL(synchronize_irq);
   
+ +#ifdef CONFIG_SMP
+ +cpumask_var_t irq_default_affinity;
+ +
   /**
    *    irq_can_set_affinity - Check if the affinity of a given irq can be set
    *    @irq:           Interrupt to check
@@@ -80,18 -72,6 +80,18 @@@ int irq_can_set_affinity(unsigned int i
         return 1;
   }
   
+ +static void
+ +irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+ +{
+ +      struct irqaction *action = desc->action;
+ +
+ +      while (action) {
+ +              if (action->thread)
+ +                      set_cpus_allowed_ptr(action->thread, cpumask);
+ +              action = action->next;
+ +      }
+ +}
+ +
   /**
    *    irq_set_affinity - Set the irq affinity of a given irq
    *    @irq:           Interrupt to set affinity
@@@ -120,7 -100,6 +120,7 @@@ int irq_set_affinity(unsigned int irq, 
         cpumask_copy(desc->affinity, cpumask);
         desc->chip->set_affinity(irq, cpumask);
   #endif
+ +      irq_set_thread_affinity(desc, cpumask);
         desc->status |= IRQ_AFFINITY_SET;
         spin_unlock_irqrestore(&desc->lock, flags);
         return 0;
@@@ -171,8 -150,6 +171,8 @@@ int irq_select_affinity_usr(unsigned in
   
         spin_lock_irqsave(&desc->lock, flags);
         ret = setup_affinity(irq, desc);
+ +      if (!ret)
+ +              irq_set_thread_affinity(desc, desc->affinity);
         spin_unlock_irqrestore(&desc->lock, flags);
   
         return ret;
@@@ -185,6 -162,20 +185,20 @@@ static inline int setup_affinity(unsign
   }
   #endif
   
+ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+ {
+       if (suspend) {
+               if (!desc->action || (desc->action->flags & IRQF_TIMER))
+                       return;
+               desc->status |= IRQ_SUSPENDED;
+       }
+ 
+       if (!desc->depth++) {
+               desc->status |= IRQ_DISABLED;
+               desc->chip->disable(irq);
+       }
+ }
+ 
   /**
    *    disable_irq_nosync - disable an irq without waiting
    *    @irq: Interrupt to disable
@@@ -205,10 -196,7 +219,7 @@@ void disable_irq_nosync(unsigned int ir
                 return;
   
         spin_lock_irqsave(&desc->lock, flags);
-       if (!desc->depth++) {
-               desc->status |= IRQ_DISABLED;
-               desc->chip->disable(irq);
-       }
+       __disable_irq(desc, irq, false);
         spin_unlock_irqrestore(&desc->lock, flags);
   }
   EXPORT_SYMBOL(disable_irq_nosync);
@@@ -238,15 -226,21 +249,21 @@@ void disable_irq(unsigned int irq
   }
   EXPORT_SYMBOL(disable_irq);
   
- static void __enable_irq(struct irq_desc *desc, unsigned int irq)
+ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
   {
+       if (resume)
+               desc->status &= ~IRQ_SUSPENDED;
+ 
         switch (desc->depth) {
         case 0:
+  err_out:
                 WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
                 break;
         case 1: {
                 unsigned int status = desc->status & ~IRQ_DISABLED;
   
+               if (desc->status & IRQ_SUSPENDED)
+                       goto err_out;
                 /* Prevent probing on this irq: */
                 desc->status = status | IRQ_NOPROBE;
                 check_irq_resend(desc, irq);
@@@ -276,7 -270,7 +293,7 @@@ void enable_irq(unsigned int irq
                 return;
   
         spin_lock_irqsave(&desc->lock, flags);
-       __enable_irq(desc, irq);
+       __enable_irq(desc, irq, false);
         spin_unlock_irqrestore(&desc->lock, flags);
   }
   EXPORT_SYMBOL(enable_irq);
@@@ -407,90 -401,6 +424,90 @@@ int __irq_set_trigger(struct irq_desc *
         return ret;
   }
   
+ +static int irq_wait_for_interrupt(struct irqaction *action)
+ +{
+ +      while (!kthread_should_stop()) {
+ +              set_current_state(TASK_INTERRUPTIBLE);
+ +
+ +              if (test_and_clear_bit(IRQTF_RUNTHREAD,
+ +                                     &action->thread_flags)) {
+ +                      __set_current_state(TASK_RUNNING);
+ +                      return 0;
+ +              }
+ +              schedule();
+ +      }
+ +      return -1;
+ +}
+ +
+ +/*
+ + * Interrupt handler thread
+ + */
+ +static int irq_thread(void *data)
+ +{
+ +      struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
+ +      struct irqaction *action = data;
+ +      struct irq_desc *desc = irq_to_desc(action->irq);
+ +      int wake;
+ +
+ +      sched_setscheduler(current, SCHED_FIFO, &param);
+ +      current->irqaction = action;
+ +
+ +      while (!irq_wait_for_interrupt(action)) {
+ +
+ +              atomic_inc(&desc->threads_active);
+ +
+ +              spin_lock_irq(&desc->lock);
+ +              if (unlikely(desc->status & IRQ_DISABLED)) {
+ +                      /*
+ +                       * CHECKME: We might need a dedicated
+ +                       * IRQ_THREAD_PENDING flag here, which
+ +                       * retriggers the thread in check_irq_resend()
+ +                       * but AFAICT IRQ_PENDING should be fine as it
+ +                       * retriggers the interrupt itself --- tglx
+ +                       */
+ +                      desc->status |= IRQ_PENDING;
+ +                      spin_unlock_irq(&desc->lock);
+ +              } else {
+ +                      spin_unlock_irq(&desc->lock);
+ +
+ +                      action->thread_fn(action->irq, action->dev_id);
+ +              }
+ +
+ +              wake = atomic_dec_and_test(&desc->threads_active);
+ +
+ +              if (wake && waitqueue_active(&desc->wait_for_threads))
+ +                      wake_up(&desc->wait_for_threads);
+ +      }
+ +
+ +      /*
+ +       * Clear irqaction. Otherwise exit_irq_thread() would make
+ +       * fuzz about an active irq thread going into nirvana.
+ +       */
+ +      current->irqaction = NULL;
+ +      return 0;
+ +}
+ +
+ +/*
+ + * Called from do_exit()
+ + */
+ +void exit_irq_thread(void)
+ +{
+ +      struct task_struct *tsk = current;
+ +
+ +      if (!tsk->irqaction)
+ +              return;
+ +
+ +      printk(KERN_ERR
+ +             "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+ +             tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
+ +
+ +      /*
+ +       * Set the THREAD DIED flag to prevent further wakeups of the
+ +       * soon to be gone threaded handler.
+ +       */
+ +      set_bit(IRQTF_DIED, &tsk->irqaction->flags);
+ +}
+ +
   /*
    * Internal function to register an irqaction - typically used to
    * allocate special interrupts that are part of the architecture.
@@@ -526,26 -436,6 +543,26 @@@ __setup_irq(unsigned int irq, struct ir
                 rand_initialize_irq(irq);
         }
   
+ +      /*
+ +       * Threaded handler ?
+ +       */
+ +      if (new->thread_fn) {
+ +              struct task_struct *t;
+ +
+ +              t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
+ +                                 new->name);
+ +              if (IS_ERR(t))
+ +                      return PTR_ERR(t);
+ +              /*
+ +               * We keep the reference to the task struct even if
+ +               * the thread dies to avoid that the interrupt code
+ +               * references an already freed task_struct.
+ +               */
+ +              get_task_struct(t);
+ +              new->thread = t;
+ +              wake_up_process(t);
+ +      }
+ +
         /*
          * The following block of code has to be executed atomically
          */
@@@ -583,15 -473,15 +600,15 @@@
         if (!shared) {
                 irq_chip_set_defaults(desc->chip);
   
+ +              init_waitqueue_head(&desc->wait_for_threads);
+ +
                 /* Setup the type (level, edge polarity) if configured: */
                 if (new->flags & IRQF_TRIGGER_MASK) {
                         ret = __irq_set_trigger(desc, irq,
                                         new->flags & IRQF_TRIGGER_MASK);
   
- -                      if (ret) {
- -                              spin_unlock_irqrestore(&desc->lock, flags);
- -                              return ret;
- -                      }
+ +                      if (ret)
+ +                              goto out_thread;
                 } else
                         compat_irq_chip_set_default_handler(desc);
   #if defined(CONFIG_IRQ_PER_CPU)
@@@ -638,7 -528,7 +655,7 @@@
          */
         if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) {
                 desc->status &= ~IRQ_SPURIOUS_DISABLED;
-               __enable_irq(desc, irq);
+               __enable_irq(desc, irq, false);
         }
   
         spin_unlock_irqrestore(&desc->lock, flags);
@@@ -659,19 -549,8 +676,19 @@@ mismatch
                 dump_stack();
         }
   #endif
+ +      ret = -EBUSY;
+ +
+ +out_thread:
         spin_unlock_irqrestore(&desc->lock, flags);
- -      return -EBUSY;
+ +      if (new->thread) {
+ +              struct task_struct *t = new->thread;
+ +
+ +              new->thread = NULL;
+ +              if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
+ +                      kthread_stop(t);
+ +              put_task_struct(t);
+ +      }
+ +      return ret;
   }
   
   /**
@@@ -697,7 -576,6 +714,7 @@@ static struct irqaction *__free_irq(uns
   {
         struct irq_desc *desc = irq_to_desc(irq);
         struct irqaction *action, **action_ptr;
+ +      struct task_struct *irqthread;
         unsigned long flags;
   
         WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@@ -744,10 -622,6 +761,10 @@@
                 else
                         desc->chip->disable(irq);
         }
+ +
+ +      irqthread = action->thread;
+ +      action->thread = NULL;
+ +
         spin_unlock_irqrestore(&desc->lock, flags);
   
         unregister_handler_proc(irq, action);
@@@ -755,12 -629,6 +772,12 @@@
         /* Make sure it's not being used on another CPU: */
         synchronize_irq(irq);
   
+ +      if (irqthread) {
+ +              if (!test_bit(IRQTF_DIED, &action->thread_flags))
+ +                      kthread_stop(irqthread);
+ +              put_task_struct(irqthread);
+ +      }
+ +
   #ifdef CONFIG_DEBUG_SHIRQ
         /*
          * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@@ -813,12 -681,9 +830,12 @@@ void free_irq(unsigned int irq, void *d
   EXPORT_SYMBOL(free_irq);
   
   /**
- - *    request_irq - allocate an interrupt line
+ + *    request_threaded_irq - allocate an interrupt line
    *    @irq: Interrupt line to allocate
- - *    @handler: Function to be called when the IRQ occurs
+ + *    @handler: Function to be called when the IRQ occurs.
+ + *              Primary handler for threaded interrupts
+ + *    @thread_fn: Function called from the irq handler thread
+ + *                If NULL, no irq thread is created
    *    @irqflags: Interrupt type flags
    *    @devname: An ascii name for the claiming device
    *    @dev_id: A cookie passed back to the handler function
@@@ -830,15 -695,6 +847,15 @@@
    *    raises, you must take care both to initialise your hardware
    *    and to set up the interrupt handler in the right order.
    *
+ + *    If you want to set up a threaded irq handler for your device
+ + *    then you need to supply @handler and @thread_fn. @handler ist
+ + *    still called in hard interrupt context and has to check
+ + *    whether the interrupt originates from the device. If yes it
+ + *    needs to disable the interrupt on the device and return
+ + *    IRQ_THREAD_WAKE which will wake up the handler thread and run
+ + *    @thread_fn. This split handler design is necessary to support
+ + *    shared interrupts.
+ + *
    *    Dev_id must be globally unique. Normally the address of the
    *    device data structure is used as the cookie. Since the handler
    *    receives this value it makes sense to use it.
@@@ -854,9 -710,8 +871,9 @@@
    *    IRQF_TRIGGER_*          Specify active edge(s) or level
    *
    */
- -int request_irq(unsigned int irq, irq_handler_t handler,
- -              unsigned long irqflags, const char *devname, void *dev_id)
+ +int request_threaded_irq(unsigned int irq, irq_handler_t handler,
+ +                       irq_handler_t thread_fn, unsigned long irqflags,
+ +                       const char *devname, void *dev_id)
   {
         struct irqaction *action;
         struct irq_desc *desc;
@@@ -904,7 -759,6 +921,7 @@@
                 return -ENOMEM;
   
         action->handler = handler;
+ +      action->thread_fn = thread_fn;
         action->flags = irqflags;
         action->name = devname;
         action->dev_id = dev_id;
@@@ -934,4 -788,4 +951,4 @@@
   #endif
         return retval;
   }
- -EXPORT_SYMBOL(request_irq);
+ +EXPORT_SYMBOL(request_threaded_irq);
author	Ingo Molnar <mingo@elte.hu>
	Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 5 Apr 2009 23:41:22 +0000 (01:41 +0200)
		1	2
include/linux/hardirq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/interrupt.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/irq.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/exit.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/handle.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/irq/manage.c	patch \|	diff1 \|	diff2 \|	blob \| history