From: Linus Torvalds Date: Fri, 2 Jan 2009 19:44:09 +0000 (-0800) Subject: Merge branch 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel... X-Git-Tag: v2.6.29-rc1~538 X-Git-Url: https://git.kernel.dk/?a=commitdiff_plain;h=b840d79631c882786925303c2b0f4fefc31845ed;p=linux-2.6-block.git Merge branch 'cpus4096-for-linus-2' of git://git./linux/kernel/git/tip/linux-2.6-tip * 'cpus4096-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (66 commits) x86: export vector_used_by_percpu_irq x86: use logical apicid in x2apic_cluster's x2apic_cpu_mask_to_apicid_and() sched: nominate preferred wakeup cpu, fix x86: fix lguest used_vectors breakage, -v2 x86: fix warning in arch/x86/kernel/io_apic.c sched: fix warning in kernel/sched.c sched: move test_sd_parent() to an SMP section of sched.h sched: add SD_BALANCE_NEWIDLE at MC and CPU level for sched_mc>0 sched: activate active load balancing in new idle cpus sched: bias task wakeups to preferred semi-idle packages sched: nominate preferred wakeup cpu sched: favour lower logical cpu number for sched_mc balance sched: framework for sched_mc/smt_power_savings=N sched: convert BALANCE_FOR_xx_POWER to inline functions x86: use possible_cpus=NUM to extend the possible cpus allowed x86: fix cpu_mask_to_apicid_and to include cpu_online_mask x86: update io_apic.c to the new cpumask code x86: Introduce topology_core_cpumask()/topology_thread_cpumask() x86: xen: use smp_call_function_many() x86: use work_on_cpu in x86/kernel/cpu/mcheck/mce_amd_64.c ... Fixed up trivial conflict in kernel/time/tick-sched.c manually --- b840d79631c882786925303c2b0f4fefc31845ed diff --cc arch/powerpc/kernel/smp.c index 8ac3f721d235,d1165566f064..65484b2200b3 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@@ -57,10 -57,9 +57,8 @@@ #define DBG(fmt...) #endif -int smp_hw_index[NR_CPUS]; struct thread_info *secondary_ti; - cpumask_t cpu_possible_map = CPU_MASK_NONE; - cpumask_t cpu_online_map = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; DEFINE_PER_CPU(cpumask_t, cpu_core_map) = CPU_MASK_NONE; diff --cc arch/sparc/kernel/irq_64.c index a3ea2bcb95de,000000000000..cab8e0286871 mode 100644,000000..100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@@ -1,1101 -1,0 +1,1104 @@@ +/* irq.c: UltraSparc IRQ handling/init/registry. + * + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" + +#define NUM_IVECS (IMAP_INR + 1) + +struct ino_bucket *ivector_table; +unsigned long ivector_table_pa; + +/* On several sun4u processors, it is illegal to mix bypass and + * non-bypass accesses. Therefore we access all INO buckets + * using bypass accesses only. + */ +static unsigned long bucket_get_chain_pa(unsigned long bucket_pa) +{ + unsigned long ret; + + __asm__ __volatile__("ldxa [%1] %2, %0" + : "=&r" (ret) + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __irq_chain_pa)), + "i" (ASI_PHYS_USE_EC)); + + return ret; +} + +static void bucket_clear_chain_pa(unsigned long bucket_pa) +{ + __asm__ __volatile__("stxa %%g0, [%0] %1" + : /* no outputs */ + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __irq_chain_pa)), + "i" (ASI_PHYS_USE_EC)); +} + +static unsigned int bucket_get_virt_irq(unsigned long bucket_pa) +{ + unsigned int ret; + + __asm__ __volatile__("lduwa [%1] %2, %0" + : "=&r" (ret) + : "r" (bucket_pa + + offsetof(struct ino_bucket, + __virt_irq)), + "i" (ASI_PHYS_USE_EC)); + + return ret; +} + +static void bucket_set_virt_irq(unsigned long bucket_pa, + unsigned int virt_irq) +{ + __asm__ __volatile__("stwa %0, [%1] %2" + : /* no outputs */ + : "r" (virt_irq), + "r" (bucket_pa + + offsetof(struct ino_bucket, + __virt_irq)), + "i" (ASI_PHYS_USE_EC)); +} + +#define irq_work_pa(__cpu) &(trap_block[(__cpu)].irq_worklist_pa) + +static struct { + unsigned int dev_handle; + unsigned int dev_ino; + unsigned int in_use; +} virt_irq_table[NR_IRQS]; +static DEFINE_SPINLOCK(virt_irq_alloc_lock); + +unsigned char virt_irq_alloc(unsigned int dev_handle, + unsigned int dev_ino) +{ + unsigned long flags; + unsigned char ent; + + BUILD_BUG_ON(NR_IRQS >= 256); + + spin_lock_irqsave(&virt_irq_alloc_lock, flags); + + for (ent = 1; ent < NR_IRQS; ent++) { + if (!virt_irq_table[ent].in_use) + break; + } + if (ent >= NR_IRQS) { + printk(KERN_ERR "IRQ: Out of virtual IRQs.\n"); + ent = 0; + } else { + virt_irq_table[ent].dev_handle = dev_handle; + virt_irq_table[ent].dev_ino = dev_ino; + virt_irq_table[ent].in_use = 1; + } + + spin_unlock_irqrestore(&virt_irq_alloc_lock, flags); + + return ent; +} + +#ifdef CONFIG_PCI_MSI +void virt_irq_free(unsigned int virt_irq) +{ + unsigned long flags; + + if (virt_irq >= NR_IRQS) + return; + + spin_lock_irqsave(&virt_irq_alloc_lock, flags); + + virt_irq_table[virt_irq].in_use = 0; + + spin_unlock_irqrestore(&virt_irq_alloc_lock, flags); +} +#endif + +/* + * /proc/interrupts printing: + */ + +int show_interrupts(struct seq_file *p, void *v) +{ + int i = *(loff_t *) v, j; + struct irqaction * action; + unsigned long flags; + + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%d ",j); + seq_putc(p, '\n'); + } + + if (i < NR_IRQS) { + spin_lock_irqsave(&irq_desc[i].lock, flags); + action = irq_desc[i].action; + if (!action) + goto skip; + seq_printf(p, "%3d: ",i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); +#endif + seq_printf(p, " %9s", irq_desc[i].chip->typename); + seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) + seq_printf(p, ", %s", action->name); + + seq_putc(p, '\n'); +skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); + } + return 0; +} + +static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) +{ + unsigned int tid; + + if (this_is_starfire) { + tid = starfire_translate(imap, cpuid); + tid <<= IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } else { + if (tlb_type == cheetah || tlb_type == cheetah_plus) { + unsigned long ver; + + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + if ((ver >> 32UL) == __JALAPENO_ID || + (ver >> 32UL) == __SERRANO_ID) { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_JBUS; + } else { + unsigned int a = cpuid & 0x1f; + unsigned int n = (cpuid >> 5) & 0x1f; + + tid = ((a << IMAP_AID_SHIFT) | + (n << IMAP_NID_SHIFT)); + tid &= (IMAP_AID_SAFARI | + IMAP_NID_SAFARI);; + } + } else { + tid = cpuid << IMAP_TID_SHIFT; + tid &= IMAP_TID_UPA; + } + } + + return tid; +} + +struct irq_handler_data { + unsigned long iclr; + unsigned long imap; + + void (*pre_handler)(unsigned int, void *, void *); + void *arg1; + void *arg2; +}; + +#ifdef CONFIG_SMP +static int irq_choose_cpu(unsigned int virt_irq) +{ + cpumask_t mask = irq_desc[virt_irq].affinity; + int cpuid; + + if (cpus_equal(mask, CPU_MASK_ALL)) { + static int irq_rover; + static DEFINE_SPINLOCK(irq_rover_lock); + unsigned long flags; + + /* Round-robin distribution... */ + do_round_robin: + spin_lock_irqsave(&irq_rover_lock, flags); + + while (!cpu_online(irq_rover)) { + if (++irq_rover >= NR_CPUS) + irq_rover = 0; + } + cpuid = irq_rover; + do { + if (++irq_rover >= NR_CPUS) + irq_rover = 0; + } while (!cpu_online(irq_rover)); + + spin_unlock_irqrestore(&irq_rover_lock, flags); + } else { + cpumask_t tmp; + + cpus_and(tmp, cpu_online_map, mask); + + if (cpus_empty(tmp)) + goto do_round_robin; + + cpuid = first_cpu(tmp); + } + + return cpuid; +} +#else +static int irq_choose_cpu(unsigned int virt_irq) +{ + return real_hard_smp_processor_id(); +} +#endif + +static void sun4u_irq_enable(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + + if (likely(data)) { + unsigned long cpuid, imap, val; + unsigned int tid; + + cpuid = irq_choose_cpu(virt_irq); + imap = data->imap; + + tid = sun4u_compute_tid(imap, cpuid); + + val = upa_readq(imap); + val &= ~(IMAP_TID_UPA | IMAP_TID_JBUS | + IMAP_AID_SAFARI | IMAP_NID_SAFARI); + val |= tid | IMAP_VALID; + upa_writeq(val, imap); + upa_writeq(ICLR_IDLE, data->iclr); + } +} + - static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask) ++static void sun4u_set_affinity(unsigned int virt_irq, ++ const struct cpumask *mask) +{ + sun4u_irq_enable(virt_irq); +} + +static void sun4u_irq_disable(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + + if (likely(data)) { + unsigned long imap = data->imap; + unsigned long tmp = upa_readq(imap); + + tmp &= ~IMAP_VALID; + upa_writeq(tmp, imap); + } +} + +static void sun4u_irq_eoi(unsigned int virt_irq) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + struct irq_desc *desc = irq_desc + virt_irq; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + if (likely(data)) + upa_writeq(ICLR_IDLE, data->iclr); +} + +static void sun4v_irq_enable(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(virt_irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); + err = sun4v_intr_setenabled(ino, HV_INTR_ENABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setenabled(%x): err(%d)\n", + ino, err); +} + - static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask) ++static void sun4v_set_affinity(unsigned int virt_irq, ++ const struct cpumask *mask) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + unsigned long cpuid = irq_choose_cpu(virt_irq); + int err; + + err = sun4v_intr_settarget(ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): " + "err(%d)\n", ino, cpuid, err); +} + +static void sun4v_irq_disable(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + int err; + + err = sun4v_intr_setenabled(ino, HV_INTR_DISABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setenabled(%x): " + "err(%d)\n", ino, err); +} + +static void sun4v_irq_eoi(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + struct irq_desc *desc = irq_desc + virt_irq; + int err; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + err = sun4v_intr_setstate(ino, HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_intr_setstate(%x): " + "err(%d)\n", ino, err); +} + +static void sun4v_virq_enable(unsigned int virt_irq) +{ + unsigned long cpuid, dev_handle, dev_ino; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " + "err(%d)\n", + dev_handle, dev_ino, cpuid, err); + err = sun4v_vintr_set_state(dev_handle, dev_ino, + HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_STATE_IDLE): err(%d)\n", + dev_handle, dev_ino, err); + err = sun4v_vintr_set_valid(dev_handle, dev_ino, + HV_INTR_ENABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_ENABLED): err(%d)\n", + dev_handle, dev_ino, err); +} + - static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask) ++static void sun4v_virt_set_affinity(unsigned int virt_irq, ++ const struct cpumask *mask) +{ + unsigned long cpuid, dev_handle, dev_ino; + int err; + + cpuid = irq_choose_cpu(virt_irq); + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): " + "err(%d)\n", + dev_handle, dev_ino, cpuid, err); +} + +static void sun4v_virq_disable(unsigned int virt_irq) +{ + unsigned long dev_handle, dev_ino; + int err; + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_valid(dev_handle, dev_ino, + HV_INTR_DISABLED); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_DISABLED): err(%d)\n", + dev_handle, dev_ino, err); +} + +static void sun4v_virq_eoi(unsigned int virt_irq) +{ + struct irq_desc *desc = irq_desc + virt_irq; + unsigned long dev_handle, dev_ino; + int err; + + if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS))) + return; + + dev_handle = virt_irq_table[virt_irq].dev_handle; + dev_ino = virt_irq_table[virt_irq].dev_ino; + + err = sun4v_vintr_set_state(dev_handle, dev_ino, + HV_INTR_STATE_IDLE); + if (err != HV_EOK) + printk(KERN_ERR "sun4v_vintr_set_state(%lx,%lx," + "HV_INTR_STATE_IDLE): err(%d)\n", + dev_handle, dev_ino, err); +} + +static struct irq_chip sun4u_irq = { + .typename = "sun4u", + .enable = sun4u_irq_enable, + .disable = sun4u_irq_disable, + .eoi = sun4u_irq_eoi, + .set_affinity = sun4u_set_affinity, +}; + +static struct irq_chip sun4v_irq = { + .typename = "sun4v", + .enable = sun4v_irq_enable, + .disable = sun4v_irq_disable, + .eoi = sun4v_irq_eoi, + .set_affinity = sun4v_set_affinity, +}; + +static struct irq_chip sun4v_virq = { + .typename = "vsun4v", + .enable = sun4v_virq_enable, + .disable = sun4v_virq_disable, + .eoi = sun4v_virq_eoi, + .set_affinity = sun4v_virt_set_affinity, +}; + +static void pre_flow_handler(unsigned int virt_irq, + struct irq_desc *desc) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + + data->pre_handler(ino, data->arg1, data->arg2); + + handle_fasteoi_irq(virt_irq, desc); +} + +void irq_install_pre_handler(int virt_irq, + void (*func)(unsigned int, void *, void *), + void *arg1, void *arg2) +{ + struct irq_handler_data *data = get_irq_chip_data(virt_irq); + struct irq_desc *desc = irq_desc + virt_irq; + + data->pre_handler = func; + data->arg1 = arg1; + data->arg2 = arg2; + + desc->handle_irq = pre_flow_handler; +} + +unsigned int build_irq(int inofixup, unsigned long iclr, unsigned long imap) +{ + struct ino_bucket *bucket; + struct irq_handler_data *data; + unsigned int virt_irq; + int ino; + + BUG_ON(tlb_type == hypervisor); + + ino = (upa_readq(imap) & (IMAP_IGN | IMAP_INO)) + inofixup; + bucket = &ivector_table[ino]; + virt_irq = bucket_get_virt_irq(__pa(bucket)); + if (!virt_irq) { + virt_irq = virt_irq_alloc(0, ino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + set_irq_chip_and_handler_name(virt_irq, + &sun4u_irq, + handle_fasteoi_irq, + "IVEC"); + } + + data = get_irq_chip_data(virt_irq); + if (unlikely(data)) + goto out; + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); + prom_halt(); + } + set_irq_chip_data(virt_irq, data); + + data->imap = imap; + data->iclr = iclr; + +out: + return virt_irq; +} + +static unsigned int sun4v_build_common(unsigned long sysino, + struct irq_chip *chip) +{ + struct ino_bucket *bucket; + struct irq_handler_data *data; + unsigned int virt_irq; + + BUG_ON(tlb_type != hypervisor); + + bucket = &ivector_table[sysino]; + virt_irq = bucket_get_virt_irq(__pa(bucket)); + if (!virt_irq) { + virt_irq = virt_irq_alloc(0, sysino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + set_irq_chip_and_handler_name(virt_irq, chip, + handle_fasteoi_irq, + "IVEC"); + } + + data = get_irq_chip_data(virt_irq); + if (unlikely(data)) + goto out; + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) { + prom_printf("IRQ: kzalloc(irq_handler_data) failed.\n"); + prom_halt(); + } + set_irq_chip_data(virt_irq, data); + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + */ + data->imap = ~0UL; + data->iclr = ~0UL; + +out: + return virt_irq; +} + +unsigned int sun4v_build_irq(u32 devhandle, unsigned int devino) +{ + unsigned long sysino = sun4v_devino_to_sysino(devhandle, devino); + + return sun4v_build_common(sysino, &sun4v_irq); +} + +unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) +{ + struct irq_handler_data *data; + unsigned long hv_err, cookie; + struct ino_bucket *bucket; + struct irq_desc *desc; + unsigned int virt_irq; + + bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); + if (unlikely(!bucket)) + return 0; + __flush_dcache_range((unsigned long) bucket, + ((unsigned long) bucket + + sizeof(struct ino_bucket))); + + virt_irq = virt_irq_alloc(devhandle, devino); + bucket_set_virt_irq(__pa(bucket), virt_irq); + + set_irq_chip_and_handler_name(virt_irq, &sun4v_virq, + handle_fasteoi_irq, + "IVEC"); + + data = kzalloc(sizeof(struct irq_handler_data), GFP_ATOMIC); + if (unlikely(!data)) + return 0; + + /* In order to make the LDC channel startup sequence easier, + * especially wrt. locking, we do not let request_irq() enable + * the interrupt. + */ + desc = irq_desc + virt_irq; + desc->status |= IRQ_NOAUTOEN; + + set_irq_chip_data(virt_irq, data); + + /* Catch accidental accesses to these things. IMAP/ICLR handling + * is done by hypervisor calls on sun4v platforms, not by direct + * register accesses. + */ + data->imap = ~0UL; + data->iclr = ~0UL; + + cookie = ~__pa(bucket); + hv_err = sun4v_vintr_set_cookie(devhandle, devino, cookie); + if (hv_err) { + prom_printf("IRQ: Fatal, cannot set cookie for [%x:%x] " + "err=%lu\n", devhandle, devino, hv_err); + prom_halt(); + } + + return virt_irq; +} + +void ack_bad_irq(unsigned int virt_irq) +{ + unsigned int ino = virt_irq_table[virt_irq].dev_ino; + + if (!ino) + ino = 0xdeadbeef; + + printk(KERN_CRIT "Unexpected IRQ from ino[%x] virt_irq[%u]\n", + ino, virt_irq); +} + +void *hardirq_stack[NR_CPUS]; +void *softirq_stack[NR_CPUS]; + +static __attribute__((always_inline)) void *set_hardirq_stack(void) +{ + void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; + + __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); + if (orig_sp < sp || + orig_sp > (sp + THREAD_SIZE)) { + sp += THREAD_SIZE - 192 - STACK_BIAS; + __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); + } + + return orig_sp; +} +static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) +{ + __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); +} + +void handler_irq(int irq, struct pt_regs *regs) +{ + unsigned long pstate, bucket_pa; + struct pt_regs *old_regs; + void *orig_sp; + + clear_softint(1 << irq); + + old_regs = set_irq_regs(regs); + irq_enter(); + + /* Grab an atomic snapshot of the pending IVECs. */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %3, %%pstate\n\t" + "ldx [%2], %1\n\t" + "stx %%g0, [%2]\n\t" + "wrpr %0, 0x0, %%pstate\n\t" + : "=&r" (pstate), "=&r" (bucket_pa) + : "r" (irq_work_pa(smp_processor_id())), + "i" (PSTATE_IE) + : "memory"); + + orig_sp = set_hardirq_stack(); + + while (bucket_pa) { + struct irq_desc *desc; + unsigned long next_pa; + unsigned int virt_irq; + + next_pa = bucket_get_chain_pa(bucket_pa); + virt_irq = bucket_get_virt_irq(bucket_pa); + bucket_clear_chain_pa(bucket_pa); + + desc = irq_desc + virt_irq; + + desc->handle_irq(virt_irq, desc); + + bucket_pa = next_pa; + } + + restore_hardirq_stack(orig_sp); + + irq_exit(); + set_irq_regs(old_regs); +} + +void do_softirq(void) +{ + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + + if (local_softirq_pending()) { + void *orig_sp, *sp = softirq_stack[smp_processor_id()]; + + sp += THREAD_SIZE - 192 - STACK_BIAS; + + __asm__ __volatile__("mov %%sp, %0\n\t" + "mov %1, %%sp" + : "=&r" (orig_sp) + : "r" (sp)); + __do_softirq(); + __asm__ __volatile__("mov %0, %%sp" + : : "r" (orig_sp)); + } + + local_irq_restore(flags); +} + +static void unhandled_perf_irq(struct pt_regs *regs) +{ + unsigned long pcr, pic; + + read_pcr(pcr); + read_pic(pic); + + write_pcr(0); + + printk(KERN_EMERG "CPU %d: Got unexpected perf counter IRQ.\n", + smp_processor_id()); + printk(KERN_EMERG "CPU %d: PCR[%016lx] PIC[%016lx]\n", + smp_processor_id(), pcr, pic); +} + +/* Almost a direct copy of the powerpc PMC code. */ +static DEFINE_SPINLOCK(perf_irq_lock); +static void *perf_irq_owner_caller; /* mostly for debugging */ +static void (*perf_irq)(struct pt_regs *regs) = unhandled_perf_irq; + +/* Invoked from level 15 PIL handler in trap table. */ +void perfctr_irq(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + perf_irq(regs); +} + +int register_perfctr_intr(void (*handler)(struct pt_regs *)) +{ + int ret; + + if (!handler) + return -EINVAL; + + spin_lock(&perf_irq_lock); + if (perf_irq != unhandled_perf_irq) { + printk(KERN_WARNING "register_perfctr_intr: " + "perf IRQ busy (reserved by caller %p)\n", + perf_irq_owner_caller); + ret = -EBUSY; + goto out; + } + + perf_irq_owner_caller = __builtin_return_address(0); + perf_irq = handler; + + ret = 0; +out: + spin_unlock(&perf_irq_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(register_perfctr_intr); + +void release_perfctr_intr(void (*handler)(struct pt_regs *)) +{ + spin_lock(&perf_irq_lock); + perf_irq_owner_caller = NULL; + perf_irq = unhandled_perf_irq; + spin_unlock(&perf_irq_lock); +} +EXPORT_SYMBOL_GPL(release_perfctr_intr); + +#ifdef CONFIG_HOTPLUG_CPU +void fixup_irqs(void) +{ + unsigned int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + unsigned long flags; + + spin_lock_irqsave(&irq_desc[irq].lock, flags); + if (irq_desc[irq].action && + !(irq_desc[irq].status & IRQ_PER_CPU)) { + if (irq_desc[irq].chip->set_affinity) + irq_desc[irq].chip->set_affinity(irq, - irq_desc[irq].affinity); ++ &irq_desc[irq].affinity); + } + spin_unlock_irqrestore(&irq_desc[irq].lock, flags); + } + + tick_ops->disable_irq(); +} +#endif + +struct sun5_timer { + u64 count0; + u64 limit0; + u64 count1; + u64 limit1; +}; + +static struct sun5_timer *prom_timers; +static u64 prom_limit0, prom_limit1; + +static void map_prom_timers(void) +{ + struct device_node *dp; + const unsigned int *addr; + + /* PROM timer node hangs out in the top level of device siblings... */ + dp = of_find_node_by_path("/"); + dp = dp->child; + while (dp) { + if (!strcmp(dp->name, "counter-timer")) + break; + dp = dp->sibling; + } + + /* Assume if node is not present, PROM uses different tick mechanism + * which we should not care about. + */ + if (!dp) { + prom_timers = (struct sun5_timer *) 0; + return; + } + + /* If PROM is really using this, it must be mapped by him. */ + addr = of_get_property(dp, "address", NULL); + if (!addr) { + prom_printf("PROM does not have timer mapped, trying to continue.\n"); + prom_timers = (struct sun5_timer *) 0; + return; + } + prom_timers = (struct sun5_timer *) ((unsigned long)addr[0]); +} + +static void kill_prom_timer(void) +{ + if (!prom_timers) + return; + + /* Save them away for later. */ + prom_limit0 = prom_timers->limit0; + prom_limit1 = prom_timers->limit1; + + /* Just as in sun4c/sun4m PROM uses timer which ticks at IRQ 14. + * We turn both off here just to be paranoid. + */ + prom_timers->limit0 = 0; + prom_timers->limit1 = 0; + + /* Wheee, eat the interrupt packet too... */ + __asm__ __volatile__( +" mov 0x40, %%g2\n" +" ldxa [%%g0] %0, %%g1\n" +" ldxa [%%g2] %1, %%g1\n" +" stxa %%g0, [%%g0] %0\n" +" membar #Sync\n" + : /* no outputs */ + : "i" (ASI_INTR_RECEIVE), "i" (ASI_INTR_R) + : "g1", "g2"); +} + +void notrace init_irqwork_curcpu(void) +{ + int cpu = hard_smp_processor_id(); + + trap_block[cpu].irq_worklist_pa = 0UL; +} + +/* Please be very careful with register_one_mondo() and + * sun4v_register_mondo_queues(). + * + * On SMP this gets invoked from the CPU trampoline before + * the cpu has fully taken over the trap table from OBP, + * and it's kernel stack + %g6 thread register state is + * not fully cooked yet. + * + * Therefore you cannot make any OBP calls, not even prom_printf, + * from these two routines. + */ +static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) +{ + unsigned long num_entries = (qmask + 1) / 64; + unsigned long status; + + status = sun4v_cpu_qconf(type, paddr, num_entries); + if (status != HV_EOK) { + prom_printf("SUN4V: sun4v_cpu_qconf(%lu:%lx:%lu) failed, " + "err %lu\n", type, paddr, num_entries, status); + prom_halt(); + } +} + +void __cpuinit notrace sun4v_register_mondo_queues(int this_cpu) +{ + struct trap_per_cpu *tb = &trap_block[this_cpu]; + + register_one_mondo(tb->cpu_mondo_pa, HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_qmask); + register_one_mondo(tb->dev_mondo_pa, HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_qmask); + register_one_mondo(tb->resum_mondo_pa, HV_CPU_QUEUE_RES_ERROR, + tb->resum_qmask); + register_one_mondo(tb->nonresum_mondo_pa, HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_qmask); +} + +static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask) +{ + unsigned long size = PAGE_ALIGN(qmask + 1); + void *p = __alloc_bootmem(size, size, 0); + if (!p) { + prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); + prom_halt(); + } + + *pa_ptr = __pa(p); +} + +static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) +{ + unsigned long size = PAGE_ALIGN(qmask + 1); + void *p = __alloc_bootmem(size, size, 0); + + if (!p) { + prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); + prom_halt(); + } + + *pa_ptr = __pa(p); +} + +static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) +{ +#ifdef CONFIG_SMP + void *page; + + BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); + + page = alloc_bootmem_pages(PAGE_SIZE); + if (!page) { + prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); + prom_halt(); + } + + tb->cpu_mondo_block_pa = __pa(page); + tb->cpu_list_pa = __pa(page + 64); +#endif +} + +/* Allocate mondo and error queues for all possible cpus. */ +static void __init sun4v_init_mondo_queues(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask); + alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask); + alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask); + alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, + tb->nonresum_qmask); + } +} + +static void __init init_send_mondo_info(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + init_cpu_send_mondo_info(tb); + } +} + +static struct irqaction timer_irq_action = { + .name = "timer", +}; + +/* Only invoked on boot processor. */ +void __init init_IRQ(void) +{ + unsigned long size; + + map_prom_timers(); + kill_prom_timer(); + + size = sizeof(struct ino_bucket) * NUM_IVECS; + ivector_table = alloc_bootmem(size); + if (!ivector_table) { + prom_printf("Fatal error, cannot allocate ivector_table\n"); + prom_halt(); + } + __flush_dcache_range((unsigned long) ivector_table, + ((unsigned long) ivector_table) + size); + + ivector_table_pa = __pa(ivector_table); + + if (tlb_type == hypervisor) + sun4v_init_mondo_queues(); + + init_send_mondo_info(); + + if (tlb_type == hypervisor) { + /* Load up the boot cpu's entries. */ + sun4v_register_mondo_queues(hard_smp_processor_id()); + } + + /* We need to clear any IRQ's pending in the soft interrupt + * registers, a spurious one could be left around from the + * PROM timer which we just disabled. + */ + clear_softint(get_softint()); + + /* Now that ivector table is initialized, it is safe + * to receive IRQ vector traps. We will normally take + * one or two right now, in case some device PROM used + * to boot us wants to speak to us. We just ignore them. + */ + __asm__ __volatile__("rdpr %%pstate, %%g1\n\t" + "or %%g1, %0, %%g1\n\t" + "wrpr %%g1, 0x0, %%pstate" + : /* No outputs */ + : "i" (PSTATE_IE) + : "g1"); + + irq_desc[0].action = &timer_irq_action; +} diff --cc arch/sparc/kernel/of_device_64.c index 46e231f7c5ce,000000000000..322046cdf85f mode 100644,000000..100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@@ -1,898 -1,0 +1,898 @@@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name) +{ + unsigned long ret = res->start + offset; + struct resource *r; + + if (res->flags & IORESOURCE_MEM) + r = request_mem_region(ret, size, name); + else + r = request_region(ret, size, name); + if (!r) + ret = 0; + + return (void __iomem *) ret; +} +EXPORT_SYMBOL(of_ioremap); + +void of_iounmap(struct resource *res, void __iomem *base, unsigned long size) +{ + if (res->flags & IORESOURCE_MEM) + release_mem_region((unsigned long) base, size); + else + release_region((unsigned long) base, size); +} +EXPORT_SYMBOL(of_iounmap); + +static int node_match(struct device *dev, void *data) +{ + struct of_device *op = to_of_device(dev); + struct device_node *dp = data; + + return (op->node == dp); +} + +struct of_device *of_find_device_by_node(struct device_node *dp) +{ + struct device *dev = bus_find_device(&of_platform_bus_type, NULL, + dp, node_match); + + if (dev) + return to_of_device(dev); + + return NULL; +} +EXPORT_SYMBOL(of_find_device_by_node); + +unsigned int irq_of_parse_and_map(struct device_node *node, int index) +{ + struct of_device *op = of_find_device_by_node(node); + + if (!op || index >= op->num_irqs) + return 0; + + return op->irqs[index]; +} +EXPORT_SYMBOL(irq_of_parse_and_map); + +/* Take the archdata values for IOMMU, STC, and HOSTDATA found in + * BUS and propagate to all child of_device objects. + */ +void of_propagate_archdata(struct of_device *bus) +{ + struct dev_archdata *bus_sd = &bus->dev.archdata; + struct device_node *bus_dp = bus->node; + struct device_node *dp; + + for (dp = bus_dp->child; dp; dp = dp->sibling) { + struct of_device *op = of_find_device_by_node(dp); + + op->dev.archdata.iommu = bus_sd->iommu; + op->dev.archdata.stc = bus_sd->stc; + op->dev.archdata.host_controller = bus_sd->host_controller; + op->dev.archdata.numa_node = bus_sd->numa_node; + + if (dp->child) + of_propagate_archdata(op); + } +} + +struct bus_type of_platform_bus_type; +EXPORT_SYMBOL(of_platform_bus_type); + +static inline u64 of_read_addr(const u32 *cell, int size) +{ + u64 r = 0; + while (size--) + r = (r << 32) | *(cell++); + return r; +} + +static void __init get_cells(struct device_node *dp, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = of_n_addr_cells(dp); + if (sizec) + *sizec = of_n_size_cells(dp); +} + +/* Max address size we deal with */ +#define OF_MAX_ADDR_CELLS 4 + +struct of_bus { + const char *name; + const char *addr_prop_name; + int (*match)(struct device_node *parent); + void (*count_cells)(struct device_node *child, + int *addrc, int *sizec); + int (*map)(u32 *addr, const u32 *range, + int na, int ns, int pna); + unsigned long (*get_flags)(const u32 *addr, unsigned long); +}; + +/* + * Default translator (generic bus) + */ + +static void of_bus_default_count_cells(struct device_node *dev, + int *addrc, int *sizec) +{ + get_cells(dev, addrc, sizec); +} + +/* Make sure the least significant 64-bits are in-range. Even + * for 3 or 4 cell values it is a good enough approximation. + */ +static int of_out_of_range(const u32 *addr, const u32 *base, + const u32 *size, int na, int ns) +{ + u64 a = of_read_addr(addr, na); + u64 b = of_read_addr(base, na); + + if (a < b) + return 1; + + b += of_read_addr(size, ns); + if (a >= b) + return 1; + + return 0; +} + +static int of_bus_default_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + if (ns > 2) { + printk("of_device: Cannot handle size cells (%d) > 2.", ns); + return -EINVAL; + } + + if (of_out_of_range(addr, range, range + na + pna, na, ns)) + return -EINVAL; + + /* Start with the parent range base. */ + memcpy(result, range + na, pna * 4); + + /* Add in the child address offset. */ + for (i = 0; i < na; i++) + result[pna - 1 - i] += + (addr[na - 1 - i] - + range[na - 1 - i]); + + memcpy(addr, result, pna * 4); + + return 0; +} + +static unsigned long of_bus_default_get_flags(const u32 *addr, unsigned long flags) +{ + if (flags) + return flags; + return IORESOURCE_MEM; +} + +/* + * PCI bus specific translator + */ + +static int of_bus_pci_match(struct device_node *np) +{ + if (!strcmp(np->name, "pci")) { + const char *model = of_get_property(np, "model", NULL); + + if (model && !strcmp(model, "SUNW,simba")) + return 0; + + /* Do not do PCI specific frobbing if the + * PCI bridge lacks a ranges property. We + * want to pass it through up to the next + * parent as-is, not with the PCI translate + * method which chops off the top address cell. + */ + if (!of_find_property(np, "ranges", NULL)) + return 0; + + return 1; + } + + return 0; +} + +static int of_bus_simba_match(struct device_node *np) +{ + const char *model = of_get_property(np, "model", NULL); + + if (model && !strcmp(model, "SUNW,simba")) + return 1; + + /* Treat PCI busses lacking ranges property just like + * simba. + */ + if (!strcmp(np->name, "pci")) { + if (!of_find_property(np, "ranges", NULL)) + return 1; + } + + return 0; +} + +static int of_bus_simba_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + return 0; +} + +static void of_bus_pci_count_cells(struct device_node *np, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 3; + if (sizec) + *sizec = 2; +} + +static int of_bus_pci_map(u32 *addr, const u32 *range, + int na, int ns, int pna) +{ + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + /* Check address type match */ + if ((addr[0] ^ range[0]) & 0x03000000) + return -EINVAL; + + if (of_out_of_range(addr + 1, range + 1, range + na + pna, + na - 1, ns)) + return -EINVAL; + + /* Start with the parent range base. */ + memcpy(result, range + na, pna * 4); + + /* Add in the child address offset, skipping high cell. */ + for (i = 0; i < na - 1; i++) + result[pna - 1 - i] += + (addr[na - 1 - i] - + range[na - 1 - i]); + + memcpy(addr, result, pna * 4); + + return 0; +} + +static unsigned long of_bus_pci_get_flags(const u32 *addr, unsigned long flags) +{ + u32 w = addr[0]; + + /* For PCI, we override whatever child busses may have used. */ + flags = 0; + switch((w >> 24) & 0x03) { + case 0x01: + flags |= IORESOURCE_IO; + break; + + case 0x02: /* 32 bits */ + case 0x03: /* 64 bits */ + flags |= IORESOURCE_MEM; + break; + } + if (w & 0x40000000) + flags |= IORESOURCE_PREFETCH; + return flags; +} + +/* + * SBUS bus specific translator + */ + +static int of_bus_sbus_match(struct device_node *np) +{ + return !strcmp(np->name, "sbus") || + !strcmp(np->name, "sbi"); +} + +static void of_bus_sbus_count_cells(struct device_node *child, + int *addrc, int *sizec) +{ + if (addrc) + *addrc = 2; + if (sizec) + *sizec = 1; +} + +/* + * FHC/Central bus specific translator. + * + * This is just needed to hard-code the address and size cell + * counts. 'fhc' and 'central' nodes lack the #address-cells and + * #size-cells properties, and if you walk to the root on such + * Enterprise boxes all you'll get is a #size-cells of 2 which is + * not what we want to use. + */ +static int of_bus_fhc_match(struct device_node *np) +{ + return !strcmp(np->name, "fhc") || + !strcmp(np->name, "central"); +} + +#define of_bus_fhc_count_cells of_bus_sbus_count_cells + +/* + * Array of bus specific translators + */ + +static struct of_bus of_busses[] = { + /* PCI */ + { + .name = "pci", + .addr_prop_name = "assigned-addresses", + .match = of_bus_pci_match, + .count_cells = of_bus_pci_count_cells, + .map = of_bus_pci_map, + .get_flags = of_bus_pci_get_flags, + }, + /* SIMBA */ + { + .name = "simba", + .addr_prop_name = "assigned-addresses", + .match = of_bus_simba_match, + .count_cells = of_bus_pci_count_cells, + .map = of_bus_simba_map, + .get_flags = of_bus_pci_get_flags, + }, + /* SBUS */ + { + .name = "sbus", + .addr_prop_name = "reg", + .match = of_bus_sbus_match, + .count_cells = of_bus_sbus_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, + /* FHC */ + { + .name = "fhc", + .addr_prop_name = "reg", + .match = of_bus_fhc_match, + .count_cells = of_bus_fhc_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, + /* Default */ + { + .name = "default", + .addr_prop_name = "reg", + .match = NULL, + .count_cells = of_bus_default_count_cells, + .map = of_bus_default_map, + .get_flags = of_bus_default_get_flags, + }, +}; + +static struct of_bus *of_match_bus(struct device_node *np) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(of_busses); i ++) + if (!of_busses[i].match || of_busses[i].match(np)) + return &of_busses[i]; + BUG(); + return NULL; +} + +static int __init build_one_resource(struct device_node *parent, + struct of_bus *bus, + struct of_bus *pbus, + u32 *addr, + int na, int ns, int pna) +{ + const u32 *ranges; + int rone, rlen; + + ranges = of_get_property(parent, "ranges", &rlen); + if (ranges == NULL || rlen == 0) { + u32 result[OF_MAX_ADDR_CELLS]; + int i; + + memset(result, 0, pna * 4); + for (i = 0; i < na; i++) + result[pna - 1 - i] = + addr[na - 1 - i]; + + memcpy(addr, result, pna * 4); + return 0; + } + + /* Now walk through the ranges */ + rlen /= 4; + rone = na + pna + ns; + for (; rlen >= rone; rlen -= rone, ranges += rone) { + if (!bus->map(addr, ranges, na, ns, pna)) + return 0; + } + + /* When we miss an I/O space match on PCI, just pass it up + * to the next PCI bridge and/or controller. + */ + if (!strcmp(bus->name, "pci") && + (addr[0] & 0x03000000) == 0x01000000) + return 0; + + return 1; +} + +static int __init use_1to1_mapping(struct device_node *pp) +{ + /* If we have a ranges property in the parent, use it. */ + if (of_find_property(pp, "ranges", NULL) != NULL) + return 0; + + /* If the parent is the dma node of an ISA bus, pass + * the translation up to the root. + * + * Some SBUS devices use intermediate nodes to express + * hierarchy within the device itself. These aren't + * real bus nodes, and don't have a 'ranges' property. + * But, we should still pass the translation work up + * to the SBUS itself. + */ + if (!strcmp(pp->name, "dma") || + !strcmp(pp->name, "espdma") || + !strcmp(pp->name, "ledma") || + !strcmp(pp->name, "lebuffer")) + return 0; + + /* Similarly for all PCI bridges, if we get this far + * it lacks a ranges property, and this will include + * cases like Simba. + */ + if (!strcmp(pp->name, "pci")) + return 0; + + return 1; +} + +static int of_resource_verbose; + +static void __init build_device_resources(struct of_device *op, + struct device *parent) +{ + struct of_device *p_op; + struct of_bus *bus; + int na, ns; + int index, num_reg; + const void *preg; + + if (!parent) + return; + + p_op = to_of_device(parent); + bus = of_match_bus(p_op->node); + bus->count_cells(op->node, &na, &ns); + + preg = of_get_property(op->node, bus->addr_prop_name, &num_reg); + if (!preg || num_reg == 0) + return; + + /* Convert to num-cells. */ + num_reg /= 4; + + /* Convert to num-entries. */ + num_reg /= na + ns; + + /* Prevent overrunning the op->resources[] array. */ + if (num_reg > PROMREG_MAX) { + printk(KERN_WARNING "%s: Too many regs (%d), " + "limiting to %d.\n", + op->node->full_name, num_reg, PROMREG_MAX); + num_reg = PROMREG_MAX; + } + + for (index = 0; index < num_reg; index++) { + struct resource *r = &op->resource[index]; + u32 addr[OF_MAX_ADDR_CELLS]; + const u32 *reg = (preg + (index * ((na + ns) * 4))); + struct device_node *dp = op->node; + struct device_node *pp = p_op->node; + struct of_bus *pbus, *dbus; + u64 size, result = OF_BAD_ADDR; + unsigned long flags; + int dna, dns; + int pna, pns; + + size = of_read_addr(reg + na, ns); + memcpy(addr, reg, na * 4); + + flags = bus->get_flags(addr, 0); + + if (use_1to1_mapping(pp)) { + result = of_read_addr(addr, na); + goto build_res; + } + + dna = na; + dns = ns; + dbus = bus; + + while (1) { + dp = pp; + pp = dp->parent; + if (!pp) { + result = of_read_addr(addr, dna); + break; + } + + pbus = of_match_bus(pp); + pbus->count_cells(dp, &pna, &pns); + + if (build_one_resource(dp, dbus, pbus, addr, + dna, dns, pna)) + break; + + flags = pbus->get_flags(addr, flags); + + dna = pna; + dns = pns; + dbus = pbus; + } + + build_res: + memset(r, 0, sizeof(*r)); + + if (of_resource_verbose) + printk("%s reg[%d] -> %lx\n", + op->node->full_name, index, + result); + + if (result != OF_BAD_ADDR) { + if (tlb_type == hypervisor) + result &= 0x0fffffffffffffffUL; + + r->start = result; + r->end = result + size - 1; + r->flags = flags; + } + r->name = op->node->name; + } +} + +static struct device_node * __init +apply_interrupt_map(struct device_node *dp, struct device_node *pp, + const u32 *imap, int imlen, const u32 *imask, + unsigned int *irq_p) +{ + struct device_node *cp; + unsigned int irq = *irq_p; + struct of_bus *bus; + phandle handle; + const u32 *reg; + int na, num_reg, i; + + bus = of_match_bus(pp); + bus->count_cells(dp, &na, NULL); + + reg = of_get_property(dp, "reg", &num_reg); + if (!reg || !num_reg) + return NULL; + + imlen /= ((na + 3) * 4); + handle = 0; + for (i = 0; i < imlen; i++) { + int j; + + for (j = 0; j < na; j++) { + if ((reg[j] & imask[j]) != imap[j]) + goto next; + } + if (imap[na] == irq) { + handle = imap[na + 1]; + irq = imap[na + 2]; + break; + } + + next: + imap += (na + 3); + } + if (i == imlen) { + /* Psycho and Sabre PCI controllers can have 'interrupt-map' + * properties that do not include the on-board device + * interrupts. Instead, the device's 'interrupts' property + * is already a fully specified INO value. + * + * Handle this by deciding that, if we didn't get a + * match in the parent's 'interrupt-map', and the + * parent is an IRQ translater, then use the parent as + * our IRQ controller. + */ + if (pp->irq_trans) + return pp; + + return NULL; + } + + *irq_p = irq; + cp = of_find_node_by_phandle(handle); + + return cp; +} + +static unsigned int __init pci_irq_swizzle(struct device_node *dp, + struct device_node *pp, + unsigned int irq) +{ + const struct linux_prom_pci_registers *regs; + unsigned int bus, devfn, slot, ret; + + if (irq < 1 || irq > 4) + return irq; + + regs = of_get_property(dp, "reg", NULL); + if (!regs) + return irq; + + bus = (regs->phys_hi >> 16) & 0xff; + devfn = (regs->phys_hi >> 8) & 0xff; + slot = (devfn >> 3) & 0x1f; + + if (pp->irq_trans) { + /* Derived from Table 8-3, U2P User's Manual. This branch + * is handling a PCI controller that lacks a proper set of + * interrupt-map and interrupt-map-mask properties. The + * Ultra-E450 is one example. + * + * The bit layout is BSSLL, where: + * B: 0 on bus A, 1 on bus B + * D: 2-bit slot number, derived from PCI device number as + * (dev - 1) for bus A, or (dev - 2) for bus B + * L: 2-bit line number + */ + if (bus & 0x80) { + /* PBM-A */ + bus = 0x00; + slot = (slot - 1) << 2; + } else { + /* PBM-B */ + bus = 0x10; + slot = (slot - 2) << 2; + } + irq -= 1; + + ret = (bus | slot | irq); + } else { + /* Going through a PCI-PCI bridge that lacks a set of + * interrupt-map and interrupt-map-mask properties. + */ + ret = ((irq - 1 + (slot & 3)) & 3) + 1; + } + + return ret; +} + +static int of_irq_verbose; + +static unsigned int __init build_one_device_irq(struct of_device *op, + struct device *parent, + unsigned int irq) +{ + struct device_node *dp = op->node; + struct device_node *pp, *ip; + unsigned int orig_irq = irq; + int nid; + + if (irq == 0xffffffff) + return irq; + + if (dp->irq_trans) { + irq = dp->irq_trans->irq_build(dp, irq, + dp->irq_trans->data); + + if (of_irq_verbose) + printk("%s: direct translate %x --> %x\n", + dp->full_name, orig_irq, irq); + + goto out; + } + + /* Something more complicated. Walk up to the root, applying + * interrupt-map or bus specific translations, until we hit + * an IRQ translator. + * + * If we hit a bus type or situation we cannot handle, we + * stop and assume that the original IRQ number was in a + * format which has special meaning to it's immediate parent. + */ + pp = dp->parent; + ip = NULL; + while (pp) { + const void *imap, *imsk; + int imlen; + + imap = of_get_property(pp, "interrupt-map", &imlen); + imsk = of_get_property(pp, "interrupt-map-mask", NULL); + if (imap && imsk) { + struct device_node *iret; + int this_orig_irq = irq; + + iret = apply_interrupt_map(dp, pp, + imap, imlen, imsk, + &irq); + + if (of_irq_verbose) + printk("%s: Apply [%s:%x] imap --> [%s:%x]\n", + op->node->full_name, + pp->full_name, this_orig_irq, + (iret ? iret->full_name : "NULL"), irq); + + if (!iret) + break; + + if (iret->irq_trans) { + ip = iret; + break; + } + } else { + if (!strcmp(pp->name, "pci")) { + unsigned int this_orig_irq = irq; + + irq = pci_irq_swizzle(dp, pp, irq); + if (of_irq_verbose) + printk("%s: PCI swizzle [%s] " + "%x --> %x\n", + op->node->full_name, + pp->full_name, this_orig_irq, + irq); + + } + + if (pp->irq_trans) { + ip = pp; + break; + } + } + dp = pp; + pp = pp->parent; + } + if (!ip) + return orig_irq; + + irq = ip->irq_trans->irq_build(op->node, irq, + ip->irq_trans->data); + if (of_irq_verbose) + printk("%s: Apply IRQ trans [%s] %x --> %x\n", + op->node->full_name, ip->full_name, orig_irq, irq); + +out: + nid = of_node_to_nid(dp); + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + - irq_set_affinity(irq, numa_mask); ++ irq_set_affinity(irq, &numa_mask); + } + + return irq; +} + +static struct of_device * __init scan_one_device(struct device_node *dp, + struct device *parent) +{ + struct of_device *op = kzalloc(sizeof(*op), GFP_KERNEL); + const unsigned int *irq; + struct dev_archdata *sd; + int len, i; + + if (!op) + return NULL; + + sd = &op->dev.archdata; + sd->prom_node = dp; + sd->op = op; + + op->node = dp; + + op->clock_freq = of_getintprop_default(dp, "clock-frequency", + (25*1000*1000)); + op->portid = of_getintprop_default(dp, "upa-portid", -1); + if (op->portid == -1) + op->portid = of_getintprop_default(dp, "portid", -1); + + irq = of_get_property(dp, "interrupts", &len); + if (irq) { + op->num_irqs = len / 4; + + /* Prevent overrunning the op->irqs[] array. */ + if (op->num_irqs > PROMINTR_MAX) { + printk(KERN_WARNING "%s: Too many irqs (%d), " + "limiting to %d.\n", + dp->full_name, op->num_irqs, PROMINTR_MAX); + op->num_irqs = PROMINTR_MAX; + } + memcpy(op->irqs, irq, op->num_irqs * 4); + } else { + op->num_irqs = 0; + } + + build_device_resources(op, parent); + for (i = 0; i < op->num_irqs; i++) + op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]); + + op->dev.parent = parent; + op->dev.bus = &of_platform_bus_type; + if (!parent) + dev_set_name(&op->dev, "root"); + else + dev_set_name(&op->dev, "%08x", dp->node); + + if (of_device_register(op)) { + printk("%s: Could not register of device.\n", + dp->full_name); + kfree(op); + op = NULL; + } + + return op; +} + +static void __init scan_tree(struct device_node *dp, struct device *parent) +{ + while (dp) { + struct of_device *op = scan_one_device(dp, parent); + + if (op) + scan_tree(dp->child, &op->dev); + + dp = dp->sibling; + } +} + +static void __init scan_of_devices(void) +{ + struct device_node *root = of_find_node_by_path("/"); + struct of_device *parent; + + parent = scan_one_device(root, NULL); + if (!parent) + return; + + scan_tree(root->child, &parent->dev); +} + +static int __init of_bus_driver_init(void) +{ + int err; + + err = of_bus_type_init(&of_platform_bus_type, "of"); + if (!err) + scan_of_devices(); + + return err; +} + +postcore_initcall(of_bus_driver_init); + +static int __init of_debug(char *str) +{ + int val = 0; + + get_option(&str, &val); + if (val & 1) + of_resource_verbose = 1; + if (val & 2) + of_irq_verbose = 1; + return 1; +} + +__setup("of_debug=", of_debug); diff --cc arch/sparc/kernel/pci_msi.c index 2e680f34f727,000000000000..0d0cd815e83e mode 100644,000000..100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@@ -1,447 -1,0 +1,447 @@@ +/* pci_msi.c: Sparc64 MSI support common layer. + * + * Copyright (C) 2007 David S. Miller (davem@davemloft.net) + */ +#include +#include +#include + +#include "pci_impl.h" + +static irqreturn_t sparc64_msiq_interrupt(int irq, void *cookie) +{ + struct sparc64_msiq_cookie *msiq_cookie = cookie; + struct pci_pbm_info *pbm = msiq_cookie->pbm; + unsigned long msiqid = msiq_cookie->msiqid; + const struct sparc64_msiq_ops *ops; + unsigned long orig_head, head; + int err; + + ops = pbm->msi_ops; + + err = ops->get_head(pbm, msiqid, &head); + if (unlikely(err < 0)) + goto err_get_head; + + orig_head = head; + for (;;) { + unsigned long msi; + + err = ops->dequeue_msi(pbm, msiqid, &head, &msi); + if (likely(err > 0)) { + struct irq_desc *desc; + unsigned int virt_irq; + + virt_irq = pbm->msi_irq_table[msi - pbm->msi_first]; + desc = irq_desc + virt_irq; + + desc->handle_irq(virt_irq, desc); + } + + if (unlikely(err < 0)) + goto err_dequeue; + + if (err == 0) + break; + } + if (likely(head != orig_head)) { + err = ops->set_head(pbm, msiqid, head); + if (unlikely(err < 0)) + goto err_set_head; + } + return IRQ_HANDLED; + +err_get_head: + printk(KERN_EMERG "MSI: Get head on msiqid[%lu] gives error %d\n", + msiqid, err); + goto err_out; + +err_dequeue: + printk(KERN_EMERG "MSI: Dequeue head[%lu] from msiqid[%lu] " + "gives error %d\n", + head, msiqid, err); + goto err_out; + +err_set_head: + printk(KERN_EMERG "MSI: Set head[%lu] on msiqid[%lu] " + "gives error %d\n", + head, msiqid, err); + goto err_out; + +err_out: + return IRQ_NONE; +} + +static u32 pick_msiq(struct pci_pbm_info *pbm) +{ + static DEFINE_SPINLOCK(rotor_lock); + unsigned long flags; + u32 ret, rotor; + + spin_lock_irqsave(&rotor_lock, flags); + + rotor = pbm->msiq_rotor; + ret = pbm->msiq_first + rotor; + + if (++rotor >= pbm->msiq_num) + rotor = 0; + pbm->msiq_rotor = rotor; + + spin_unlock_irqrestore(&rotor_lock, flags); + + return ret; +} + + +static int alloc_msi(struct pci_pbm_info *pbm) +{ + int i; + + for (i = 0; i < pbm->msi_num; i++) { + if (!test_and_set_bit(i, pbm->msi_bitmap)) + return i + pbm->msi_first; + } + + return -ENOENT; +} + +static void free_msi(struct pci_pbm_info *pbm, int msi_num) +{ + msi_num -= pbm->msi_first; + clear_bit(msi_num, pbm->msi_bitmap); +} + +static struct irq_chip msi_irq = { + .typename = "PCI-MSI", + .mask = mask_msi_irq, + .unmask = unmask_msi_irq, + .enable = unmask_msi_irq, + .disable = mask_msi_irq, + /* XXX affinity XXX */ +}; + +static int sparc64_setup_msi_irq(unsigned int *virt_irq_p, + struct pci_dev *pdev, + struct msi_desc *entry) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + const struct sparc64_msiq_ops *ops = pbm->msi_ops; + struct msi_msg msg; + int msi, err; + u32 msiqid; + + *virt_irq_p = virt_irq_alloc(0, 0); + err = -ENOMEM; + if (!*virt_irq_p) + goto out_err; + + set_irq_chip_and_handler_name(*virt_irq_p, &msi_irq, + handle_simple_irq, "MSI"); + + err = alloc_msi(pbm); + if (unlikely(err < 0)) + goto out_virt_irq_free; + + msi = err; + + msiqid = pick_msiq(pbm); + + err = ops->msi_setup(pbm, msiqid, msi, + (entry->msi_attrib.is_64 ? 1 : 0)); + if (err) + goto out_msi_free; + + pbm->msi_irq_table[msi - pbm->msi_first] = *virt_irq_p; + + if (entry->msi_attrib.is_64) { + msg.address_hi = pbm->msi64_start >> 32; + msg.address_lo = pbm->msi64_start & 0xffffffff; + } else { + msg.address_hi = 0; + msg.address_lo = pbm->msi32_start; + } + msg.data = msi; + + set_irq_msi(*virt_irq_p, entry); + write_msi_msg(*virt_irq_p, &msg); + + return 0; + +out_msi_free: + free_msi(pbm, msi); + +out_virt_irq_free: + set_irq_chip(*virt_irq_p, NULL); + virt_irq_free(*virt_irq_p); + *virt_irq_p = 0; + +out_err: + return err; +} + +static void sparc64_teardown_msi_irq(unsigned int virt_irq, + struct pci_dev *pdev) +{ + struct pci_pbm_info *pbm = pdev->dev.archdata.host_controller; + const struct sparc64_msiq_ops *ops = pbm->msi_ops; + unsigned int msi_num; + int i, err; + + for (i = 0; i < pbm->msi_num; i++) { + if (pbm->msi_irq_table[i] == virt_irq) + break; + } + if (i >= pbm->msi_num) { + printk(KERN_ERR "%s: teardown: No MSI for irq %u\n", + pbm->name, virt_irq); + return; + } + + msi_num = pbm->msi_first + i; + pbm->msi_irq_table[i] = ~0U; + + err = ops->msi_teardown(pbm, msi_num); + if (err) { + printk(KERN_ERR "%s: teardown: ops->teardown() on MSI %u, " + "irq %u, gives error %d\n", + pbm->name, msi_num, virt_irq, err); + return; + } + + free_msi(pbm, msi_num); + + set_irq_chip(virt_irq, NULL); + virt_irq_free(virt_irq); +} + +static int msi_bitmap_alloc(struct pci_pbm_info *pbm) +{ + unsigned long size, bits_per_ulong; + + bits_per_ulong = sizeof(unsigned long) * 8; + size = (pbm->msi_num + (bits_per_ulong - 1)) & ~(bits_per_ulong - 1); + size /= 8; + BUG_ON(size % sizeof(unsigned long)); + + pbm->msi_bitmap = kzalloc(size, GFP_KERNEL); + if (!pbm->msi_bitmap) + return -ENOMEM; + + return 0; +} + +static void msi_bitmap_free(struct pci_pbm_info *pbm) +{ + kfree(pbm->msi_bitmap); + pbm->msi_bitmap = NULL; +} + +static int msi_table_alloc(struct pci_pbm_info *pbm) +{ + int size, i; + + size = pbm->msiq_num * sizeof(struct sparc64_msiq_cookie); + pbm->msiq_irq_cookies = kzalloc(size, GFP_KERNEL); + if (!pbm->msiq_irq_cookies) + return -ENOMEM; + + for (i = 0; i < pbm->msiq_num; i++) { + struct sparc64_msiq_cookie *p; + + p = &pbm->msiq_irq_cookies[i]; + p->pbm = pbm; + p->msiqid = pbm->msiq_first + i; + } + + size = pbm->msi_num * sizeof(unsigned int); + pbm->msi_irq_table = kzalloc(size, GFP_KERNEL); + if (!pbm->msi_irq_table) { + kfree(pbm->msiq_irq_cookies); + pbm->msiq_irq_cookies = NULL; + return -ENOMEM; + } + + return 0; +} + +static void msi_table_free(struct pci_pbm_info *pbm) +{ + kfree(pbm->msiq_irq_cookies); + pbm->msiq_irq_cookies = NULL; + + kfree(pbm->msi_irq_table); + pbm->msi_irq_table = NULL; +} + +static int bringup_one_msi_queue(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops, + unsigned long msiqid, + unsigned long devino) +{ + int irq = ops->msiq_build_irq(pbm, msiqid, devino); + int err, nid; + + if (irq < 0) + return irq; + + nid = pbm->numa_node; + if (nid != -1) { + cpumask_t numa_mask = node_to_cpumask(nid); + - irq_set_affinity(irq, numa_mask); ++ irq_set_affinity(irq, &numa_mask); + } + err = request_irq(irq, sparc64_msiq_interrupt, 0, + "MSIQ", + &pbm->msiq_irq_cookies[msiqid - pbm->msiq_first]); + if (err) + return err; + + return 0; +} + +static int sparc64_bringup_msi_queues(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops) +{ + int i; + + for (i = 0; i < pbm->msiq_num; i++) { + unsigned long msiqid = i + pbm->msiq_first; + unsigned long devino = i + pbm->msiq_first_devino; + int err; + + err = bringup_one_msi_queue(pbm, ops, msiqid, devino); + if (err) + return err; + } + + return 0; +} + +void sparc64_pbm_msi_init(struct pci_pbm_info *pbm, + const struct sparc64_msiq_ops *ops) +{ + const u32 *val; + int len; + + val = of_get_property(pbm->op->node, "#msi-eqs", &len); + if (!val || len != 4) + goto no_msi; + pbm->msiq_num = *val; + if (pbm->msiq_num) { + const struct msiq_prop { + u32 first_msiq; + u32 num_msiq; + u32 first_devino; + } *mqp; + const struct msi_range_prop { + u32 first_msi; + u32 num_msi; + } *mrng; + const struct addr_range_prop { + u32 msi32_high; + u32 msi32_low; + u32 msi32_len; + u32 msi64_high; + u32 msi64_low; + u32 msi64_len; + } *arng; + + val = of_get_property(pbm->op->node, "msi-eq-size", &len); + if (!val || len != 4) + goto no_msi; + + pbm->msiq_ent_count = *val; + + mqp = of_get_property(pbm->op->node, + "msi-eq-to-devino", &len); + if (!mqp) + mqp = of_get_property(pbm->op->node, + "msi-eq-devino", &len); + if (!mqp || len != sizeof(struct msiq_prop)) + goto no_msi; + + pbm->msiq_first = mqp->first_msiq; + pbm->msiq_first_devino = mqp->first_devino; + + val = of_get_property(pbm->op->node, "#msi", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_num = *val; + + mrng = of_get_property(pbm->op->node, "msi-ranges", &len); + if (!mrng || len != sizeof(struct msi_range_prop)) + goto no_msi; + pbm->msi_first = mrng->first_msi; + + val = of_get_property(pbm->op->node, "msi-data-mask", &len); + if (!val || len != 4) + goto no_msi; + pbm->msi_data_mask = *val; + + val = of_get_property(pbm->op->node, "msix-data-width", &len); + if (!val || len != 4) + goto no_msi; + pbm->msix_data_width = *val; + + arng = of_get_property(pbm->op->node, "msi-address-ranges", + &len); + if (!arng || len != sizeof(struct addr_range_prop)) + goto no_msi; + pbm->msi32_start = ((u64)arng->msi32_high << 32) | + (u64) arng->msi32_low; + pbm->msi64_start = ((u64)arng->msi64_high << 32) | + (u64) arng->msi64_low; + pbm->msi32_len = arng->msi32_len; + pbm->msi64_len = arng->msi64_len; + + if (msi_bitmap_alloc(pbm)) + goto no_msi; + + if (msi_table_alloc(pbm)) { + msi_bitmap_free(pbm); + goto no_msi; + } + + if (ops->msiq_alloc(pbm)) { + msi_table_free(pbm); + msi_bitmap_free(pbm); + goto no_msi; + } + + if (sparc64_bringup_msi_queues(pbm, ops)) { + ops->msiq_free(pbm); + msi_table_free(pbm); + msi_bitmap_free(pbm); + goto no_msi; + } + + printk(KERN_INFO "%s: MSI Queue first[%u] num[%u] count[%u] " + "devino[0x%x]\n", + pbm->name, + pbm->msiq_first, pbm->msiq_num, + pbm->msiq_ent_count, + pbm->msiq_first_devino); + printk(KERN_INFO "%s: MSI first[%u] num[%u] mask[0x%x] " + "width[%u]\n", + pbm->name, + pbm->msi_first, pbm->msi_num, pbm->msi_data_mask, + pbm->msix_data_width); + printk(KERN_INFO "%s: MSI addr32[0x%lx:0x%x] " + "addr64[0x%lx:0x%x]\n", + pbm->name, + pbm->msi32_start, pbm->msi32_len, + pbm->msi64_start, pbm->msi64_len); + printk(KERN_INFO "%s: MSI queues at RA [%016lx]\n", + pbm->name, + __pa(pbm->msi_queues)); + + pbm->msi_ops = ops; + pbm->setup_msi_irq = sparc64_setup_msi_irq; + pbm->teardown_msi_irq = sparc64_teardown_msi_irq; + } + return; + +no_msi: + pbm->msiq_num = 0; + printk(KERN_INFO "%s: No MSI support.\n", pbm->name); +} diff --cc arch/sparc/kernel/smp_32.c index e396c1f17a92,000000000000..1e5ac4e282e1 mode 100644,000000..100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@@ -1,423 -1,0 +1,421 @@@ +/* smp.c: Sparc SMP support. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz) + * Copyright (C) 2004 Keith M Wesolowski (wesolows@foobazco.org) + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "irq.h" + +volatile unsigned long cpu_callin_map[NR_CPUS] __cpuinitdata = {0,}; +unsigned char boot_cpu_id = 0; +unsigned char boot_cpu_id4 = 0; /* boot_cpu_id << 2 */ + - cpumask_t cpu_online_map = CPU_MASK_NONE; - cpumask_t phys_cpu_present_map = CPU_MASK_NONE; +cpumask_t smp_commenced_mask = CPU_MASK_NONE; + +/* The only guaranteed locking primitive available on all Sparc + * processors is 'ldstub [%reg + immediate], %dest_reg' which atomically + * places the current byte at the effective address into dest_reg and + * places 0xff there afterwards. Pretty lame locking primitive + * compared to the Alpha and the Intel no? Most Sparcs have 'swap' + * instruction which is much better... + */ + +void __cpuinit smp_store_cpu_info(int id) +{ + int cpu_node; + + cpu_data(id).udelay_val = loops_per_jiffy; + + cpu_find_by_mid(id, &cpu_node); + cpu_data(id).clock_tick = prom_getintdefault(cpu_node, + "clock-frequency", 0); + cpu_data(id).prom_node = cpu_node; + cpu_data(id).mid = cpu_get_hwmid(cpu_node); + + if (cpu_data(id).mid < 0) + panic("No MID found for CPU%d at node 0x%08d", id, cpu_node); +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ + extern void smp4m_smp_done(void); + extern void smp4d_smp_done(void); + unsigned long bogosum = 0; + int cpu, num; + + for (cpu = 0, num = 0; cpu < NR_CPUS; cpu++) + if (cpu_online(cpu)) { + num++; + bogosum += cpu_data(cpu).udelay_val; + } + + printk("Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + num, bogosum/(500000/HZ), + (bogosum/(5000/HZ))%100); + + switch(sparc_cpu_model) { + case sun4: + printk("SUN4\n"); + BUG(); + break; + case sun4c: + printk("SUN4C\n"); + BUG(); + break; + case sun4m: + smp4m_smp_done(); + break; + case sun4d: + smp4d_smp_done(); + break; + case sun4e: + printk("SUN4E\n"); + BUG(); + break; + case sun4u: + printk("SUN4U\n"); + BUG(); + break; + default: + printk("UNKNOWN!\n"); + BUG(); + break; + }; +} + +void cpu_panic(void) +{ + printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); + panic("SMP bolixed\n"); +} + +struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 }; + +void smp_send_reschedule(int cpu) +{ + /* See sparc64 */ +} + +void smp_send_stop(void) +{ +} + +void smp_flush_cache_all(void) +{ + xc0((smpfunc_t) BTFIXUP_CALL(local_flush_cache_all)); + local_flush_cache_all(); +} + +void smp_flush_tlb_all(void) +{ + xc0((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_all)); + local_flush_tlb_all(); +} + +void smp_flush_cache_mm(struct mm_struct *mm) +{ + if(mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc1((smpfunc_t) BTFIXUP_CALL(local_flush_cache_mm), (unsigned long) mm); + local_flush_cache_mm(mm); + } +} + +void smp_flush_tlb_mm(struct mm_struct *mm) +{ + if(mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) { + xc1((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_mm), (unsigned long) mm); + if(atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) + mm->cpu_vm_mask = cpumask_of_cpu(smp_processor_id()); + } + local_flush_tlb_mm(mm); + } +} + +void smp_flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc3((smpfunc_t) BTFIXUP_CALL(local_flush_cache_range), (unsigned long) vma, start, end); + local_flush_cache_range(vma, start, end); + } +} + +void smp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + + if (mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc3((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_range), (unsigned long) vma, start, end); + local_flush_tlb_range(vma, start, end); + } +} + +void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) +{ + struct mm_struct *mm = vma->vm_mm; + + if(mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc2((smpfunc_t) BTFIXUP_CALL(local_flush_cache_page), (unsigned long) vma, page); + local_flush_cache_page(vma, page); + } +} + +void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + struct mm_struct *mm = vma->vm_mm; + + if(mm->context != NO_CONTEXT) { + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc2((smpfunc_t) BTFIXUP_CALL(local_flush_tlb_page), (unsigned long) vma, page); + local_flush_tlb_page(vma, page); + } +} + +void smp_reschedule_irq(void) +{ + set_need_resched(); +} + +void smp_flush_page_to_ram(unsigned long page) +{ + /* Current theory is that those who call this are the one's + * who have just dirtied their cache with the pages contents + * in kernel space, therefore we only run this on local cpu. + * + * XXX This experiment failed, research further... -DaveM + */ +#if 1 + xc1((smpfunc_t) BTFIXUP_CALL(local_flush_page_to_ram), page); +#endif + local_flush_page_to_ram(page); +} + +void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) +{ + cpumask_t cpu_mask = mm->cpu_vm_mask; + cpu_clear(smp_processor_id(), cpu_mask); + if (!cpus_empty(cpu_mask)) + xc2((smpfunc_t) BTFIXUP_CALL(local_flush_sig_insns), (unsigned long) mm, insn_addr); + local_flush_sig_insns(mm, insn_addr); +} + +extern unsigned int lvl14_resolution; + +/* /proc/profile writes can call this, don't __init it please. */ +static DEFINE_SPINLOCK(prof_setup_lock); + +int setup_profiling_timer(unsigned int multiplier) +{ + int i; + unsigned long flags; + + /* Prevent level14 ticker IRQ flooding. */ + if((!multiplier) || (lvl14_resolution / multiplier) < 500) + return -EINVAL; + + spin_lock_irqsave(&prof_setup_lock, flags); + for_each_possible_cpu(i) { + load_profile_irq(i, lvl14_resolution / multiplier); + prof_multiplier(i) = multiplier; + } + spin_unlock_irqrestore(&prof_setup_lock, flags); + + return 0; +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + extern void __init smp4m_boot_cpus(void); + extern void __init smp4d_boot_cpus(void); + int i, cpuid, extra; + + printk("Entering SMP Mode...\n"); + + extra = 0; + for (i = 0; !cpu_find_by_instance(i, NULL, &cpuid); i++) { + if (cpuid >= NR_CPUS) + extra++; + } + /* i = number of cpus */ + if (extra && max_cpus > i - extra) + printk("Warning: NR_CPUS is too low to start all cpus\n"); + + smp_store_cpu_info(boot_cpu_id); + + switch(sparc_cpu_model) { + case sun4: + printk("SUN4\n"); + BUG(); + break; + case sun4c: + printk("SUN4C\n"); + BUG(); + break; + case sun4m: + smp4m_boot_cpus(); + break; + case sun4d: + smp4d_boot_cpus(); + break; + case sun4e: + printk("SUN4E\n"); + BUG(); + break; + case sun4u: + printk("SUN4U\n"); + BUG(); + break; + default: + printk("UNKNOWN!\n"); + BUG(); + break; + }; +} + +/* Set this up early so that things like the scheduler can init + * properly. We use the same cpu mask for both the present and + * possible cpu map. + */ +void __init smp_setup_cpu_possible_map(void) +{ + int instance, mid; + + instance = 0; + while (!cpu_find_by_instance(instance, NULL, &mid)) { + if (mid < NR_CPUS) { - cpu_set(mid, phys_cpu_present_map); ++ cpu_set(mid, cpu_possible_map); + cpu_set(mid, cpu_present_map); + } + instance++; + } +} + +void __init smp_prepare_boot_cpu(void) +{ + int cpuid = hard_smp_processor_id(); + + if (cpuid >= NR_CPUS) { + prom_printf("Serious problem, boot cpu id >= NR_CPUS\n"); + prom_halt(); + } + if (cpuid != 0) + printk("boot cpu id != 0, this could work but is untested\n"); + + current_thread_info()->cpu = cpuid; + cpu_set(cpuid, cpu_online_map); - cpu_set(cpuid, phys_cpu_present_map); ++ cpu_set(cpuid, cpu_possible_map); +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + extern int __cpuinit smp4m_boot_one_cpu(int); + extern int __cpuinit smp4d_boot_one_cpu(int); + int ret=0; + + switch(sparc_cpu_model) { + case sun4: + printk("SUN4\n"); + BUG(); + break; + case sun4c: + printk("SUN4C\n"); + BUG(); + break; + case sun4m: + ret = smp4m_boot_one_cpu(cpu); + break; + case sun4d: + ret = smp4d_boot_one_cpu(cpu); + break; + case sun4e: + printk("SUN4E\n"); + BUG(); + break; + case sun4u: + printk("SUN4U\n"); + BUG(); + break; + default: + printk("UNKNOWN!\n"); + BUG(); + break; + }; + + if (!ret) { + cpu_set(cpu, smp_commenced_mask); + while (!cpu_online(cpu)) + mb(); + } + return ret; +} + +void smp_bogo(struct seq_file *m) +{ + int i; + + for_each_online_cpu(i) { + seq_printf(m, + "Cpu%dBogo\t: %lu.%02lu\n", + i, + cpu_data(i).udelay_val/(500000/HZ), + (cpu_data(i).udelay_val/(5000/HZ))%100); + } +} + +void smp_info(struct seq_file *m) +{ + int i; + + seq_printf(m, "State:\n"); + for_each_online_cpu(i) + seq_printf(m, "CPU%d\t\t: online\n", i); +} diff --cc arch/sparc/kernel/smp_64.c index bfe99d82d458,000000000000..46329799f346 mode 100644,000000..100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@@ -1,1412 -1,0 +1,1408 @@@ +/* smp.c: Sparc64 SMP support. + * + * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int sparc64_multi_core __read_mostly; + - cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE; - cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly = + { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; + - EXPORT_SYMBOL(cpu_possible_map); - EXPORT_SYMBOL(cpu_online_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); +EXPORT_SYMBOL(cpu_core_map); + +static cpumask_t smp_commenced_mask; + +void smp_info(struct seq_file *m) +{ + int i; + + seq_printf(m, "State:\n"); + for_each_online_cpu(i) + seq_printf(m, "CPU%d:\t\tonline\n", i); +} + +void smp_bogo(struct seq_file *m) +{ + int i; + + for_each_online_cpu(i) + seq_printf(m, + "Cpu%dClkTck\t: %016lx\n", + i, cpu_data(i).clock_tick); +} + +extern void setup_sparc64_timer(void); + +static volatile unsigned long callin_flag = 0; + +void __cpuinit smp_callin(void) +{ + int cpuid = hard_smp_processor_id(); + + __local_per_cpu_offset = __per_cpu_offset(cpuid); + + if (tlb_type == hypervisor) + sun4v_ktsb_register(); + + __flush_tlb_all(); + + setup_sparc64_timer(); + + if (cheetah_pcache_forced_on) + cheetah_enable_pcache(); + + local_irq_enable(); + + callin_flag = 1; + __asm__ __volatile__("membar #Sync\n\t" + "flush %%g6" : : : "memory"); + + /* Clear this or we will die instantly when we + * schedule back to this idler... + */ + current_thread_info()->new_child = 0; + + /* Attach to the address space of init_task. */ + atomic_inc(&init_mm.mm_count); + current->active_mm = &init_mm; + + /* inform the notifiers about the new cpu */ + notify_cpu_starting(cpuid); + + while (!cpu_isset(cpuid, smp_commenced_mask)) + rmb(); + + ipi_call_lock(); + cpu_set(cpuid, cpu_online_map); + ipi_call_unlock(); + + /* idle thread is expected to have preempt disabled */ + preempt_disable(); +} + +void cpu_panic(void) +{ + printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id()); + panic("SMP bolixed\n"); +} + +/* This tick register synchronization scheme is taken entirely from + * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. + * + * The only change I've made is to rework it so that the master + * initiates the synchonization instead of the slave. -DaveM + */ + +#define MASTER 0 +#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long)) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static DEFINE_SPINLOCK(itc_sync_lock); +static unsigned long go[SLAVE + 1]; + +#define DEBUG_TICK_SYNC 0 + +static inline long get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + unsigned long i; + + for (i = 0; i < NUM_ITERS; i++) { + t0 = tick_ops->get_tick(); + go[MASTER] = 1; + membar_safe("#StoreLoad"); + while (!(tm = go[SLAVE])) + rmb(); + go[SLAVE] = 0; + wmb(); + t1 = tick_ops->get_tick(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + tcenter++; + return tcenter - best_tm; +} + +void smp_synchronize_tick_client(void) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_TICK_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + go[MASTER] = 1; + + while (go[MASTER]) + rmb(); + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS; i++) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + tick_ops->add_tick(adj); + } +#if DEBUG_TICK_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + local_irq_restore(flags); + +#if DEBUG_TICK_SYNC + for (i = 0; i < NUM_ROUNDS; i++) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO "CPU %d: synchronized TICK with master CPU " + "(last diff %ld cycles, maxerr %lu cycles)\n", + smp_processor_id(), delta, rt); +} + +static void smp_start_sync_tick_client(int cpu); + +static void smp_synchronize_one_tick(int cpu) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + smp_start_sync_tick_client(cpu); + + /* wait for client to be ready */ + while (!go[MASTER]) + rmb(); + + /* now let the client proceed into his loop */ + go[MASTER] = 0; + membar_safe("#StoreLoad"); + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) { + while (!go[MASTER]) + rmb(); + go[MASTER] = 0; + wmb(); + go[SLAVE] = tick_ops->get_tick(); + membar_safe("#StoreLoad"); + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); +} + +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) +/* XXX Put this in some common place. XXX */ +static unsigned long kimage_addr_to_ra(void *p) +{ + unsigned long val = (unsigned long) p; + + return kern_base + (val - KERNBASE); +} + +static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) +{ + extern unsigned long sparc64_ttable_tl0; + extern unsigned long kern_locked_tte_data; + struct hvtramp_descr *hdesc; + unsigned long trampoline_ra; + struct trap_per_cpu *tb; + u64 tte_vaddr, tte_data; + unsigned long hv_err; + int i; + + hdesc = kzalloc(sizeof(*hdesc) + + (sizeof(struct hvtramp_mapping) * + num_kernel_image_mappings - 1), + GFP_KERNEL); + if (!hdesc) { + printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate " + "hvtramp_descr.\n"); + return; + } + + hdesc->cpu = cpu; + hdesc->num_mappings = num_kernel_image_mappings; + + tb = &trap_block[cpu]; + tb->hdesc = hdesc; + + hdesc->fault_info_va = (unsigned long) &tb->fault_info; + hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info); + + hdesc->thread_reg = thread_reg; + + tte_vaddr = (unsigned long) KERNBASE; + tte_data = kern_locked_tte_data; + + for (i = 0; i < hdesc->num_mappings; i++) { + hdesc->maps[i].vaddr = tte_vaddr; + hdesc->maps[i].tte = tte_data; + tte_vaddr += 0x400000; + tte_data += 0x400000; + } + + trampoline_ra = kimage_addr_to_ra(hv_cpu_startup); + + hv_err = sun4v_cpu_start(cpu, trampoline_ra, + kimage_addr_to_ra(&sparc64_ttable_tl0), + __pa(hdesc)); + if (hv_err) + printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() " + "gives error %lu\n", hv_err); +} +#endif + +extern unsigned long sparc64_cpu_startup; + +/* The OBP cpu startup callback truncates the 3rd arg cookie to + * 32-bits (I think) so to be safe we have it read the pointer + * contained here so we work on >4GB machines. -DaveM + */ +static struct thread_info *cpu_new_thread = NULL; + +static int __cpuinit smp_boot_one_cpu(unsigned int cpu) +{ + struct trap_per_cpu *tb = &trap_block[cpu]; + unsigned long entry = + (unsigned long)(&sparc64_cpu_startup); + unsigned long cookie = + (unsigned long)(&cpu_new_thread); + struct task_struct *p; + int timeout, ret; + + p = fork_idle(cpu); + if (IS_ERR(p)) + return PTR_ERR(p); + callin_flag = 0; + cpu_new_thread = task_thread_info(p); + + if (tlb_type == hypervisor) { +#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) + if (ldom_domaining_enabled) + ldom_startcpu_cpuid(cpu, + (unsigned long) cpu_new_thread); + else +#endif + prom_startcpu_cpuid(cpu, entry, cookie); + } else { + struct device_node *dp = of_find_node_by_cpuid(cpu); + + prom_startcpu(dp->node, entry, cookie); + } + + for (timeout = 0; timeout < 50000; timeout++) { + if (callin_flag) + break; + udelay(100); + } + + if (callin_flag) { + ret = 0; + } else { + printk("Processor %d is stuck.\n", cpu); + ret = -ENODEV; + } + cpu_new_thread = NULL; + + if (tb->hdesc) { + kfree(tb->hdesc); + tb->hdesc = NULL; + } + + return ret; +} + +static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu) +{ + u64 result, target; + int stuck, tmp; + + if (this_is_starfire) { + /* map to real upaid */ + cpu = (((cpu & 0x3c) << 1) | + ((cpu & 0x40) >> 4) | + (cpu & 0x3)); + } + + target = (cpu << 14) | 0x70; +again: + /* Ok, this is the real Spitfire Errata #54. + * One must read back from a UDB internal register + * after writes to the UDB interrupt dispatch, but + * before the membar Sync for that write. + * So we use the high UDB control register (ASI 0x7f, + * ADDR 0x20) for the dummy read. -DaveM + */ + tmp = 0x40; + __asm__ __volatile__( + "wrpr %1, %2, %%pstate\n\t" + "stxa %4, [%0] %3\n\t" + "stxa %5, [%0+%8] %3\n\t" + "add %0, %8, %0\n\t" + "stxa %6, [%0+%8] %3\n\t" + "membar #Sync\n\t" + "stxa %%g0, [%7] %3\n\t" + "membar #Sync\n\t" + "mov 0x20, %%g1\n\t" + "ldxa [%%g1] 0x7f, %%g0\n\t" + "membar #Sync" + : "=r" (tmp) + : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), + "r" (data0), "r" (data1), "r" (data2), "r" (target), + "r" (0x10), "0" (tmp) + : "g1"); + + /* NOTE: PSTATE_IE is still clear. */ + stuck = 100000; + do { + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (result) + : "i" (ASI_INTR_DISPATCH_STAT)); + if (result == 0) { + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + return; + } + stuck -= 1; + if (stuck == 0) + break; + } while (result & 0x1); + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + if (stuck == 0) { + printk("CPU[%d]: mondo stuckage result[%016lx]\n", + smp_processor_id(), result); + } else { + udelay(2); + goto again; + } +} + +static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + u64 *mondo, data0, data1, data2; + u16 *cpu_list; + u64 pstate; + int i; + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + cpu_list = __va(tb->cpu_list_pa); + mondo = __va(tb->cpu_mondo_block_pa); + data0 = mondo[0]; + data1 = mondo[1]; + data2 = mondo[2]; + for (i = 0; i < cnt; i++) + spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]); +} + +/* Cheetah now allows to send the whole 64-bytes of data in the interrupt + * packet, but we have no use for that. However we do take advantage of + * the new pipelining feature (ie. dispatch to multiple cpus simultaneously). + */ +static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + int nack_busy_id, is_jbus, need_more; + u64 *mondo, pstate, ver, busy_mask; + u16 *cpu_list; + + cpu_list = __va(tb->cpu_list_pa); + mondo = __va(tb->cpu_mondo_block_pa); + + /* Unfortunately, someone at Sun had the brilliant idea to make the + * busy/nack fields hard-coded by ITID number for this Ultra-III + * derivative processor. + */ + __asm__ ("rdpr %%ver, %0" : "=r" (ver)); + is_jbus = ((ver >> 32) == __JALAPENO_ID || + (ver >> 32) == __SERRANO_ID); + + __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); + +retry: + need_more = 0; + __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" + : : "r" (pstate), "i" (PSTATE_IE)); + + /* Setup the dispatch data registers. */ + __asm__ __volatile__("stxa %0, [%3] %6\n\t" + "stxa %1, [%4] %6\n\t" + "stxa %2, [%5] %6\n\t" + "membar #Sync\n\t" + : /* no outputs */ + : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]), + "r" (0x40), "r" (0x50), "r" (0x60), + "i" (ASI_INTR_W)); + + nack_busy_id = 0; + busy_mask = 0; + { + int i; + + for (i = 0; i < cnt; i++) { + u64 target, nr; + + nr = cpu_list[i]; + if (nr == 0xffff) + continue; + + target = (nr << 14) | 0x70; + if (is_jbus) { + busy_mask |= (0x1UL << (nr * 2)); + } else { + target |= (nack_busy_id << 24); + busy_mask |= (0x1UL << + (nack_busy_id * 2)); + } + __asm__ __volatile__( + "stxa %%g0, [%0] %1\n\t" + "membar #Sync\n\t" + : /* no outputs */ + : "r" (target), "i" (ASI_INTR_W)); + nack_busy_id++; + if (nack_busy_id == 32) { + need_more = 1; + break; + } + } + } + + /* Now, poll for completion. */ + { + u64 dispatch_stat, nack_mask; + long stuck; + + stuck = 100000 * nack_busy_id; + nack_mask = busy_mask << 1; + do { + __asm__ __volatile__("ldxa [%%g0] %1, %0" + : "=r" (dispatch_stat) + : "i" (ASI_INTR_DISPATCH_STAT)); + if (!(dispatch_stat & (busy_mask | nack_mask))) { + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + if (unlikely(need_more)) { + int i, this_cnt = 0; + for (i = 0; i < cnt; i++) { + if (cpu_list[i] == 0xffff) + continue; + cpu_list[i] = 0xffff; + this_cnt++; + if (this_cnt == 32) + break; + } + goto retry; + } + return; + } + if (!--stuck) + break; + } while (dispatch_stat & busy_mask); + + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : : "r" (pstate)); + + if (dispatch_stat & busy_mask) { + /* Busy bits will not clear, continue instead + * of freezing up on this cpu. + */ + printk("CPU[%d]: mondo stuckage result[%016lx]\n", + smp_processor_id(), dispatch_stat); + } else { + int i, this_busy_nack = 0; + + /* Delay some random time with interrupts enabled + * to prevent deadlock. + */ + udelay(2 * nack_busy_id); + + /* Clear out the mask bits for cpus which did not + * NACK us. + */ + for (i = 0; i < cnt; i++) { + u64 check_mask, nr; + + nr = cpu_list[i]; + if (nr == 0xffff) + continue; + + if (is_jbus) + check_mask = (0x2UL << (2*nr)); + else + check_mask = (0x2UL << + this_busy_nack); + if ((dispatch_stat & check_mask) == 0) + cpu_list[i] = 0xffff; + this_busy_nack += 2; + if (this_busy_nack == 64) + break; + } + + goto retry; + } + } +} + +/* Multi-cpu list version. */ +static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt) +{ + int retries, this_cpu, prev_sent, i, saw_cpu_error; + unsigned long status; + u16 *cpu_list; + + this_cpu = smp_processor_id(); + + cpu_list = __va(tb->cpu_list_pa); + + saw_cpu_error = 0; + retries = 0; + prev_sent = 0; + do { + int forward_progress, n_sent; + + status = sun4v_cpu_mondo_send(cnt, + tb->cpu_list_pa, + tb->cpu_mondo_block_pa); + + /* HV_EOK means all cpus received the xcall, we're done. */ + if (likely(status == HV_EOK)) + break; + + /* First, see if we made any forward progress. + * + * The hypervisor indicates successful sends by setting + * cpu list entries to the value 0xffff. + */ + n_sent = 0; + for (i = 0; i < cnt; i++) { + if (likely(cpu_list[i] == 0xffff)) + n_sent++; + } + + forward_progress = 0; + if (n_sent > prev_sent) + forward_progress = 1; + + prev_sent = n_sent; + + /* If we get a HV_ECPUERROR, then one or more of the cpus + * in the list are in error state. Use the cpu_state() + * hypervisor call to find out which cpus are in error state. + */ + if (unlikely(status == HV_ECPUERROR)) { + for (i = 0; i < cnt; i++) { + long err; + u16 cpu; + + cpu = cpu_list[i]; + if (cpu == 0xffff) + continue; + + err = sun4v_cpu_state(cpu); + if (err == HV_CPU_STATE_ERROR) { + saw_cpu_error = (cpu + 1); + cpu_list[i] = 0xffff; + } + } + } else if (unlikely(status != HV_EWOULDBLOCK)) + goto fatal_mondo_error; + + /* Don't bother rewriting the CPU list, just leave the + * 0xffff and non-0xffff entries in there and the + * hypervisor will do the right thing. + * + * Only advance timeout state if we didn't make any + * forward progress. + */ + if (unlikely(!forward_progress)) { + if (unlikely(++retries > 10000)) + goto fatal_mondo_timeout; + + /* Delay a little bit to let other cpus catch up + * on their cpu mondo queue work. + */ + udelay(2 * cnt); + } + } while (1); + + if (unlikely(saw_cpu_error)) + goto fatal_mondo_cpu_error; + + return; + +fatal_mondo_cpu_error: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus " + "(including %d) were in error state\n", + this_cpu, saw_cpu_error - 1); + return; + +fatal_mondo_timeout: + printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward " + " progress after %d retries.\n", + this_cpu, retries); + goto dump_cpu_list_and_out; + +fatal_mondo_error: + printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n", + this_cpu, status); + printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) " + "mondo_block_pa(%lx)\n", + this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa); + +dump_cpu_list_and_out: + printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu); + for (i = 0; i < cnt; i++) + printk("%u ", cpu_list[i]); + printk("]\n"); +} + +static void (*xcall_deliver_impl)(struct trap_per_cpu *, int); + +static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask) +{ + struct trap_per_cpu *tb; + int this_cpu, i, cnt; + unsigned long flags; + u16 *cpu_list; + u64 *mondo; + + /* We have to do this whole thing with interrupts fully disabled. + * Otherwise if we send an xcall from interrupt context it will + * corrupt both our mondo block and cpu list state. + * + * One consequence of this is that we cannot use timeout mechanisms + * that depend upon interrupts being delivered locally. So, for + * example, we cannot sample jiffies and expect it to advance. + * + * Fortunately, udelay() uses %stick/%tick so we can use that. + */ + local_irq_save(flags); + + this_cpu = smp_processor_id(); + tb = &trap_block[this_cpu]; + + mondo = __va(tb->cpu_mondo_block_pa); + mondo[0] = data0; + mondo[1] = data1; + mondo[2] = data2; + wmb(); + + cpu_list = __va(tb->cpu_list_pa); + + /* Setup the initial cpu list. */ + cnt = 0; + for_each_cpu(i, mask) { + if (i == this_cpu || !cpu_online(i)) + continue; + cpu_list[cnt++] = i; + } + + if (cnt) + xcall_deliver_impl(tb, cnt); + + local_irq_restore(flags); +} + +/* Send cross call to all processors mentioned in MASK_P + * except self. Really, there are only two cases currently, + * "&cpu_online_map" and "&mm->cpu_vm_mask". + */ +static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask) +{ + u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff)); + + xcall_deliver(data0, data1, data2, mask); +} + +/* Send cross call to all processors except self. */ +static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2) +{ + smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map); +} + +extern unsigned long xcall_sync_tick; + +static void smp_start_sync_tick_client(int cpu) +{ + xcall_deliver((u64) &xcall_sync_tick, 0, 0, + &cpumask_of_cpu(cpu)); +} + +extern unsigned long xcall_call_function; + +void arch_send_call_function_ipi(cpumask_t mask) +{ + xcall_deliver((u64) &xcall_call_function, 0, 0, &mask); +} + +extern unsigned long xcall_call_function_single; + +void arch_send_call_function_single_ipi(int cpu) +{ + xcall_deliver((u64) &xcall_call_function_single, 0, 0, + &cpumask_of_cpu(cpu)); +} + +void smp_call_function_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + generic_smp_call_function_interrupt(); +} + +void smp_call_function_single_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + generic_smp_call_function_single_interrupt(); +} + +static void tsb_sync(void *info) +{ + struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()]; + struct mm_struct *mm = info; + + /* It is not valid to test "currrent->active_mm == mm" here. + * + * The value of "current" is not changed atomically with + * switch_mm(). But that's OK, we just need to check the + * current cpu's trap block PGD physical address. + */ + if (tp->pgd_paddr == __pa(mm->pgd)) + tsb_context_switch(mm); +} + +void smp_tsb_sync(struct mm_struct *mm) +{ + smp_call_function_mask(mm->cpu_vm_mask, tsb_sync, mm, 1); +} + +extern unsigned long xcall_flush_tlb_mm; +extern unsigned long xcall_flush_tlb_pending; +extern unsigned long xcall_flush_tlb_kernel_range; +extern unsigned long xcall_fetch_glob_regs; +extern unsigned long xcall_receive_signal; +extern unsigned long xcall_new_mmu_context_version; +#ifdef CONFIG_KGDB +extern unsigned long xcall_kgdb_capture; +#endif + +#ifdef DCACHE_ALIASING_POSSIBLE +extern unsigned long xcall_flush_dcache_page_cheetah; +#endif +extern unsigned long xcall_flush_dcache_page_spitfire; + +#ifdef CONFIG_DEBUG_DCFLUSH +extern atomic_t dcpage_flushes; +extern atomic_t dcpage_flushes_xcall; +#endif + +static inline void __local_flush_dcache_page(struct page *page) +{ +#ifdef DCACHE_ALIASING_POSSIBLE + __flush_dcache_page(page_address(page), + ((tlb_type == spitfire) && + page_mapping(page) != NULL)); +#else + if (page_mapping(page) != NULL && + tlb_type == spitfire) + __flush_icache_page(__pa(page_address(page))); +#endif +} + +void smp_flush_dcache_page_impl(struct page *page, int cpu) +{ + int this_cpu; + + if (tlb_type == hypervisor) + return; + +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes); +#endif + + this_cpu = get_cpu(); + + if (cpu == this_cpu) { + __local_flush_dcache_page(page); + } else if (cpu_online(cpu)) { + void *pg_addr = page_address(page); + u64 data0 = 0; + + if (tlb_type == spitfire) { + data0 = ((u64)&xcall_flush_dcache_page_spitfire); + if (page_mapping(page) != NULL) + data0 |= ((u64)1 << 32); + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { +#ifdef DCACHE_ALIASING_POSSIBLE + data0 = ((u64)&xcall_flush_dcache_page_cheetah); +#endif + } + if (data0) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, &cpumask_of_cpu(cpu)); +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes_xcall); +#endif + } + } + + put_cpu(); +} + +void flush_dcache_page_all(struct mm_struct *mm, struct page *page) +{ + void *pg_addr; + int this_cpu; + u64 data0; + + if (tlb_type == hypervisor) + return; + + this_cpu = get_cpu(); + +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes); +#endif + data0 = 0; + pg_addr = page_address(page); + if (tlb_type == spitfire) { + data0 = ((u64)&xcall_flush_dcache_page_spitfire); + if (page_mapping(page) != NULL) + data0 |= ((u64)1 << 32); + } else if (tlb_type == cheetah || tlb_type == cheetah_plus) { +#ifdef DCACHE_ALIASING_POSSIBLE + data0 = ((u64)&xcall_flush_dcache_page_cheetah); +#endif + } + if (data0) { + xcall_deliver(data0, __pa(pg_addr), + (u64) pg_addr, &cpu_online_map); +#ifdef CONFIG_DEBUG_DCFLUSH + atomic_inc(&dcpage_flushes_xcall); +#endif + } + __local_flush_dcache_page(page); + + put_cpu(); +} + +void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) +{ + struct mm_struct *mm; + unsigned long flags; + + clear_softint(1 << irq); + + /* See if we need to allocate a new TLB context because + * the version of the one we are using is now out of date. + */ + mm = current->active_mm; + if (unlikely(!mm || (mm == &init_mm))) + return; + + spin_lock_irqsave(&mm->context.lock, flags); + + if (unlikely(!CTX_VALID(mm->context))) + get_new_mmu_context(mm); + + spin_unlock_irqrestore(&mm->context.lock, flags); + + load_secondary_context(mm); + __flush_tlb_mm(CTX_HWBITS(mm->context), + SECONDARY_CONTEXT); +} + +void smp_new_mmu_context_version(void) +{ + smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0); +} + +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(unsigned long flags) +{ + smp_cross_call(&xcall_kgdb_capture, 0, 0, 0); +} +#endif + +void smp_fetch_global_regs(void) +{ + smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0); +} + +/* We know that the window frames of the user have been flushed + * to the stack before we get here because all callers of us + * are flush_tlb_*() routines, and these run after flush_cache_*() + * which performs the flushw. + * + * The SMP TLB coherency scheme we use works as follows: + * + * 1) mm->cpu_vm_mask is a bit mask of which cpus an address + * space has (potentially) executed on, this is the heuristic + * we use to avoid doing cross calls. + * + * Also, for flushing from kswapd and also for clones, we + * use cpu_vm_mask as the list of cpus to make run the TLB. + * + * 2) TLB context numbers are shared globally across all processors + * in the system, this allows us to play several games to avoid + * cross calls. + * + * One invariant is that when a cpu switches to a process, and + * that processes tsk->active_mm->cpu_vm_mask does not have the + * current cpu's bit set, that tlb context is flushed locally. + * + * If the address space is non-shared (ie. mm->count == 1) we avoid + * cross calls when we want to flush the currently running process's + * tlb state. This is done by clearing all cpu bits except the current + * processor's in current->active_mm->cpu_vm_mask and performing the + * flush locally only. This will force any subsequent cpus which run + * this task to flush the context from the local tlb if the process + * migrates to another cpu (again). + * + * 3) For shared address spaces (threads) and swapping we bite the + * bullet for most cases and perform the cross call (but only to + * the cpus listed in cpu_vm_mask). + * + * The performance gain from "optimizing" away the cross call for threads is + * questionable (in theory the big win for threads is the massive sharing of + * address space state across processors). + */ + +/* This currently is only used by the hugetlb arch pre-fault + * hook on UltraSPARC-III+ and later when changing the pagesize + * bits of the context register for an address space. + */ +void smp_flush_tlb_mm(struct mm_struct *mm) +{ + u32 ctx = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (atomic_read(&mm->mm_users) == 1) { + mm->cpu_vm_mask = cpumask_of_cpu(cpu); + goto local_flush_and_out; + } + + smp_cross_call_masked(&xcall_flush_tlb_mm, + ctx, 0, 0, + &mm->cpu_vm_mask); + +local_flush_and_out: + __flush_tlb_mm(ctx, SECONDARY_CONTEXT); + + put_cpu(); +} + +void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs) +{ + u32 ctx = CTX_HWBITS(mm->context); + int cpu = get_cpu(); + + if (mm == current->active_mm && atomic_read(&mm->mm_users) == 1) + mm->cpu_vm_mask = cpumask_of_cpu(cpu); + else + smp_cross_call_masked(&xcall_flush_tlb_pending, + ctx, nr, (unsigned long) vaddrs, + &mm->cpu_vm_mask); + + __flush_tlb_pending(ctx, nr, vaddrs); + + put_cpu(); +} + +void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + start &= PAGE_MASK; + end = PAGE_ALIGN(end); + if (start != end) { + smp_cross_call(&xcall_flush_tlb_kernel_range, + 0, start, end); + + __flush_tlb_kernel_range(start, end); + } +} + +/* CPU capture. */ +/* #define CAPTURE_DEBUG */ +extern unsigned long xcall_capture; + +static atomic_t smp_capture_depth = ATOMIC_INIT(0); +static atomic_t smp_capture_registry = ATOMIC_INIT(0); +static unsigned long penguins_are_doing_time; + +void smp_capture(void) +{ + int result = atomic_add_ret(1, &smp_capture_depth); + + if (result == 1) { + int ncpus = num_online_cpus(); + +#ifdef CAPTURE_DEBUG + printk("CPU[%d]: Sending penguins to jail...", + smp_processor_id()); +#endif + penguins_are_doing_time = 1; + atomic_inc(&smp_capture_registry); + smp_cross_call(&xcall_capture, 0, 0, 0); + while (atomic_read(&smp_capture_registry) != ncpus) + rmb(); +#ifdef CAPTURE_DEBUG + printk("done\n"); +#endif + } +} + +void smp_release(void) +{ + if (atomic_dec_and_test(&smp_capture_depth)) { +#ifdef CAPTURE_DEBUG + printk("CPU[%d]: Giving pardon to " + "imprisoned penguins\n", + smp_processor_id()); +#endif + penguins_are_doing_time = 0; + membar_safe("#StoreLoad"); + atomic_dec(&smp_capture_registry); + } +} + +/* Imprisoned penguins run with %pil == PIL_NORMAL_MAX, but PSTATE_IE + * set, so they can service tlb flush xcalls... + */ +extern void prom_world(int); + +void smp_penguin_jailcell(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); + + preempt_disable(); + + __asm__ __volatile__("flushw"); + prom_world(1); + atomic_inc(&smp_capture_registry); + membar_safe("#StoreLoad"); + while (penguins_are_doing_time) + rmb(); + atomic_dec(&smp_capture_registry); + prom_world(0); + + preempt_enable(); +} + +/* /proc/profile writes can call this, don't __init it please. */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ +} + +void __devinit smp_prepare_boot_cpu(void) +{ +} + +void __init smp_setup_processor_id(void) +{ + if (tlb_type == spitfire) + xcall_deliver_impl = spitfire_xcall_deliver; + else if (tlb_type == cheetah || tlb_type == cheetah_plus) + xcall_deliver_impl = cheetah_xcall_deliver; + else + xcall_deliver_impl = hypervisor_xcall_deliver; +} + +void __devinit smp_fill_in_sib_core_maps(void) +{ + unsigned int i; + + for_each_present_cpu(i) { + unsigned int j; + + cpus_clear(cpu_core_map[i]); + if (cpu_data(i).core_id == 0) { + cpu_set(i, cpu_core_map[i]); + continue; + } + + for_each_present_cpu(j) { + if (cpu_data(i).core_id == + cpu_data(j).core_id) + cpu_set(j, cpu_core_map[i]); + } + } + + for_each_present_cpu(i) { + unsigned int j; + + cpus_clear(per_cpu(cpu_sibling_map, i)); + if (cpu_data(i).proc_id == -1) { + cpu_set(i, per_cpu(cpu_sibling_map, i)); + continue; + } + + for_each_present_cpu(j) { + if (cpu_data(i).proc_id == + cpu_data(j).proc_id) + cpu_set(j, per_cpu(cpu_sibling_map, i)); + } + } +} + +int __cpuinit __cpu_up(unsigned int cpu) +{ + int ret = smp_boot_one_cpu(cpu); + + if (!ret) { + cpu_set(cpu, smp_commenced_mask); + while (!cpu_isset(cpu, cpu_online_map)) + mb(); + if (!cpu_isset(cpu, cpu_online_map)) { + ret = -ENODEV; + } else { + /* On SUN4V, writes to %tick and %stick are + * not allowed. + */ + if (tlb_type != hypervisor) + smp_synchronize_one_tick(cpu); + } + } + return ret; +} + +#ifdef CONFIG_HOTPLUG_CPU +void cpu_play_dead(void) +{ + int cpu = smp_processor_id(); + unsigned long pstate; + + idle_task_exit(); + + if (tlb_type == hypervisor) { + struct trap_per_cpu *tb = &trap_block[cpu]; + + sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO, + tb->cpu_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO, + tb->dev_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR, + tb->resum_mondo_pa, 0); + sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR, + tb->nonresum_mondo_pa, 0); + } + + cpu_clear(cpu, smp_commenced_mask); + membar_safe("#Sync"); + + local_irq_disable(); + + __asm__ __volatile__( + "rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + while (1) + barrier(); +} + +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + cpuinfo_sparc *c; + int i; + + for_each_cpu_mask(i, cpu_core_map[cpu]) + cpu_clear(cpu, cpu_core_map[i]); + cpus_clear(cpu_core_map[cpu]); + + for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu)) + cpu_clear(cpu, per_cpu(cpu_sibling_map, i)); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + + c = &cpu_data(cpu); + + c->core_id = 0; + c->proc_id = -1; + + smp_wmb(); + + /* Make sure no interrupts point to this cpu. */ + fixup_irqs(); + + local_irq_enable(); + mdelay(1); + local_irq_disable(); + + ipi_call_lock(); + cpu_clear(cpu, cpu_online_map); + ipi_call_unlock(); + + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + int i; + + for (i = 0; i < 100; i++) { + smp_rmb(); + if (!cpu_isset(cpu, smp_commenced_mask)) + break; + msleep(100); + } + if (cpu_isset(cpu, smp_commenced_mask)) { + printk(KERN_ERR "CPU %u didn't die...\n", cpu); + } else { +#if defined(CONFIG_SUN_LDOMS) + unsigned long hv_err; + int limit = 100; + + do { + hv_err = sun4v_cpu_stop(cpu); + if (hv_err == HV_EOK) { + cpu_clear(cpu, cpu_present_map); + break; + } + } while (--limit > 0); + if (limit <= 0) { + printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n", + hv_err); + } +#endif + } +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +void smp_send_reschedule(int cpu) +{ + xcall_deliver((u64) &xcall_receive_signal, 0, 0, + &cpumask_of_cpu(cpu)); +} + +void smp_receive_signal_client(int irq, struct pt_regs *regs) +{ + clear_softint(1 << irq); +} + +/* This is a nop because we capture all other cpus + * anyways when making the PROM active. + */ +void smp_send_stop(void) +{ +} + +unsigned long __per_cpu_base __read_mostly; +unsigned long __per_cpu_shift __read_mostly; + +EXPORT_SYMBOL(__per_cpu_base); +EXPORT_SYMBOL(__per_cpu_shift); + +void __init real_setup_per_cpu_areas(void) +{ + unsigned long paddr, goal, size, i; + char *ptr; + + /* Copy section for each CPU (we discard the original) */ + goal = PERCPU_ENOUGH_ROOM; + + __per_cpu_shift = PAGE_SHIFT; + for (size = PAGE_SIZE; size < goal; size <<= 1UL) + __per_cpu_shift++; + + paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); + if (!paddr) { + prom_printf("Cannot allocate per-cpu memory.\n"); + prom_halt(); + } + + ptr = __va(paddr); + __per_cpu_base = ptr - __per_cpu_start; + + for (i = 0; i < NR_CPUS; i++, ptr += size) + memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + + /* Setup %g5 for the boot cpu. */ + __local_per_cpu_offset = __per_cpu_offset(smp_processor_id()); +} diff --cc arch/sparc/kernel/sparc_ksyms_32.c index a4d45fc29b21,000000000000..e1e97639231b mode 100644,000000..100644 --- a/arch/sparc/kernel/sparc_ksyms_32.c +++ b/arch/sparc/kernel/sparc_ksyms_32.c @@@ -1,257 -1,0 +1,253 @@@ +/* + * arch/sparc/kernel/ksyms.c: Sparc specific ksyms support. + * + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) + * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) + */ + +/* Tell string.h we don't want memcpy etc. as cpp defines */ +#define EXPORT_SYMTAB_STROPS +#define PROMLIB_INTERNAL + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_PCI +#include +#endif +#include +#ifdef CONFIG_HIGHMEM +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_SBUS +#include +#endif +#include +#include + +extern spinlock_t rtc_lock; + +struct poll { + int fd; + short events; + short revents; +}; + +extern void (*__copy_1page)(void *, const void *); +extern void __memmove(void *, const void *, __kernel_size_t); +extern void (*bzero_1page)(void *); +extern void *__bzero(void *, size_t); +extern void *__memscan_zero(void *, size_t); +extern void *__memscan_generic(void *, int, size_t); +extern int __strncmp(const char *, const char *, __kernel_size_t); + +extern int __ashrdi3(int, int); +extern int __ashldi3(int, int); +extern int __lshrdi3(int, int); +extern int __muldi3(int, int); +extern int __divdi3(int, int); + +/* Private functions with odd calling conventions. */ +extern void ___atomic24_add(void); +extern void ___atomic24_sub(void); +extern void ___rw_read_enter(void); +extern void ___rw_read_try(void); +extern void ___rw_read_exit(void); +extern void ___rw_write_enter(void); + +/* Alias functions whose names begin with "." and export the aliases. + * The module references will be fixed up by module_frob_arch_sections. + */ +extern int _Div(int, int); +extern int _Mul(int, int); +extern int _Rem(int, int); +extern unsigned _Udiv(unsigned, unsigned); +extern unsigned _Umul(unsigned, unsigned); +extern unsigned _Urem(unsigned, unsigned); + +/* used by various drivers */ +EXPORT_SYMBOL(sparc_cpu_model); +EXPORT_SYMBOL(kernel_thread); +#ifdef CONFIG_SMP +// XXX find what uses (or used) these. AV: see asm/spinlock.h +EXPORT_SYMBOL(___rw_read_enter); +EXPORT_SYMBOL(___rw_read_try); +EXPORT_SYMBOL(___rw_read_exit); +EXPORT_SYMBOL(___rw_write_enter); +#endif + +EXPORT_SYMBOL(sparc_valid_addr_bitmap); +EXPORT_SYMBOL(phys_base); +EXPORT_SYMBOL(pfn_base); + +/* Atomic operations. */ +EXPORT_SYMBOL(___atomic24_add); +EXPORT_SYMBOL(___atomic24_sub); + +/* Per-CPU information table */ +EXPORT_PER_CPU_SYMBOL(__cpu_data); + +#ifdef CONFIG_SMP +/* IRQ implementation. */ +EXPORT_SYMBOL(synchronize_irq); - - /* CPU online map and active count. */ - EXPORT_SYMBOL(cpu_online_map); - EXPORT_SYMBOL(phys_cpu_present_map); +#endif + +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); +EXPORT_SYMBOL(rtc_lock); +EXPORT_SYMBOL(set_auxio); +EXPORT_SYMBOL(get_auxio); +EXPORT_SYMBOL(io_remap_pfn_range); + +#ifndef CONFIG_SMP +EXPORT_SYMBOL(BTFIXUP_CALL(___xchg32)); +#else +EXPORT_SYMBOL(BTFIXUP_CALL(__hard_smp_processor_id)); +#endif +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_unlockarea)); +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_lockarea)); +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_sgl)); +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_get_scsi_one)); +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_sgl)); +EXPORT_SYMBOL(BTFIXUP_CALL(mmu_release_scsi_one)); + +EXPORT_SYMBOL(BTFIXUP_CALL(pgprot_noncached)); + +#ifdef CONFIG_SBUS +EXPORT_SYMBOL(sbus_set_sbus64); +#endif +#ifdef CONFIG_PCI +EXPORT_SYMBOL(insb); +EXPORT_SYMBOL(outsb); +EXPORT_SYMBOL(insw); +EXPORT_SYMBOL(outsw); +EXPORT_SYMBOL(insl); +EXPORT_SYMBOL(outsl); +EXPORT_SYMBOL(pci_alloc_consistent); +EXPORT_SYMBOL(pci_free_consistent); +EXPORT_SYMBOL(pci_map_single); +EXPORT_SYMBOL(pci_unmap_single); +EXPORT_SYMBOL(pci_dma_sync_single_for_cpu); +EXPORT_SYMBOL(pci_dma_sync_single_for_device); +EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu); +EXPORT_SYMBOL(pci_dma_sync_sg_for_device); +EXPORT_SYMBOL(pci_map_sg); +EXPORT_SYMBOL(pci_unmap_sg); +EXPORT_SYMBOL(pci_map_page); +EXPORT_SYMBOL(pci_unmap_page); +/* Actually, ioremap/iounmap are not PCI specific. But it is ok for drivers. */ +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(iounmap); +#endif + +/* in arch/sparc/mm/highmem.c */ +#ifdef CONFIG_HIGHMEM +EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kunmap_atomic); +#endif + +/* prom symbols */ +EXPORT_SYMBOL(idprom); +EXPORT_SYMBOL(prom_root_node); +EXPORT_SYMBOL(prom_getchild); +EXPORT_SYMBOL(prom_getsibling); +EXPORT_SYMBOL(prom_searchsiblings); +EXPORT_SYMBOL(prom_firstprop); +EXPORT_SYMBOL(prom_nextprop); +EXPORT_SYMBOL(prom_getproplen); +EXPORT_SYMBOL(prom_getproperty); +EXPORT_SYMBOL(prom_node_has_property); +EXPORT_SYMBOL(prom_setprop); +EXPORT_SYMBOL(saved_command_line); +EXPORT_SYMBOL(prom_apply_obio_ranges); +EXPORT_SYMBOL(prom_feval); +EXPORT_SYMBOL(prom_getbool); +EXPORT_SYMBOL(prom_getstring); +EXPORT_SYMBOL(prom_getint); +EXPORT_SYMBOL(prom_getintdefault); +EXPORT_SYMBOL(prom_finddevice); +EXPORT_SYMBOL(romvec); +EXPORT_SYMBOL(__prom_getchild); +EXPORT_SYMBOL(__prom_getsibling); + +/* sparc library symbols */ +EXPORT_SYMBOL(memscan); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(page_kernel); + +/* Special internal versions of library functions. */ +EXPORT_SYMBOL(__copy_1page); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(bzero_1page); +EXPORT_SYMBOL(__bzero); +EXPORT_SYMBOL(__memscan_zero); +EXPORT_SYMBOL(__memscan_generic); +EXPORT_SYMBOL(__strncmp); +EXPORT_SYMBOL(__memmove); + +/* Moving data to/from userspace. */ +EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(__strncpy_from_user); +EXPORT_SYMBOL(__strnlen_user); + +/* Networking helper routines. */ +EXPORT_SYMBOL(__csum_partial_copy_sparc_generic); +EXPORT_SYMBOL(csum_partial); + +/* Cache flushing. */ +EXPORT_SYMBOL(sparc_flush_page_to_ram); + +/* For when serial stuff is built as modules. */ +EXPORT_SYMBOL(sun_do_break); + +EXPORT_SYMBOL(__ret_efault); + +EXPORT_SYMBOL(memcmp); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__ashrdi3); +EXPORT_SYMBOL(__ashldi3); +EXPORT_SYMBOL(__lshrdi3); +EXPORT_SYMBOL(__muldi3); +EXPORT_SYMBOL(__divdi3); + +EXPORT_SYMBOL(_Rem); +EXPORT_SYMBOL(_Urem); +EXPORT_SYMBOL(_Mul); +EXPORT_SYMBOL(_Umul); +EXPORT_SYMBOL(_Div); +EXPORT_SYMBOL(_Udiv); + +#ifdef CONFIG_DEBUG_BUGVERBOSE +EXPORT_SYMBOL(do_BUG); +#endif + +/* Sun Power Management Idle Handler */ +EXPORT_SYMBOL(pm_idle); + +EXPORT_SYMBOL(empty_zero_page); diff --cc arch/sparc/kernel/time_64.c index 141da3759091,000000000000..9df8f095a8b1 mode 100644,000000..100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@@ -1,862 -1,0 +1,862 @@@ +/* time.c: UltraSparc timer and TOD clock support. + * + * Copyright (C) 1997, 2008 David S. Miller (davem@davemloft.net) + * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) + * + * Based largely on code which is: + * + * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" + +DEFINE_SPINLOCK(rtc_lock); + +#define TICK_PRIV_BIT (1UL << 63) +#define TICKCMP_IRQ_BIT (1UL << 63) + +#ifdef CONFIG_SMP +unsigned long profile_pc(struct pt_regs *regs) +{ + unsigned long pc = instruction_pointer(regs); + + if (in_lock_functions(pc)) + return regs->u_regs[UREG_RETPC]; + return pc; +} +EXPORT_SYMBOL(profile_pc); +#endif + +static void tick_disable_protection(void) +{ + /* Set things up so user can access tick register for profiling + * purposes. Also workaround BB_ERRATA_1 by doing a dummy + * read back of %tick after writing it. + */ + __asm__ __volatile__( + " ba,pt %%xcc, 1f\n" + " nop\n" + " .align 64\n" + "1: rd %%tick, %%g2\n" + " add %%g2, 6, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wrpr %%g2, 0, %%tick\n" + " rdpr %%tick, %%g0" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g2"); +} + +static void tick_disable_irq(void) +{ + __asm__ __volatile__( + " ba,pt %%xcc, 1f\n" + " nop\n" + " .align 64\n" + "1: wr %0, 0x0, %%tick_cmpr\n" + " rd %%tick_cmpr, %%g0" + : /* no outputs */ + : "r" (TICKCMP_IRQ_BIT)); +} + +static void tick_init_tick(void) +{ + tick_disable_protection(); + tick_disable_irq(); +} + +static unsigned long tick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%tick, %0\n\t" + "mov %0, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static int tick_add_compare(unsigned long adj) +{ + unsigned long orig_tick, new_tick, new_compare; + + __asm__ __volatile__("rd %%tick, %0" + : "=r" (orig_tick)); + + orig_tick &= ~TICKCMP_IRQ_BIT; + + /* Workaround for Spitfire Errata (#54 I think??), I discovered + * this via Sun BugID 4008234, mentioned in Solaris-2.5.1 patch + * number 103640. + * + * On Blackbird writes to %tick_cmpr can fail, the + * workaround seems to be to execute the wr instruction + * at the start of an I-cache line, and perform a dummy + * read back from %tick_cmpr right after writing to it. -DaveM + */ + __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" + " add %1, %2, %0\n\t" + ".align 64\n" + "1:\n\t" + "wr %0, 0, %%tick_cmpr\n\t" + "rd %%tick_cmpr, %%g0\n\t" + : "=r" (new_compare) + : "r" (orig_tick), "r" (adj)); + + __asm__ __volatile__("rd %%tick, %0" + : "=r" (new_tick)); + new_tick &= ~TICKCMP_IRQ_BIT; + + return ((long)(new_tick - (orig_tick+adj))) > 0L; +} + +static unsigned long tick_add_tick(unsigned long adj) +{ + unsigned long new_tick; + + /* Also need to handle Blackbird bug here too. */ + __asm__ __volatile__("rd %%tick, %0\n\t" + "add %0, %1, %0\n\t" + "wrpr %0, 0, %%tick\n\t" + : "=&r" (new_tick) + : "r" (adj)); + + return new_tick; +} + +static struct sparc64_tick_ops tick_operations __read_mostly = { + .name = "tick", + .init_tick = tick_init_tick, + .disable_irq = tick_disable_irq, + .get_tick = tick_get_tick, + .add_tick = tick_add_tick, + .add_compare = tick_add_compare, + .softint_mask = 1UL << 0, +}; + +struct sparc64_tick_ops *tick_ops __read_mostly = &tick_operations; + +static void stick_disable_irq(void) +{ + __asm__ __volatile__( + "wr %0, 0x0, %%asr25" + : /* no outputs */ + : "r" (TICKCMP_IRQ_BIT)); +} + +static void stick_init_tick(void) +{ + /* Writes to the %tick and %stick register are not + * allowed on sun4v. The Hypervisor controls that + * bit, per-strand. + */ + if (tlb_type != hypervisor) { + tick_disable_protection(); + tick_disable_irq(); + + /* Let the user get at STICK too. */ + __asm__ __volatile__( + " rd %%asr24, %%g2\n" + " andn %%g2, %0, %%g2\n" + " wr %%g2, 0, %%asr24" + : /* no outputs */ + : "r" (TICK_PRIV_BIT) + : "g1", "g2"); + } + + stick_disable_irq(); +} + +static unsigned long stick_get_tick(void) +{ + unsigned long ret; + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (ret)); + + return ret & ~TICK_PRIV_BIT; +} + +static unsigned long stick_add_tick(unsigned long adj) +{ + unsigned long new_tick; + + __asm__ __volatile__("rd %%asr24, %0\n\t" + "add %0, %1, %0\n\t" + "wr %0, 0, %%asr24\n\t" + : "=&r" (new_tick) + : "r" (adj)); + + return new_tick; +} + +static int stick_add_compare(unsigned long adj) +{ + unsigned long orig_tick, new_tick; + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (orig_tick)); + orig_tick &= ~TICKCMP_IRQ_BIT; + + __asm__ __volatile__("wr %0, 0, %%asr25" + : /* no outputs */ + : "r" (orig_tick + adj)); + + __asm__ __volatile__("rd %%asr24, %0" + : "=r" (new_tick)); + new_tick &= ~TICKCMP_IRQ_BIT; + + return ((long)(new_tick - (orig_tick+adj))) > 0L; +} + +static struct sparc64_tick_ops stick_operations __read_mostly = { + .name = "stick", + .init_tick = stick_init_tick, + .disable_irq = stick_disable_irq, + .get_tick = stick_get_tick, + .add_tick = stick_add_tick, + .add_compare = stick_add_compare, + .softint_mask = 1UL << 16, +}; + +/* On Hummingbird the STICK/STICK_CMPR register is implemented + * in I/O space. There are two 64-bit registers each, the + * first holds the low 32-bits of the value and the second holds + * the high 32-bits. + * + * Since STICK is constantly updating, we have to access it carefully. + * + * The sequence we use to read is: + * 1) read high + * 2) read low + * 3) read high again, if it rolled re-read both low and high again. + * + * Writing STICK safely is also tricky: + * 1) write low to zero + * 2) write high + * 3) write low + */ +#define HBIRD_STICKCMP_ADDR 0x1fe0000f060UL +#define HBIRD_STICK_ADDR 0x1fe0000f070UL + +static unsigned long __hbird_read_stick(void) +{ + unsigned long ret, tmp1, tmp2, tmp3; + unsigned long addr = HBIRD_STICK_ADDR+8; + + __asm__ __volatile__("ldxa [%1] %5, %2\n" + "1:\n\t" + "sub %1, 0x8, %1\n\t" + "ldxa [%1] %5, %3\n\t" + "add %1, 0x8, %1\n\t" + "ldxa [%1] %5, %4\n\t" + "cmp %4, %2\n\t" + "bne,a,pn %%xcc, 1b\n\t" + " mov %4, %2\n\t" + "sllx %4, 32, %4\n\t" + "or %3, %4, %0\n\t" + : "=&r" (ret), "=&r" (addr), + "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3) + : "i" (ASI_PHYS_BYPASS_EC_E), "1" (addr)); + + return ret; +} + +static void __hbird_write_stick(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICK_ADDR; + + __asm__ __volatile__("stxa %%g0, [%0] %4\n\t" + "add %0, 0x8, %0\n\t" + "stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void __hbird_write_compare(unsigned long val) +{ + unsigned long low = (val & 0xffffffffUL); + unsigned long high = (val >> 32UL); + unsigned long addr = HBIRD_STICKCMP_ADDR + 0x8UL; + + __asm__ __volatile__("stxa %3, [%0] %4\n\t" + "sub %0, 0x8, %0\n\t" + "stxa %2, [%0] %4" + : "=&r" (addr) + : "0" (addr), "r" (low), "r" (high), + "i" (ASI_PHYS_BYPASS_EC_E)); +} + +static void hbtick_disable_irq(void) +{ + __hbird_write_compare(TICKCMP_IRQ_BIT); +} + +static void hbtick_init_tick(void) +{ + tick_disable_protection(); + + /* XXX This seems to be necessary to 'jumpstart' Hummingbird + * XXX into actually sending STICK interrupts. I think because + * XXX of how we store %tick_cmpr in head.S this somehow resets the + * XXX {TICK + STICK} interrupt mux. -DaveM + */ + __hbird_write_stick(__hbird_read_stick()); + + hbtick_disable_irq(); +} + +static unsigned long hbtick_get_tick(void) +{ + return __hbird_read_stick() & ~TICK_PRIV_BIT; +} + +static unsigned long hbtick_add_tick(unsigned long adj) +{ + unsigned long val; + + val = __hbird_read_stick() + adj; + __hbird_write_stick(val); + + return val; +} + +static int hbtick_add_compare(unsigned long adj) +{ + unsigned long val = __hbird_read_stick(); + unsigned long val2; + + val &= ~TICKCMP_IRQ_BIT; + val += adj; + __hbird_write_compare(val); + + val2 = __hbird_read_stick() & ~TICKCMP_IRQ_BIT; + + return ((long)(val2 - val)) > 0L; +} + +static struct sparc64_tick_ops hbtick_operations __read_mostly = { + .name = "hbtick", + .init_tick = hbtick_init_tick, + .disable_irq = hbtick_disable_irq, + .get_tick = hbtick_get_tick, + .add_tick = hbtick_add_tick, + .add_compare = hbtick_add_compare, + .softint_mask = 1UL << 0, +}; + +static unsigned long timer_ticks_per_nsec_quotient __read_mostly; + +int update_persistent_clock(struct timespec now) +{ + struct rtc_device *rtc = rtc_class_open("rtc0"); + int err = -1; + + if (rtc) { + err = rtc_set_mmss(rtc, now.tv_sec); + rtc_class_close(rtc); + } + + return err; +} + +unsigned long cmos_regs; +EXPORT_SYMBOL(cmos_regs); + +static struct resource rtc_cmos_resource; + +static struct platform_device rtc_cmos_device = { + .name = "rtc_cmos", + .id = -1, + .resource = &rtc_cmos_resource, + .num_resources = 1, +}; + +static int __devinit rtc_probe(struct of_device *op, const struct of_device_id *match) +{ + struct resource *r; + + printk(KERN_INFO "%s: RTC regs at 0x%lx\n", + op->node->full_name, op->resource[0].start); + + /* The CMOS RTC driver only accepts IORESOURCE_IO, so cons + * up a fake resource so that the probe works for all cases. + * When the RTC is behind an ISA bus it will have IORESOURCE_IO + * already, whereas when it's behind EBUS is will be IORESOURCE_MEM. + */ + + r = &rtc_cmos_resource; + r->flags = IORESOURCE_IO; + r->name = op->resource[0].name; + r->start = op->resource[0].start; + r->end = op->resource[0].end; + + cmos_regs = op->resource[0].start; + return platform_device_register(&rtc_cmos_device); +} + +static struct of_device_id __initdata rtc_match[] = { + { + .name = "rtc", + .compatible = "m5819", + }, + { + .name = "rtc", + .compatible = "isa-m5819p", + }, + { + .name = "rtc", + .compatible = "isa-m5823p", + }, + { + .name = "rtc", + .compatible = "ds1287", + }, + {}, +}; + +static struct of_platform_driver rtc_driver = { + .match_table = rtc_match, + .probe = rtc_probe, + .driver = { + .name = "rtc", + }, +}; + +static struct platform_device rtc_bq4802_device = { + .name = "rtc-bq4802", + .id = -1, + .num_resources = 1, +}; + +static int __devinit bq4802_probe(struct of_device *op, const struct of_device_id *match) +{ + + printk(KERN_INFO "%s: BQ4802 regs at 0x%lx\n", + op->node->full_name, op->resource[0].start); + + rtc_bq4802_device.resource = &op->resource[0]; + return platform_device_register(&rtc_bq4802_device); +} + +static struct of_device_id __initdata bq4802_match[] = { + { + .name = "rtc", + .compatible = "bq4802", + }, + {}, +}; + +static struct of_platform_driver bq4802_driver = { + .match_table = bq4802_match, + .probe = bq4802_probe, + .driver = { + .name = "bq4802", + }, +}; + +static unsigned char mostek_read_byte(struct device *dev, u32 ofs) +{ + struct platform_device *pdev = to_platform_device(dev); + void __iomem *regs = (void __iomem *) pdev->resource[0].start; + + return readb(regs + ofs); +} + +static void mostek_write_byte(struct device *dev, u32 ofs, u8 val) +{ + struct platform_device *pdev = to_platform_device(dev); + void __iomem *regs = (void __iomem *) pdev->resource[0].start; + + writeb(val, regs + ofs); +} + +static struct m48t59_plat_data m48t59_data = { + .read_byte = mostek_read_byte, + .write_byte = mostek_write_byte, +}; + +static struct platform_device m48t59_rtc = { + .name = "rtc-m48t59", + .id = 0, + .num_resources = 1, + .dev = { + .platform_data = &m48t59_data, + }, +}; + +static int __devinit mostek_probe(struct of_device *op, const struct of_device_id *match) +{ + struct device_node *dp = op->node; + + /* On an Enterprise system there can be multiple mostek clocks. + * We should only match the one that is on the central FHC bus. + */ + if (!strcmp(dp->parent->name, "fhc") && + strcmp(dp->parent->parent->name, "central") != 0) + return -ENODEV; + + printk(KERN_INFO "%s: Mostek regs at 0x%lx\n", + dp->full_name, op->resource[0].start); + + m48t59_rtc.resource = &op->resource[0]; + return platform_device_register(&m48t59_rtc); +} + +static struct of_device_id __initdata mostek_match[] = { + { + .name = "eeprom", + }, + {}, +}; + +static struct of_platform_driver mostek_driver = { + .match_table = mostek_match, + .probe = mostek_probe, + .driver = { + .name = "mostek", + }, +}; + +static struct platform_device rtc_sun4v_device = { + .name = "rtc-sun4v", + .id = -1, +}; + +static struct platform_device rtc_starfire_device = { + .name = "rtc-starfire", + .id = -1, +}; + +static int __init clock_init(void) +{ + if (this_is_starfire) + return platform_device_register(&rtc_starfire_device); + + if (tlb_type == hypervisor) + return platform_device_register(&rtc_sun4v_device); + + (void) of_register_driver(&rtc_driver, &of_platform_bus_type); + (void) of_register_driver(&mostek_driver, &of_platform_bus_type); + (void) of_register_driver(&bq4802_driver, &of_platform_bus_type); + + return 0; +} + +/* Must be after subsys_initcall() so that busses are probed. Must + * be before device_initcall() because things like the RTC driver + * need to see the clock registers. + */ +fs_initcall(clock_init); + +/* This is gets the master TICK_INT timer going. */ +static unsigned long sparc64_init_timers(void) +{ + struct device_node *dp; + unsigned long freq; + + dp = of_find_node_by_path("/"); + if (tlb_type == spitfire) { + unsigned long ver, manuf, impl; + + __asm__ __volatile__ ("rdpr %%ver, %0" + : "=&r" (ver)); + manuf = ((ver >> 48) & 0xffff); + impl = ((ver >> 32) & 0xffff); + if (manuf == 0x17 && impl == 0x13) { + /* Hummingbird, aka Ultra-IIe */ + tick_ops = &hbtick_operations; + freq = of_getintprop_default(dp, "stick-frequency", 0); + } else { + tick_ops = &tick_operations; + freq = local_cpu_data().clock_tick; + } + } else { + tick_ops = &stick_operations; + freq = of_getintprop_default(dp, "stick-frequency", 0); + } + + return freq; +} + +struct freq_table { + unsigned long clock_tick_ref; + unsigned int ref_freq; +}; +static DEFINE_PER_CPU(struct freq_table, sparc64_freq_table) = { 0, 0 }; + +unsigned long sparc64_get_clock_tick(unsigned int cpu) +{ + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (ft->clock_tick_ref) + return ft->clock_tick_ref; + return cpu_data(cpu).clock_tick; +} + +#ifdef CONFIG_CPU_FREQ + +static int sparc64_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + unsigned int cpu = freq->cpu; + struct freq_table *ft = &per_cpu(sparc64_freq_table, cpu); + + if (!ft->ref_freq) { + ft->ref_freq = freq->old; + ft->clock_tick_ref = cpu_data(cpu).clock_tick; + } + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || + (val == CPUFREQ_RESUMECHANGE)) { + cpu_data(cpu).clock_tick = + cpufreq_scale(ft->clock_tick_ref, + ft->ref_freq, + freq->new); + } + + return 0; +} + +static struct notifier_block sparc64_cpufreq_notifier_block = { + .notifier_call = sparc64_cpufreq_notifier +}; + +static int __init register_sparc64_cpufreq_notifier(void) +{ + + cpufreq_register_notifier(&sparc64_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + return 0; +} + +core_initcall(register_sparc64_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + +static int sparc64_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return tick_ops->add_compare(delta) ? -ETIME : 0; +} + +static void sparc64_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + case CLOCK_EVT_MODE_RESUME: + break; + + case CLOCK_EVT_MODE_SHUTDOWN: + tick_ops->disable_irq(); + break; + + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_UNUSED: + WARN_ON(1); + break; + }; +} + +static struct clock_event_device sparc64_clockevent = { + .features = CLOCK_EVT_FEAT_ONESHOT, + .set_mode = sparc64_timer_setup, + .set_next_event = sparc64_next_event, + .rating = 100, + .shift = 30, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, sparc64_events); + +void timer_interrupt(int irq, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long tick_mask = tick_ops->softint_mask; + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(sparc64_events, cpu); + + clear_softint(tick_mask); + + irq_enter(); + + kstat_this_cpu.irqs[0]++; + + if (unlikely(!evt->event_handler)) { + printk(KERN_WARNING + "Spurious SPARC64 timer interrupt on cpu %d\n", cpu); + } else + evt->event_handler(evt); + + irq_exit(); + + set_irq_regs(old_regs); +} + +void __devinit setup_sparc64_timer(void) +{ + struct clock_event_device *sevt; + unsigned long pstate; + + /* Guarantee that the following sequences execute + * uninterrupted. + */ + __asm__ __volatile__("rdpr %%pstate, %0\n\t" + "wrpr %0, %1, %%pstate" + : "=r" (pstate) + : "i" (PSTATE_IE)); + + tick_ops->init_tick(); + + /* Restore PSTATE_IE. */ + __asm__ __volatile__("wrpr %0, 0x0, %%pstate" + : /* no outputs */ + : "r" (pstate)); + + sevt = &__get_cpu_var(sparc64_events); + + memcpy(sevt, &sparc64_clockevent, sizeof(*sevt)); - sevt->cpumask = cpumask_of_cpu(smp_processor_id()); ++ sevt->cpumask = cpumask_of(smp_processor_id()); + + clockevents_register_device(sevt); +} + +#define SPARC64_NSEC_PER_CYC_SHIFT 10UL + +static struct clocksource clocksource_tick = { + .rating = 100, + .mask = CLOCKSOURCE_MASK(64), + .shift = 16, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static void __init setup_clockevent_multiplier(unsigned long hz) +{ + unsigned long mult, shift = 32; + + while (1) { + mult = div_sc(hz, NSEC_PER_SEC, shift); + if (mult && (mult >> 32UL) == 0UL) + break; + + shift--; + } + + sparc64_clockevent.shift = shift; + sparc64_clockevent.mult = mult; +} + +static unsigned long tb_ticks_per_usec __read_mostly; + +void __delay(unsigned long loops) +{ + unsigned long bclock, now; + + bclock = tick_ops->get_tick(); + do { + now = tick_ops->get_tick(); + } while ((now-bclock) < loops); +} +EXPORT_SYMBOL(__delay); + +void udelay(unsigned long usecs) +{ + __delay(tb_ticks_per_usec * usecs); +} +EXPORT_SYMBOL(udelay); + +void __init time_init(void) +{ + unsigned long freq = sparc64_init_timers(); + + tb_ticks_per_usec = freq / USEC_PER_SEC; + + timer_ticks_per_nsec_quotient = + clocksource_hz2mult(freq, SPARC64_NSEC_PER_CYC_SHIFT); + + clocksource_tick.name = tick_ops->name; + clocksource_tick.mult = + clocksource_hz2mult(freq, + clocksource_tick.shift); + clocksource_tick.read = tick_ops->get_tick; + + printk("clocksource: mult[%x] shift[%d]\n", + clocksource_tick.mult, clocksource_tick.shift); + + clocksource_register(&clocksource_tick); + + sparc64_clockevent.name = tick_ops->name; + + setup_clockevent_multiplier(freq); + + sparc64_clockevent.max_delta_ns = + clockevent_delta2ns(0x7fffffffffffffffUL, &sparc64_clockevent); + sparc64_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &sparc64_clockevent); + + printk("clockevent: mult[%lx] shift[%d]\n", + sparc64_clockevent.mult, sparc64_clockevent.shift); + + setup_sparc64_timer(); +} + +unsigned long long sched_clock(void) +{ + unsigned long ticks = tick_ops->get_tick(); + + return (ticks * timer_ticks_per_nsec_quotient) + >> SPARC64_NSEC_PER_CYC_SHIFT; +} + +int __devinit read_current_timer(unsigned long *timer_val) +{ + *timer_val = tick_ops->get_tick(); + return 0; +} diff --cc arch/x86/include/asm/irq.h index 28e409fc73f3,4bb732e45a85..592688ed04d3 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@@ -31,9 -31,13 +31,9 @@@ static inline int irq_canonicalize(int # endif #endif -#ifdef CONFIG_IRQBALANCE -extern int irqbalance_disable(char *str); -#endif - #ifdef CONFIG_HOTPLUG_CPU #include - extern void fixup_irqs(cpumask_t map); + extern void fixup_irqs(void); #endif extern unsigned int do_IRQ(struct pt_regs *regs); diff --cc arch/x86/kernel/io_apic.c index 74917658b004,1cbf7c8d46e0..62ecfc991e1e --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@@ -152,25 -152,25 +152,25 @@@ static struct irq_cfg irq_cfgx[] = #else static struct irq_cfg irq_cfgx[NR_IRQS] = { #endif - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + [0] = { .vector = IRQ0_VECTOR, }, + [1] = { .vector = IRQ1_VECTOR, }, + [2] = { .vector = IRQ2_VECTOR, }, + [3] = { .vector = IRQ3_VECTOR, }, + [4] = { .vector = IRQ4_VECTOR, }, + [5] = { .vector = IRQ5_VECTOR, }, + [6] = { .vector = IRQ6_VECTOR, }, + [7] = { .vector = IRQ7_VECTOR, }, + [8] = { .vector = IRQ8_VECTOR, }, + [9] = { .vector = IRQ9_VECTOR, }, + [10] = { .vector = IRQ10_VECTOR, }, + [11] = { .vector = IRQ11_VECTOR, }, + [12] = { .vector = IRQ12_VECTOR, }, + [13] = { .vector = IRQ13_VECTOR, }, + [14] = { .vector = IRQ14_VECTOR, }, + [15] = { .vector = IRQ15_VECTOR, }, }; -void __init arch_early_irq_init(void) +int __init arch_early_irq_init(void) { struct irq_cfg *cfg; struct irq_desc *desc; @@@ -183,9 -183,11 +183,13 @@@ for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; + alloc_bootmem_cpumask_var(&cfg[i].domain); + alloc_bootmem_cpumask_var(&cfg[i].old_domain); + if (i < NR_IRQS_LEGACY) + cpumask_setall(cfg[i].domain); } + + return 0; } #ifdef CONFIG_SPARSE_IRQ @@@ -1349,8 -1400,10 +1404,8 @@@ void __setup_vector_irq(int cpu /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { - if (!desc) - continue; cfg = desc->chip_data; - if (!cpu_isset(cpu, cfg->domain)) + if (!cpumask_test_cpu(cpu, cfg->domain)) continue; vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; diff --cc kernel/sched.c index fff1c4a20b65,756d981d91a4..27ba1d642f0f --- a/kernel/sched.c +++ b/kernel/sched.c @@@ -5426,8 -5477,17 +5495,16 @@@ long sched_setaffinity(pid_t pid, cons get_task_struct(p); read_unlock(&tasklist_lock); + if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_put_task; + } + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) { + retval = -ENOMEM; + goto out_free_cpus_allowed; + } retval = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --cc kernel/time/tick-sched.c index 8f3fc2582d38,70f872c71f4e..76a574bbef97 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@@ -282,31 -282,8 +282,31 @@@ void tick_nohz_stop_sched_tick(int inid /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { + /* + * calculate the expiry time for the next timer wheel + * timer + */ + expires = ktime_add_ns(last_update, tick_period.tv64 * + delta_jiffies); + + /* + * If this cpu is the one which updates jiffies, then + * give up the assignment and let it be taken by the + * cpu which runs the tick timer next, which might be + * this cpu as well. If we don't drop this here the + * jiffies might be stale and do_timer() never + * invoked. + */ + if (cpu == tick_do_timer_cpu) + tick_do_timer_cpu = TICK_DO_TIMER_NONE; + if (delta_jiffies > 1) - cpu_set(cpu, nohz_cpu_mask); + cpumask_set_cpu(cpu, nohz_cpu_mask); + + /* Skip reprogram of event if its not changed */ + if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) + goto out; + /* * nohz_stop_sched_tick can be called several times before * the nohz_restart_sched_tick is called. This happens when