perfcounters: rename struct hw_perf_counter_ops into struct pmu
[linux-2.6-block.git] / arch / powerpc / kernel / perf_counter.c
index df007fe0cc0bb6e6c989dc273a82349df8e5af99..d9bbe5efc64924c640c91f9b10b351339057a659 100644 (file)
@@ -41,6 +41,8 @@ struct power_pmu *ppmu;
  */
 static unsigned int freeze_counters_kernel = MMCR0_FCS;
 
+static void perf_counter_interrupt(struct pt_regs *regs);
+
 void perf_counter_print_debug(void)
 {
 }
@@ -254,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
        return 0;
 }
 
-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
 {
        long val, delta, prev;
 
@@ -304,6 +306,15 @@ u64 hw_perf_save_disable(void)
                        cpuhw->pmcs_enabled = 1;
                }
 
+               /*
+                * Disable instruction sampling if it was enabled
+                */
+               if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+                       mtspr(SPRN_MMCRA,
+                             cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+                       mb();
+               }
+
                /*
                 * Set the 'freeze counters' bit.
                 * The barrier is to make sure the mtspr has been
@@ -345,12 +356,11 @@ void hw_perf_restore(u64 disable)
         * (possibly updated for removal of counters).
         */
        if (!cpuhw->n_added) {
-               mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+               mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
                mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-               mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
                if (cpuhw->n_counters == 0)
                        get_lppaca()->pmcregs_in_use = 0;
-               goto out;
+               goto out_enable;
        }
 
        /*
@@ -383,7 +393,7 @@ void hw_perf_restore(u64 disable)
         * Then unfreeze the counters.
         */
        get_lppaca()->pmcregs_in_use = 1;
-       mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+       mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
        mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
        mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
                                | MMCR0_FC);
@@ -395,7 +405,7 @@ void hw_perf_restore(u64 disable)
        for (i = 0; i < cpuhw->n_counters; ++i) {
                counter = cpuhw->counter[i];
                if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-                       power_perf_read(counter);
+                       power_pmu_read(counter);
                        write_pmc(counter->hw.idx, 0);
                        counter->hw.idx = 0;
                }
@@ -419,10 +429,20 @@ void hw_perf_restore(u64 disable)
                write_pmc(counter->hw.idx, val);
                perf_counter_update_userpage(counter);
        }
-       mb();
        cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+       mb();
        mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 
+       /*
+        * Enable instruction sampling if necessary
+        */
+       if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+               mb();
+               mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+       }
+
  out:
        local_irq_restore(flags);
 }
@@ -455,10 +475,9 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
 {
        counter->state = PERF_COUNTER_STATE_ACTIVE;
        counter->oncpu = cpu;
-       counter->tstamp_running += counter->ctx->time_now -
-               counter->tstamp_stopped;
+       counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
        if (is_software_counter(counter))
-               counter->hw_ops->enable(counter);
+               counter->pmu->enable(counter);
 }
 
 /*
@@ -514,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
  * re-enable the PMU in order to get hw_perf_restore to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
 {
        struct cpu_hw_counters *cpuhw;
        unsigned long flags;
@@ -554,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
 /*
  * Remove a counter from the PMU.
  */
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
 {
        struct cpu_hw_counters *cpuhw;
        long i;
@@ -564,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
        local_irq_save(flags);
        pmudis = hw_perf_save_disable();
 
-       power_perf_read(counter);
+       power_pmu_read(counter);
 
        cpuhw = &__get_cpu_var(cpu_hw_counters);
        for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -588,28 +607,46 @@ static void power_perf_disable(struct perf_counter *counter)
        local_irq_restore(flags);
 }
 
-struct hw_perf_counter_ops power_perf_ops = {
-       .enable = power_perf_enable,
-       .disable = power_perf_disable,
-       .read = power_perf_read
+struct pmu power_pmu = {
+       .enable         = power_pmu_enable,
+       .disable        = power_pmu_disable,
+       .read           = power_pmu_read,
 };
 
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+       if (!atomic_add_unless(&num_counters, -1, 1)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_dec_return(&num_counters) == 0)
+                       release_pmc_hardware();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 {
        unsigned long ev;
        struct perf_counter *ctrs[MAX_HWCOUNTERS];
        unsigned int events[MAX_HWCOUNTERS];
        int n;
+       int err;
 
        if (!ppmu)
-               return NULL;
+               return ERR_PTR(-ENXIO);
        if ((s64)counter->hw_event.irq_period < 0)
-               return NULL;
+               return ERR_PTR(-EINVAL);
        if (!perf_event_raw(&counter->hw_event)) {
                ev = perf_event_id(&counter->hw_event);
                if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-                       return NULL;
+                       return ERR_PTR(-EOPNOTSUPP);
                ev = ppmu->generic_events[ev];
        } else {
                ev = perf_event_config(&counter->hw_event);
@@ -635,36 +672,39 @@ hw_perf_counter_init(struct perf_counter *counter)
                n = collect_events(counter->group_leader, ppmu->n_counter - 1,
                                   ctrs, events);
                if (n < 0)
-                       return NULL;
+                       return ERR_PTR(-EINVAL);
        }
        events[n] = ev;
        ctrs[n] = counter;
        if (check_excludes(ctrs, n, 1))
-               return NULL;
+               return ERR_PTR(-EINVAL);
        if (power_check_constraints(events, n + 1))
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        counter->hw.config = events[n];
        atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
-       return &power_perf_ops;
-}
 
-/*
* Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
-       int i;
-       struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-       struct perf_counter *counter;
-
-       for (i = 0; i < cpuhw->n_counters; ++i) {
-               counter = cpuhw->counter[i];
-               if (counter && counter->wakeup_pending) {
-                       counter->wakeup_pending = 0;
-                       wake_up(&counter->waitq);
-               }
+       /*
       * See if we need to reserve the PMU.
+        * If no counters are currently in use, then we have to take a
+        * mutex to ensure that we don't race with another task doing
+        * reserve_pmc_hardware or release_pmc_hardware.
+        */
+       err = 0;
+       if (!atomic_inc_not_zero(&num_counters)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&num_counters) == 0 &&
+                   reserve_pmc_hardware(perf_counter_interrupt))
+                       err = -EBUSY;
+               else
+                       atomic_inc(&num_counters);
+               mutex_unlock(&pmc_reserve_mutex);
        }
+       counter->destroy = hw_perf_counter_destroy;
+
+       if (err)
+               return ERR_PTR(err);
+       return &power_pmu;
 }
 
 /*
@@ -673,7 +713,7 @@ void perf_counter_do_pending(void)
  * here so there is no possibility of being interrupted.
  */
 static void record_and_restart(struct perf_counter *counter, long val,
-                              struct pt_regs *regs)
+                              struct pt_regs *regs, int nmi)
 {
        s64 prev, delta, left;
        int record = 0;
@@ -708,7 +748,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
         * Finally record data if requested.
         */
        if (record)
-               perf_counter_output(counter, 1, regs);
+               perf_counter_overflow(counter, nmi, regs, 0);
 }
 
 /*
@@ -720,7 +760,18 @@ static void perf_counter_interrupt(struct pt_regs *regs)
        struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
        struct perf_counter *counter;
        long val;
-       int need_wakeup = 0, found = 0;
+       int found = 0;
+       int nmi;
+
+       /*
+        * If interrupts were soft-disabled when this PMU interrupt
+        * occurred, treat it as an NMI.
+        */
+       nmi = !regs->softe;
+       if (nmi)
+               nmi_enter();
+       else
+               irq_enter();
 
        for (i = 0; i < cpuhw->n_counters; ++i) {
                counter = cpuhw->counter[i];
@@ -728,7 +779,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
                if ((int)val < 0) {
                        /* counter has overflowed */
                        found = 1;
-                       record_and_restart(counter, val, regs);
+                       record_and_restart(counter, val, regs, nmi);
                }
        }
 
@@ -755,18 +806,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
         */
        mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
 
-       /*
-        * If we need a wakeup, check whether interrupts were soft-enabled
-        * when we took the interrupt.  If they were, we can wake stuff up
-        * immediately; otherwise we'll have do the wakeup when interrupts
-        * get soft-enabled.
-        */
-       if (get_perf_counter_pending() && regs->softe) {
-               irq_enter();
-               clear_perf_counter_pending();
-               perf_counter_do_pending();
+       if (nmi)
+               nmi_exit();
+       else
                irq_exit();
-       }
 }
 
 void hw_perf_counter_setup(int cpu)
@@ -787,11 +830,6 @@ static int init_perf_counters(void)
 {
        unsigned long pvr;
 
-       if (reserve_pmc_hardware(perf_counter_interrupt)) {
-               printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
-               return -EBUSY;
-       }
-
        /* XXX should get this from cputable */
        pvr = mfspr(SPRN_PVR);
        switch (PVR_VER(pvr)) {