perfcounters: rename struct hw_perf_counter_ops into struct pmu

[linux-2.6-block.git] / arch / powerpc / kernel / perf_counter.c
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c

index 6e27913ec0d88735747e8dec8bdd7325b026da6a..d9bbe5efc64924c640c91f9b10b351339057a659 100644 (file)
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -41,6 +41,8 @@ struct power_pmu *ppmu;
   */
  static unsigned int freeze_counters_kernel = MMCR0_FCS;
  
+static void perf_counter_interrupt(struct pt_regs *regs);
+
  void perf_counter_print_debug(void)
  {
  }
@@ -254,7 +256,7 @@ static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
         return 0;
  }
  
-static void power_perf_read(struct perf_counter *counter)
+static void power_pmu_read(struct perf_counter *counter)
  {
         long val, delta, prev;
  
@@ -304,6 +306,15 @@ u64 hw_perf_save_disable(void)
                         cpuhw->pmcs_enabled = 1;
                 }
  
+               /*
+                * Disable instruction sampling if it was enabled
+                */
+               if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+                       mtspr(SPRN_MMCRA,
+                             cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+                       mb();
+               }
+
                 /*
                  * Set the 'freeze counters' bit.
                  * The barrier is to make sure the mtspr has been
@@ -345,12 +356,11 @@ void hw_perf_restore(u64 disable)
          * (possibly updated for removal of counters).
          */
         if (!cpuhw->n_added) {
-               mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+               mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
                 mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-               mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
                 if (cpuhw->n_counters == 0)
                         get_lppaca()->pmcregs_in_use = 0;
-               goto out;
+               goto out_enable;
         }
  
         /*
@@ -383,7 +393,7 @@ void hw_perf_restore(u64 disable)
          * Then unfreeze the counters.
          */
         get_lppaca()->pmcregs_in_use = 1;
-       mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+       mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
         mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
         mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
                                 | MMCR0_FC);
@@ -395,7 +405,7 @@ void hw_perf_restore(u64 disable)
         for (i = 0; i < cpuhw->n_counters; ++i) {
                 counter = cpuhw->counter[i];
                 if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-                       power_perf_read(counter);
+                       power_pmu_read(counter);
                         write_pmc(counter->hw.idx, 0);
                         counter->hw.idx = 0;
                 }
@@ -417,11 +427,22 @@ void hw_perf_restore(u64 disable)
                 atomic64_set(&counter->hw.prev_count, val);
                 counter->hw.idx = hwc_index[i] + 1;
                 write_pmc(counter->hw.idx, val);
+               perf_counter_update_userpage(counter);
         }
-       mb();
         cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+       mb();
         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
  
+       /*
+        * Enable instruction sampling if necessary
+        */
+       if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+               mb();
+               mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+       }
+
   out:
         local_irq_restore(flags);
  }
@@ -454,8 +475,9 @@ static void counter_sched_in(struct perf_counter *counter, int cpu)
  {
         counter->state = PERF_COUNTER_STATE_ACTIVE;
         counter->oncpu = cpu;
+       counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
         if (is_software_counter(counter))
-               counter->hw_ops->enable(counter);
+               counter->pmu->enable(counter);
  }
  
  /*
@@ -511,7 +533,7 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
   * re-enable the PMU in order to get hw_perf_restore to do the
   * actual work of reconfiguring the PMU.
   */
-static int power_perf_enable(struct perf_counter *counter)
+static int power_pmu_enable(struct perf_counter *counter)
  {
         struct cpu_hw_counters *cpuhw;
         unsigned long flags;
@@ -551,7 +573,7 @@ static int power_perf_enable(struct perf_counter *counter)
  /*
   * Remove a counter from the PMU.
   */
-static void power_perf_disable(struct perf_counter *counter)
+static void power_pmu_disable(struct perf_counter *counter)
  {
         struct cpu_hw_counters *cpuhw;
         long i;
@@ -561,7 +583,7 @@ static void power_perf_disable(struct perf_counter *counter)
         local_irq_save(flags);
         pmudis = hw_perf_save_disable();
  
-       power_perf_read(counter);
+       power_pmu_read(counter);
  
         cpuhw = &__get_cpu_var(cpu_hw_counters);
         for (i = 0; i < cpuhw->n_counters; ++i) {
@@ -572,6 +594,7 @@ static void power_perf_disable(struct perf_counter *counter)
                         ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
                         write_pmc(counter->hw.idx, 0);
                         counter->hw.idx = 0;
+                       perf_counter_update_userpage(counter);
                         break;
                 }
         }
@@ -584,30 +607,49 @@ static void power_perf_disable(struct perf_counter *counter)
         local_irq_restore(flags);
  }
  
-struct hw_perf_counter_ops power_perf_ops = {
-       .enable = power_perf_enable,
-       .disable = power_perf_disable,
-       .read = power_perf_read
+struct pmu power_pmu = {
+       .enable         = power_pmu_enable,
+       .disable        = power_pmu_disable,
+       .read           = power_pmu_read,
  };
  
-const struct hw_perf_counter_ops *
-hw_perf_counter_init(struct perf_counter *counter)
+/* Number of perf_counters counting hardware events */
+static atomic_t num_counters;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_counter.
+ */
+static void hw_perf_counter_destroy(struct perf_counter *counter)
+{
+       if (!atomic_add_unless(&num_counters, -1, 1)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_dec_return(&num_counters) == 0)
+                       release_pmc_hardware();
+               mutex_unlock(&pmc_reserve_mutex);
+       }
+}
+
+const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
  {
         unsigned long ev;
         struct perf_counter *ctrs[MAX_HWCOUNTERS];
         unsigned int events[MAX_HWCOUNTERS];
         int n;
+       int err;
  
         if (!ppmu)
-               return NULL;
+               return ERR_PTR(-ENXIO);
         if ((s64)counter->hw_event.irq_period < 0)
-               return NULL;
-       ev = counter->hw_event.type;
-       if (!counter->hw_event.raw) {
-               if (ev >= ppmu->n_generic ||
-                   ppmu->generic_events[ev] == 0)
-                       return NULL;
+               return ERR_PTR(-EINVAL);
+       if (!perf_event_raw(&counter->hw_event)) {
+               ev = perf_event_id(&counter->hw_event);
+               if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+                       return ERR_PTR(-EOPNOTSUPP);
                 ev = ppmu->generic_events[ev];
+       } else {
+               ev = perf_event_config(&counter->hw_event);
         }
         counter->hw.config_base = ev;
         counter->hw.idx = 0;
@@ -630,71 +672,39 @@ hw_perf_counter_init(struct perf_counter *counter)
                 n = collect_events(counter->group_leader, ppmu->n_counter - 1,
                                    ctrs, events);
                 if (n < 0)
-                       return NULL;
+                       return ERR_PTR(-EINVAL);
         }
         events[n] = ev;
+       ctrs[n] = counter;
         if (check_excludes(ctrs, n, 1))
-               return NULL;
+               return ERR_PTR(-EINVAL);
         if (power_check_constraints(events, n + 1))
-               return NULL;
+               return ERR_PTR(-EINVAL);
  
         counter->hw.config = events[n];
         atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
-       return &power_perf_ops;
-}
  
-/*
- * Handle wakeups.
- */
-void perf_counter_do_pending(void)
-{
-       int i;
-       struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-       struct perf_counter *counter;
-
-       set_perf_counter_pending(0);
-       for (i = 0; i < cpuhw->n_counters; ++i) {
-               counter = cpuhw->counter[i];
-               if (counter && counter->wakeup_pending) {
-                       counter->wakeup_pending = 0;
-                       wake_up(&counter->waitq);
-               }
-       }
-}
-
-/*
- * Record data for an irq counter.
- * This function was lifted from the x86 code; maybe it should
- * go in the core?
- */
-static void perf_store_irq_data(struct perf_counter *counter, u64 data)
-{
-       struct perf_data *irqdata = counter->irqdata;
-
-       if (irqdata->len > PERF_DATA_BUFLEN - sizeof(u64)) {
-               irqdata->overrun++;
-       } else {
-               u64 *p = (u64 *) &irqdata->data[irqdata->len];
-
-               *p = data;
-               irqdata->len += sizeof(u64);
+       /*
+        * See if we need to reserve the PMU.
+        * If no counters are currently in use, then we have to take a
+        * mutex to ensure that we don't race with another task doing
+        * reserve_pmc_hardware or release_pmc_hardware.
+        */
+       err = 0;
+       if (!atomic_inc_not_zero(&num_counters)) {
+               mutex_lock(&pmc_reserve_mutex);
+               if (atomic_read(&num_counters) == 0 &&
+                   reserve_pmc_hardware(perf_counter_interrupt))
+                       err = -EBUSY;
+               else
+                       atomic_inc(&num_counters);
+               mutex_unlock(&pmc_reserve_mutex);
         }
-}
+       counter->destroy = hw_perf_counter_destroy;
  
-/*
- * Record all the values of the counters in a group
- */
-static void perf_handle_group(struct perf_counter *counter)
-{
-       struct perf_counter *leader, *sub;
-
-       leader = counter->group_leader;
-       list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-               if (sub != counter)
-                       sub->hw_ops->read(sub);
-               perf_store_irq_data(counter, sub->hw_event.type);
-               perf_store_irq_data(counter, atomic64_read(&sub->count));
-       }
+       if (err)
+               return ERR_PTR(err);
+       return &power_pmu;
  }
  
  /*
@@ -703,7 +713,7 @@ static void perf_handle_group(struct perf_counter *counter)
   * here so there is no possibility of being interrupted.
   */
  static void record_and_restart(struct perf_counter *counter, long val,
-                              struct pt_regs *regs)
+                              struct pt_regs *regs, int nmi)
  {
         s64 prev, delta, left;
         int record = 0;
@@ -732,24 +742,13 @@ static void record_and_restart(struct perf_counter *counter, long val,
         write_pmc(counter->hw.idx, val);
         atomic64_set(&counter->hw.prev_count, val);
         atomic64_set(&counter->hw.period_left, left);
+       perf_counter_update_userpage(counter);
  
         /*
          * Finally record data if requested.
          */
-       if (record) {
-               switch (counter->hw_event.record_type) {
-               case PERF_RECORD_SIMPLE:
-                       break;
-               case PERF_RECORD_IRQ:
-                       perf_store_irq_data(counter, instruction_pointer(regs));
-                       counter->wakeup_pending = 1;
-                       break;
-               case PERF_RECORD_GROUP:
-                       perf_handle_group(counter);
-                       counter->wakeup_pending = 1;
-                       break;
-               }
-       }
+       if (record)
+               perf_counter_overflow(counter, nmi, regs, 0);
  }
  
  /*
@@ -761,7 +760,18 @@ static void perf_counter_interrupt(struct pt_regs *regs)
         struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
         struct perf_counter *counter;
         long val;
-       int need_wakeup = 0, found = 0;
+       int found = 0;
+       int nmi;
+
+       /*
+        * If interrupts were soft-disabled when this PMU interrupt
+        * occurred, treat it as an NMI.
+        */
+       nmi = !regs->softe;
+       if (nmi)
+               nmi_enter();
+       else
+               irq_enter();
  
         for (i = 0; i < cpuhw->n_counters; ++i) {
                 counter = cpuhw->counter[i];
@@ -769,9 +779,7 @@ static void perf_counter_interrupt(struct pt_regs *regs)
                 if ((int)val < 0) {
                         /* counter has overflowed */
                         found = 1;
-                       record_and_restart(counter, val, regs);
-                       if (counter->wakeup_pending)
-                               need_wakeup = 1;
+                       record_and_restart(counter, val, regs, nmi);
                 }
         }
  
@@ -798,21 +806,10 @@ static void perf_counter_interrupt(struct pt_regs *regs)
          */
         mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
  
-       /*
-        * If we need a wakeup, check whether interrupts were soft-enabled
-        * when we took the interrupt.  If they were, we can wake stuff up
-        * immediately; otherwise we'll have to set a flag and do the
-        * wakeup when interrupts get soft-enabled.
-        */
-       if (need_wakeup) {
-               if (regs->softe) {
-                       irq_enter();
-                       perf_counter_do_pending();
-                       irq_exit();
-               } else {
-                       set_perf_counter_pending(1);
-               }
-       }
+       if (nmi)
+               nmi_exit();
+       else
+               irq_exit();
  }
  
  void hw_perf_counter_setup(int cpu)
@@ -823,26 +820,34 @@ void hw_perf_counter_setup(int cpu)
         cpuhw->mmcr[0] = MMCR0_FC;
  }
  
+extern struct power_pmu power4_pmu;
  extern struct power_pmu ppc970_pmu;
+extern struct power_pmu power5_pmu;
+extern struct power_pmu power5p_pmu;
  extern struct power_pmu power6_pmu;
  
  static int init_perf_counters(void)
  {
         unsigned long pvr;
  
-       if (reserve_pmc_hardware(perf_counter_interrupt)) {
-               printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
-               return -EBUSY;
-       }
-
         /* XXX should get this from cputable */
         pvr = mfspr(SPRN_PVR);
         switch (PVR_VER(pvr)) {
+       case PV_POWER4:
+       case PV_POWER4p:
+               ppmu = &power4_pmu;
+               break;
         case PV_970:
         case PV_970FX:
         case PV_970MP:
                 ppmu = &ppc970_pmu;
                 break;
+       case PV_POWER5:
+               ppmu = &power5_pmu;
+               break;
+       case PV_POWER5p:
+               ppmu = &power5p_pmu;
+               break;
         case 0x3e:
                 ppmu = &power6_pmu;
                 break;