Merge tag 'powerpc-4.17-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...

author Linus Torvalds <torvalds@linux-foundation.org>

Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h

index d1c2d2e658cf4d5b1d3c3718a377efa345067128..2f3ff7a278815a131f9ebec73bffcdaedf0abab3 100644 (file)
--- a/arch/powerpc/include/asm/powernv.h
+++ b/arch/powerpc/include/asm/powernv.h
@@ -15,7 +15,7 @@
  extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
  extern struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                         unsigned long flags,
-                       struct npu_context *(*cb)(struct npu_context *, void *),
+                       void (*cb)(struct npu_context *, void *),
                         void *priv);
  extern void pnv_npu2_destroy_context(struct npu_context *context,
                                 struct pci_dev *gpdev);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c

index fe6fc63251fec70e7cf2dee7f6deab47fe8ab256..38c5b4764bfed0e0aeb647418f53875bb0f7b2a0 100644 (file)
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -441,7 +441,6 @@ static int mce_handle_ierror(struct pt_regs *regs,
                                         if (pfn != ULONG_MAX) {
                                                 *phys_addr =
                                                         (pfn << PAGE_SHIFT);
-                                               handled = 1;
                                         }
                                 }
                         }
@@ -532,9 +531,7 @@ static int mce_handle_derror(struct pt_regs *regs,
                          * kernel/exception-64s.h
                          */
                         if (get_paca()->in_mce < MAX_MCE_DEPTH)
-                               if (!mce_find_instr_ea_and_pfn(regs, addr,
-                                                               phys_addr))
-                                       handled = 1;
+                               mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
                 }
                 found = 1;
         }
@@ -572,7 +569,7 @@ static long mce_handle_error(struct pt_regs *regs,
                 const struct mce_ierror_table itable[])
  {
         struct mce_error_info mce_err = { 0 };
-       uint64_t addr, phys_addr;
+       uint64_t addr, phys_addr = ULONG_MAX;
         uint64_t srr1 = regs->msr;
         long handled;
  
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c

index e16ec7b3b427ea2d2ce2ca0a1229b16df51dc0e1..9ca7148b5881a098abf7bf0a3eff99536f99a7ec 100644 (file)
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -566,10 +566,35 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
  #endif
  
  #ifdef CONFIG_NMI_IPI
-static void stop_this_cpu(struct pt_regs *regs)
-#else
+static void nmi_stop_this_cpu(struct pt_regs *regs)
+{
+       /*
+        * This is a special case because it never returns, so the NMI IPI
+        * handling would never mark it as done, which makes any later
+        * smp_send_nmi_ipi() call spin forever. Mark it done now.
+        *
+        * IRQs are already hard disabled by the smp_handle_nmi_ipi.
+        */
+       nmi_ipi_lock();
+       nmi_ipi_busy_count--;
+       nmi_ipi_unlock();
+
+       /* Remove this CPU */
+       set_cpu_online(smp_processor_id(), false);
+
+       spin_begin();
+       while (1)
+               spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+       smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
+}
+
+#else /* CONFIG_NMI_IPI */
+
  static void stop_this_cpu(void *dummy)
-#endif
  {
         /* Remove this CPU */
         set_cpu_online(smp_processor_id(), false);
@@ -582,12 +607,22 @@ static void stop_this_cpu(void *dummy)
  
  void smp_send_stop(void)
  {
-#ifdef CONFIG_NMI_IPI
-       smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, stop_this_cpu, 1000000);
-#else
+       static bool stopped = false;
+
+       /*
+        * Prevent waiting on csd lock from a previous smp_send_stop.
+        * This is racy, but in general callers try to do the right
+        * thing and only fire off one smp_send_stop (e.g., see
+        * kernel/panic.c)
+        */
+       if (stopped)
+               return;
+
+       stopped = true;
+
         smp_call_function(stop_this_cpu, NULL, 0);
-#endif
  }
+#endif /* CONFIG_NMI_IPI */
  
  struct thread_info *current_set[NR_CPUS];
  
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c

index 6038e2e7aee03c2b29edb65624be370f01a6c928..876d4f294fdd89fc160439073a3c04c9c8e81174 100644 (file)
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -305,6 +305,13 @@ void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
  }
  
+#ifdef CONFIG_ALTIVEC
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
+{
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+}
+#endif
+
  void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
  {
         kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c

index 737f8a4632ccc68abfdd61a1900380834b125340..c3c39b02b2ba7b0ae58df5bf293103b382b2b708 100644 (file)
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -133,6 +133,7 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
                         start, start + size, rc);
                 return -EFAULT;
         }
+       flush_inval_dcache_range(start, start + size);
  
         return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
  }
@@ -159,6 +160,7 @@ int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap
  
         /* Remove htab bolted mappings for this section of memory */
         start = (unsigned long)__va(start);
+       flush_inval_dcache_range(start, start + size);
         ret = remove_section_mapping(start, start + size);
  
         /* Ensure all vmalloc mappings are flushed in case they also
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c

index de470caf07848e28864cbb8ff0129e4ed7aaf021..fc222a0c2ac46b06095ed831827d40e84994e3ca 100644 (file)
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -82,19 +82,6 @@ static const struct file_operations memtrace_fops = {
         .open   = simple_open,
  };
  
-static void flush_memory_region(u64 base, u64 size)
-{
-       unsigned long line_size = ppc64_caches.l1d.size;
-       u64 end = base + size;
-       u64 addr;
-
-       base = round_down(base, line_size);
-       end = round_up(end, line_size);
-
-       for (addr = base; addr < end; addr += line_size)
-               asm volatile("dcbf 0,%0" : "=r" (addr) :: "memory");
-}
-
  static int check_memblock_online(struct memory_block *mem, void *arg)
  {
         if (mem->state != MEM_ONLINE)
@@ -132,10 +119,6 @@ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
         walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
                           change_memblock_state);
  
-       /* RCU grace period? */
-       flush_memory_region((u64)__va(start_pfn << PAGE_SHIFT),
-                           nr_pages << PAGE_SHIFT);
-
         lock_device_hotplug();
         remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
         unlock_device_hotplug();
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c

index 69a4f9e8bd554f137dd01b930b1b3d87e204fd47..525e966dce3418cef4a82f494da331793b628668 100644 (file)
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -33,6 +33,19 @@
  
  #define npu_to_phb(x) container_of(x, struct pnv_phb, npu)
  
+/*
+ * spinlock to protect initialisation of an npu_context for a particular
+ * mm_struct.
+ */
+static DEFINE_SPINLOCK(npu_context_lock);
+
+/*
+ * When an address shootdown range exceeds this threshold we invalidate the
+ * entire TLB on the GPU for the given PID rather than each specific address in
+ * the range.
+ */
+#define ATSD_THRESHOLD (2*1024*1024)
+
  /*
   * Other types of TCE cache invalidation are not functional in the
   * hardware.
@@ -401,7 +414,7 @@ struct npu_context {
         bool nmmu_flush;
  
         /* Callback to stop translation requests on a given GPU */
-       struct npu_context *(*release_cb)(struct npu_context *, void *);
+       void (*release_cb)(struct npu_context *context, void *priv);
  
         /*
          * Private pointer passed to the above callback for usage by
@@ -671,11 +684,19 @@ static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
         struct npu_context *npu_context = mn_to_npu_context(mn);
         unsigned long address;
  
-       for (address = start; address < end; address += PAGE_SIZE)
-               mmio_invalidate(npu_context, 1, address, false);
+       if (end - start > ATSD_THRESHOLD) {
+               /*
+                * Just invalidate the entire PID if the address range is too
+                * large.
+                */
+               mmio_invalidate(npu_context, 0, 0, true);
+       } else {
+               for (address = start; address < end; address += PAGE_SIZE)
+                       mmio_invalidate(npu_context, 1, address, false);
  
-       /* Do the flush only on the final addess == end */
-       mmio_invalidate(npu_context, 1, address, true);
+               /* Do the flush only on the final addess == end */
+               mmio_invalidate(npu_context, 1, address, true);
+       }
  }
  
  static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
@@ -696,11 +717,12 @@ static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
   * Returns an error if there no contexts are currently available or a
   * npu_context which should be passed to pnv_npu2_handle_fault().
   *
- * mmap_sem must be held in write mode.
+ * mmap_sem must be held in write mode and must not be called from interrupt
+ * context.
   */
  struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                         unsigned long flags,
-                       struct npu_context *(*cb)(struct npu_context *, void *),
+                       void (*cb)(struct npu_context *, void *),
                         void *priv)
  {
         int rc;
@@ -743,7 +765,9 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
         /*
          * Setup the NPU context table for a particular GPU. These need to be
          * per-GPU as we need the tables to filter ATSDs when there are no
-        * active contexts on a particular GPU.
+        * active contexts on a particular GPU. It is safe for these to be
+        * called concurrently with destroy as the OPAL call takes appropriate
+        * locks and refcounts on init/destroy.
          */
         rc = opal_npu_init_context(nphb->opal_id, mm->context.id, flags,
                                 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
@@ -754,8 +778,29 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
          * We store the npu pci device so we can more easily get at the
          * associated npus.
          */
+       spin_lock(&npu_context_lock);
         npu_context = mm->context.npu_context;
+       if (npu_context) {
+               if (npu_context->release_cb != cb ||
+                       npu_context->priv != priv) {
+                       spin_unlock(&npu_context_lock);
+                       opal_npu_destroy_context(nphb->opal_id, mm->context.id,
+                                               PCI_DEVID(gpdev->bus->number,
+                                                       gpdev->devfn));
+                       return ERR_PTR(-EINVAL);
+               }
+
+               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
+       }
+       spin_unlock(&npu_context_lock);
+
         if (!npu_context) {
+               /*
+                * We can set up these fields without holding the
+                * npu_context_lock as the npu_context hasn't been returned to
+                * the caller meaning it can't be destroyed. Parallel allocation
+                * is protected against by mmap_sem.
+                */
                 rc = -ENOMEM;
                 npu_context = kzalloc(sizeof(struct npu_context), GFP_KERNEL);
                 if (npu_context) {
@@ -774,8 +819,6 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
                 }
  
                 mm->context.npu_context = npu_context;
-       } else {
-               WARN_ON(!kref_get_unless_zero(&npu_context->kref));
         }
  
         npu_context->release_cb = cb;
@@ -814,15 +857,16 @@ static void pnv_npu2_release_context(struct kref *kref)
                 mm_context_remove_copro(npu_context->mm);
  
         npu_context->mm->context.npu_context = NULL;
-       mmu_notifier_unregister(&npu_context->mn,
-                               npu_context->mm);
-
-       kfree(npu_context);
  }
  
+/*
+ * Destroy a context on the given GPU. May free the npu_context if it is no
+ * longer active on any GPUs. Must not be called from interrupt context.
+ */
  void pnv_npu2_destroy_context(struct npu_context *npu_context,
                         struct pci_dev *gpdev)
  {
+       int removed;
         struct pnv_phb *nphb;
         struct npu *npu;
         struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
@@ -844,7 +888,21 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
         WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
         opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
                                 PCI_DEVID(gpdev->bus->number, gpdev->devfn));
-       kref_put(&npu_context->kref, pnv_npu2_release_context);
+       spin_lock(&npu_context_lock);
+       removed = kref_put(&npu_context->kref, pnv_npu2_release_context);
+       spin_unlock(&npu_context_lock);
+
+       /*
+        * We need to do this outside of pnv_npu2_release_context so that it is
+        * outside the spinlock as mmu_notifier_destroy uses SRCU.
+        */
+       if (removed) {
+               mmu_notifier_unregister(&npu_context->mn,
+                                       npu_context->mm);
+
+               kfree(npu_context);
+       }
+
  }
  EXPORT_SYMBOL(pnv_npu2_destroy_context);
  
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c

index f8868864f373ed5eecba333a5b1a2b83e38e0ec6..aa2a5139462ea5f6c81f51c8f025c9af84ce0be2 100644 (file)
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -48,10 +48,12 @@ unsigned long __init opal_get_boot_time(void)
  
         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       mdelay(OPAL_BUSY_DELAY_MS);
                         opal_poll_events(NULL);
-               else if (rc == OPAL_BUSY)
-                       mdelay(10);
+               } else if (rc == OPAL_BUSY) {
+                       mdelay(OPAL_BUSY_DELAY_MS);
+               }
         }
         if (rc != OPAL_SUCCESS)
                 return 0;
diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c

index 0591874856d3c8ba037537bc863c4bbbfe21e0a5..54edaec1e60837dfcc515430012979482652271a 100644 (file)
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -679,6 +679,16 @@ void gpstate_timer_handler(struct timer_list *t)
  
         if (!spin_trylock(&gpstates->gpstate_lock))
                 return;
+       /*
+        * If the timer has migrated to the different cpu then bring
+        * it back to one of the policy->cpus
+        */
+       if (!cpumask_test_cpu(raw_smp_processor_id(), policy->cpus)) {
+               gpstates->timer.expires = jiffies + msecs_to_jiffies(1);
+               add_timer_on(&gpstates->timer, cpumask_first(policy->cpus));
+               spin_unlock(&gpstates->gpstate_lock);
+               return;
+       }
  
         /*
          * If PMCR was last updated was using fast_swtich then
@@ -718,10 +728,8 @@ void gpstate_timer_handler(struct timer_list *t)
         if (gpstate_idx != gpstates->last_lpstate_idx)
                 queue_gpstate_timer(gpstates);
  
+       set_pstate(&freq_data);
         spin_unlock(&gpstates->gpstate_lock);
-
-       /* Timer may get migrated to a different cpu on cpu hot unplug */
-       smp_call_function_any(policy->cpus, set_pstate, &freq_data, 1);
  }
  
  /*
diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c

index 304e891e35fcb060a8ff26f78122fa3c63a1eb87..60f2250fd96bec2cec1dd33f652edaac474d2182 100644 (file)
--- a/drivers/rtc/rtc-opal.c
+++ b/drivers/rtc/rtc-opal.c
@@ -57,7 +57,7 @@ static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms)
  
  static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
  {
-       long rc = OPAL_BUSY;
+       s64 rc = OPAL_BUSY;
         int retries = 10;
         u32 y_m_d;
         u64 h_m_s_ms;
@@ -66,13 +66,17 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
  
         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                 rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       msleep(OPAL_BUSY_DELAY_MS);
                         opal_poll_events(NULL);
-               else if (retries-- && (rc == OPAL_HARDWARE
-                                      || rc == OPAL_INTERNAL_ERROR))
-                       msleep(10);
-               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-                       break;
+               } else if (rc == OPAL_BUSY) {
+                       msleep(OPAL_BUSY_DELAY_MS);
+               } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+                       if (retries--) {
+                               msleep(10); /* Wait 10ms before retry */
+                               rc = OPAL_BUSY; /* go around again */
+                       }
+               }
         }
  
         if (rc != OPAL_SUCCESS)
@@ -87,21 +91,26 @@ static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm)
  
  static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm)
  {
-       long rc = OPAL_BUSY;
+       s64 rc = OPAL_BUSY;
         int retries = 10;
         u32 y_m_d = 0;
         u64 h_m_s_ms = 0;
  
         tm_to_opal(tm, &y_m_d, &h_m_s_ms);
+
         while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                 rc = opal_rtc_write(y_m_d, h_m_s_ms);
-               if (rc == OPAL_BUSY_EVENT)
+               if (rc == OPAL_BUSY_EVENT) {
+                       msleep(OPAL_BUSY_DELAY_MS);
                         opal_poll_events(NULL);
-               else if (retries-- && (rc == OPAL_HARDWARE
-                                      || rc == OPAL_INTERNAL_ERROR))
-                       msleep(10);
-               else if (rc != OPAL_BUSY && rc != OPAL_BUSY_EVENT)
-                       break;
+               } else if (rc == OPAL_BUSY) {
+                       msleep(OPAL_BUSY_DELAY_MS);
+               } else if (rc == OPAL_HARDWARE || rc == OPAL_INTERNAL_ERROR) {
+                       if (retries--) {
+                               msleep(10); /* Wait 10ms before retry */
+                               rc = OPAL_BUSY; /* go around again */
+                       }
+               }
         }
  
         return rc == OPAL_SUCCESS ? 0 : -EIO;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 28 Apr 2018 16:45:34 +0000 (09:45 -0700)
arch/powerpc/include/asm/powernv.h		patch \| blob \| blame \| history
arch/powerpc/kernel/mce_power.c		patch \| blob \| blame \| history
arch/powerpc/kernel/smp.c		patch \| blob \| blame \| history
arch/powerpc/kvm/booke.c		patch \| blob \| blame \| history
arch/powerpc/mm/mem.c		patch \| blob \| blame \| history
arch/powerpc/platforms/powernv/memtrace.c		patch \| blob \| blame \| history
arch/powerpc/platforms/powernv/npu-dma.c		patch \| blob \| blame \| history
arch/powerpc/platforms/powernv/opal-rtc.c		patch \| blob \| blame \| history
drivers/cpufreq/powernv-cpufreq.c		patch \| blob \| blame \| history
drivers/rtc/rtc-opal.c		patch \| blob \| blame \| history