powerpc/pseries: Introduce rwlock to gatekeep DTLB usage
[linux-2.6-block.git] / arch / powerpc / platforms / pseries / lpar.c
1 /*
2  * pSeries_lpar.c
3  * Copyright (C) 2001 Todd Inglett, IBM Corporation
4  *
5  * pSeries LPAR support.
6  * 
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  * 
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  * 
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
20  */
21
22 /* Enables debugging of low-level hash table routines - careful! */
23 #undef DEBUG
24 #define pr_fmt(fmt) "lpar: " fmt
25
26 #include <linux/kernel.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/console.h>
29 #include <linux/export.h>
30 #include <linux/jump_label.h>
31 #include <linux/delay.h>
32 #include <linux/stop_machine.h>
33 #include <asm/processor.h>
34 #include <asm/mmu.h>
35 #include <asm/page.h>
36 #include <asm/pgtable.h>
37 #include <asm/machdep.h>
38 #include <asm/mmu_context.h>
39 #include <asm/iommu.h>
40 #include <asm/tlb.h>
41 #include <asm/prom.h>
42 #include <asm/cputable.h>
43 #include <asm/udbg.h>
44 #include <asm/smp.h>
45 #include <asm/trace.h>
46 #include <asm/firmware.h>
47 #include <asm/plpar_wrappers.h>
48 #include <asm/kexec.h>
49 #include <asm/fadump.h>
50 #include <asm/asm-prototypes.h>
51 #include <asm/debugfs.h>
52
53 #include "pseries.h"
54
55 /* Flag bits for H_BULK_REMOVE */
56 #define HBR_REQUEST     0x4000000000000000UL
57 #define HBR_RESPONSE    0x8000000000000000UL
58 #define HBR_END         0xc000000000000000UL
59 #define HBR_AVPN        0x0200000000000000UL
60 #define HBR_ANDCOND     0x0100000000000000UL
61
62
63 /* in hvCall.S */
64 EXPORT_SYMBOL(plpar_hcall);
65 EXPORT_SYMBOL(plpar_hcall9);
66 EXPORT_SYMBOL(plpar_hcall_norets);
67
68 void alloc_dtl_buffers(void)
69 {
70         int cpu;
71         struct paca_struct *pp;
72         struct dtl_entry *dtl;
73
74         for_each_possible_cpu(cpu) {
75                 pp = paca_ptrs[cpu];
76                 dtl = kmem_cache_alloc(dtl_cache, GFP_KERNEL);
77                 if (!dtl) {
78                         pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
79                                 cpu);
80                         pr_warn("Stolen time statistics will be unreliable\n");
81                         break;
82                 }
83
84                 pp->dtl_ridx = 0;
85                 pp->dispatch_log = dtl;
86                 pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
87                 pp->dtl_curr = dtl;
88         }
89 }
90
91 void register_dtl_buffer(int cpu)
92 {
93         long ret;
94         struct paca_struct *pp;
95         struct dtl_entry *dtl;
96         int hwcpu = get_hard_smp_processor_id(cpu);
97
98         pp = paca_ptrs[cpu];
99         dtl = pp->dispatch_log;
100         if (dtl) {
101                 pp->dtl_ridx = 0;
102                 pp->dtl_curr = dtl;
103                 lppaca_of(cpu).dtl_idx = 0;
104
105                 /* hypervisor reads buffer length from this field */
106                 dtl->enqueue_to_dispatch_time = cpu_to_be32(DISPATCH_LOG_BYTES);
107                 ret = register_dtl(hwcpu, __pa(dtl));
108                 if (ret)
109                         pr_err("WARNING: DTL registration of cpu %d (hw %d) failed with %ld\n",
110                                cpu, hwcpu, ret);
111
112                 lppaca_of(cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
113         }
114 }
115
116 #ifdef CONFIG_PPC_SPLPAR
117 DEFINE_RWLOCK(dtl_access_lock);
118 #endif /* CONFIG_PPC_SPLPAR */
119
120 void vpa_init(int cpu)
121 {
122         int hwcpu = get_hard_smp_processor_id(cpu);
123         unsigned long addr;
124         long ret;
125
126         /*
127          * The spec says it "may be problematic" if CPU x registers the VPA of
128          * CPU y. We should never do that, but wail if we ever do.
129          */
130         WARN_ON(cpu != smp_processor_id());
131
132         if (cpu_has_feature(CPU_FTR_ALTIVEC))
133                 lppaca_of(cpu).vmxregs_in_use = 1;
134
135         if (cpu_has_feature(CPU_FTR_ARCH_207S))
136                 lppaca_of(cpu).ebb_regs_in_use = 1;
137
138         addr = __pa(&lppaca_of(cpu));
139         ret = register_vpa(hwcpu, addr);
140
141         if (ret) {
142                 pr_err("WARNING: VPA registration for cpu %d (hw %d) of area "
143                        "%lx failed with %ld\n", cpu, hwcpu, addr, ret);
144                 return;
145         }
146
147 #ifdef CONFIG_PPC_BOOK3S_64
148         /*
149          * PAPR says this feature is SLB-Buffer but firmware never
150          * reports that.  All SPLPAR support SLB shadow buffer.
151          */
152         if (!radix_enabled() && firmware_has_feature(FW_FEATURE_SPLPAR)) {
153                 addr = __pa(paca_ptrs[cpu]->slb_shadow_ptr);
154                 ret = register_slb_shadow(hwcpu, addr);
155                 if (ret)
156                         pr_err("WARNING: SLB shadow buffer registration for "
157                                "cpu %d (hw %d) of area %lx failed with %ld\n",
158                                cpu, hwcpu, addr, ret);
159         }
160 #endif /* CONFIG_PPC_BOOK3S_64 */
161
162         /*
163          * Register dispatch trace log, if one has been allocated.
164          */
165         register_dtl_buffer(cpu);
166 }
167
168 #ifdef CONFIG_PPC_BOOK3S_64
169
170 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
171                                      unsigned long vpn, unsigned long pa,
172                                      unsigned long rflags, unsigned long vflags,
173                                      int psize, int apsize, int ssize)
174 {
175         unsigned long lpar_rc;
176         unsigned long flags;
177         unsigned long slot;
178         unsigned long hpte_v, hpte_r;
179
180         if (!(vflags & HPTE_V_BOLTED))
181                 pr_devel("hpte_insert(group=%lx, vpn=%016lx, "
182                          "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
183                          hpte_group, vpn,  pa, rflags, vflags, psize);
184
185         hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
186         hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
187
188         if (!(vflags & HPTE_V_BOLTED))
189                 pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
190
191         /* Now fill in the actual HPTE */
192         /* Set CEC cookie to 0         */
193         /* Zero page = 0               */
194         /* I-cache Invalidate = 0      */
195         /* I-cache synchronize = 0     */
196         /* Exact = 0                   */
197         flags = 0;
198
199         if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
200                 flags |= H_COALESCE_CAND;
201
202         lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
203         if (unlikely(lpar_rc == H_PTEG_FULL)) {
204                 pr_devel("Hash table group is full\n");
205                 return -1;
206         }
207
208         /*
209          * Since we try and ioremap PHBs we don't own, the pte insert
210          * will fail. However we must catch the failure in hash_page
211          * or we will loop forever, so return -2 in this case.
212          */
213         if (unlikely(lpar_rc != H_SUCCESS)) {
214                 pr_err("Failed hash pte insert with error %ld\n", lpar_rc);
215                 return -2;
216         }
217         if (!(vflags & HPTE_V_BOLTED))
218                 pr_devel(" -> slot: %lu\n", slot & 7);
219
220         /* Because of iSeries, we have to pass down the secondary
221          * bucket bit here as well
222          */
223         return (slot & 7) | (!!(vflags & HPTE_V_SECONDARY) << 3);
224 }
225
226 static DEFINE_SPINLOCK(pSeries_lpar_tlbie_lock);
227
228 static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
229 {
230         unsigned long slot_offset;
231         unsigned long lpar_rc;
232         int i;
233         unsigned long dummy1, dummy2;
234
235         /* pick a random slot to start at */
236         slot_offset = mftb() & 0x7;
237
238         for (i = 0; i < HPTES_PER_GROUP; i++) {
239
240                 /* don't remove a bolted entry */
241                 lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
242                                            (0x1UL << 4), &dummy1, &dummy2);
243                 if (lpar_rc == H_SUCCESS)
244                         return i;
245
246                 /*
247                  * The test for adjunct partition is performed before the
248                  * ANDCOND test.  H_RESOURCE may be returned, so we need to
249                  * check for that as well.
250                  */
251                 BUG_ON(lpar_rc != H_NOT_FOUND && lpar_rc != H_RESOURCE);
252
253                 slot_offset++;
254                 slot_offset &= 0x7;
255         }
256
257         return -1;
258 }
259
260 static void manual_hpte_clear_all(void)
261 {
262         unsigned long size_bytes = 1UL << ppc64_pft_size;
263         unsigned long hpte_count = size_bytes >> 4;
264         struct {
265                 unsigned long pteh;
266                 unsigned long ptel;
267         } ptes[4];
268         long lpar_rc;
269         unsigned long i, j;
270
271         /* Read in batches of 4,
272          * invalidate only valid entries not in the VRMA
273          * hpte_count will be a multiple of 4
274          */
275         for (i = 0; i < hpte_count; i += 4) {
276                 lpar_rc = plpar_pte_read_4_raw(0, i, (void *)ptes);
277                 if (lpar_rc != H_SUCCESS) {
278                         pr_info("Failed to read hash page table at %ld err %ld\n",
279                                 i, lpar_rc);
280                         continue;
281                 }
282                 for (j = 0; j < 4; j++){
283                         if ((ptes[j].pteh & HPTE_V_VRMA_MASK) ==
284                                 HPTE_V_VRMA_MASK)
285                                 continue;
286                         if (ptes[j].pteh & HPTE_V_VALID)
287                                 plpar_pte_remove_raw(0, i + j, 0,
288                                         &(ptes[j].pteh), &(ptes[j].ptel));
289                 }
290         }
291 }
292
293 static int hcall_hpte_clear_all(void)
294 {
295         int rc;
296
297         do {
298                 rc = plpar_hcall_norets(H_CLEAR_HPT);
299         } while (rc == H_CONTINUE);
300
301         return rc;
302 }
303
304 static void pseries_hpte_clear_all(void)
305 {
306         int rc;
307
308         rc = hcall_hpte_clear_all();
309         if (rc != H_SUCCESS)
310                 manual_hpte_clear_all();
311
312 #ifdef __LITTLE_ENDIAN__
313         /*
314          * Reset exceptions to big endian.
315          *
316          * FIXME this is a hack for kexec, we need to reset the exception
317          * endian before starting the new kernel and this is a convenient place
318          * to do it.
319          *
320          * This is also called on boot when a fadump happens. In that case we
321          * must not change the exception endian mode.
322          */
323         if (firmware_has_feature(FW_FEATURE_SET_MODE) && !is_fadump_active())
324                 pseries_big_endian_exceptions();
325 #endif
326 }
327
328 /*
329  * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
330  * the low 3 bits of flags happen to line up.  So no transform is needed.
331  * We can probably optimize here and assume the high bits of newpp are
332  * already zero.  For now I am paranoid.
333  */
334 static long pSeries_lpar_hpte_updatepp(unsigned long slot,
335                                        unsigned long newpp,
336                                        unsigned long vpn,
337                                        int psize, int apsize,
338                                        int ssize, unsigned long inv_flags)
339 {
340         unsigned long lpar_rc;
341         unsigned long flags;
342         unsigned long want_v;
343
344         want_v = hpte_encode_avpn(vpn, psize, ssize);
345
346         flags = (newpp & 7) | H_AVPN;
347         if (mmu_has_feature(MMU_FTR_KERNEL_RO))
348                 /* Move pp0 into bit 8 (IBM 55) */
349                 flags |= (newpp & HPTE_R_PP0) >> 55;
350
351         pr_devel("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
352                  want_v, slot, flags, psize);
353
354         lpar_rc = plpar_pte_protect(flags, slot, want_v);
355
356         if (lpar_rc == H_NOT_FOUND) {
357                 pr_devel("not found !\n");
358                 return -1;
359         }
360
361         pr_devel("ok\n");
362
363         BUG_ON(lpar_rc != H_SUCCESS);
364
365         return 0;
366 }
367
368 static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group)
369 {
370         long lpar_rc;
371         unsigned long i, j;
372         struct {
373                 unsigned long pteh;
374                 unsigned long ptel;
375         } ptes[4];
376
377         for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
378
379                 lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
380                 if (lpar_rc != H_SUCCESS) {
381                         pr_info("Failed to read hash page table at %ld err %ld\n",
382                                 hpte_group, lpar_rc);
383                         continue;
384                 }
385
386                 for (j = 0; j < 4; j++) {
387                         if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
388                             (ptes[j].pteh & HPTE_V_VALID))
389                                 return i + j;
390                 }
391         }
392
393         return -1;
394 }
395
396 static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
397 {
398         long slot;
399         unsigned long hash;
400         unsigned long want_v;
401         unsigned long hpte_group;
402
403         hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
404         want_v = hpte_encode_avpn(vpn, psize, ssize);
405
406         /* Bolted entries are always in the primary group */
407         hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
408         slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
409         if (slot < 0)
410                 return -1;
411         return hpte_group + slot;
412 }
413
414 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
415                                              unsigned long ea,
416                                              int psize, int ssize)
417 {
418         unsigned long vpn;
419         unsigned long lpar_rc, slot, vsid, flags;
420
421         vsid = get_kernel_vsid(ea, ssize);
422         vpn = hpt_vpn(ea, vsid, ssize);
423
424         slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
425         BUG_ON(slot == -1);
426
427         flags = newpp & 7;
428         if (mmu_has_feature(MMU_FTR_KERNEL_RO))
429                 /* Move pp0 into bit 8 (IBM 55) */
430                 flags |= (newpp & HPTE_R_PP0) >> 55;
431
432         lpar_rc = plpar_pte_protect(flags, slot, 0);
433
434         BUG_ON(lpar_rc != H_SUCCESS);
435 }
436
437 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
438                                          int psize, int apsize,
439                                          int ssize, int local)
440 {
441         unsigned long want_v;
442         unsigned long lpar_rc;
443         unsigned long dummy1, dummy2;
444
445         pr_devel("    inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
446                  slot, vpn, psize, local);
447
448         want_v = hpte_encode_avpn(vpn, psize, ssize);
449         lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
450         if (lpar_rc == H_NOT_FOUND)
451                 return;
452
453         BUG_ON(lpar_rc != H_SUCCESS);
454 }
455
456
457 /*
458  * As defined in the PAPR's section 14.5.4.1.8
459  * The control mask doesn't include the returned reference and change bit from
460  * the processed PTE.
461  */
462 #define HBLKR_AVPN              0x0100000000000000UL
463 #define HBLKR_CTRL_MASK         0xf800000000000000UL
464 #define HBLKR_CTRL_SUCCESS      0x8000000000000000UL
465 #define HBLKR_CTRL_ERRNOTFOUND  0x8800000000000000UL
466 #define HBLKR_CTRL_ERRBUSY      0xa000000000000000UL
467
468 /**
469  * H_BLOCK_REMOVE caller.
470  * @idx should point to the latest @param entry set with a PTEX.
471  * If PTE cannot be processed because another CPUs has already locked that
472  * group, those entries are put back in @param starting at index 1.
473  * If entries has to be retried and @retry_busy is set to true, these entries
474  * are retried until success. If @retry_busy is set to false, the returned
475  * is the number of entries yet to process.
476  */
477 static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
478                                        bool retry_busy)
479 {
480         unsigned long i, rc, new_idx;
481         unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
482
483         if (idx < 2) {
484                 pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
485                 return 0;
486         }
487 again:
488         new_idx = 0;
489         if (idx > PLPAR_HCALL9_BUFSIZE) {
490                 pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
491                 idx = PLPAR_HCALL9_BUFSIZE;
492         } else if (idx < PLPAR_HCALL9_BUFSIZE)
493                 param[idx] = HBR_END;
494
495         rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
496                           param[0], /* AVA */
497                           param[1],  param[2],  param[3],  param[4], /* TS0-7 */
498                           param[5],  param[6],  param[7],  param[8]);
499         if (rc == H_SUCCESS)
500                 return 0;
501
502         BUG_ON(rc != H_PARTIAL);
503
504         /* Check that the unprocessed entries were 'not found' or 'busy' */
505         for (i = 0; i < idx-1; i++) {
506                 unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
507
508                 if (ctrl == HBLKR_CTRL_ERRBUSY) {
509                         param[++new_idx] = param[i+1];
510                         continue;
511                 }
512
513                 BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
514                        && ctrl != HBLKR_CTRL_ERRNOTFOUND);
515         }
516
517         /*
518          * If there were entries found busy, retry these entries if requested,
519          * of if all the entries have to be retried.
520          */
521         if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
522                 idx = new_idx + 1;
523                 goto again;
524         }
525
526         return new_idx;
527 }
528
529 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
530 /*
531  * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
532  * to make sure that we avoid bouncing the hypervisor tlbie lock.
533  */
534 #define PPC64_HUGE_HPTE_BATCH 12
535
536 static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
537                                       int count, int psize, int ssize)
538 {
539         unsigned long param[PLPAR_HCALL9_BUFSIZE];
540         unsigned long shift, current_vpgb, vpgb;
541         int i, pix = 0;
542
543         shift = mmu_psize_defs[psize].shift;
544
545         for (i = 0; i < count; i++) {
546                 /*
547                  * Shifting 3 bits more on the right to get a
548                  * 8 pages aligned virtual addresse.
549                  */
550                 vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
551                 if (!pix || vpgb != current_vpgb) {
552                         /*
553                          * Need to start a new 8 pages block, flush
554                          * the current one if needed.
555                          */
556                         if (pix)
557                                 (void)call_block_remove(pix, param, true);
558                         current_vpgb = vpgb;
559                         param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
560                         pix = 1;
561                 }
562
563                 param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
564                 if (pix == PLPAR_HCALL9_BUFSIZE) {
565                         pix = call_block_remove(pix, param, false);
566                         /*
567                          * pix = 0 means that all the entries were
568                          * removed, we can start a new block.
569                          * Otherwise, this means that there are entries
570                          * to retry, and pix points to latest one, so
571                          * we should increment it and try to continue
572                          * the same block.
573                          */
574                         if (pix)
575                                 pix++;
576                 }
577         }
578         if (pix)
579                 (void)call_block_remove(pix, param, true);
580 }
581
582 static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
583                                      int count, int psize, int ssize)
584 {
585         unsigned long param[PLPAR_HCALL9_BUFSIZE];
586         int i = 0, pix = 0, rc;
587
588         for (i = 0; i < count; i++) {
589
590                 if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
591                         pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
592                                                      ssize, 0);
593                 } else {
594                         param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
595                         param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
596                         pix += 2;
597                         if (pix == 8) {
598                                 rc = plpar_hcall9(H_BULK_REMOVE, param,
599                                                   param[0], param[1], param[2],
600                                                   param[3], param[4], param[5],
601                                                   param[6], param[7]);
602                                 BUG_ON(rc != H_SUCCESS);
603                                 pix = 0;
604                         }
605                 }
606         }
607         if (pix) {
608                 param[pix] = HBR_END;
609                 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
610                                   param[2], param[3], param[4], param[5],
611                                   param[6], param[7]);
612                 BUG_ON(rc != H_SUCCESS);
613         }
614 }
615
616 static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
617                                                       unsigned long *vpn,
618                                                       int count, int psize,
619                                                       int ssize)
620 {
621         unsigned long flags = 0;
622         int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
623
624         if (lock_tlbie)
625                 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
626
627         if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
628                 hugepage_block_invalidate(slot, vpn, count, psize, ssize);
629         else
630                 hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
631
632         if (lock_tlbie)
633                 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
634 }
635
636 static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
637                                              unsigned long addr,
638                                              unsigned char *hpte_slot_array,
639                                              int psize, int ssize, int local)
640 {
641         int i, index = 0;
642         unsigned long s_addr = addr;
643         unsigned int max_hpte_count, valid;
644         unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
645         unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
646         unsigned long shift, hidx, vpn = 0, hash, slot;
647
648         shift = mmu_psize_defs[psize].shift;
649         max_hpte_count = 1U << (PMD_SHIFT - shift);
650
651         for (i = 0; i < max_hpte_count; i++) {
652                 valid = hpte_valid(hpte_slot_array, i);
653                 if (!valid)
654                         continue;
655                 hidx =  hpte_hash_index(hpte_slot_array, i);
656
657                 /* get the vpn */
658                 addr = s_addr + (i * (1ul << shift));
659                 vpn = hpt_vpn(addr, vsid, ssize);
660                 hash = hpt_hash(vpn, shift, ssize);
661                 if (hidx & _PTEIDX_SECONDARY)
662                         hash = ~hash;
663
664                 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
665                 slot += hidx & _PTEIDX_GROUP_IX;
666
667                 slot_array[index] = slot;
668                 vpn_array[index] = vpn;
669                 if (index == PPC64_HUGE_HPTE_BATCH - 1) {
670                         /*
671                          * Now do a bluk invalidate
672                          */
673                         __pSeries_lpar_hugepage_invalidate(slot_array,
674                                                            vpn_array,
675                                                            PPC64_HUGE_HPTE_BATCH,
676                                                            psize, ssize);
677                         index = 0;
678                 } else
679                         index++;
680         }
681         if (index)
682                 __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
683                                                    index, psize, ssize);
684 }
685 #else
686 static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
687                                              unsigned long addr,
688                                              unsigned char *hpte_slot_array,
689                                              int psize, int ssize, int local)
690 {
691         WARN(1, "%s called without THP support\n", __func__);
692 }
693 #endif
694
695 static int pSeries_lpar_hpte_removebolted(unsigned long ea,
696                                           int psize, int ssize)
697 {
698         unsigned long vpn;
699         unsigned long slot, vsid;
700
701         vsid = get_kernel_vsid(ea, ssize);
702         vpn = hpt_vpn(ea, vsid, ssize);
703
704         slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
705         if (slot == -1)
706                 return -ENOENT;
707
708         /*
709          * lpar doesn't use the passed actual page size
710          */
711         pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
712         return 0;
713 }
714
715
716 static inline unsigned long compute_slot(real_pte_t pte,
717                                          unsigned long vpn,
718                                          unsigned long index,
719                                          unsigned long shift,
720                                          int ssize)
721 {
722         unsigned long slot, hash, hidx;
723
724         hash = hpt_hash(vpn, shift, ssize);
725         hidx = __rpte_to_hidx(pte, index);
726         if (hidx & _PTEIDX_SECONDARY)
727                 hash = ~hash;
728         slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
729         slot += hidx & _PTEIDX_GROUP_IX;
730         return slot;
731 }
732
733 /**
734  * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
735  * "all within the same naturally aligned 8 page virtual address block".
736  */
737 static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
738                             unsigned long *param)
739 {
740         unsigned long vpn;
741         unsigned long i, pix = 0;
742         unsigned long index, shift, slot, current_vpgb, vpgb;
743         real_pte_t pte;
744         int psize, ssize;
745
746         psize = batch->psize;
747         ssize = batch->ssize;
748
749         for (i = 0; i < number; i++) {
750                 vpn = batch->vpn[i];
751                 pte = batch->pte[i];
752                 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
753                         /*
754                          * Shifting 3 bits more on the right to get a
755                          * 8 pages aligned virtual addresse.
756                          */
757                         vpgb = (vpn >> (shift - VPN_SHIFT + 3));
758                         if (!pix || vpgb != current_vpgb) {
759                                 /*
760                                  * Need to start a new 8 pages block, flush
761                                  * the current one if needed.
762                                  */
763                                 if (pix)
764                                         (void)call_block_remove(pix, param,
765                                                                 true);
766                                 current_vpgb = vpgb;
767                                 param[0] = hpte_encode_avpn(vpn, psize,
768                                                             ssize);
769                                 pix = 1;
770                         }
771
772                         slot = compute_slot(pte, vpn, index, shift, ssize);
773                         param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
774
775                         if (pix == PLPAR_HCALL9_BUFSIZE) {
776                                 pix = call_block_remove(pix, param, false);
777                                 /*
778                                  * pix = 0 means that all the entries were
779                                  * removed, we can start a new block.
780                                  * Otherwise, this means that there are entries
781                                  * to retry, and pix points to latest one, so
782                                  * we should increment it and try to continue
783                                  * the same block.
784                                  */
785                                 if (pix)
786                                         pix++;
787                         }
788                 } pte_iterate_hashed_end();
789         }
790
791         if (pix)
792                 (void)call_block_remove(pix, param, true);
793 }
794
795 /*
796  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
797  * lock.
798  */
799 static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
800 {
801         unsigned long vpn;
802         unsigned long i, pix, rc;
803         unsigned long flags = 0;
804         struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
805         int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
806         unsigned long param[PLPAR_HCALL9_BUFSIZE];
807         unsigned long index, shift, slot;
808         real_pte_t pte;
809         int psize, ssize;
810
811         if (lock_tlbie)
812                 spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
813
814         if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
815                 do_block_remove(number, batch, param);
816                 goto out;
817         }
818
819         psize = batch->psize;
820         ssize = batch->ssize;
821         pix = 0;
822         for (i = 0; i < number; i++) {
823                 vpn = batch->vpn[i];
824                 pte = batch->pte[i];
825                 pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
826                         slot = compute_slot(pte, vpn, index, shift, ssize);
827                         if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
828                                 /*
829                                  * lpar doesn't use the passed actual page size
830                                  */
831                                 pSeries_lpar_hpte_invalidate(slot, vpn, psize,
832                                                              0, ssize, local);
833                         } else {
834                                 param[pix] = HBR_REQUEST | HBR_AVPN | slot;
835                                 param[pix+1] = hpte_encode_avpn(vpn, psize,
836                                                                 ssize);
837                                 pix += 2;
838                                 if (pix == 8) {
839                                         rc = plpar_hcall9(H_BULK_REMOVE, param,
840                                                 param[0], param[1], param[2],
841                                                 param[3], param[4], param[5],
842                                                 param[6], param[7]);
843                                         BUG_ON(rc != H_SUCCESS);
844                                         pix = 0;
845                                 }
846                         }
847                 } pte_iterate_hashed_end();
848         }
849         if (pix) {
850                 param[pix] = HBR_END;
851                 rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
852                                   param[2], param[3], param[4], param[5],
853                                   param[6], param[7]);
854                 BUG_ON(rc != H_SUCCESS);
855         }
856
857 out:
858         if (lock_tlbie)
859                 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
860 }
861
862 static int __init disable_bulk_remove(char *str)
863 {
864         if (strcmp(str, "off") == 0 &&
865             firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
866                 pr_info("Disabling BULK_REMOVE firmware feature");
867                 powerpc_firmware_features &= ~FW_FEATURE_BULK_REMOVE;
868         }
869         return 1;
870 }
871
872 __setup("bulk_remove=", disable_bulk_remove);
873
874 #define HPT_RESIZE_TIMEOUT      10000 /* ms */
875
876 struct hpt_resize_state {
877         unsigned long shift;
878         int commit_rc;
879 };
880
881 static int pseries_lpar_resize_hpt_commit(void *data)
882 {
883         struct hpt_resize_state *state = data;
884
885         state->commit_rc = plpar_resize_hpt_commit(0, state->shift);
886         if (state->commit_rc != H_SUCCESS)
887                 return -EIO;
888
889         /* Hypervisor has transitioned the HTAB, update our globals */
890         ppc64_pft_size = state->shift;
891         htab_size_bytes = 1UL << ppc64_pft_size;
892         htab_hash_mask = (htab_size_bytes >> 7) - 1;
893
894         return 0;
895 }
896
897 /* Must be called in user context */
898 static int pseries_lpar_resize_hpt(unsigned long shift)
899 {
900         struct hpt_resize_state state = {
901                 .shift = shift,
902                 .commit_rc = H_FUNCTION,
903         };
904         unsigned int delay, total_delay = 0;
905         int rc;
906         ktime_t t0, t1, t2;
907
908         might_sleep();
909
910         if (!firmware_has_feature(FW_FEATURE_HPT_RESIZE))
911                 return -ENODEV;
912
913         pr_info("Attempting to resize HPT to shift %lu\n", shift);
914
915         t0 = ktime_get();
916
917         rc = plpar_resize_hpt_prepare(0, shift);
918         while (H_IS_LONG_BUSY(rc)) {
919                 delay = get_longbusy_msecs(rc);
920                 total_delay += delay;
921                 if (total_delay > HPT_RESIZE_TIMEOUT) {
922                         /* prepare with shift==0 cancels an in-progress resize */
923                         rc = plpar_resize_hpt_prepare(0, 0);
924                         if (rc != H_SUCCESS)
925                                 pr_warn("Unexpected error %d cancelling timed out HPT resize\n",
926                                        rc);
927                         return -ETIMEDOUT;
928                 }
929                 msleep(delay);
930                 rc = plpar_resize_hpt_prepare(0, shift);
931         };
932
933         switch (rc) {
934         case H_SUCCESS:
935                 /* Continue on */
936                 break;
937
938         case H_PARAMETER:
939                 pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
940                 return -EINVAL;
941         case H_RESOURCE:
942                 pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
943                 return -EPERM;
944         default:
945                 pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
946                 return -EIO;
947         }
948
949         t1 = ktime_get();
950
951         rc = stop_machine(pseries_lpar_resize_hpt_commit, &state, NULL);
952
953         t2 = ktime_get();
954
955         if (rc != 0) {
956                 switch (state.commit_rc) {
957                 case H_PTEG_FULL:
958                         return -ENOSPC;
959
960                 default:
961                         pr_warn("Unexpected error %d from H_RESIZE_HPT_COMMIT\n",
962                                 state.commit_rc);
963                         return -EIO;
964                 };
965         }
966
967         pr_info("HPT resize to shift %lu complete (%lld ms / %lld ms)\n",
968                 shift, (long long) ktime_ms_delta(t1, t0),
969                 (long long) ktime_ms_delta(t2, t1));
970
971         return 0;
972 }
973
974 static int pseries_lpar_register_process_table(unsigned long base,
975                         unsigned long page_size, unsigned long table_size)
976 {
977         long rc;
978         unsigned long flags = 0;
979
980         if (table_size)
981                 flags |= PROC_TABLE_NEW;
982         if (radix_enabled())
983                 flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
984         else
985                 flags |= PROC_TABLE_HPT_SLB;
986         for (;;) {
987                 rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
988                                         page_size, table_size);
989                 if (!H_IS_LONG_BUSY(rc))
990                         break;
991                 mdelay(get_longbusy_msecs(rc));
992         }
993         if (rc != H_SUCCESS) {
994                 pr_err("Failed to register process table (rc=%ld)\n", rc);
995                 BUG();
996         }
997         return rc;
998 }
999
1000 void __init hpte_init_pseries(void)
1001 {
1002         mmu_hash_ops.hpte_invalidate     = pSeries_lpar_hpte_invalidate;
1003         mmu_hash_ops.hpte_updatepp       = pSeries_lpar_hpte_updatepp;
1004         mmu_hash_ops.hpte_updateboltedpp = pSeries_lpar_hpte_updateboltedpp;
1005         mmu_hash_ops.hpte_insert         = pSeries_lpar_hpte_insert;
1006         mmu_hash_ops.hpte_remove         = pSeries_lpar_hpte_remove;
1007         mmu_hash_ops.hpte_removebolted   = pSeries_lpar_hpte_removebolted;
1008         mmu_hash_ops.flush_hash_range    = pSeries_lpar_flush_hash_range;
1009         mmu_hash_ops.hpte_clear_all      = pseries_hpte_clear_all;
1010         mmu_hash_ops.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
1011         register_process_table           = pseries_lpar_register_process_table;
1012
1013         if (firmware_has_feature(FW_FEATURE_HPT_RESIZE))
1014                 mmu_hash_ops.resize_hpt = pseries_lpar_resize_hpt;
1015 }
1016
1017 void radix_init_pseries(void)
1018 {
1019         pr_info("Using radix MMU under hypervisor\n");
1020         register_process_table = pseries_lpar_register_process_table;
1021 }
1022
1023 #ifdef CONFIG_PPC_SMLPAR
1024 #define CMO_FREE_HINT_DEFAULT 1
1025 static int cmo_free_hint_flag = CMO_FREE_HINT_DEFAULT;
1026
1027 static int __init cmo_free_hint(char *str)
1028 {
1029         char *parm;
1030         parm = strstrip(str);
1031
1032         if (strcasecmp(parm, "no") == 0 || strcasecmp(parm, "off") == 0) {
1033                 pr_info("%s: CMO free page hinting is not active.\n", __func__);
1034                 cmo_free_hint_flag = 0;
1035                 return 1;
1036         }
1037
1038         cmo_free_hint_flag = 1;
1039         pr_info("%s: CMO free page hinting is active.\n", __func__);
1040
1041         if (strcasecmp(parm, "yes") == 0 || strcasecmp(parm, "on") == 0)
1042                 return 1;
1043
1044         return 0;
1045 }
1046
1047 __setup("cmo_free_hint=", cmo_free_hint);
1048
1049 static void pSeries_set_page_state(struct page *page, int order,
1050                                    unsigned long state)
1051 {
1052         int i, j;
1053         unsigned long cmo_page_sz, addr;
1054
1055         cmo_page_sz = cmo_get_page_size();
1056         addr = __pa((unsigned long)page_address(page));
1057
1058         for (i = 0; i < (1 << order); i++, addr += PAGE_SIZE) {
1059                 for (j = 0; j < PAGE_SIZE; j += cmo_page_sz)
1060                         plpar_hcall_norets(H_PAGE_INIT, state, addr + j, 0);
1061         }
1062 }
1063
1064 void arch_free_page(struct page *page, int order)
1065 {
1066         if (radix_enabled())
1067                 return;
1068         if (!cmo_free_hint_flag || !firmware_has_feature(FW_FEATURE_CMO))
1069                 return;
1070
1071         pSeries_set_page_state(page, order, H_PAGE_SET_UNUSED);
1072 }
1073 EXPORT_SYMBOL(arch_free_page);
1074
1075 #endif /* CONFIG_PPC_SMLPAR */
1076 #endif /* CONFIG_PPC_BOOK3S_64 */
1077
1078 #ifdef CONFIG_TRACEPOINTS
1079 #ifdef CONFIG_JUMP_LABEL
1080 struct static_key hcall_tracepoint_key = STATIC_KEY_INIT;
1081
1082 int hcall_tracepoint_regfunc(void)
1083 {
1084         static_key_slow_inc(&hcall_tracepoint_key);
1085         return 0;
1086 }
1087
1088 void hcall_tracepoint_unregfunc(void)
1089 {
1090         static_key_slow_dec(&hcall_tracepoint_key);
1091 }
1092 #else
1093 /*
1094  * We optimise our hcall path by placing hcall_tracepoint_refcount
1095  * directly in the TOC so we can check if the hcall tracepoints are
1096  * enabled via a single load.
1097  */
1098
1099 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
1100 extern long hcall_tracepoint_refcount;
1101
1102 int hcall_tracepoint_regfunc(void)
1103 {
1104         hcall_tracepoint_refcount++;
1105         return 0;
1106 }
1107
1108 void hcall_tracepoint_unregfunc(void)
1109 {
1110         hcall_tracepoint_refcount--;
1111 }
1112 #endif
1113
1114 /*
1115  * Since the tracing code might execute hcalls we need to guard against
1116  * recursion. One example of this are spinlocks calling H_YIELD on
1117  * shared processor partitions.
1118  */
1119 static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
1120
1121
1122 void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
1123 {
1124         unsigned long flags;
1125         unsigned int *depth;
1126
1127         /*
1128          * We cannot call tracepoints inside RCU idle regions which
1129          * means we must not trace H_CEDE.
1130          */
1131         if (opcode == H_CEDE)
1132                 return;
1133
1134         local_irq_save(flags);
1135
1136         depth = this_cpu_ptr(&hcall_trace_depth);
1137
1138         if (*depth)
1139                 goto out;
1140
1141         (*depth)++;
1142         preempt_disable();
1143         trace_hcall_entry(opcode, args);
1144         (*depth)--;
1145
1146 out:
1147         local_irq_restore(flags);
1148 }
1149
1150 void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
1151 {
1152         unsigned long flags;
1153         unsigned int *depth;
1154
1155         if (opcode == H_CEDE)
1156                 return;
1157
1158         local_irq_save(flags);
1159
1160         depth = this_cpu_ptr(&hcall_trace_depth);
1161
1162         if (*depth)
1163                 goto out;
1164
1165         (*depth)++;
1166         trace_hcall_exit(opcode, retval, retbuf);
1167         preempt_enable();
1168         (*depth)--;
1169
1170 out:
1171         local_irq_restore(flags);
1172 }
1173 #endif
1174
1175 /**
1176  * h_get_mpp
1177  * H_GET_MPP hcall returns info in 7 parms
1178  */
1179 int h_get_mpp(struct hvcall_mpp_data *mpp_data)
1180 {
1181         int rc;
1182         unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
1183
1184         rc = plpar_hcall9(H_GET_MPP, retbuf);
1185
1186         mpp_data->entitled_mem = retbuf[0];
1187         mpp_data->mapped_mem = retbuf[1];
1188
1189         mpp_data->group_num = (retbuf[2] >> 2 * 8) & 0xffff;
1190         mpp_data->pool_num = retbuf[2] & 0xffff;
1191
1192         mpp_data->mem_weight = (retbuf[3] >> 7 * 8) & 0xff;
1193         mpp_data->unallocated_mem_weight = (retbuf[3] >> 6 * 8) & 0xff;
1194         mpp_data->unallocated_entitlement = retbuf[3] & 0xffffffffffffUL;
1195
1196         mpp_data->pool_size = retbuf[4];
1197         mpp_data->loan_request = retbuf[5];
1198         mpp_data->backing_mem = retbuf[6];
1199
1200         return rc;
1201 }
1202 EXPORT_SYMBOL(h_get_mpp);
1203
1204 int h_get_mpp_x(struct hvcall_mpp_x_data *mpp_x_data)
1205 {
1206         int rc;
1207         unsigned long retbuf[PLPAR_HCALL9_BUFSIZE] = { 0 };
1208
1209         rc = plpar_hcall9(H_GET_MPP_X, retbuf);
1210
1211         mpp_x_data->coalesced_bytes = retbuf[0];
1212         mpp_x_data->pool_coalesced_bytes = retbuf[1];
1213         mpp_x_data->pool_purr_cycles = retbuf[2];
1214         mpp_x_data->pool_spurr_cycles = retbuf[3];
1215
1216         return rc;
1217 }
1218
1219 static unsigned long vsid_unscramble(unsigned long vsid, int ssize)
1220 {
1221         unsigned long protovsid;
1222         unsigned long va_bits = VA_BITS;
1223         unsigned long modinv, vsid_modulus;
1224         unsigned long max_mod_inv, tmp_modinv;
1225
1226         if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
1227                 va_bits = 65;
1228
1229         if (ssize == MMU_SEGSIZE_256M) {
1230                 modinv = VSID_MULINV_256M;
1231                 vsid_modulus = ((1UL << (va_bits - SID_SHIFT)) - 1);
1232         } else {
1233                 modinv = VSID_MULINV_1T;
1234                 vsid_modulus = ((1UL << (va_bits - SID_SHIFT_1T)) - 1);
1235         }
1236
1237         /*
1238          * vsid outside our range.
1239          */
1240         if (vsid >= vsid_modulus)
1241                 return 0;
1242
1243         /*
1244          * If modinv is the modular multiplicate inverse of (x % vsid_modulus)
1245          * and vsid = (protovsid * x) % vsid_modulus, then we say:
1246          *   protovsid = (vsid * modinv) % vsid_modulus
1247          */
1248
1249         /* Check if (vsid * modinv) overflow (63 bits) */
1250         max_mod_inv = 0x7fffffffffffffffull / vsid;
1251         if (modinv < max_mod_inv)
1252                 return (vsid * modinv) % vsid_modulus;
1253
1254         tmp_modinv = modinv/max_mod_inv;
1255         modinv %= max_mod_inv;
1256
1257         protovsid = (((vsid * max_mod_inv) % vsid_modulus) * tmp_modinv) % vsid_modulus;
1258         protovsid = (protovsid + vsid * modinv) % vsid_modulus;
1259
1260         return protovsid;
1261 }
1262
1263 static int __init reserve_vrma_context_id(void)
1264 {
1265         unsigned long protovsid;
1266
1267         /*
1268          * Reserve context ids which map to reserved virtual addresses. For now
1269          * we only reserve the context id which maps to the VRMA VSID. We ignore
1270          * the addresses in "ibm,adjunct-virtual-addresses" because we don't
1271          * enable adjunct support via the "ibm,client-architecture-support"
1272          * interface.
1273          */
1274         protovsid = vsid_unscramble(VRMA_VSID, MMU_SEGSIZE_1T);
1275         hash__reserve_context_id(protovsid >> ESID_BITS_1T);
1276         return 0;
1277 }
1278 machine_device_initcall(pseries, reserve_vrma_context_id);
1279
1280 #ifdef CONFIG_DEBUG_FS
1281 /* debugfs file interface for vpa data */
1282 static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
1283                               loff_t *pos)
1284 {
1285         int cpu = (long)filp->private_data;
1286         struct lppaca *lppaca = &lppaca_of(cpu);
1287
1288         return simple_read_from_buffer(buf, len, pos, lppaca,
1289                                 sizeof(struct lppaca));
1290 }
1291
1292 static const struct file_operations vpa_fops = {
1293         .open           = simple_open,
1294         .read           = vpa_file_read,
1295         .llseek         = default_llseek,
1296 };
1297
1298 static int __init vpa_debugfs_init(void)
1299 {
1300         char name[16];
1301         long i;
1302         static struct dentry *vpa_dir;
1303
1304         if (!firmware_has_feature(FW_FEATURE_SPLPAR))
1305                 return 0;
1306
1307         vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
1308         if (!vpa_dir) {
1309                 pr_warn("%s: can't create vpa root dir\n", __func__);
1310                 return -ENOMEM;
1311         }
1312
1313         /* set up the per-cpu vpa file*/
1314         for_each_possible_cpu(i) {
1315                 struct dentry *d;
1316
1317                 sprintf(name, "cpu-%ld", i);
1318
1319                 d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
1320                                         &vpa_fops);
1321                 if (!d) {
1322                         pr_warn("%s: can't create per-cpu vpa file\n",
1323                                         __func__);
1324                         return -ENOMEM;
1325                 }
1326         }
1327
1328         return 0;
1329 }
1330 machine_arch_initcall(pseries, vpa_debugfs_init);
1331 #endif /* CONFIG_DEBUG_FS */