Merge branch 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jul 2019 00:34:44 +0000 (17:34 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 9 Jul 2019 00:34:44 +0000 (17:34 -0700)
Pull x86 paravirt updates from Ingo Molnar:
 "A handful of paravirt patching code enhancements to make it more
  robust against patching failures, and related cleanups and not so
  related cleanups - by Thomas Gleixner and myself"

* 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/paravirt: Rename paravirt_patch_site::instrtype to paravirt_patch_site::type
  x86/paravirt: Standardize 'insn_buff' variable names
  x86/paravirt: Match paravirt patchlet field definition ordering to initialization ordering
  x86/paravirt: Replace the paravirt patch asm magic
  x86/paravirt: Unify the 32/64 bit paravirt patching code
  x86/paravirt: Detect over-sized patching bugs in paravirt_patch_call()
  x86/paravirt: Detect over-sized patching bugs in paravirt_patch_insns()
  x86/paravirt: Remove bogus extern declarations

1  2 
arch/x86/events/intel/ds.c
arch/x86/kernel/Makefile
arch/x86/kernel/alternative.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/paravirt.c
arch/x86/tools/insn_decoder_test.c
arch/x86/tools/insn_sanity.c

index 505c73dc6a730ee87cf9db820dd9b5a6c5848180,50f647e131bc028936b3ccef52aa6ad3158a8bfe..2c8db2c19328bcbf95e8c14b4f58e0d2d6d7d7ba
@@@ -337,7 -337,7 +337,7 @@@ static int alloc_pebs_buffer(int cpu
        struct debug_store *ds = hwev->ds;
        size_t bsiz = x86_pmu.pebs_buffer_size;
        int max, node = cpu_to_node(cpu);
-       void *buffer, *ibuffer, *cea;
+       void *buffer, *insn_buff, *cea;
  
        if (!x86_pmu.pebs)
                return 0;
         * buffer then.
         */
        if (x86_pmu.intel_cap.pebs_format < 2) {
-               ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
-               if (!ibuffer) {
+               insn_buff = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+               if (!insn_buff) {
                        dsfree_pages(buffer, bsiz);
                        return -ENOMEM;
                }
-               per_cpu(insn_buffer, cpu) = ibuffer;
+               per_cpu(insn_buffer, cpu) = insn_buff;
        }
        hwev->ds_pebs_vaddr = buffer;
        /* Update the cpu entry area mapping */
@@@ -684,7 -684,7 +684,7 @@@ struct event_constraint intel_core2_peb
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
        EVENT_CONSTRAINT_END
  };
  
@@@ -693,7 -693,7 +693,7 @@@ struct event_constraint intel_atom_pebs
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0x1),    /* MEM_LOAD_RETIRED.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x01),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x01),
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
        EVENT_CONSTRAINT_END
  
  struct event_constraint intel_slm_pebs_event_constraints[] = {
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x1),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x1),
        /* Allow all events as PEBS with no flags */
        INTEL_ALL_EVENT_CONSTRAINT(0, 0x1),
        EVENT_CONSTRAINT_END
@@@ -726,7 -726,7 +726,7 @@@ struct event_constraint intel_nehalem_p
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        EVENT_CONSTRAINT_END
  };
  
@@@ -743,7 -743,7 +743,7 @@@ struct event_constraint intel_westmere_
        INTEL_FLAGS_EVENT_CONSTRAINT(0xcb, 0xf),    /* MEM_LOAD_RETIRED.* */
        INTEL_FLAGS_EVENT_CONSTRAINT(0xf7, 0xf),    /* FP_ASSIST.* */
        /* INST_RETIRED.ANY_P, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        EVENT_CONSTRAINT_END
  };
  
@@@ -752,7 -752,7 +752,7 @@@ struct event_constraint intel_snb_pebs_
        INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
          INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
          INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
          INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@@ -767,9 -767,9 +767,9 @@@ struct event_constraint intel_ivb_pebs_
          INTEL_PLD_CONSTRAINT(0x01cd, 0x8),    /* MEM_TRANS_RETIRED.LAT_ABOVE_THR */
        INTEL_PST_CONSTRAINT(0x02cd, 0x8),    /* MEM_TRANS_RETIRED.PRECISE_STORES */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
@@@ -783,9 -783,9 +783,9 @@@ struct event_constraint intel_hsw_pebs_
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_XLD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
@@@ -806,9 -806,9 +806,9 @@@ struct event_constraint intel_bdw_pebs_
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
        INTEL_PLD_CONSTRAINT(0x01cd, 0xf),    /* MEM_TRANS_RETIRED.* */
        /* UOPS_RETIRED.ALL, inv=1, cmask=16 (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c2, 0xf),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c2, 0xf),
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_NA(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */
  struct event_constraint intel_skl_pebs_event_constraints[] = {
        INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x2),      /* INST_RETIRED.PREC_DIST */
        /* INST_RETIRED.PREC_DIST, inv=1, cmask=16 (cycles:ppp). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108001c0, 0x2),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108001c0, 0x2),
        /* INST_RETIRED.TOTAL_CYCLES_PS (inv=1, cmask=16) (cycles:p). */
 -      INTEL_FLAGS_EVENT_CONSTRAINT(0x108000c0, 0x0f),
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x108000c0, 0x0f),
        INTEL_PLD_CONSTRAINT(0x1cd, 0xf),                     /* MEM_TRANS_RETIRED.* */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
        INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
        EVENT_CONSTRAINT_END
  };
  
 +struct event_constraint intel_icl_pebs_event_constraints[] = {
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL),   /* INST_RETIRED.PREC_DIST */
 +      INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL),  /* SLOTS */
 +
 +      INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
 +      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
 +      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
 +
 +      INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
 +
 +      INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf),                /* MEM_INST_RETIRED.* */
 +
 +      /*
 +       * Everything else is handled by PMU_FL_PEBS_ALL, because we
 +       * need the full constraints from the main table.
 +       */
 +
 +      EVENT_CONSTRAINT_END
 +};
 +
  struct event_constraint *intel_pebs_constraints(struct perf_event *event)
  {
        struct event_constraint *c;
  
        if (x86_pmu.pebs_constraints) {
                for_each_event_constraint(c, x86_pmu.pebs_constraints) {
 -                      if ((event->hw.config & c->cmask) == c->code) {
 +                      if (constraint_match(c, event->hw.config)) {
                                event->hw.flags |= c->flags;
                                return c;
                        }
@@@ -926,87 -906,17 +926,87 @@@ static inline void pebs_update_threshol
  
        if (cpuc->n_pebs == cpuc->n_large_pebs) {
                threshold = ds->pebs_absolute_maximum -
 -                      reserved * x86_pmu.pebs_record_size;
 +                      reserved * cpuc->pebs_record_size;
        } else {
 -              threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
 +              threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
        }
  
        ds->pebs_interrupt_threshold = threshold;
  }
  
 +static void adaptive_pebs_record_size_update(void)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      u64 pebs_data_cfg = cpuc->pebs_data_cfg;
 +      int sz = sizeof(struct pebs_basic);
 +
 +      if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
 +              sz += sizeof(struct pebs_meminfo);
 +      if (pebs_data_cfg & PEBS_DATACFG_GP)
 +              sz += sizeof(struct pebs_gprs);
 +      if (pebs_data_cfg & PEBS_DATACFG_XMMS)
 +              sz += sizeof(struct pebs_xmm);
 +      if (pebs_data_cfg & PEBS_DATACFG_LBRS)
 +              sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
 +
 +      cpuc->pebs_record_size = sz;
 +}
 +
 +#define PERF_PEBS_MEMINFO_TYPE        (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC |   \
 +                              PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
 +                              PERF_SAMPLE_TRANSACTION)
 +
 +static u64 pebs_update_adaptive_cfg(struct perf_event *event)
 +{
 +      struct perf_event_attr *attr = &event->attr;
 +      u64 sample_type = attr->sample_type;
 +      u64 pebs_data_cfg = 0;
 +      bool gprs, tsx_weight;
 +
 +      if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
 +          attr->precise_ip > 1)
 +              return pebs_data_cfg;
 +
 +      if (sample_type & PERF_PEBS_MEMINFO_TYPE)
 +              pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
 +
 +      /*
 +       * We need GPRs when:
 +       * + user requested them
 +       * + precise_ip < 2 for the non event IP
 +       * + For RTM TSX weight we need GPRs for the abort code.
 +       */
 +      gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
 +             (attr->sample_regs_intr & PEBS_GP_REGS);
 +
 +      tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
 +                   ((attr->config & INTEL_ARCH_EVENT_MASK) ==
 +                    x86_pmu.rtm_abort_event);
 +
 +      if (gprs || (attr->precise_ip < 2) || tsx_weight)
 +              pebs_data_cfg |= PEBS_DATACFG_GP;
 +
 +      if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
 +          (attr->sample_regs_intr & PERF_REG_EXTENDED_MASK))
 +              pebs_data_cfg |= PEBS_DATACFG_XMMS;
 +
 +      if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
 +              /*
 +               * For now always log all LBRs. Could configure this
 +               * later.
 +               */
 +              pebs_data_cfg |= PEBS_DATACFG_LBRS |
 +                      ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
 +      }
 +
 +      return pebs_data_cfg;
 +}
 +
  static void
 -pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
 +pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
 +                struct perf_event *event, bool add)
  {
 +      struct pmu *pmu = event->ctx->pmu;
        /*
         * Make sure we get updated with the first PEBS
         * event. It will trigger also during removal, but
                update = true;
        }
  
 +      /*
 +       * The PEBS record doesn't shrink on pmu::del(). Doing so would require
 +       * iterating all remaining PEBS events to reconstruct the config.
 +       */
 +      if (x86_pmu.intel_cap.pebs_baseline && add) {
 +              u64 pebs_data_cfg;
 +
 +              /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
 +              if (cpuc->n_pebs == 1) {
 +                      cpuc->pebs_data_cfg = 0;
 +                      cpuc->pebs_record_size = sizeof(struct pebs_basic);
 +              }
 +
 +              pebs_data_cfg = pebs_update_adaptive_cfg(event);
 +
 +              /* Update pebs_record_size if new event requires more data. */
 +              if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
 +                      cpuc->pebs_data_cfg |= pebs_data_cfg;
 +                      adaptive_pebs_record_size_update();
 +                      update = true;
 +              }
 +      }
 +
        if (update)
                pebs_update_threshold(cpuc);
  }
@@@ -1060,7 -947,7 +1060,7 @@@ void intel_pmu_pebs_add(struct perf_eve
        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
                cpuc->n_large_pebs++;
  
 -      pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 +      pebs_update_state(needed_cb, cpuc, event, true);
  }
  
  void intel_pmu_pebs_enable(struct perf_event *event)
  
        cpuc->pebs_enabled |= 1ULL << hwc->idx;
  
 -      if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 +      if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
                cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled |= 1ULL << 63;
  
 +      if (x86_pmu.intel_cap.pebs_baseline) {
 +              hwc->config |= ICL_EVENTSEL_ADAPTIVE;
 +              if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
 +                      wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
 +                      cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
 +              }
 +      }
 +
        /*
         * Use auto-reload if possible to save a MSR write in the PMI.
         * This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
@@@ -1112,7 -991,7 +1112,7 @@@ void intel_pmu_pebs_del(struct perf_eve
        if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
                cpuc->n_large_pebs--;
  
 -      pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
 +      pebs_update_state(needed_cb, cpuc, event, false);
  }
  
  void intel_pmu_pebs_disable(struct perf_event *event)
  
        cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
  
 -      if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
 +      if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
 +          (x86_pmu.version < 5))
                cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
        else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
                cpuc->pebs_enabled &= ~(1ULL << 63);
@@@ -1247,57 -1125,34 +1247,57 @@@ static int intel_pmu_pebs_fixup_ip(stru
        return 0;
  }
  
 -static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
 +static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
  {
 -      if (pebs->tsx_tuning) {
 -              union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
 +      if (tsx_tuning) {
 +              union hsw_tsx_tuning tsx = { .value = tsx_tuning };
                return tsx.cycles_last_block;
        }
        return 0;
  }
  
 -static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
 +static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
  {
 -      u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
 +      u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
  
        /* For RTM XABORTs also log the abort code from AX */
 -      if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
 -              txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
 +      if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
 +              txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
        return txn;
  }
  
 -static void setup_pebs_sample_data(struct perf_event *event,
 -                                 struct pt_regs *iregs, void *__pebs,
 -                                 struct perf_sample_data *data,
 -                                 struct pt_regs *regs)
 +static inline u64 get_pebs_status(void *n)
  {
 +      if (x86_pmu.intel_cap.pebs_format < 4)
 +              return ((struct pebs_record_nhm *)n)->status;
 +      return ((struct pebs_basic *)n)->applicable_counters;
 +}
 +
  #define PERF_X86_EVENT_PEBS_HSW_PREC \
                (PERF_X86_EVENT_PEBS_ST_HSW | \
                 PERF_X86_EVENT_PEBS_LD_HSW | \
                 PERF_X86_EVENT_PEBS_NA_HSW)
 +
 +static u64 get_data_src(struct perf_event *event, u64 aux)
 +{
 +      u64 val = PERF_MEM_NA;
 +      int fl = event->hw.flags;
 +      bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 +
 +      if (fl & PERF_X86_EVENT_PEBS_LDLAT)
 +              val = load_latency_data(aux);
 +      else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
 +              val = precise_datala_hsw(event, aux);
 +      else if (fst)
 +              val = precise_store_data(aux);
 +      return val;
 +}
 +
 +static void setup_pebs_fixed_sample_data(struct perf_event *event,
 +                                 struct pt_regs *iregs, void *__pebs,
 +                                 struct perf_sample_data *data,
 +                                 struct pt_regs *regs)
 +{
        /*
         * We cast to the biggest pebs_record but are careful not to
         * unconditionally access the 'extra' entries.
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct pebs_record_skl *pebs = __pebs;
        u64 sample_type;
 -      int fll, fst, dsrc;
 -      int fl = event->hw.flags;
 +      int fll;
  
        if (pebs == NULL)
                return;
  
        sample_type = event->attr.sample_type;
 -      dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 -
 -      fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
 -      fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 +      fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
  
        perf_sample_data_init(data, 0, event->hw.last_period);
  
        /*
         * data.data_src encodes the data source
         */
 -      if (dsrc) {
 -              u64 val = PERF_MEM_NA;
 -              if (fll)
 -                      val = load_latency_data(pebs->dse);
 -              else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
 -                      val = precise_datala_hsw(event, pebs->dse);
 -              else if (fst)
 -                      val = precise_store_data(pebs->dse);
 -              data->data_src.val = val;
 -      }
 +      if (sample_type & PERF_SAMPLE_DATA_SRC)
 +              data->data_src.val = get_data_src(event, pebs->dse);
  
        /*
         * We must however always use iregs for the unwinder to stay sane; the
        if (x86_pmu.intel_cap.pebs_format >= 2) {
                /* Only set the TSX weight when no memory weight. */
                if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
 -                      data->weight = intel_hsw_weight(pebs);
 +                      data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
  
                if (sample_type & PERF_SAMPLE_TRANSACTION)
 -                      data->txn = intel_hsw_transaction(pebs);
 +                      data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
 +                                                            pebs->ax);
        }
  
        /*
                data->br_stack = &cpuc->lbr_stack;
  }
  
 +static void adaptive_pebs_save_regs(struct pt_regs *regs,
 +                                  struct pebs_gprs *gprs)
 +{
 +      regs->ax = gprs->ax;
 +      regs->bx = gprs->bx;
 +      regs->cx = gprs->cx;
 +      regs->dx = gprs->dx;
 +      regs->si = gprs->si;
 +      regs->di = gprs->di;
 +      regs->bp = gprs->bp;
 +      regs->sp = gprs->sp;
 +#ifndef CONFIG_X86_32
 +      regs->r8 = gprs->r8;
 +      regs->r9 = gprs->r9;
 +      regs->r10 = gprs->r10;
 +      regs->r11 = gprs->r11;
 +      regs->r12 = gprs->r12;
 +      regs->r13 = gprs->r13;
 +      regs->r14 = gprs->r14;
 +      regs->r15 = gprs->r15;
 +#endif
 +}
 +
 +/*
 + * With adaptive PEBS the layout depends on what fields are configured.
 + */
 +
 +static void setup_pebs_adaptive_sample_data(struct perf_event *event,
 +                                          struct pt_regs *iregs, void *__pebs,
 +                                          struct perf_sample_data *data,
 +                                          struct pt_regs *regs)
 +{
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct pebs_basic *basic = __pebs;
 +      void *next_record = basic + 1;
 +      u64 sample_type;
 +      u64 format_size;
 +      struct pebs_meminfo *meminfo = NULL;
 +      struct pebs_gprs *gprs = NULL;
 +      struct x86_perf_regs *perf_regs;
 +
 +      if (basic == NULL)
 +              return;
 +
 +      perf_regs = container_of(regs, struct x86_perf_regs, regs);
 +      perf_regs->xmm_regs = NULL;
 +
 +      sample_type = event->attr.sample_type;
 +      format_size = basic->format_size;
 +      perf_sample_data_init(data, 0, event->hw.last_period);
 +      data->period = event->hw.last_period;
 +
 +      if (event->attr.use_clockid == 0)
 +              data->time = native_sched_clock_from_tsc(basic->tsc);
 +
 +      /*
 +       * We must however always use iregs for the unwinder to stay sane; the
 +       * record BP,SP,IP can point into thin air when the record is from a
 +       * previous PMI context or an (I)RET happened between the record and
 +       * PMI.
 +       */
 +      if (sample_type & PERF_SAMPLE_CALLCHAIN)
 +              data->callchain = perf_callchain(event, iregs);
 +
 +      *regs = *iregs;
 +      /* The ip in basic is EventingIP */
 +      set_linear_ip(regs, basic->ip);
 +      regs->flags = PERF_EFLAGS_EXACT;
 +
 +      /*
 +       * The record for MEMINFO is in front of GP
 +       * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
 +       * Save the pointer here but process later.
 +       */
 +      if (format_size & PEBS_DATACFG_MEMINFO) {
 +              meminfo = next_record;
 +              next_record = meminfo + 1;
 +      }
 +
 +      if (format_size & PEBS_DATACFG_GP) {
 +              gprs = next_record;
 +              next_record = gprs + 1;
 +
 +              if (event->attr.precise_ip < 2) {
 +                      set_linear_ip(regs, gprs->ip);
 +                      regs->flags &= ~PERF_EFLAGS_EXACT;
 +              }
 +
 +              if (sample_type & PERF_SAMPLE_REGS_INTR)
 +                      adaptive_pebs_save_regs(regs, gprs);
 +      }
 +
 +      if (format_size & PEBS_DATACFG_MEMINFO) {
 +              if (sample_type & PERF_SAMPLE_WEIGHT)
 +                      data->weight = meminfo->latency ?:
 +                              intel_get_tsx_weight(meminfo->tsx_tuning);
 +
 +              if (sample_type & PERF_SAMPLE_DATA_SRC)
 +                      data->data_src.val = get_data_src(event, meminfo->aux);
 +
 +              if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
 +                      data->addr = meminfo->address;
 +
 +              if (sample_type & PERF_SAMPLE_TRANSACTION)
 +                      data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
 +                                                        gprs ? gprs->ax : 0);
 +      }
 +
 +      if (format_size & PEBS_DATACFG_XMMS) {
 +              struct pebs_xmm *xmm = next_record;
 +
 +              next_record = xmm + 1;
 +              perf_regs->xmm_regs = xmm->xmm;
 +      }
 +
 +      if (format_size & PEBS_DATACFG_LBRS) {
 +              struct pebs_lbr *lbr = next_record;
 +              int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
 +                                      & 0xff) + 1;
 +              next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
 +
 +              if (has_branch_stack(event)) {
 +                      intel_pmu_store_pebs_lbrs(lbr);
 +                      data->br_stack = &cpuc->lbr_stack;
 +              }
 +      }
 +
 +      WARN_ONCE(next_record != __pebs + (format_size >> 48),
 +                      "PEBS record size %llu, expected %llu, config %llx\n",
 +                      format_size >> 48,
 +                      (u64)(next_record - __pebs),
 +                      basic->format_size);
 +}
 +
  static inline void *
  get_next_pebs_record_by_bit(void *base, void *top, int bit)
  {
        if (base == NULL)
                return NULL;
  
 -      for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 -              struct pebs_record_nhm *p = at;
 +      for (at = base; at < top; at += cpuc->pebs_record_size) {
 +              unsigned long status = get_pebs_status(at);
  
 -              if (test_bit(bit, (unsigned long *)&p->status)) {
 +              if (test_bit(bit, (unsigned long *)&status)) {
                        /* PEBS v3 has accurate status bits */
                        if (x86_pmu.intel_cap.pebs_format >= 3)
                                return at;
  
 -                      if (p->status == (1 << bit))
 +                      if (status == (1 << bit))
                                return at;
  
                        /* clear non-PEBS bit and re-check */
 -                      pebs_status = p->status & cpuc->pebs_enabled;
 +                      pebs_status = status & cpuc->pebs_enabled;
                        pebs_status &= PEBS_COUNTER_MASK;
                        if (pebs_status == (1 << bit))
                                return at;
@@@ -1678,18 -1410,11 +1678,18 @@@ intel_pmu_save_and_restart_reload(struc
  static void __intel_pmu_pebs_event(struct perf_event *event,
                                   struct pt_regs *iregs,
                                   void *base, void *top,
 -                                 int bit, int count)
 +                                 int bit, int count,
 +                                 void (*setup_sample)(struct perf_event *,
 +                                              struct pt_regs *,
 +                                              void *,
 +                                              struct perf_sample_data *,
 +                                              struct pt_regs *))
  {
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
        struct perf_sample_data data;
 -      struct pt_regs regs;
 +      struct x86_perf_regs perf_regs;
 +      struct pt_regs *regs = &perf_regs.regs;
        void *at = get_next_pebs_record_by_bit(base, top, bit);
  
        if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
                return;
  
        while (count > 1) {
 -              setup_pebs_sample_data(event, iregs, at, &data, &regs);
 -              perf_event_output(event, &data, &regs);
 -              at += x86_pmu.pebs_record_size;
 +              setup_sample(event, iregs, at, &data, regs);
 +              perf_event_output(event, &data, regs);
 +              at += cpuc->pebs_record_size;
                at = get_next_pebs_record_by_bit(at, top, bit);
                count--;
        }
  
 -      setup_pebs_sample_data(event, iregs, at, &data, &regs);
 +      setup_sample(event, iregs, at, &data, regs);
  
        /*
         * All but the last records are processed.
         * The last one is left to be able to call the overflow handler.
         */
 -      if (perf_event_overflow(event, &data, &regs)) {
 +      if (perf_event_overflow(event, &data, regs)) {
                x86_pmu_stop(event, 0);
                return;
        }
@@@ -1758,27 -1483,7 +1758,27 @@@ static void intel_pmu_drain_pebs_core(s
                return;
        }
  
 -      __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 +      __intel_pmu_pebs_event(event, iregs, at, top, 0, n,
 +                             setup_pebs_fixed_sample_data);
 +}
 +
 +static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
 +{
 +      struct perf_event *event;
 +      int bit;
 +
 +      /*
 +       * The drain_pebs() could be called twice in a short period
 +       * for auto-reload event in pmu::read(). There are no
 +       * overflows have happened in between.
 +       * It needs to call intel_pmu_save_and_restart_reload() to
 +       * update the event->count for this case.
 +       */
 +      for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
 +              event = cpuc->events[bit];
 +              if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
 +                      intel_pmu_save_and_restart_reload(event, 0);
 +      }
  }
  
  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
        }
  
        if (unlikely(base >= top)) {
 -              /*
 -               * The drain_pebs() could be called twice in a short period
 -               * for auto-reload event in pmu::read(). There are no
 -               * overflows have happened in between.
 -               * It needs to call intel_pmu_save_and_restart_reload() to
 -               * update the event->count for this case.
 -               */
 -              for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
 -                               size) {
 -                      event = cpuc->events[bit];
 -                      if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
 -                              intel_pmu_save_and_restart_reload(event, 0);
 -              }
 +              intel_pmu_pebs_event_update_no_drain(cpuc, size);
                return;
        }
  
  
                /* PEBS v3 has more accurate status bits */
                if (x86_pmu.intel_cap.pebs_format >= 3) {
 -                      for_each_set_bit(bit, (unsigned long *)&pebs_status,
 -                                       size)
 +                      for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
                                counts[bit]++;
  
                        continue;
                 * If collision happened, the record will be dropped.
                 */
                if (p->status != (1ULL << bit)) {
 -                      for_each_set_bit(i, (unsigned long *)&pebs_status,
 -                                       x86_pmu.max_pebs_events)
 +                      for_each_set_bit(i, (unsigned long *)&pebs_status, size)
                                error[i]++;
                        continue;
                }
                counts[bit]++;
        }
  
 -      for (bit = 0; bit < size; bit++) {
 +      for_each_set_bit(bit, (unsigned long *)&mask, size) {
                if ((counts[bit] == 0) && (error[bit] == 0))
                        continue;
  
  
                if (counts[bit]) {
                        __intel_pmu_pebs_event(event, iregs, base,
 -                                             top, bit, counts[bit]);
 +                                             top, bit, counts[bit],
 +                                             setup_pebs_fixed_sample_data);
                }
        }
  }
  
 +static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
 +{
 +      short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
 +      struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 +      struct debug_store *ds = cpuc->ds;
 +      struct perf_event *event;
 +      void *base, *at, *top;
 +      int bit, size;
 +      u64 mask;
 +
 +      if (!x86_pmu.pebs_active)
 +              return;
 +
 +      base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
 +      top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
 +
 +      ds->pebs_index = ds->pebs_buffer_base;
 +
 +      mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
 +             (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
 +      size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
 +
 +      if (unlikely(base >= top)) {
 +              intel_pmu_pebs_event_update_no_drain(cpuc, size);
 +              return;
 +      }
 +
 +      for (at = base; at < top; at += cpuc->pebs_record_size) {
 +              u64 pebs_status;
 +
 +              pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
 +              pebs_status &= mask;
 +
 +              for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
 +                      counts[bit]++;
 +      }
 +
 +      for_each_set_bit(bit, (unsigned long *)&mask, size) {
 +              if (counts[bit] == 0)
 +                      continue;
 +
 +              event = cpuc->events[bit];
 +              if (WARN_ON_ONCE(!event))
 +                      continue;
 +
 +              if (WARN_ON_ONCE(!event->attr.precise_ip))
 +                      continue;
 +
 +              __intel_pmu_pebs_event(event, iregs, base,
 +                                     top, bit, counts[bit],
 +                                     setup_pebs_adaptive_sample_data);
 +      }
 +}
 +
  /*
   * BTS, PEBS probe and setup
   */
@@@ -1966,15 -1630,10 +1966,15 @@@ void __init intel_ds_init(void
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
        if (x86_pmu.version <= 4)
                x86_pmu.pebs_no_isolation = 1;
 +
        if (x86_pmu.pebs) {
                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
 +              char *pebs_qual = "";
                int format = x86_pmu.intel_cap.pebs_format;
  
 +              if (format < 4)
 +                      x86_pmu.intel_cap.pebs_baseline = 0;
 +
                switch (format) {
                case 0:
                        pr_cont("PEBS fmt0%c, ", pebs_type);
                        x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
                        break;
  
 +              case 4:
 +                      x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
 +                      x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
 +                      if (x86_pmu.intel_cap.pebs_baseline) {
 +                              x86_pmu.large_pebs_flags |=
 +                                      PERF_SAMPLE_BRANCH_STACK |
 +                                      PERF_SAMPLE_TIME;
 +                              x86_pmu.flags |= PMU_FL_PEBS_ALL;
 +                              pebs_qual = "-baseline";
 +                              x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
 +                      } else {
 +                              /* Only basic record supported */
 +                              x86_pmu.large_pebs_flags &=
 +                                      ~(PERF_SAMPLE_ADDR |
 +                                        PERF_SAMPLE_TIME |
 +                                        PERF_SAMPLE_DATA_SRC |
 +                                        PERF_SAMPLE_TRANSACTION |
 +                                        PERF_SAMPLE_REGS_USER |
 +                                        PERF_SAMPLE_REGS_INTR);
 +                      }
 +                      pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
 +                      break;
 +
                default:
                        pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
                        x86_pmu.pebs = 0;
diff --combined arch/x86/kernel/Makefile
index ce1b5cc360a27ba13160856ddeffd4040ebeffca,62e78a3fd31e02bf3f900f0f4f475dfe10795f51..3578ad248bc98319e3a0f5769615c72e191d5569
@@@ -30,7 -30,7 +30,7 @@@ KASAN_SANITIZE_paravirt.o                             := 
  
  OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o   := y
  OBJECT_FILES_NON_STANDARD_test_nx.o                   := y
- OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o    := y
+ OBJECT_FILES_NON_STANDARD_paravirt_patch.o            := y
  
  ifdef CONFIG_FRAME_POINTER
  OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o            := y
@@@ -42,7 -42,7 +42,7 @@@ endi
  # non-deterministic coverage.
  KCOV_INSTRUMENT               := n
  
 -CFLAGS_irq.o := -I$(src)/../include/asm/trace
 +CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace
  
  obj-y                 := process_$(BITS).o signal.o
  obj-$(CONFIG_COMPAT)  += signal_compat.o
@@@ -112,7 -112,7 +112,7 @@@ obj-$(CONFIG_AMD_NB)               += amd_nb.
  obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
  
  obj-$(CONFIG_KVM_GUEST)               += kvm.o kvmclock.o
- obj-$(CONFIG_PARAVIRT)                += paravirt.o paravirt_patch_$(BITS).o
+ obj-$(CONFIG_PARAVIRT)                += paravirt.o paravirt_patch.o
  obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
  obj-$(CONFIG_PARAVIRT_CLOCK)  += pvclock.o
  obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o
index c3468b5242fdeec3edb0f81704049ed1bc658fc7,7ea5a3764fccf50f49e8b71cfe0399a06c9f3c69..99ef8b6f9a1a5abf2e89c37a79da906d2103b66c
@@@ -1,4 -1,3 +1,4 @@@
 +// SPDX-License-Identifier: GPL-2.0-only
  #define pr_fmt(fmt) "SMP alternatives: " fmt
  
  #include <linux/module.h>
@@@ -13,8 -12,6 +13,8 @@@
  #include <linux/slab.h>
  #include <linux/kdebug.h>
  #include <linux/kprobes.h>
 +#include <linux/mmu_context.h>
 +#include <linux/bsearch.h>
  #include <asm/text-patching.h>
  #include <asm/alternative.h>
  #include <asm/sections.h>
@@@ -267,7 -264,7 +267,7 @@@ static void __init_or_module add_nops(v
  
  extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
  extern s32 __smp_locks[], __smp_locks_end[];
 -void *text_poke_early(void *addr, const void *opcode, size_t len);
 +void text_poke_early(void *addr, const void *opcode, size_t len);
  
  /*
   * Are we looking at a near JMP with a 1 or 4-byte displacement.
@@@ -278,7 -275,7 +278,7 @@@ static inline bool is_jmp(const u8 opco
  }
  
  static void __init_or_module
- recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+ recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insn_buff)
  {
        u8 *next_rip, *tgt_rip;
        s32 n_dspl, o_dspl;
        if (a->replacementlen != 5)
                return;
  
-       o_dspl = *(s32 *)(insnbuf + 1);
+       o_dspl = *(s32 *)(insn_buff + 1);
  
        /* next_rip of the replacement JMP */
        next_rip = repl_insn + a->replacementlen;
  two_byte_jmp:
        n_dspl -= 2;
  
-       insnbuf[0] = 0xeb;
-       insnbuf[1] = (s8)n_dspl;
-       add_nops(insnbuf + 2, 3);
+       insn_buff[0] = 0xeb;
+       insn_buff[1] = (s8)n_dspl;
+       add_nops(insn_buff + 2, 3);
  
        repl_len = 2;
        goto done;
  five_byte_jmp:
        n_dspl -= 5;
  
-       insnbuf[0] = 0xe9;
-       *(s32 *)&insnbuf[1] = n_dspl;
+       insn_buff[0] = 0xe9;
+       *(s32 *)&insn_buff[1] = n_dspl;
  
        repl_len = 5;
  
@@@ -371,7 -368,7 +371,7 @@@ void __init_or_module noinline apply_al
  {
        struct alt_instr *a;
        u8 *instr, *replacement;
-       u8 insnbuf[MAX_PATCH_LEN];
+       u8 insn_buff[MAX_PATCH_LEN];
  
        DPRINTK("alt table %px, -> %px", start, end);
        /*
         * order.
         */
        for (a = start; a < end; a++) {
-               int insnbuf_sz = 0;
+               int insn_buff_sz = 0;
  
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
-               BUG_ON(a->instrlen > sizeof(insnbuf));
+               BUG_ON(a->instrlen > sizeof(insn_buff));
                BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
                if (!boot_cpu_has(a->cpuid)) {
                        if (a->padlen > 1)
                DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr);
                DUMP_BYTES(replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
  
-               memcpy(insnbuf, replacement, a->replacementlen);
-               insnbuf_sz = a->replacementlen;
+               memcpy(insn_buff, replacement, a->replacementlen);
+               insn_buff_sz = a->replacementlen;
  
                /*
                 * 0xe8 is a relative jump; fix the offset.
                 * Instruction length is checked before the opcode to avoid
                 * accessing uninitialized bytes for zero-length replacements.
                 */
-               if (a->replacementlen == 5 && *insnbuf == 0xe8) {
-                       *(s32 *)(insnbuf + 1) += replacement - instr;
+               if (a->replacementlen == 5 && *insn_buff == 0xe8) {
+                       *(s32 *)(insn_buff + 1) += replacement - instr;
                        DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
-                               *(s32 *)(insnbuf + 1),
-                               (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+                               *(s32 *)(insn_buff + 1),
+                               (unsigned long)instr + *(s32 *)(insn_buff + 1) + 5);
                }
  
                if (a->replacementlen && is_jmp(replacement[0]))
-                       recompute_jump(a, instr, replacement, insnbuf);
+                       recompute_jump(a, instr, replacement, insn_buff);
  
                if (a->instrlen > a->replacementlen) {
-                       add_nops(insnbuf + a->replacementlen,
+                       add_nops(insn_buff + a->replacementlen,
                                 a->instrlen - a->replacementlen);
-                       insnbuf_sz += a->instrlen - a->replacementlen;
+                       insn_buff_sz += a->instrlen - a->replacementlen;
                }
-               DUMP_BYTES(insnbuf, insnbuf_sz, "%px: final_insn: ", instr);
+               DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
  
-               text_poke_early(instr, insnbuf, insnbuf_sz);
+               text_poke_early(instr, insn_buff, insn_buff_sz);
        }
  }
  
@@@ -594,105 -591,32 +594,104 @@@ void __init_or_module apply_paravirt(st
                                     struct paravirt_patch_site *end)
  {
        struct paravirt_patch_site *p;
-       char insnbuf[MAX_PATCH_LEN];
+       char insn_buff[MAX_PATCH_LEN];
  
        for (p = start; p < end; p++) {
                unsigned int used;
  
                BUG_ON(p->len > MAX_PATCH_LEN);
                /* prep the buffer with the original instructions */
-               memcpy(insnbuf, p->instr, p->len);
-               used = pv_ops.init.patch(p->instrtype, insnbuf,
-                                        (unsigned long)p->instr, p->len);
+               memcpy(insn_buff, p->instr, p->len);
+               used = pv_ops.init.patch(p->type, insn_buff, (unsigned long)p->instr, p->len);
  
                BUG_ON(used > p->len);
  
                /* Pad the rest with nops */
-               add_nops(insnbuf + used, p->len - used);
-               text_poke_early(p->instr, insnbuf, p->len);
+               add_nops(insn_buff + used, p->len - used);
+               text_poke_early(p->instr, insn_buff, p->len);
        }
  }
  extern struct paravirt_patch_site __start_parainstructions[],
        __stop_parainstructions[];
  #endif        /* CONFIG_PARAVIRT */
  
 +/*
 + * Self-test for the INT3 based CALL emulation code.
 + *
 + * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up
 + * properly and that there is a stack gap between the INT3 frame and the
 + * previous context. Without this gap doing a virtual PUSH on the interrupted
 + * stack would corrupt the INT3 IRET frame.
 + *
 + * See entry_{32,64}.S for more details.
 + */
 +static void __init int3_magic(unsigned int *ptr)
 +{
 +      *ptr = 1;
 +}
 +
 +extern __initdata unsigned long int3_selftest_ip; /* defined in asm below */
 +
 +static int __init
 +int3_exception_notify(struct notifier_block *self, unsigned long val, void *data)
 +{
 +      struct die_args *args = data;
 +      struct pt_regs *regs = args->regs;
 +
 +      if (!regs || user_mode(regs))
 +              return NOTIFY_DONE;
 +
 +      if (val != DIE_INT3)
 +              return NOTIFY_DONE;
 +
 +      if (regs->ip - INT3_INSN_SIZE != int3_selftest_ip)
 +              return NOTIFY_DONE;
 +
 +      int3_emulate_call(regs, (unsigned long)&int3_magic);
 +      return NOTIFY_STOP;
 +}
 +
 +static void __init int3_selftest(void)
 +{
 +      static __initdata struct notifier_block int3_exception_nb = {
 +              .notifier_call  = int3_exception_notify,
 +              .priority       = INT_MAX-1, /* last */
 +      };
 +      unsigned int val = 0;
 +
 +      BUG_ON(register_die_notifier(&int3_exception_nb));
 +
 +      /*
 +       * Basically: int3_magic(&val); but really complicated :-)
 +       *
 +       * Stick the address of the INT3 instruction into int3_selftest_ip,
 +       * then trigger the INT3, padded with NOPs to match a CALL instruction
 +       * length.
 +       */
 +      asm volatile ("1: int3; nop; nop; nop; nop\n\t"
 +                    ".pushsection .init.data,\"aw\"\n\t"
 +                    ".align " __ASM_SEL(4, 8) "\n\t"
 +                    ".type int3_selftest_ip, @object\n\t"
 +                    ".size int3_selftest_ip, " __ASM_SEL(4, 8) "\n\t"
 +                    "int3_selftest_ip:\n\t"
 +                    __ASM_SEL(.long, .quad) " 1b\n\t"
 +                    ".popsection\n\t"
 +                    : : __ASM_SEL_RAW(a, D) (&val) : "memory");
 +
 +      BUG_ON(val != 1);
 +
 +      unregister_die_notifier(&int3_exception_nb);
 +}
 +
  void __init alternative_instructions(void)
  {
 -      /* The patching is not fully atomic, so try to avoid local interruptions
 -         that might execute the to be patched code.
 -         Other CPUs are not running. */
 +      int3_selftest();
 +
 +      /*
 +       * The patching is not fully atomic, so try to avoid local
 +       * interruptions that might execute the to be patched code.
 +       * Other CPUs are not running.
 +       */
        stop_nmi();
  
        /*
                                            _text, _etext);
        }
  
 -      if (!uniproc_patched || num_possible_cpus() == 1)
 +      if (!uniproc_patched || num_possible_cpus() == 1) {
                free_init_pages("SMP alternatives",
                                (unsigned long)__smp_locks,
                                (unsigned long)__smp_locks_end);
 +      }
  #endif
  
        apply_paravirt(__parainstructions, __parainstructions_end);
   * instructions. And on the local CPU you need to be protected again NMI or MCE
   * handlers seeing an inconsistent instruction while you patch.
   */
 -void *__init_or_module text_poke_early(void *addr, const void *opcode,
 -                                            size_t len)
 +void __init_or_module text_poke_early(void *addr, const void *opcode,
 +                                    size_t len)
 +{
 +      unsigned long flags;
 +
 +      if (boot_cpu_has(X86_FEATURE_NX) &&
 +          is_module_text_address((unsigned long)addr)) {
 +              /*
 +               * Modules text is marked initially as non-executable, so the
 +               * code cannot be running and speculative code-fetches are
 +               * prevented. Just change the code.
 +               */
 +              memcpy(addr, opcode, len);
 +      } else {
 +              local_irq_save(flags);
 +              memcpy(addr, opcode, len);
 +              local_irq_restore(flags);
 +              sync_core();
 +
 +              /*
 +               * Could also do a CLFLUSH here to speed up CPU recovery; but
 +               * that causes hangs on some VIA CPUs.
 +               */
 +      }
 +}
 +
 +__ro_after_init struct mm_struct *poking_mm;
 +__ro_after_init unsigned long poking_addr;
 +
 +static void *__text_poke(void *addr, const void *opcode, size_t len)
  {
 +      bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
 +      struct page *pages[2] = {NULL};
 +      temp_mm_state_t prev;
        unsigned long flags;
 +      pte_t pte, *ptep;
 +      spinlock_t *ptl;
 +      pgprot_t pgprot;
 +
 +      /*
 +       * While boot memory allocator is running we cannot use struct pages as
 +       * they are not yet initialized. There is no way to recover.
 +       */
 +      BUG_ON(!after_bootmem);
 +
 +      if (!core_kernel_text((unsigned long)addr)) {
 +              pages[0] = vmalloc_to_page(addr);
 +              if (cross_page_boundary)
 +                      pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 +      } else {
 +              pages[0] = virt_to_page(addr);
 +              WARN_ON(!PageReserved(pages[0]));
 +              if (cross_page_boundary)
 +                      pages[1] = virt_to_page(addr + PAGE_SIZE);
 +      }
 +      /*
 +       * If something went wrong, crash and burn since recovery paths are not
 +       * implemented.
 +       */
 +      BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
 +
        local_irq_save(flags);
 -      memcpy(addr, opcode, len);
 +
 +      /*
 +       * Map the page without the global bit, as TLB flushing is done with
 +       * flush_tlb_mm_range(), which is intended for non-global PTEs.
 +       */
 +      pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
 +
 +      /*
 +       * The lock is not really needed, but this allows to avoid open-coding.
 +       */
 +      ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
 +
 +      /*
 +       * This must not fail; preallocated in poking_init().
 +       */
 +      VM_BUG_ON(!ptep);
 +
 +      pte = mk_pte(pages[0], pgprot);
 +      set_pte_at(poking_mm, poking_addr, ptep, pte);
 +
 +      if (cross_page_boundary) {
 +              pte = mk_pte(pages[1], pgprot);
 +              set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
 +      }
 +
 +      /*
 +       * Loading the temporary mm behaves as a compiler barrier, which
 +       * guarantees that the PTE will be set at the time memcpy() is done.
 +       */
 +      prev = use_temporary_mm(poking_mm);
 +
 +      kasan_disable_current();
 +      memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
 +      kasan_enable_current();
 +
 +      /*
 +       * Ensure that the PTE is only cleared after the instructions of memcpy
 +       * were issued by using a compiler barrier.
 +       */
 +      barrier();
 +
 +      pte_clear(poking_mm, poking_addr, ptep);
 +      if (cross_page_boundary)
 +              pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
 +
 +      /*
 +       * Loading the previous page-table hierarchy requires a serializing
 +       * instruction that already allows the core to see the updated version.
 +       * Xen-PV is assumed to serialize execution in a similar manner.
 +       */
 +      unuse_temporary_mm(prev);
 +
 +      /*
 +       * Flushing the TLB might involve IPIs, which would require enabled
 +       * IRQs, but not if the mm is not used, as it is in this point.
 +       */
 +      flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
 +                         (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
 +                         PAGE_SHIFT, false);
 +
 +      /*
 +       * If the text does not match what we just wrote then something is
 +       * fundamentally screwy; there's nothing we can really do about that.
 +       */
 +      BUG_ON(memcmp(addr, opcode, len));
 +
 +      pte_unmap_unlock(ptep, ptl);
        local_irq_restore(flags);
 -      sync_core();
 -      /* Could also do a CLFLUSH here to speed up CPU recovery; but
 -         that causes hangs on some VIA CPUs. */
        return addr;
  }
  
   * It means the size must be writable atomically and the address must be aligned
   * in a way that permits an atomic write. It also makes sure we fit on a single
   * page.
 + *
 + * Note that the caller must ensure that if the modified code is part of a
 + * module, the module would not be removed during poking. This can be achieved
 + * by registering a module notifier, and ordering module removal and patching
 + * trough a mutex.
   */
  void *text_poke(void *addr, const void *opcode, size_t len)
  {
 -      unsigned long flags;
 -      char *vaddr;
 -      struct page *pages[2];
 -      int i;
 -
 -      /*
 -       * While boot memory allocator is runnig we cannot use struct
 -       * pages as they are not yet initialized.
 -       */
 -      BUG_ON(!after_bootmem);
 -
        lockdep_assert_held(&text_mutex);
  
 -      if (!core_kernel_text((unsigned long)addr)) {
 -              pages[0] = vmalloc_to_page(addr);
 -              pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 -      } else {
 -              pages[0] = virt_to_page(addr);
 -              WARN_ON(!PageReserved(pages[0]));
 -              pages[1] = virt_to_page(addr + PAGE_SIZE);
 -      }
 -      BUG_ON(!pages[0]);
 -      local_irq_save(flags);
 -      set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
 -      if (pages[1])
 -              set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
 -      vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
 -      memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
 -      clear_fixmap(FIX_TEXT_POKE0);
 -      if (pages[1])
 -              clear_fixmap(FIX_TEXT_POKE1);
 -      local_flush_tlb();
 -      sync_core();
 -      /* Could also do a CLFLUSH here to speed up CPU recovery; but
 -         that causes hangs on some VIA CPUs. */
 -      for (i = 0; i < len; i++)
 -              BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 -      local_irq_restore(flags);
 -      return addr;
 +      return __text_poke(addr, opcode, len);
 +}
 +
 +/**
 + * text_poke_kgdb - Update instructions on a live kernel by kgdb
 + * @addr: address to modify
 + * @opcode: source of the copy
 + * @len: length to copy
 + *
 + * Only atomic text poke/set should be allowed when not doing early patching.
 + * It means the size must be writable atomically and the address must be aligned
 + * in a way that permits an atomic write. It also makes sure we fit on a single
 + * page.
 + *
 + * Context: should only be used by kgdb, which ensures no other core is running,
 + *        despite the fact it does not hold the text_mutex.
 + */
 +void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
 +{
 +      return __text_poke(addr, opcode, len);
  }
  
  static void do_sync_core(void *info)
        sync_core();
  }
  
 -static bool bp_patching_in_progress;
 -static void *bp_int3_handler, *bp_int3_addr;
 +static struct bp_patching_desc {
 +      struct text_poke_loc *vec;
 +      int nr_entries;
 +} bp_patching;
 +
 +static int patch_cmp(const void *key, const void *elt)
 +{
 +      struct text_poke_loc *tp = (struct text_poke_loc *) elt;
 +
 +      if (key < tp->addr)
 +              return -1;
 +      if (key > tp->addr)
 +              return 1;
 +      return 0;
 +}
 +NOKPROBE_SYMBOL(patch_cmp);
  
  int poke_int3_handler(struct pt_regs *regs)
  {
 +      struct text_poke_loc *tp;
 +      unsigned char int3 = 0xcc;
 +      void *ip;
 +
        /*
         * Having observed our INT3 instruction, we now must observe
 -       * bp_patching_in_progress.
 +       * bp_patching.nr_entries.
         *
 -       *      in_progress = TRUE              INT3
 +       *      nr_entries != 0                 INT3
         *      WMB                             RMB
 -       *      write INT3                      if (in_progress)
 +       *      write INT3                      if (nr_entries)
         *
 -       * Idem for bp_int3_handler.
 +       * Idem for other elements in bp_patching.
         */
        smp_rmb();
  
 -      if (likely(!bp_patching_in_progress))
 +      if (likely(!bp_patching.nr_entries))
                return 0;
  
 -      if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
 +      if (user_mode(regs))
                return 0;
  
 -      /* set up the specified breakpoint handler */
 -      regs->ip = (unsigned long) bp_int3_handler;
 +      /*
 +       * Discount the sizeof(int3). See text_poke_bp_batch().
 +       */
 +      ip = (void *) regs->ip - sizeof(int3);
 +
 +      /*
 +       * Skip the binary search if there is a single member in the vector.
 +       */
 +      if (unlikely(bp_patching.nr_entries > 1)) {
 +              tp = bsearch(ip, bp_patching.vec, bp_patching.nr_entries,
 +                           sizeof(struct text_poke_loc),
 +                           patch_cmp);
 +              if (!tp)
 +                      return 0;
 +      } else {
 +              tp = bp_patching.vec;
 +              if (tp->addr != ip)
 +                      return 0;
 +      }
 +
 +      /* set up the specified breakpoint detour */
 +      regs->ip = (unsigned long) tp->detour;
  
        return 1;
  }
  NOKPROBE_SYMBOL(poke_int3_handler);
  
  /**
 - * text_poke_bp() -- update instructions on live kernel on SMP
 - * @addr:     address to patch
 - * @opcode:   opcode of new instruction
 - * @len:      length to copy
 - * @handler:  address to jump to when the temporary breakpoint is hit
 + * text_poke_bp_batch() -- update instructions on live kernel on SMP
 + * @tp:                       vector of instructions to patch
 + * @nr_entries:               number of entries in the vector
   *
   * Modify multi-byte instruction by using int3 breakpoint on SMP.
   * We completely avoid stop_machine() here, and achieve the
   * synchronization using int3 breakpoint.
   *
   * The way it is done:
 - *    - add a int3 trap to the address that will be patched
 + *    - For each entry in the vector:
 + *            - add a int3 trap to the address that will be patched
   *    - sync cores
 - *    - update all but the first byte of the patched range
 + *    - For each entry in the vector:
 + *            - update all but the first byte of the patched range
   *    - sync cores
 - *    - replace the first byte (int3) by the first byte of
 - *      replacing opcode
 + *    - For each entry in the vector:
 + *            - replace the first byte (int3) by the first byte of
 + *              replacing opcode
   *    - sync cores
   */
 -void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 +void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
  {
 +      int patched_all_but_first = 0;
        unsigned char int3 = 0xcc;
 -
 -      bp_int3_handler = handler;
 -      bp_int3_addr = (u8 *)addr + sizeof(int3);
 -      bp_patching_in_progress = true;
 +      unsigned int i;
  
        lockdep_assert_held(&text_mutex);
  
 +      bp_patching.vec = tp;
 +      bp_patching.nr_entries = nr_entries;
 +
        /*
         * Corresponding read barrier in int3 notifier for making sure the
 -       * in_progress and handler are correctly ordered wrt. patching.
 +       * nr_entries and handler are correctly ordered wrt. patching.
         */
        smp_wmb();
  
 -      text_poke(addr, &int3, sizeof(int3));
 +      /*
 +       * First step: add a int3 trap to the address that will be patched.
 +       */
 +      for (i = 0; i < nr_entries; i++)
 +              text_poke(tp[i].addr, &int3, sizeof(int3));
  
        on_each_cpu(do_sync_core, NULL, 1);
  
 -      if (len - sizeof(int3) > 0) {
 -              /* patch all but the first byte */
 -              text_poke((char *)addr + sizeof(int3),
 -                        (const char *) opcode + sizeof(int3),
 -                        len - sizeof(int3));
 +      /*
 +       * Second step: update all but the first byte of the patched range.
 +       */
 +      for (i = 0; i < nr_entries; i++) {
 +              if (tp[i].len - sizeof(int3) > 0) {
 +                      text_poke((char *)tp[i].addr + sizeof(int3),
 +                                (const char *)tp[i].opcode + sizeof(int3),
 +                                tp[i].len - sizeof(int3));
 +                      patched_all_but_first++;
 +              }
 +      }
 +
 +      if (patched_all_but_first) {
                /*
                 * According to Intel, this core syncing is very likely
                 * not necessary and we'd be safe even without it. But
                on_each_cpu(do_sync_core, NULL, 1);
        }
  
 -      /* patch the first byte */
 -      text_poke(addr, opcode, sizeof(int3));
 +      /*
 +       * Third step: replace the first byte (int3) by the first byte of
 +       * replacing opcode.
 +       */
 +      for (i = 0; i < nr_entries; i++)
 +              text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
  
        on_each_cpu(do_sync_core, NULL, 1);
        /*
         * sync_core() implies an smp_mb() and orders this store against
         * the writing of the new instruction.
         */
 -      bp_patching_in_progress = false;
 -
 -      return addr;
 +      bp_patching.vec = NULL;
 +      bp_patching.nr_entries = 0;
  }
  
 +/**
 + * text_poke_bp() -- update instructions on live kernel on SMP
 + * @addr:     address to patch
 + * @opcode:   opcode of new instruction
 + * @len:      length to copy
 + * @handler:  address to jump to when the temporary breakpoint is hit
 + *
 + * Update a single instruction with the vector in the stack, avoiding
 + * dynamically allocated memory. This function should be used when it is
 + * not possible to allocate memory.
 + */
 +void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 +{
 +      struct text_poke_loc tp = {
 +              .detour = handler,
 +              .addr = addr,
 +              .len = len,
 +      };
 +
 +      if (len > POKE_MAX_OPCODE_SIZE) {
 +              WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
 +              return;
 +      }
 +
 +      memcpy((void *)tp.opcode, opcode, len);
 +
 +      text_poke_bp_batch(&tp, 1);
 +}
index 282b4eb67e30366167102fb1ecd268f59e6a769b,e77a895a9ecc3a1a161b08c567f95627a988b5be..9d4aedece363cc28c696be0dbfa1af8deaba9fa3
@@@ -1,7 -1,20 +1,7 @@@
 +// SPDX-License-Identifier: GPL-2.0-or-later
  /*
   *  Kernel Probes Jump Optimization (Optprobes)
   *
 - * This program is free software; you can redistribute it and/or modify
 - * it under the terms of the GNU General Public License as published by
 - * the Free Software Foundation; either version 2 of the License, or
 - * (at your option) any later version.
 - *
 - * This program is distributed in the hope that it will be useful,
 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 - * GNU General Public License for more details.
 - *
 - * You should have received a copy of the GNU General Public License
 - * along with this program; if not, write to the Free Software
 - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 - *
   * Copyright (C) IBM Corporation, 2002, 2004
   * Copyright (C) Hitachi Ltd., 2012
   */
@@@ -102,15 -115,14 +102,15 @@@ asm 
                        "optprobe_template_call:\n"
                        ASM_NOP5
                        /* Move flags to rsp */
 -                      "       movq 144(%rsp), %rdx\n"
 -                      "       movq %rdx, 152(%rsp)\n"
 +                      "       movq 18*8(%rsp), %rdx\n"
 +                      "       movq %rdx, 19*8(%rsp)\n"
                        RESTORE_REGS_STRING
                        /* Skip flags entry */
                        "       addq $8, %rsp\n"
                        "       popfq\n"
  #else /* CONFIG_X86_32 */
 -                      "       pushf\n"
 +                      "       pushl %esp\n"
 +                      "       pushfl\n"
                        SAVE_REGS_STRING
                        "       movl %esp, %edx\n"
                        ".global optprobe_template_val\n"
                        ".global optprobe_template_call\n"
                        "optprobe_template_call:\n"
                        ASM_NOP5
 +                      /* Move flags into esp */
 +                      "       movl 14*4(%esp), %edx\n"
 +                      "       movl %edx, 15*4(%esp)\n"
                        RESTORE_REGS_STRING
 -                      "       addl $4, %esp\n"        /* skip cs */
 -                      "       popf\n"
 +                      /* Skip flags entry */
 +                      "       addl $4, %esp\n"
 +                      "       popfl\n"
  #endif
                        ".global optprobe_template_end\n"
                        "optprobe_template_end:\n"
@@@ -157,9 -165,10 +157,9 @@@ optimized_callback(struct optimized_kpr
        } else {
                struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
                /* Save skipped registers */
 -#ifdef CONFIG_X86_64
                regs->cs = __KERNEL_CS;
 -#else
 -              regs->cs = __KERNEL_CS | get_kernel_rpl();
 +#ifdef CONFIG_X86_32
 +              regs->cs |= get_kernel_rpl();
                regs->gs = 0;
  #endif
                regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
@@@ -422,7 -431,7 +422,7 @@@ err
  void arch_optimize_kprobes(struct list_head *oplist)
  {
        struct optimized_kprobe *op, *tmp;
-       u8 insn_buf[RELATIVEJUMP_SIZE];
+       u8 insn_buff[RELATIVEJUMP_SIZE];
  
        list_for_each_entry_safe(op, tmp, oplist, list) {
                s32 rel = (s32)((long)op->optinsn.insn -
                memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
                       RELATIVE_ADDR_SIZE);
  
-               insn_buf[0] = RELATIVEJUMP_OPCODE;
-               *(s32 *)(&insn_buf[1]) = rel;
+               insn_buff[0] = RELATIVEJUMP_OPCODE;
+               *(s32 *)(&insn_buff[1]) = rel;
  
-               text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+               text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
                             op->optinsn.insn);
  
                list_del_init(&op->list);
  /* Replace a relative jump with a breakpoint (int3).  */
  void arch_unoptimize_kprobe(struct optimized_kprobe *op)
  {
-       u8 insn_buf[RELATIVEJUMP_SIZE];
+       u8 insn_buff[RELATIVEJUMP_SIZE];
  
        /* Set int3 to first byte for kprobes */
-       insn_buf[0] = BREAKPOINT_INSTRUCTION;
-       memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
-       text_poke_bp(op->kp.addr, insn_buf, RELATIVEJUMP_SIZE,
+       insn_buff[0] = BREAKPOINT_INSTRUCTION;
+       memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+       text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
                     op->optinsn.insn);
  }
  
index 06f6bb48d01891a31c668730697d00a093cd51e8,b7d22912e20b8100f067cbd92ff7373e9fdb594a..98039d7fb998c96152854d5488de834dc3537b3b
@@@ -1,7 -1,19 +1,7 @@@
 +// SPDX-License-Identifier: GPL-2.0-or-later
  /*  Paravirtualization interfaces
      Copyright (C) 2006 Rusty Russell IBM Corporation
  
 -    This program is free software; you can redistribute it and/or modify
 -    it under the terms of the GNU General Public License as published by
 -    the Free Software Foundation; either version 2 of the License, or
 -    (at your option) any later version.
 -
 -    This program is distributed in the hope that it will be useful,
 -    but WITHOUT ANY WARRANTY; without even the implied warranty of
 -    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 -    GNU General Public License for more details.
 -
 -    You should have received a copy of the GNU General Public License
 -    along with this program; if not, write to the Free Software
 -    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  
      2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
  */
@@@ -58,24 -70,24 +58,24 @@@ struct branch 
        u32 delta;
  } __attribute__((packed));
  
- static unsigned paravirt_patch_call(void *insnbuf, const void *target,
+ static unsigned paravirt_patch_call(void *insn_buff, const void *target,
                                    unsigned long addr, unsigned len)
  {
-       struct branch *b = insnbuf;
-       unsigned long delta = (unsigned long)target - (addr+5);
-       if (len < 5) {
- #ifdef CONFIG_RETPOLINE
-               WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
- #endif
-               return len;     /* call too long for patch site */
+       const int call_len = 5;
+       struct branch *b = insn_buff;
+       unsigned long delta = (unsigned long)target - (addr+call_len);
+       if (len < call_len) {
+               pr_warn("paravirt: Failed to patch indirect CALL at %ps\n", (void *)addr);
+               /* Kernel might not be viable if patching fails, bail out: */
+               BUG_ON(1);
        }
  
        b->opcode = 0xe8; /* call */
        b->delta = delta;
-       BUILD_BUG_ON(sizeof(*b) != 5);
+       BUILD_BUG_ON(sizeof(*b) != call_len);
  
-       return 5;
+       return call_len;
  }
  
  #ifdef CONFIG_PARAVIRT_XXL
@@@ -85,10 -97,10 +85,10 @@@ u64 notrace _paravirt_ident_64(u64 x
        return x;
  }
  
- static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
+ static unsigned paravirt_patch_jmp(void *insn_buff, const void *target,
                                   unsigned long addr, unsigned len)
  {
-       struct branch *b = insnbuf;
+       struct branch *b = insn_buff;
        unsigned long delta = (unsigned long)target - (addr+5);
  
        if (len < 5) {
@@@ -109,11 -121,11 +109,11 @@@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_k
  
  void __init native_pv_lock_init(void)
  {
 -      if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
 +      if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
                static_branch_disable(&virt_spin_lock_key);
  }
  
- unsigned paravirt_patch_default(u8 type, void *insnbuf,
+ unsigned paravirt_patch_default(u8 type, void *insn_buff,
                                unsigned long addr, unsigned len)
  {
        /*
  
        if (opfunc == NULL)
                /* If there's no function, patch it with a ud2a (BUG) */
-               ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
+               ret = paravirt_patch_insns(insn_buff, len, ud2a, ud2a+sizeof(ud2a));
        else if (opfunc == _paravirt_nop)
                ret = 0;
  
  #ifdef CONFIG_PARAVIRT_XXL
        /* identity functions just return their single argument */
        else if (opfunc == _paravirt_ident_64)
-               ret = paravirt_patch_ident_64(insnbuf, len);
+               ret = paravirt_patch_ident_64(insn_buff, len);
  
        else if (type == PARAVIRT_PATCH(cpu.iret) ||
                 type == PARAVIRT_PATCH(cpu.usergs_sysret64))
                /* If operation requires a jmp, then jmp */
-               ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
+               ret = paravirt_patch_jmp(insn_buff, opfunc, addr, len);
  #endif
        else
                /* Otherwise call the function. */
-               ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
+               ret = paravirt_patch_call(insn_buff, opfunc, addr, len);
  
        return ret;
  }
  
- unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
+ unsigned paravirt_patch_insns(void *insn_buff, unsigned len,
                              const char *start, const char *end)
  {
        unsigned insn_len = end - start;
  
-       if (insn_len > len || start == NULL)
-               insn_len = len;
-       else
-               memcpy(insnbuf, start, insn_len);
+       /* Alternative instruction is too large for the patch site and we cannot continue: */
+       BUG_ON(insn_len > len || start == NULL);
+       memcpy(insn_buff, start, insn_len);
  
        return insn_len;
  }
index e455349e0ab5e5a44b0041f9efac90b4727173bf,34c2b3691f4f868d85bcb8b00657ca450fae1ef7..34eda63c124b19cc1592d853f8b1862ddc269087
@@@ -1,5 -1,13 +1,5 @@@
 +// SPDX-License-Identifier: GPL-2.0-or-later
  /*
 - * This program is free software; you can redistribute it and/or modify
 - * it under the terms of the GNU General Public License as published by
 - * the Free Software Foundation; either version 2 of the License, or
 - * (at your option) any later version.
 - *
 - * This program is distributed in the hope that it will be useful,
 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 - * GNU General Public License for more details.
   *
   * Copyright (C) IBM Corporation, 2009
   */
@@@ -111,7 -119,7 +111,7 @@@ static void parse_args(int argc, char *
  int main(int argc, char **argv)
  {
        char line[BUFSIZE], sym[BUFSIZE] = "<unknown>";
-       unsigned char insn_buf[16];
+       unsigned char insn_buff[16];
        struct insn insn;
        int insns = 0;
        int warnings = 0;
                }
  
                insns++;
-               memset(insn_buf, 0, 16);
+               memset(insn_buff, 0, 16);
                strcpy(copy, line);
                tab1 = strchr(copy, '\t');
                if (!tab1)
                *tab2 = '\0';   /* Characters beyond tab2 aren't examined */
                while (s < tab2) {
                        if (sscanf(s, "%x", &b) == 1) {
-                               insn_buf[nb++] = (unsigned char) b;
+                               insn_buff[nb++] = (unsigned char) b;
                                s += 3;
                        } else
                                break;
                }
                /* Decode an instruction */
-               insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+               insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
                insn_get_length(&insn);
                if (insn.length != nb) {
                        warnings++;
index 14cf07916081182935e8e13c19f5e8dc5d24e587,7adec7b490fd806a6e4da45dc70a700eae9014d3..185ceba9d289bd5de91019cebc25502c9a385a2f
@@@ -1,7 -1,20 +1,7 @@@
 +// SPDX-License-Identifier: GPL-2.0-or-later
  /*
   * x86 decoder sanity test - based on test_get_insn.c
   *
 - * This program is free software; you can redistribute it and/or modify
 - * it under the terms of the GNU General Public License as published by
 - * the Free Software Foundation; either version 2 of the License, or
 - * (at your option) any later version.
 - *
 - * This program is distributed in the hope that it will be useful,
 - * but WITHOUT ANY WARRANTY; without even the implied warranty of
 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 - * GNU General Public License for more details.
 - *
 - * You should have received a copy of the GNU General Public License
 - * along with this program; if not, write to the Free Software
 - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 - *
   * Copyright (C) IBM Corporation, 2009
   * Copyright (C) Hitachi, Ltd., 2011
   */
@@@ -83,7 -96,7 +83,7 @@@ static void dump_insn(FILE *fp, struct 
  }
  
  static void dump_stream(FILE *fp, const char *msg, unsigned long nr_iter,
-                       unsigned char *insn_buf, struct insn *insn)
+                       unsigned char *insn_buff, struct insn *insn)
  {
        int i;
  
        /* Input a decoded instruction sequence directly */
        fprintf(fp, " $ echo ");
        for (i = 0; i < MAX_INSN_SIZE; i++)
-               fprintf(fp, " %02x", insn_buf[i]);
+               fprintf(fp, " %02x", insn_buff[i]);
        fprintf(fp, " | %s -i -\n", prog);
  
        if (!input_file) {
@@@ -124,7 -137,7 +124,7 @@@ fail
  }
  
  /* Read given instruction sequence from the input file */
- static int read_next_insn(unsigned char *insn_buf)
+ static int read_next_insn(unsigned char *insn_buff)
  {
        char buf[256]  = "", *tmp;
        int i;
                return 0;
  
        for (i = 0; i < MAX_INSN_SIZE; i++) {
-               insn_buf[i] = (unsigned char)strtoul(tmp, &tmp, 16);
+               insn_buff[i] = (unsigned char)strtoul(tmp, &tmp, 16);
                if (*tmp != ' ')
                        break;
        }
        return i;
  }
  
- static int generate_insn(unsigned char *insn_buf)
+ static int generate_insn(unsigned char *insn_buff)
  {
        int i;
  
        if (input_file)
-               return read_next_insn(insn_buf);
+               return read_next_insn(insn_buff);
  
        /* Fills buffer with random binary up to MAX_INSN_SIZE */
        for (i = 0; i < MAX_INSN_SIZE - 1; i += 2)
-               *(unsigned short *)(&insn_buf[i]) = random() & 0xffff;
+               *(unsigned short *)(&insn_buff[i]) = random() & 0xffff;
  
        while (i < MAX_INSN_SIZE)
-               insn_buf[i++] = random() & 0xff;
+               insn_buff[i++] = random() & 0xff;
  
        return i;
  }
@@@ -226,31 -239,31 +226,31 @@@ int main(int argc, char **argv
        int insns = 0;
        int errors = 0;
        unsigned long i;
-       unsigned char insn_buf[MAX_INSN_SIZE * 2];
+       unsigned char insn_buff[MAX_INSN_SIZE * 2];
  
        parse_args(argc, argv);
  
        /* Prepare stop bytes with NOPs */
-       memset(insn_buf + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
+       memset(insn_buff + MAX_INSN_SIZE, INSN_NOP, MAX_INSN_SIZE);
  
        for (i = 0; i < iter_end; i++) {
-               if (generate_insn(insn_buf) <= 0)
+               if (generate_insn(insn_buff) <= 0)
                        break;
  
                if (i < iter_start)     /* Skip to given iteration number */
                        continue;
  
                /* Decode an instruction */
-               insn_init(&insn, insn_buf, sizeof(insn_buf), x86_64);
+               insn_init(&insn, insn_buff, sizeof(insn_buff), x86_64);
                insn_get_length(&insn);
  
                if (insn.next_byte <= insn.kaddr ||
                    insn.kaddr + MAX_INSN_SIZE < insn.next_byte) {
                        /* Access out-of-range memory */
-                       dump_stream(stderr, "Error: Found an access violation", i, insn_buf, &insn);
+                       dump_stream(stderr, "Error: Found an access violation", i, insn_buff, &insn);
                        errors++;
                } else if (verbose && !insn_complete(&insn))
-                       dump_stream(stdout, "Info: Found an undecodable input", i, insn_buf, &insn);
+                       dump_stream(stdout, "Info: Found an undecodable input", i, insn_buff, &insn);
                else if (verbose >= 2)
                        dump_insn(stdout, &insn);
                insns++;