x86/resctrl: Abstract __rmid_read()
authorJames Morse <james.morse@arm.com>
Fri, 2 Sep 2022 15:48:23 +0000 (15:48 +0000)
committerBorislav Petkov <bp@suse.de>
Fri, 23 Sep 2022 12:17:20 +0000 (14:17 +0200)
__rmid_read() selects the specified eventid and returns the counter
value from the MSR. The error handling is architecture specific, and
handled by the callers, rdtgroup_mondata_show() and __mon_event_count().

Error handling should be handled by architecture specific code, as
a different architecture may have different requirements. MPAM's
counters can report that they are 'not ready', requiring a second
read after a short delay. This should be hidden from resctrl.

Make __rmid_read() the architecture specific function for reading
a counter. Rename it resctrl_arch_rmid_read() and move the error
handling into it.

A read from a counter that hardware supports but resctrl does not
now returns -EINVAL instead of -EIO from the default case in
__mon_event_count(). It isn't possible for user-space to see this
change as resctrl doesn't expose counters it doesn't support.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Jamie Iles <quic_jiles@quicinc.com>
Reviewed-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Xin Hao <xhao@linux.alibaba.com>
Tested-by: Shaopeng Tan <tan.shaopeng@fujitsu.com>
Tested-by: Cristian Marussi <cristian.marussi@arm.com>
Link: https://lore.kernel.org/r/20220902154829.30399-16-james.morse@arm.com
arch/x86/kernel/cpu/resctrl/ctrlmondata.c
arch/x86/kernel/cpu/resctrl/internal.h
arch/x86/kernel/cpu/resctrl/monitor.c
include/linux/resctrl.h

index 0ab92320de71699d037ecba601ba0729a4a58e6a..42a1abb378f018b775c4d13ee934a6f20836527a 100644 (file)
@@ -579,9 +579,9 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 
        mon_event_read(&rr, r, d, rdtgrp, evtid, false);
 
-       if (rr.val & RMID_VAL_ERROR)
+       if (rr.err == -EIO)
                seq_puts(m, "Error\n");
-       else if (rr.val & RMID_VAL_UNAVAIL)
+       else if (rr.err == -EINVAL)
                seq_puts(m, "Unavailable\n");
        else
                seq_printf(m, "%llu\n", rr.val * hw_res->mon_scale);
index b34a1403f033c44b666fcaa97b966a0e8df80bd5..1d2e7bd6305fc34e1c2d55aeff79a54802c15803 100644 (file)
@@ -94,6 +94,7 @@ struct rmid_read {
        struct rdt_domain       *d;
        enum resctrl_event_id   evtid;
        bool                    first;
+       int                     err;
        u64                     val;
 };
 
index e9755143492b43a66a50dd8e21df7497a127b97e..51ab76f2dfbc1f63f6f91d40ff71336b5304065f 100644 (file)
@@ -167,9 +167,9 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d,
                memset(am, 0, sizeof(*am));
 }
 
-static u64 __rmid_read(u32 rmid, enum resctrl_event_id eventid)
+int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val)
 {
-       u64 val;
+       u64 msr_val;
 
        /*
         * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured
@@ -180,14 +180,24 @@ static u64 __rmid_read(u32 rmid, enum resctrl_event_id eventid)
         * are error bits.
         */
        wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
-       rdmsrl(MSR_IA32_QM_CTR, val);
+       rdmsrl(MSR_IA32_QM_CTR, msr_val);
 
-       return val;
+       if (msr_val & RMID_VAL_ERROR)
+               return -EIO;
+       if (msr_val & RMID_VAL_UNAVAIL)
+               return -EINVAL;
+
+       *val = msr_val;
+
+       return 0;
 }
 
 static bool rmid_dirty(struct rmid_entry *entry)
 {
-       u64 val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
+       u64 val = 0;
+
+       if (resctrl_arch_rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID, &val))
+               return true;
 
        return val >= resctrl_cqm_threshold;
 }
@@ -259,8 +269,8 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
 {
        struct rdt_resource *r;
        struct rdt_domain *d;
-       int cpu;
-       u64 val;
+       int cpu, err;
+       u64 val = 0;
 
        r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 
@@ -268,8 +278,10 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
        cpu = get_cpu();
        list_for_each_entry(d, &r->domains, list) {
                if (cpumask_test_cpu(cpu, &d->cpu_mask)) {
-                       val = __rmid_read(entry->rmid, QOS_L3_OCCUP_EVENT_ID);
-                       if (val <= resctrl_cqm_threshold)
+                       err = resctrl_arch_rmid_read(entry->rmid,
+                                                    QOS_L3_OCCUP_EVENT_ID,
+                                                    &val);
+                       if (err || val <= resctrl_cqm_threshold)
                                continue;
                }
 
@@ -315,19 +327,19 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
        return chunks >> shift;
 }
 
-static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
+static int __mon_event_count(u32 rmid, struct rmid_read *rr)
 {
        struct rdt_hw_resource *hw_res = resctrl_to_arch_res(rr->r);
        struct mbm_state *m;
-       u64 chunks, tval;
+       u64 chunks, tval = 0;
 
        if (rr->first)
                resctrl_arch_reset_rmid(rr->r, rr->d, rmid, rr->evtid);
 
-       tval = __rmid_read(rmid, rr->evtid);
-       if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
-               return tval;
-       }
+       rr->err = resctrl_arch_rmid_read(rmid, rr->evtid, &tval);
+       if (rr->err)
+               return rr->err;
+
        switch (rr->evtid) {
        case QOS_L3_OCCUP_EVENT_ID:
                rr->val += tval;
@@ -341,9 +353,9 @@ static u64 __mon_event_count(u32 rmid, struct rmid_read *rr)
        default:
                /*
                 * Code would never reach here because an invalid
-                * event id would fail the __rmid_read.
+                * event id would fail in resctrl_arch_rmid_read().
                 */
-               return RMID_VAL_ERROR;
+               return -EINVAL;
        }
 
        if (rr->first) {
@@ -399,11 +411,11 @@ void mon_event_count(void *info)
        struct rdtgroup *rdtgrp, *entry;
        struct rmid_read *rr = info;
        struct list_head *head;
-       u64 ret_val;
+       int ret;
 
        rdtgrp = rr->rgrp;
 
-       ret_val = __mon_event_count(rdtgrp->mon.rmid, rr);
+       ret = __mon_event_count(rdtgrp->mon.rmid, rr);
 
        /*
         * For Ctrl groups read data from child monitor groups and
@@ -415,13 +427,17 @@ void mon_event_count(void *info)
        if (rdtgrp->type == RDTCTRL_GROUP) {
                list_for_each_entry(entry, head, mon.crdtgrp_list) {
                        if (__mon_event_count(entry->mon.rmid, rr) == 0)
-                               ret_val = 0;
+                               ret = 0;
                }
        }
 
-       /* Report error if none of rmid_reads are successful */
-       if (ret_val)
-               rr->val = ret_val;
+       /*
+        * __mon_event_count() calls for newly created monitor groups may
+        * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
+        * Discard error if any of the monitor event reads succeeded.
+        */
+       if (ret == 0)
+               rr->err = 0;
 }
 
 /*
index 818456770176d30030f245b341e625d52e1199e0..efe60dd7fd211ac4b3323df7ef3eef9eadf86963 100644 (file)
@@ -219,6 +219,7 @@ u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d,
                            u32 closid, enum resctrl_conf_type type);
 int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d);
 void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
+int resctrl_arch_rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *res);
 
 /**
  * resctrl_arch_reset_rmid() - Reset any private state associated with rmid