DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
+
+/* Mutex to protect rdtgroup access. */
+DEFINE_MUTEX(rdtgroup_mutex);
+
static struct kernfs_root *rdt_root;
struct rdtgroup rdtgroup_default;
LIST_HEAD(rdt_all_groups);
/* list of entries for the schemata file */
LIST_HEAD(resctrl_schema_all);
+/* The filesystem can only be mounted once. */
+bool resctrl_mounted;
+
/* Kernel fs node for "info" directory under root */
static struct kernfs_node *kn_info;
static struct seq_buf last_cmd_status;
static char last_cmd_status_buf[512];
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
+static void rdtgroup_destroy_root(void);
+
struct dentry *debugfs_resctrl;
+static bool resctrl_debug;
+
void rdt_last_cmd_clear(void)
{
lockdep_assert_held(&rdtgroup_mutex);
*
* Using a global CLOSID across all resources has some advantages and
* some drawbacks:
- * + We can simply set "current->closid" to assign a task to a resource
+ * + We can simply set current's closid to assign a task to a resource
* group.
* + Context switch code can avoid extra memory references deciding which
* CLOSID to load into the PQR_ASSOC MSR
* - Our choices on how to configure each resource become progressively more
* limited as the number of resources grows.
*/
-static int closid_free_map;
+static unsigned long closid_free_map;
static int closid_free_map_len;
int closids_supported(void)
closid_free_map = BIT_MASK(rdt_min_closid) - 1;
- /* CLOSID 0 is always reserved for the default group */
- closid_free_map &= ~1;
+ /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
+ __clear_bit(RESCTRL_RESERVED_CLOSID, &closid_free_map);
closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
{
- u32 closid = ffs(closid_free_map);
+ int cleanest_closid;
+ u32 closid;
- if (closid == 0)
- return -ENOSPC;
- closid--;
- closid_free_map &= ~(1 << closid);
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+ cleanest_closid = resctrl_find_cleanest_closid();
+ if (cleanest_closid < 0)
+ return cleanest_closid;
+ closid = cleanest_closid;
+ } else {
+ closid = ffs(closid_free_map);
+ if (closid == 0)
+ return -ENOSPC;
+ closid--;
+ }
+ __clear_bit(closid, &closid_free_map);
return closid;
}
void closid_free(int closid)
{
- closid_free_map |= 1 << closid;
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ __set_bit(closid, &closid_free_map);
}
/**
* Return: true if @closid is currently associated with a resource group,
* false if @closid is free
*/
-static bool closid_allocated(unsigned int closid)
+bool closid_allocated(unsigned int closid)
{
- return (closid_free_map & (1 << closid)) == 0;
+ lockdep_assert_held(&rdtgroup_mutex);
+
+ return !test_bit(closid, &closid_free_map);
}
/**
_update_task_closid_rmid(t);
}
+static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
+{
+ u32 closid, rmid = rdtgrp->mon.rmid;
+
+ if (rdtgrp->type == RDTCTRL_GROUP)
+ closid = rdtgrp->closid;
+ else if (rdtgrp->type == RDTMON_GROUP)
+ closid = rdtgrp->mon.parent->closid;
+ else
+ return false;
+
+ return resctrl_arch_match_closid(tsk, closid) &&
+ resctrl_arch_match_rmid(tsk, closid, rmid);
+}
+
static int __rdtgroup_move_task(struct task_struct *tsk,
struct rdtgroup *rdtgrp)
{
/* If the task is already in rdtgrp, no need to move the task. */
- if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
- tsk->rmid == rdtgrp->mon.rmid) ||
- (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
- tsk->closid == rdtgrp->mon.parent->closid))
+ if (task_in_rdtgroup(tsk, rdtgrp))
return 0;
/*
* For monitor groups, can move the tasks only from
* their parent CTRL group.
*/
-
- if (rdtgrp->type == RDTCTRL_GROUP) {
- WRITE_ONCE(tsk->closid, rdtgrp->closid);
- WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
- } else if (rdtgrp->type == RDTMON_GROUP) {
- if (rdtgrp->mon.parent->closid == tsk->closid) {
- WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
- } else {
- rdt_last_cmd_puts("Can't move task to different control group\n");
- return -EINVAL;
- }
+ if (rdtgrp->type == RDTMON_GROUP &&
+ !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
+ rdt_last_cmd_puts("Can't move task to different control group\n");
+ return -EINVAL;
}
+ if (rdtgrp->type == RDTMON_GROUP)
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
+ rdtgrp->mon.rmid);
+ else
+ resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
+ rdtgrp->mon.rmid);
+
/*
* Ensure the task's closid and rmid are written before determining if
* the task is current that will decide if it will be interrupted.
static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
{
- return (rdt_alloc_capable &&
- (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
+ return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
+ resctrl_arch_match_closid(t, r->closid));
}
static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
{
- return (rdt_mon_capable &&
- (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
+ return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
+ resctrl_arch_match_rmid(t, r->mon.parent->closid,
+ r->mon.rmid));
}
/**
char *buf, size_t nbytes, loff_t off)
{
struct rdtgroup *rdtgrp;
+ char *pid_str;
int ret = 0;
pid_t pid;
- if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
- return -EINVAL;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
rdtgroup_kn_unlock(of->kn);
goto unlock;
}
- ret = rdtgroup_move_task(pid, rdtgrp, of);
+ while (buf && buf[0] != '\0' && buf[0] != '\n') {
+ pid_str = strim(strsep(&buf, ","));
+
+ if (kstrtoint(pid_str, 0, &pid)) {
+ rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (pid < 0) {
+ rdt_last_cmd_printf("Invalid pid %d\n", pid);
+ ret = -EINVAL;
+ break;
+ }
+
+ ret = rdtgroup_move_task(pid, rdtgrp, of);
+ if (ret) {
+ rdt_last_cmd_printf("Error while processing task %d\n", pid);
+ break;
+ }
+ }
unlock:
rdtgroup_kn_unlock(of->kn);
return ret;
}
+static int rdtgroup_closid_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp)
+ seq_printf(s, "%u\n", rdtgrp->closid);
+ else
+ ret = -ENOENT;
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
+static int rdtgroup_rmid_show(struct kernfs_open_file *of,
+ struct seq_file *s, void *v)
+{
+ struct rdtgroup *rdtgrp;
+ int ret = 0;
+
+ rdtgrp = rdtgroup_kn_lock_live(of->kn);
+ if (rdtgrp)
+ seq_printf(s, "%u\n", rdtgrp->mon.rmid);
+ else
+ ret = -ENOENT;
+ rdtgroup_kn_unlock(of->kn);
+
+ return ret;
+}
+
#ifdef CONFIG_PROC_CPU_RESCTRL
/*
mutex_lock(&rdtgroup_mutex);
/* Return empty if resctrl has not been mounted. */
- if (!static_branch_unlikely(&rdt_enable_key)) {
+ if (!resctrl_mounted) {
seq_puts(s, "res:\nmon:\n");
goto unlock;
}
rdtg->mode != RDT_MODE_EXCLUSIVE)
continue;
- if (rdtg->closid != tsk->closid)
+ if (!resctrl_arch_match_closid(tsk, rdtg->closid))
continue;
seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
seq_puts(s, "mon:");
list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
mon.crdtgrp_list) {
- if (tsk->rmid != crg->mon.rmid)
+ if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
+ crg->mon.rmid))
continue;
seq_printf(s, "%s", crg->kn->name);
break;
return 0;
}
-/**
+/*
* rdt_bit_usage_show - Display current usage of resources
*
* A domain is a shared resource that can now be allocated differently. Here
bool sep = false;
u32 ctrl_val;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
hw_shareable = r->cache.shareable_bits;
list_for_each_entry(dom, &r->domains, list) {
}
seq_putc(seq, '\n');
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
}
}
+static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
+ struct seq_file *seq, void *v)
+{
+ struct resctrl_schema *s = of->kn->parent->priv;
+ struct rdt_resource *r = s->res;
+
+ seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
+
+ return 0;
+}
+
/**
* __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
* @r: Resource to which domain instance @d belongs.
* @d: The domain instance for which @closid is being tested.
* @cbm: Capacity bitmask being tested.
* @closid: Intended closid for @cbm.
+ * @type: CDP type of @r.
* @exclusive: Only check if overlaps with exclusive resource groups
*
* Checks if provided @cbm intended to be used for @closid on domain
/**
* rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
+ * @rdtgrp: Resource group identified through its closid.
*
* An exclusive resource group implies that there should be no sharing of
* its allocated resources. At the time this group is considered to be
struct rdt_domain *d;
u32 ctrl;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
return true;
}
-/**
+/*
* rdtgroup_mode_write - Modify the resource group's mode
- *
*/
static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
return size;
}
-/**
+/*
* rdtgroup_size_show - Display size in bytes of allocated regions
*
* The "size" file mirrors the layout of the "schemata" file, printing the
* size in bytes of each region instead of the capacity bitmask.
- *
*/
static int rdtgroup_size_show(struct kernfs_open_file *of,
struct seq_file *s, void *v)
struct rdt_domain *dom;
bool sep = false;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
list_for_each_entry(dom, &r->domains, list) {
seq_puts(s, "\n");
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return 0;
}
wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
}
-static int mbm_config_write_domain(struct rdt_resource *r,
- struct rdt_domain *d, u32 evtid, u32 val)
+static void mbm_config_write_domain(struct rdt_resource *r,
+ struct rdt_domain *d, u32 evtid, u32 val)
{
struct mon_config_info mon_info = {0};
- int ret = 0;
-
- /* mon_config cannot be more than the supported set of events */
- if (val > MAX_EVT_CONFIG_BITS) {
- rdt_last_cmd_puts("Invalid event configuration\n");
- return -EINVAL;
- }
/*
* Read the current config value first. If both are the same then
mon_info.evtid = evtid;
mondata_config_read(d, &mon_info);
if (mon_info.mon_config == val)
- goto out;
+ return;
mon_info.mon_config = val;
* mbm_local and mbm_total counts for all the RMIDs.
*/
resctrl_arch_reset_rmid_all(r, d);
-
-out:
- return ret;
}
static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
struct rdt_domain *d;
- int ret = 0;
+
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
next:
if (!tok || tok[0] == '\0')
return -EINVAL;
}
+ /* Value from user cannot be more than the supported set of events */
+ if ((val & hw_res->mbm_cfg_mask) != val) {
+ rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
+ hw_res->mbm_cfg_mask);
+ return -EINVAL;
+ }
+
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
- ret = mbm_config_write_domain(r, d, evtid, val);
- if (ret)
- return -EINVAL;
+ mbm_config_write_domain(r, d, evtid, val);
goto next;
}
}
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
if (nbytes == 0 || buf[nbytes - 1] != '\n')
return -EINVAL;
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
rdt_last_cmd_clear();
ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
return ret ?: nbytes;
}
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_last_cmd_status_show,
- .fflags = RF_TOP_INFO,
+ .fflags = RFTYPE_TOP_INFO,
},
{
.name = "num_closids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_num_closids_show,
- .fflags = RF_CTRL_INFO,
+ .fflags = RFTYPE_CTRL_INFO,
},
{
.name = "mon_features",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_mon_features_show,
- .fflags = RF_MON_INFO,
+ .fflags = RFTYPE_MON_INFO,
},
{
.name = "num_rmids",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_num_rmids_show,
- .fflags = RF_MON_INFO,
+ .fflags = RFTYPE_MON_INFO,
},
{
.name = "cbm_mask",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_default_ctrl_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "min_cbm_bits",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_min_cbm_bits_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "shareable_bits",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_shareable_bits_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "bit_usage",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_bit_usage_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
},
{
.name = "min_bandwidth",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_min_bw_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "bandwidth_gran",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_bw_gran_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
},
{
.name = "delay_linear",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdt_delay_linear_show,
- .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
},
/*
* Platform specific which (if any) capabilities are provided by
.kf_ops = &rdtgroup_kf_single_ops,
.write = max_threshold_occ_write,
.seq_show = max_threshold_occ_show,
- .fflags = RF_MON_INFO | RFTYPE_RES_CACHE,
+ .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
},
{
.name = "mbm_total_bytes_config",
.seq_show = rdtgroup_tasks_show,
.fflags = RFTYPE_BASE,
},
+ {
+ .name = "mon_hw_id",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_rmid_show,
+ .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
+ },
{
.name = "schemata",
.mode = 0644,
.kf_ops = &rdtgroup_kf_single_ops,
.write = rdtgroup_schemata_write,
.seq_show = rdtgroup_schemata_show,
- .fflags = RF_CTRL_BASE,
+ .fflags = RFTYPE_CTRL_BASE,
},
{
.name = "mode",
.kf_ops = &rdtgroup_kf_single_ops,
.write = rdtgroup_mode_write,
.seq_show = rdtgroup_mode_show,
- .fflags = RF_CTRL_BASE,
+ .fflags = RFTYPE_CTRL_BASE,
},
{
.name = "size",
.mode = 0444,
.kf_ops = &rdtgroup_kf_single_ops,
.seq_show = rdtgroup_size_show,
- .fflags = RF_CTRL_BASE,
+ .fflags = RFTYPE_CTRL_BASE,
+ },
+ {
+ .name = "sparse_masks",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdt_has_sparse_bitmasks_show,
+ .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
+ },
+ {
+ .name = "ctrl_hw_id",
+ .mode = 0444,
+ .kf_ops = &rdtgroup_kf_single_ops,
+ .seq_show = rdtgroup_closid_show,
+ .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
},
};
lockdep_assert_held(&rdtgroup_mutex);
+ if (resctrl_debug)
+ fflags |= RFTYPE_DEBUG;
+
for (rft = rfts; rft < rfts + len; rft++) {
if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
ret = rdtgroup_add_file(kn, rft);
if (!rft)
return;
- rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
+ rft->fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB;
}
void __init mbm_config_rftype_init(const char *config)
rft = rdtgroup_get_rftype_by_name(config);
if (rft)
- rft->fflags = RF_MON_INFO | RFTYPE_RES_CACHE;
+ rft->fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE;
}
/**
if (IS_ERR(kn_info))
return PTR_ERR(kn_info);
- ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
+ ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
if (ret)
goto out_destroy;
/* loop over enabled controls, these are all alloc_capable */
list_for_each_entry(s, &resctrl_schema_all, list) {
r = s->res;
- fflags = r->fflags | RF_CTRL_INFO;
+ fflags = r->fflags | RFTYPE_CTRL_INFO;
ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
if (ret)
goto out_destroy;
}
for_each_mon_capable_rdt_resource(r) {
- fflags = r->fflags | RF_MON_INFO;
+ fflags = r->fflags | RFTYPE_MON_INFO;
sprintf(name, "%s_MON", r->name);
ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
if (ret)
struct rdt_domain *d;
int cpu;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (level == RDT_RESOURCE_L3)
update = l3_qos_cfg_update;
else if (level == RDT_RESOURCE_L2)
return 0;
}
-static void cdp_disable_all(void)
-{
- if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
- if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
- resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
-}
-
/*
* We don't allow rdtgroup directories to be created anywhere
* except the root directory. Thus when looking for the rdtgroup
rdtgroup_kn_get(rdtgrp, kn);
+ cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
/* Was this group deleted while we waited? */
return;
mutex_unlock(&rdtgroup_mutex);
+ cpus_read_unlock();
+
rdtgroup_kn_put(rdtgrp, kn);
}
struct rdtgroup *prgrp,
struct kernfs_node **mon_data_kn);
+static void rdt_disable_ctx(void)
+{
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+ set_mba_sc(false);
+
+ resctrl_debug = false;
+}
+
static int rdt_enable_ctx(struct rdt_fs_context *ctx)
{
int ret = 0;
- if (ctx->enable_cdpl2)
+ if (ctx->enable_cdpl2) {
ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
+ if (ret)
+ goto out_done;
+ }
- if (!ret && ctx->enable_cdpl3)
+ if (ctx->enable_cdpl3) {
ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
+ if (ret)
+ goto out_cdpl2;
+ }
- if (!ret && ctx->enable_mba_mbps)
+ if (ctx->enable_mba_mbps) {
ret = set_mba_sc(true);
+ if (ret)
+ goto out_cdpl3;
+ }
+
+ if (ctx->enable_debug)
+ resctrl_debug = true;
+
+ return 0;
+out_cdpl3:
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
+out_cdpl2:
+ resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
+out_done:
return ret;
}
static int rdt_get_tree(struct fs_context *fc)
{
struct rdt_fs_context *ctx = rdt_fc2context(fc);
+ unsigned long flags = RFTYPE_CTRL_BASE;
struct rdt_domain *dom;
struct rdt_resource *r;
int ret;
/*
* resctrl file system can only be mounted once.
*/
- if (static_branch_unlikely(&rdt_enable_key)) {
+ if (resctrl_mounted) {
ret = -EBUSY;
goto out;
}
+ ret = rdtgroup_setup_root(ctx);
+ if (ret)
+ goto out;
+
ret = rdt_enable_ctx(ctx);
- if (ret < 0)
- goto out_cdp;
+ if (ret)
+ goto out_root;
ret = schemata_list_create();
if (ret) {
schemata_list_destroy();
- goto out_mba;
+ goto out_ctx;
}
closid_init();
+ if (resctrl_arch_mon_capable())
+ flags |= RFTYPE_MON;
+
+ ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
+ if (ret)
+ goto out_schemata_free;
+
+ kernfs_activate(rdtgroup_default.kn);
+
ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
if (ret < 0)
goto out_schemata_free;
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
ret = mongroup_create_dir(rdtgroup_default.kn,
&rdtgroup_default, "mon_groups",
&kn_mongrp);
if (ret < 0)
goto out_psl;
- if (rdt_alloc_capable)
- static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
- if (rdt_mon_capable)
- static_branch_enable_cpuslocked(&rdt_mon_enable_key);
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_enable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_enable_mon();
- if (rdt_alloc_capable || rdt_mon_capable)
- static_branch_enable_cpuslocked(&rdt_enable_key);
+ if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
+ resctrl_mounted = true;
if (is_mbm_enabled()) {
r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->domains, list)
- mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
goto out;
out_psl:
rdt_pseudo_lock_release();
out_mondata:
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
kernfs_remove(kn_mondata);
out_mongrp:
- if (rdt_mon_capable)
+ if (resctrl_arch_mon_capable())
kernfs_remove(kn_mongrp);
out_info:
kernfs_remove(kn_info);
out_schemata_free:
schemata_list_destroy();
-out_mba:
- if (ctx->enable_mba_mbps)
- set_mba_sc(false);
-out_cdp:
- cdp_disable_all();
+out_ctx:
+ rdt_disable_ctx();
+out_root:
+ rdtgroup_destroy_root();
out:
rdt_last_cmd_clear();
mutex_unlock(&rdtgroup_mutex);
Opt_cdp,
Opt_cdpl2,
Opt_mba_mbps,
+ Opt_debug,
nr__rdt_params
};
fsparam_flag("cdp", Opt_cdp),
fsparam_flag("cdpl2", Opt_cdpl2),
fsparam_flag("mba_MBps", Opt_mba_mbps),
+ fsparam_flag("debug", Opt_debug),
{}
};
return -EINVAL;
ctx->enable_mba_mbps = true;
return 0;
+ case Opt_debug:
+ ctx->enable_debug = true;
+ return 0;
}
return -EINVAL;
if (!ctx)
return -ENOMEM;
- ctx->kfc.root = rdt_root;
ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
fc->fs_private = &ctx->kfc;
fc->ops = &rdt_fs_context_ops;
struct rdt_domain *d;
int i;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
return -ENOMEM;
for_each_process_thread(p, t) {
if (!from || is_closid_match(t, from) ||
is_rmid_match(t, from)) {
- WRITE_ONCE(t->closid, to->closid);
- WRITE_ONCE(t->rmid, to->mon.rmid);
+ resctrl_arch_set_closid_rmid(t, to->closid,
+ to->mon.rmid);
/*
* Order the closid/rmid stores above before the loads
head = &rdtgrp->mon.crdtgrp_list;
list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
- free_rmid(sentry->mon.rmid);
+ free_rmid(sentry->closid, sentry->mon.rmid);
list_del(&sentry->mon.crdtgrp_list);
if (atomic_read(&sentry->waitcount) != 0)
cpumask_or(&rdtgroup_default.cpu_mask,
&rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
kernfs_remove(rdtgrp->kn);
list_del(&rdtgrp->rdtgroup_list);
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
- set_mba_sc(false);
+ rdt_disable_ctx();
/*Put everything back to default values. */
for_each_alloc_capable_rdt_resource(r)
reset_all_ctrls(r);
- cdp_disable_all();
rmdir_all_sub();
rdt_pseudo_lock_release();
rdtgroup_default.mode = RDT_MODE_SHAREABLE;
schemata_list_destroy();
- static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
- static_branch_disable_cpuslocked(&rdt_mon_enable_key);
- static_branch_disable_cpuslocked(&rdt_enable_key);
+ rdtgroup_destroy_root();
+ if (resctrl_arch_alloc_capable())
+ resctrl_arch_disable_alloc();
+ if (resctrl_arch_mon_capable())
+ resctrl_arch_disable_mon();
+ resctrl_mounted = false;
kernfs_kill_sb(sb);
mutex_unlock(&rdtgroup_mutex);
cpus_read_unlock();
struct rdt_domain *dom;
int ret;
+ /* Walking r->domains, ensure it can't race with cpuhp */
+ lockdep_assert_cpus_held();
+
list_for_each_entry(dom, &r->domains, list) {
ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
if (ret)
return ret;
}
+static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
+{
+ int ret;
+
+ if (!resctrl_arch_mon_capable())
+ return 0;
+
+ ret = alloc_rmid(rdtgrp->closid);
+ if (ret < 0) {
+ rdt_last_cmd_puts("Out of RMIDs\n");
+ return ret;
+ }
+ rdtgrp->mon.rmid = ret;
+
+ ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
+ if (ret) {
+ rdt_last_cmd_puts("kernfs subdir error\n");
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
+{
+ if (resctrl_arch_mon_capable())
+ free_rmid(rgrp->closid, rgrp->mon.rmid);
+}
+
static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
const char *name, umode_t mode,
enum rdt_group_type rtype, struct rdtgroup **r)
{
struct rdtgroup *prdtgrp, *rdtgrp;
+ unsigned long files = 0;
struct kernfs_node *kn;
- uint files = 0;
int ret;
prdtgrp = rdtgroup_kn_lock_live(parent_kn);
goto out_destroy;
}
- files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
+ if (rtype == RDTCTRL_GROUP) {
+ files = RFTYPE_BASE | RFTYPE_CTRL;
+ if (resctrl_arch_mon_capable())
+ files |= RFTYPE_MON;
+ } else {
+ files = RFTYPE_BASE | RFTYPE_MON;
+ }
+
ret = rdtgroup_add_files(kn, files);
if (ret) {
rdt_last_cmd_puts("kernfs fill error\n");
goto out_destroy;
}
- if (rdt_mon_capable) {
- ret = alloc_rmid();
- if (ret < 0) {
- rdt_last_cmd_puts("Out of RMIDs\n");
- goto out_destroy;
- }
- rdtgrp->mon.rmid = ret;
-
- ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
- if (ret) {
- rdt_last_cmd_puts("kernfs subdir error\n");
- goto out_idfree;
- }
- }
- kernfs_activate(kn);
-
/*
* The caller unlocks the parent_kn upon success.
*/
return 0;
-out_idfree:
- free_rmid(rdtgrp->mon.rmid);
out_destroy:
kernfs_put(rdtgrp->kn);
kernfs_remove(rdtgrp->kn);
static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
{
kernfs_remove(rgrp->kn);
- free_rmid(rgrp->mon.rmid);
rdtgroup_remove(rgrp);
}
prgrp = rdtgrp->mon.parent;
rdtgrp->closid = prgrp->closid;
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret) {
+ mkdir_rdt_prepare_clean(rdtgrp);
+ goto out_unlock;
+ }
+
+ kernfs_activate(rdtgrp->kn);
+
/*
* Add the rdtgrp to the list of rdtgrps the parent
* ctrl_mon group has to track.
*/
list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
+out_unlock:
rdtgroup_kn_unlock(parent_kn);
return ret;
}
ret = 0;
rdtgrp->closid = closid;
+
+ ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
+ if (ret)
+ goto out_closid_free;
+
+ kernfs_activate(rdtgrp->kn);
+
ret = rdtgroup_init_alloc(rdtgrp);
if (ret < 0)
- goto out_id_free;
+ goto out_rmid_free;
list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
- if (rdt_mon_capable) {
+ if (resctrl_arch_mon_capable()) {
/*
* Create an empty mon_groups directory to hold the subset
* of tasks and cpus to monitor.
out_del_list:
list_del(&rdtgrp->rdtgroup_list);
-out_id_free:
+out_rmid_free:
+ mkdir_rdt_prepare_rmid_free(rdtgrp);
+out_closid_free:
closid_free(closid);
out_common_fail:
mkdir_rdt_prepare_clean(rdtgrp);
* allocation is supported, add a control and monitoring
* subdirectory
*/
- if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
+ if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
/*
* If RDT monitoring is supported and the parent directory is a valid
* "mon_groups" directory, add a monitoring subdirectory.
*/
- if (rdt_mon_capable && is_mon_groups(parent_kn, name))
+ if (resctrl_arch_mon_capable() && is_mon_groups(parent_kn, name))
return rdtgroup_mkdir_mon(parent_kn, name, mode);
return -EPERM;
update_closid_rmid(tmpmask, NULL);
rdtgrp->flags = RDT_DELETED;
- free_rmid(rdtgrp->mon.rmid);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
/*
* Remove the rdtgrp from the parent ctrl_mon group's list
cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
update_closid_rmid(tmpmask, NULL);
+ free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
closid_free(rdtgrp->closid);
- free_rmid(rdtgrp->mon.rmid);
rdtgroup_ctrl_remove(rdtgrp);
if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
seq_puts(seq, ",mba_MBps");
+ if (resctrl_debug)
+ seq_puts(seq, ",debug");
+
return 0;
}
.show_options = rdtgroup_show_options,
};
-static int __init rdtgroup_setup_root(void)
+static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
{
- int ret;
-
rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
KERNFS_ROOT_CREATE_DEACTIVATED |
KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
if (IS_ERR(rdt_root))
return PTR_ERR(rdt_root);
+ ctx->kfc.root = rdt_root;
+ rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
+
+ return 0;
+}
+
+static void rdtgroup_destroy_root(void)
+{
+ kernfs_destroy_root(rdt_root);
+ rdtgroup_default.kn = NULL;
+}
+
+static void __init rdtgroup_setup_default(void)
+{
mutex_lock(&rdtgroup_mutex);
- rdtgroup_default.closid = 0;
- rdtgroup_default.mon.rmid = 0;
+ rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
+ rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
rdtgroup_default.type = RDTCTRL_GROUP;
INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
- ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE);
- if (ret) {
- kernfs_destroy_root(rdt_root);
- goto out;
- }
-
- rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
- kernfs_activate(rdtgroup_default.kn);
-
-out:
mutex_unlock(&rdtgroup_mutex);
-
- return ret;
}
static void domain_destroy_mon_state(struct rdt_domain *d)
void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
mba_sc_domain_destroy(r, d);
if (!r->mon_capable)
- return;
+ goto out_unlock;
/*
* If resctrl is mounted, remove all the
* per domain monitor data directories.
*/
- if (static_branch_unlikely(&rdt_mon_enable_key))
+ if (resctrl_mounted && resctrl_arch_mon_capable())
rmdir_mondata_subdir_allrdtgrp(r, d->id);
if (is_mbm_enabled())
cancel_delayed_work(&d->mbm_over);
- if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
+ if (is_llc_occupancy_enabled() && has_busy_rmid(d)) {
/*
* When a package is going down, forcefully
* decrement rmid->ebusy. There is no way to know
}
domain_destroy_mon_state(d);
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
{
+ u32 idx_limit = resctrl_arch_system_num_rmid_idx();
size_t tsize;
if (is_llc_occupancy_enabled()) {
- d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
+ d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
if (!d->rmid_busy_llc)
return -ENOMEM;
}
if (is_mbm_total_enabled()) {
tsize = sizeof(*d->mbm_total);
- d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_total) {
bitmap_free(d->rmid_busy_llc);
return -ENOMEM;
}
if (is_mbm_local_enabled()) {
tsize = sizeof(*d->mbm_local);
- d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
+ d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL);
if (!d->mbm_local) {
bitmap_free(d->rmid_busy_llc);
kfree(d->mbm_total);
int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
{
- int err;
+ int err = 0;
- lockdep_assert_held(&rdtgroup_mutex);
+ mutex_lock(&rdtgroup_mutex);
- if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
+ if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
/* RDT_RESOURCE_MBA is never mon_capable */
- return mba_sc_domain_allocate(r, d);
+ err = mba_sc_domain_allocate(r, d);
+ goto out_unlock;
+ }
if (!r->mon_capable)
- return 0;
+ goto out_unlock;
err = domain_setup_mon_state(r, d);
if (err)
- return err;
+ goto out_unlock;
if (is_mbm_enabled()) {
INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
- mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
+ mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
+ RESCTRL_PICK_ANY_CPU);
}
if (is_llc_occupancy_enabled())
INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
- /* If resctrl is mounted, add per domain monitor data directories. */
- if (static_branch_unlikely(&rdt_mon_enable_key))
+ /*
+ * If the filesystem is not mounted then only the default resource group
+ * exists. Creation of its directories is deferred until mount time
+ * by rdt_get_tree() calling mkdir_mondata_all().
+ * If resctrl is mounted, add per domain monitor data directories.
+ */
+ if (resctrl_mounted && resctrl_arch_mon_capable())
mkdir_mondata_subdir_allrdtgrp(r, d);
- return 0;
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
+
+ return err;
+}
+
+void resctrl_online_cpu(unsigned int cpu)
+{
+ mutex_lock(&rdtgroup_mutex);
+ /* The CPU is set in default rdtgroup after online. */
+ cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
+ mutex_unlock(&rdtgroup_mutex);
+}
+
+static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
+{
+ struct rdtgroup *cr;
+
+ list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
+ break;
+ }
+}
+
+void resctrl_offline_cpu(unsigned int cpu)
+{
+ struct rdt_resource *l3 = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+ struct rdtgroup *rdtgrp;
+ struct rdt_domain *d;
+
+ mutex_lock(&rdtgroup_mutex);
+ list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
+ if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
+ clear_childcpus(rdtgrp, cpu);
+ break;
+ }
+ }
+
+ if (!l3->mon_capable)
+ goto out_unlock;
+
+ d = get_domain_from_cpu(cpu, l3);
+ if (d) {
+ if (is_mbm_enabled() && cpu == d->mbm_work_cpu) {
+ cancel_delayed_work(&d->mbm_over);
+ mbm_setup_overflow_handler(d, 0, cpu);
+ }
+ if (is_llc_occupancy_enabled() && cpu == d->cqm_work_cpu &&
+ has_busy_rmid(d)) {
+ cancel_delayed_work(&d->cqm_limbo);
+ cqm_setup_limbo_handler(d, 0, cpu);
+ }
+ }
+
+out_unlock:
+ mutex_unlock(&rdtgroup_mutex);
}
/*
seq_buf_init(&last_cmd_status, last_cmd_status_buf,
sizeof(last_cmd_status_buf));
- ret = rdtgroup_setup_root();
- if (ret)
- return ret;
+ rdtgroup_setup_default();
ret = sysfs_create_mount_point(fs_kobj, "resctrl");
if (ret)
- goto cleanup_root;
+ return ret;
ret = register_filesystem(&rdt_fs_type);
if (ret)
cleanup_mountpoint:
sysfs_remove_mount_point(fs_kobj, "resctrl");
-cleanup_root:
- kernfs_destroy_root(rdt_root);
return ret;
}
debugfs_remove_recursive(debugfs_resctrl);
unregister_filesystem(&rdt_fs_type);
sysfs_remove_mount_point(fs_kobj, "resctrl");
- kernfs_destroy_root(rdt_root);
}