Within cxl module, iteration over array 'adapter->afu' may be racy
at few points as it might be simultaneously read during an EEH and its
contents being set to NULL while driver is being unloaded or unbound
from the adapter. This might result in a NULL pointer to 'struct afu'
being de-referenced during an EEH thereby causing a kernel oops.
This patch fixes this by making sure that all access to the array
'adapter->afu' is wrapped within the context of spin-lock
'adapter->afu_list_lock'.
Fixes:
9e8df8a21963 ("cxl: EEH support")
Cc: stable@vger.kernel.org # v4.3+
Acked-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Christophe Lombard <clombard@linux.vnet.ibm.com>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
int i, rc;
pr_devel("Adapter reset request\n");
int i, rc;
pr_devel("Adapter reset request\n");
+ spin_lock(&adapter->afu_list_lock);
for (i = 0; i < adapter->slices; i++) {
if ((afu = adapter->afu[i])) {
pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
for (i = 0; i < adapter->slices; i++) {
if ((afu = adapter->afu[i])) {
pci_error_handlers(afu, CXL_ERROR_DETECTED_EVENT,
pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
}
}
pci_error_handlers(afu, CXL_RESUME_EVENT, 0);
}
}
+ spin_unlock(&adapter->afu_list_lock);
/* There should only be one entry, but go through the list
* anyway
*/
/* There should only be one entry, but go through the list
* anyway
*/
+ if (afu == NULL || afu->phb == NULL)
return result;
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
return result;
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
{
struct cxl *adapter = pci_get_drvdata(pdev);
struct cxl_afu *afu;
{
struct cxl *adapter = pci_get_drvdata(pdev);
struct cxl_afu *afu;
- pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET, afu_result;
+ pci_ers_result_t result = PCI_ERS_RESULT_NEED_RESET;
+ pci_ers_result_t afu_result = PCI_ERS_RESULT_NEED_RESET;
int i;
/* At this point, we could still have an interrupt pending.
int i;
/* At this point, we could still have an interrupt pending.
/* If we're permanently dead, give up. */
if (state == pci_channel_io_perm_failure) {
/* If we're permanently dead, give up. */
if (state == pci_channel_io_perm_failure) {
+ spin_lock(&adapter->afu_list_lock);
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
/*
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
/*
*/
cxl_vphb_error_detected(afu, state);
}
*/
cxl_vphb_error_detected(afu, state);
}
+ spin_unlock(&adapter->afu_list_lock);
return PCI_ERS_RESULT_DISCONNECT;
}
return PCI_ERS_RESULT_DISCONNECT;
}
* * In slot_reset, free the old resources and allocate new ones.
* * In resume, clear the flag to allow things to start.
*/
* * In slot_reset, free the old resources and allocate new ones.
* * In resume, clear the flag to allow things to start.
*/
+
+ /* Make sure no one else changes the afu list */
+ spin_lock(&adapter->afu_list_lock);
+
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
- afu_result = cxl_vphb_error_detected(afu, state);
+ if (afu == NULL)
+ continue;
+ afu_result = cxl_vphb_error_detected(afu, state);
cxl_context_detach_all(afu);
cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
pci_deconfigure_afu(afu);
cxl_context_detach_all(afu);
cxl_ops->afu_deactivate_mode(afu, afu->current_mode);
pci_deconfigure_afu(afu);
(result == PCI_ERS_RESULT_NEED_RESET))
result = PCI_ERS_RESULT_NONE;
}
(result == PCI_ERS_RESULT_NEED_RESET))
result = PCI_ERS_RESULT_NONE;
}
+ spin_unlock(&adapter->afu_list_lock);
/* should take the context lock here */
if (cxl_adapter_context_lock(adapter) != 0)
/* should take the context lock here */
if (cxl_adapter_context_lock(adapter) != 0)
*/
cxl_adapter_context_unlock(adapter);
*/
cxl_adapter_context_unlock(adapter);
+ spin_lock(&adapter->afu_list_lock);
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
+ if (afu == NULL)
+ continue;
+
if (pci_configure_afu(afu, adapter, pdev))
if (pci_configure_afu(afu, adapter, pdev))
if (cxl_afu_select_best_mode(afu))
if (cxl_afu_select_best_mode(afu))
if (afu->phb == NULL)
continue;
if (afu->phb == NULL)
continue;
ctx = cxl_get_context(afu_dev);
if (ctx && cxl_release_context(ctx))
ctx = cxl_get_context(afu_dev);
if (ctx && cxl_release_context(ctx))
ctx = cxl_dev_context_init(afu_dev);
if (IS_ERR(ctx))
ctx = cxl_dev_context_init(afu_dev);
if (IS_ERR(ctx))
afu_dev->dev.archdata.cxl_ctx = ctx;
if (cxl_ops->afu_check_and_enable(afu))
afu_dev->dev.archdata.cxl_ctx = ctx;
if (cxl_ops->afu_check_and_enable(afu))
afu_dev->error_state = pci_channel_io_normal;
afu_dev->error_state = pci_channel_io_normal;
result = PCI_ERS_RESULT_DISCONNECT;
}
}
result = PCI_ERS_RESULT_DISCONNECT;
}
}
+
+ spin_unlock(&adapter->afu_list_lock);
+err_unlock:
+ spin_unlock(&adapter->afu_list_lock);
+
err:
/* All the bits that happen in both error_detected and cxl_remove
* should be idempotent, so we don't need to worry about leaving a mix
err:
/* All the bits that happen in both error_detected and cxl_remove
* should be idempotent, so we don't need to worry about leaving a mix
* This is not the place to be checking if everything came back up
* properly, because there's no return value: do that in slot_reset.
*/
* This is not the place to be checking if everything came back up
* properly, because there's no return value: do that in slot_reset.
*/
+ spin_lock(&adapter->afu_list_lock);
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
for (i = 0; i < adapter->slices; i++) {
afu = adapter->afu[i];
+ if (afu == NULL || afu->phb == NULL)
continue;
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
continue;
list_for_each_entry(afu_dev, &afu->phb->bus->devices, bus_list) {
afu_dev->driver->err_handler->resume(afu_dev);
}
}
afu_dev->driver->err_handler->resume(afu_dev);
}
}
+ spin_unlock(&adapter->afu_list_lock);
}
static const struct pci_error_handlers cxl_err_handler = {
}
static const struct pci_error_handlers cxl_err_handler = {