Commit | Line | Data |
---|---|---|
fc36479d JPB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Handle device page faults | |
4 | * | |
5 | * Copyright (C) 2020 ARM Ltd. | |
6 | */ | |
7 | ||
8 | #include <linux/iommu.h> | |
9 | #include <linux/list.h> | |
10 | #include <linux/sched/mm.h> | |
11 | #include <linux/slab.h> | |
12 | #include <linux/workqueue.h> | |
13 | ||
757636ed | 14 | #include "iommu-sva.h" |
fc36479d JPB |
15 | |
16 | /** | |
17 | * struct iopf_queue - IO Page Fault queue | |
18 | * @wq: the fault workqueue | |
19 | * @devices: devices attached to this queue | |
20 | * @lock: protects the device list | |
21 | */ | |
22 | struct iopf_queue { | |
23 | struct workqueue_struct *wq; | |
24 | struct list_head devices; | |
25 | struct mutex lock; | |
26 | }; | |
27 | ||
28 | /** | |
29 | * struct iopf_device_param - IO Page Fault data attached to a device | |
30 | * @dev: the device that owns this param | |
31 | * @queue: IOPF queue | |
32 | * @queue_list: index into queue->devices | |
33 | * @partial: faults that are part of a Page Request Group for which the last | |
34 | * request hasn't been submitted yet. | |
35 | */ | |
36 | struct iopf_device_param { | |
37 | struct device *dev; | |
38 | struct iopf_queue *queue; | |
39 | struct list_head queue_list; | |
40 | struct list_head partial; | |
41 | }; | |
42 | ||
43 | struct iopf_fault { | |
44 | struct iommu_fault fault; | |
45 | struct list_head list; | |
46 | }; | |
47 | ||
48 | struct iopf_group { | |
49 | struct iopf_fault last_fault; | |
50 | struct list_head faults; | |
51 | struct work_struct work; | |
52 | struct device *dev; | |
53 | }; | |
54 | ||
55 | static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, | |
56 | enum iommu_page_response_code status) | |
57 | { | |
58 | struct iommu_page_response resp = { | |
59 | .version = IOMMU_PAGE_RESP_VERSION_1, | |
60 | .pasid = iopf->fault.prm.pasid, | |
61 | .grpid = iopf->fault.prm.grpid, | |
62 | .code = status, | |
63 | }; | |
64 | ||
65 | if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) && | |
66 | (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID)) | |
67 | resp.flags = IOMMU_PAGE_RESP_PASID_VALID; | |
68 | ||
69 | return iommu_page_response(dev, &resp); | |
70 | } | |
71 | ||
4bb4211e | 72 | static void iopf_handler(struct work_struct *work) |
fc36479d JPB |
73 | { |
74 | struct iopf_group *group; | |
4bb4211e | 75 | struct iommu_domain *domain; |
fc36479d JPB |
76 | struct iopf_fault *iopf, *next; |
77 | enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; | |
78 | ||
79 | group = container_of(work, struct iopf_group, work); | |
4bb4211e LB |
80 | domain = iommu_get_domain_for_dev_pasid(group->dev, |
81 | group->last_fault.fault.prm.pasid, 0); | |
82 | if (!domain || !domain->iopf_handler) | |
83 | status = IOMMU_PAGE_RESP_INVALID; | |
fc36479d JPB |
84 | |
85 | list_for_each_entry_safe(iopf, next, &group->faults, list) { | |
86 | /* | |
87 | * For the moment, errors are sticky: don't handle subsequent | |
88 | * faults in the group if there is an error. | |
89 | */ | |
90 | if (status == IOMMU_PAGE_RESP_SUCCESS) | |
4bb4211e LB |
91 | status = domain->iopf_handler(&iopf->fault, |
92 | domain->fault_data); | |
fc36479d JPB |
93 | |
94 | if (!(iopf->fault.prm.flags & | |
95 | IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) | |
96 | kfree(iopf); | |
97 | } | |
98 | ||
99 | iopf_complete_group(group->dev, &group->last_fault, status); | |
100 | kfree(group); | |
101 | } | |
102 | ||
103 | /** | |
104 | * iommu_queue_iopf - IO Page Fault handler | |
105 | * @fault: fault event | |
106 | * @cookie: struct device, passed to iommu_register_device_fault_handler. | |
107 | * | |
108 | * Add a fault to the device workqueue, to be handled by mm. | |
109 | * | |
110 | * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard | |
111 | * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't | |
112 | * expect a response. It may be generated when disabling a PASID (issuing a | |
113 | * PASID stop request) by some PCI devices. | |
114 | * | |
115 | * The PASID stop request is issued by the device driver before unbind(). Once | |
116 | * it completes, no page request is generated for this PASID anymore and | |
117 | * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1 | |
118 | * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait | |
119 | * for all outstanding page requests to come back with a response before | |
120 | * completing the PASID stop request. Others do not wait for page responses, and | |
121 | * instead issue this Stop Marker that tells us when the PASID can be | |
122 | * reallocated. | |
123 | * | |
124 | * It is safe to discard the Stop Marker because it is an optimization. | |
125 | * a. Page requests, which are posted requests, have been flushed to the IOMMU | |
126 | * when the stop request completes. | |
127 | * b. The IOMMU driver flushes all fault queues on unbind() before freeing the | |
128 | * PASID. | |
129 | * | |
130 | * So even though the Stop Marker might be issued by the device *after* the stop | |
131 | * request completes, outstanding faults will have been dealt with by the time | |
132 | * the PASID is freed. | |
133 | * | |
8cc93159 LB |
134 | * Any valid page fault will be eventually routed to an iommu domain and the |
135 | * page fault handler installed there will get called. The users of this | |
136 | * handling framework should guarantee that the iommu domain could only be | |
137 | * freed after the device has stopped generating page faults (or the iommu | |
138 | * hardware has been set to block the page faults) and the pending page faults | |
139 | * have been flushed. | |
140 | * | |
fc36479d JPB |
141 | * Return: 0 on success and <0 on error. |
142 | */ | |
143 | int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) | |
144 | { | |
145 | int ret; | |
146 | struct iopf_group *group; | |
147 | struct iopf_fault *iopf, *next; | |
148 | struct iopf_device_param *iopf_param; | |
149 | ||
150 | struct device *dev = cookie; | |
151 | struct dev_iommu *param = dev->iommu; | |
152 | ||
153 | lockdep_assert_held(¶m->lock); | |
154 | ||
155 | if (fault->type != IOMMU_FAULT_PAGE_REQ) | |
156 | /* Not a recoverable page fault */ | |
157 | return -EOPNOTSUPP; | |
158 | ||
159 | /* | |
160 | * As long as we're holding param->lock, the queue can't be unlinked | |
161 | * from the device and therefore cannot disappear. | |
162 | */ | |
163 | iopf_param = param->iopf_param; | |
164 | if (!iopf_param) | |
165 | return -ENODEV; | |
166 | ||
167 | if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) { | |
168 | iopf = kzalloc(sizeof(*iopf), GFP_KERNEL); | |
169 | if (!iopf) | |
170 | return -ENOMEM; | |
171 | ||
172 | iopf->fault = *fault; | |
173 | ||
174 | /* Non-last request of a group. Postpone until the last one */ | |
175 | list_add(&iopf->list, &iopf_param->partial); | |
176 | ||
177 | return 0; | |
178 | } | |
179 | ||
180 | group = kzalloc(sizeof(*group), GFP_KERNEL); | |
181 | if (!group) { | |
182 | /* | |
183 | * The caller will send a response to the hardware. But we do | |
184 | * need to clean up before leaving, otherwise partial faults | |
185 | * will be stuck. | |
186 | */ | |
187 | ret = -ENOMEM; | |
188 | goto cleanup_partial; | |
189 | } | |
190 | ||
191 | group->dev = dev; | |
192 | group->last_fault.fault = *fault; | |
193 | INIT_LIST_HEAD(&group->faults); | |
194 | list_add(&group->last_fault.list, &group->faults); | |
4bb4211e | 195 | INIT_WORK(&group->work, iopf_handler); |
fc36479d JPB |
196 | |
197 | /* See if we have partial faults for this group */ | |
198 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { | |
199 | if (iopf->fault.prm.grpid == fault->prm.grpid) | |
200 | /* Insert *before* the last fault */ | |
201 | list_move(&iopf->list, &group->faults); | |
202 | } | |
203 | ||
204 | queue_work(iopf_param->queue->wq, &group->work); | |
205 | return 0; | |
206 | ||
207 | cleanup_partial: | |
208 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { | |
209 | if (iopf->fault.prm.grpid == fault->prm.grpid) { | |
210 | list_del(&iopf->list); | |
211 | kfree(iopf); | |
212 | } | |
213 | } | |
214 | return ret; | |
215 | } | |
216 | EXPORT_SYMBOL_GPL(iommu_queue_iopf); | |
217 | ||
218 | /** | |
219 | * iopf_queue_flush_dev - Ensure that all queued faults have been processed | |
220 | * @dev: the endpoint whose faults need to be flushed. | |
221 | * | |
222 | * The IOMMU driver calls this before releasing a PASID, to ensure that all | |
223 | * pending faults for this PASID have been handled, and won't hit the address | |
224 | * space of the next process that uses this PASID. The driver must make sure | |
225 | * that no new fault is added to the queue. In particular it must flush its | |
226 | * low-level queue before calling this function. | |
227 | * | |
228 | * Return: 0 on success and <0 on error. | |
229 | */ | |
230 | int iopf_queue_flush_dev(struct device *dev) | |
231 | { | |
232 | int ret = 0; | |
233 | struct iopf_device_param *iopf_param; | |
234 | struct dev_iommu *param = dev->iommu; | |
235 | ||
236 | if (!param) | |
237 | return -ENODEV; | |
238 | ||
239 | mutex_lock(¶m->lock); | |
240 | iopf_param = param->iopf_param; | |
241 | if (iopf_param) | |
242 | flush_workqueue(iopf_param->queue->wq); | |
243 | else | |
244 | ret = -ENODEV; | |
245 | mutex_unlock(¶m->lock); | |
246 | ||
247 | return ret; | |
248 | } | |
249 | EXPORT_SYMBOL_GPL(iopf_queue_flush_dev); | |
250 | ||
251 | /** | |
252 | * iopf_queue_discard_partial - Remove all pending partial fault | |
253 | * @queue: the queue whose partial faults need to be discarded | |
254 | * | |
255 | * When the hardware queue overflows, last page faults in a group may have been | |
256 | * lost and the IOMMU driver calls this to discard all partial faults. The | |
257 | * driver shouldn't be adding new faults to this queue concurrently. | |
258 | * | |
259 | * Return: 0 on success and <0 on error. | |
260 | */ | |
261 | int iopf_queue_discard_partial(struct iopf_queue *queue) | |
262 | { | |
263 | struct iopf_fault *iopf, *next; | |
264 | struct iopf_device_param *iopf_param; | |
265 | ||
266 | if (!queue) | |
267 | return -EINVAL; | |
268 | ||
269 | mutex_lock(&queue->lock); | |
270 | list_for_each_entry(iopf_param, &queue->devices, queue_list) { | |
271 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, | |
272 | list) { | |
273 | list_del(&iopf->list); | |
274 | kfree(iopf); | |
275 | } | |
276 | } | |
277 | mutex_unlock(&queue->lock); | |
278 | return 0; | |
279 | } | |
280 | EXPORT_SYMBOL_GPL(iopf_queue_discard_partial); | |
281 | ||
282 | /** | |
283 | * iopf_queue_add_device - Add producer to the fault queue | |
284 | * @queue: IOPF queue | |
285 | * @dev: device to add | |
286 | * | |
287 | * Return: 0 on success and <0 on error. | |
288 | */ | |
289 | int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev) | |
290 | { | |
291 | int ret = -EBUSY; | |
292 | struct iopf_device_param *iopf_param; | |
293 | struct dev_iommu *param = dev->iommu; | |
294 | ||
295 | if (!param) | |
296 | return -ENODEV; | |
297 | ||
298 | iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL); | |
299 | if (!iopf_param) | |
300 | return -ENOMEM; | |
301 | ||
302 | INIT_LIST_HEAD(&iopf_param->partial); | |
303 | iopf_param->queue = queue; | |
304 | iopf_param->dev = dev; | |
305 | ||
306 | mutex_lock(&queue->lock); | |
307 | mutex_lock(¶m->lock); | |
308 | if (!param->iopf_param) { | |
309 | list_add(&iopf_param->queue_list, &queue->devices); | |
310 | param->iopf_param = iopf_param; | |
311 | ret = 0; | |
312 | } | |
313 | mutex_unlock(¶m->lock); | |
314 | mutex_unlock(&queue->lock); | |
315 | ||
316 | if (ret) | |
317 | kfree(iopf_param); | |
318 | ||
319 | return ret; | |
320 | } | |
321 | EXPORT_SYMBOL_GPL(iopf_queue_add_device); | |
322 | ||
323 | /** | |
324 | * iopf_queue_remove_device - Remove producer from fault queue | |
325 | * @queue: IOPF queue | |
326 | * @dev: device to remove | |
327 | * | |
328 | * Caller makes sure that no more faults are reported for this device. | |
329 | * | |
330 | * Return: 0 on success and <0 on error. | |
331 | */ | |
332 | int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev) | |
333 | { | |
334 | int ret = -EINVAL; | |
335 | struct iopf_fault *iopf, *next; | |
336 | struct iopf_device_param *iopf_param; | |
337 | struct dev_iommu *param = dev->iommu; | |
338 | ||
339 | if (!param || !queue) | |
340 | return -EINVAL; | |
341 | ||
342 | mutex_lock(&queue->lock); | |
343 | mutex_lock(¶m->lock); | |
344 | iopf_param = param->iopf_param; | |
345 | if (iopf_param && iopf_param->queue == queue) { | |
346 | list_del(&iopf_param->queue_list); | |
347 | param->iopf_param = NULL; | |
348 | ret = 0; | |
349 | } | |
350 | mutex_unlock(¶m->lock); | |
351 | mutex_unlock(&queue->lock); | |
352 | if (ret) | |
353 | return ret; | |
354 | ||
355 | /* Just in case some faults are still stuck */ | |
356 | list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) | |
357 | kfree(iopf); | |
358 | ||
359 | kfree(iopf_param); | |
360 | ||
361 | return 0; | |
362 | } | |
363 | EXPORT_SYMBOL_GPL(iopf_queue_remove_device); | |
364 | ||
365 | /** | |
366 | * iopf_queue_alloc - Allocate and initialize a fault queue | |
367 | * @name: a unique string identifying the queue (for workqueue) | |
368 | * | |
369 | * Return: the queue on success and NULL on error. | |
370 | */ | |
371 | struct iopf_queue *iopf_queue_alloc(const char *name) | |
372 | { | |
373 | struct iopf_queue *queue; | |
374 | ||
375 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); | |
376 | if (!queue) | |
377 | return NULL; | |
378 | ||
379 | /* | |
380 | * The WQ is unordered because the low-level handler enqueues faults by | |
381 | * group. PRI requests within a group have to be ordered, but once | |
382 | * that's dealt with, the high-level function can handle groups out of | |
383 | * order. | |
384 | */ | |
385 | queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name); | |
386 | if (!queue->wq) { | |
387 | kfree(queue); | |
388 | return NULL; | |
389 | } | |
390 | ||
391 | INIT_LIST_HEAD(&queue->devices); | |
392 | mutex_init(&queue->lock); | |
393 | ||
394 | return queue; | |
395 | } | |
396 | EXPORT_SYMBOL_GPL(iopf_queue_alloc); | |
397 | ||
398 | /** | |
399 | * iopf_queue_free - Free IOPF queue | |
400 | * @queue: queue to free | |
401 | * | |
402 | * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or | |
403 | * adding/removing devices on this queue anymore. | |
404 | */ | |
405 | void iopf_queue_free(struct iopf_queue *queue) | |
406 | { | |
407 | struct iopf_device_param *iopf_param, *next; | |
408 | ||
409 | if (!queue) | |
410 | return; | |
411 | ||
412 | list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list) | |
413 | iopf_queue_remove_device(queue, iopf_param->dev); | |
414 | ||
415 | destroy_workqueue(queue->wq); | |
416 | kfree(queue); | |
417 | } | |
418 | EXPORT_SYMBOL_GPL(iopf_queue_free); |