Commit | Line | Data |
---|---|---|
4fa9c49f | 1 | // SPDX-License-Identifier: GPL-2.0-only |
599d49de DJ |
2 | /* |
3 | * Intel I/OAT DMA Linux driver | |
4 | * Copyright(c) 2004 - 2015 Intel Corporation. | |
599d49de DJ |
5 | */ |
6 | #include <linux/module.h> | |
7 | #include <linux/pci.h> | |
8 | #include <linux/gfp.h> | |
9 | #include <linux/dmaengine.h> | |
10 | #include <linux/dma-mapping.h> | |
11 | #include <linux/prefetch.h> | |
12 | #include "../dmaengine.h" | |
13 | #include "registers.h" | |
14 | #include "hw.h" | |
15 | #include "dma.h" | |
16 | ||
8319f84a | 17 | #define MAX_SCF 256 |
7b7d0ca7 | 18 | |
599d49de DJ |
19 | /* provide a lookup table for setting the source address in the base or |
20 | * extended descriptor of an xor or pq descriptor | |
21 | */ | |
22 | static const u8 xor_idx_to_desc = 0xe0; | |
23 | static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; | |
24 | static const u8 pq_idx_to_desc = 0xf8; | |
25 | static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, | |
26 | 2, 2, 2, 2, 2, 2, 2 }; | |
27 | static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; | |
28 | static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, | |
29 | 0, 1, 2, 3, 4, 5, 6 }; | |
30 | ||
31 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | |
32 | dma_addr_t addr, u32 offset, int idx) | |
33 | { | |
34 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | |
35 | ||
36 | raw->field[xor_idx_to_field[idx]] = addr + offset; | |
37 | } | |
38 | ||
39 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | |
40 | { | |
41 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
42 | ||
43 | return raw->field[pq_idx_to_field[idx]]; | |
44 | } | |
45 | ||
46 | static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) | |
47 | { | |
48 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
49 | ||
50 | return raw->field[pq16_idx_to_field[idx]]; | |
51 | } | |
52 | ||
53 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | |
54 | dma_addr_t addr, u32 offset, u8 coef, int idx) | |
55 | { | |
56 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | |
57 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | |
58 | ||
59 | raw->field[pq_idx_to_field[idx]] = addr + offset; | |
60 | pq->coef[idx] = coef; | |
61 | } | |
62 | ||
63 | static void pq16_set_src(struct ioat_raw_descriptor *desc[3], | |
64 | dma_addr_t addr, u32 offset, u8 coef, unsigned idx) | |
65 | { | |
66 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; | |
67 | struct ioat_pq16a_descriptor *pq16 = | |
68 | (struct ioat_pq16a_descriptor *)desc[1]; | |
69 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | |
70 | ||
71 | raw->field[pq16_idx_to_field[idx]] = addr + offset; | |
72 | ||
73 | if (idx < 8) | |
74 | pq->coef[idx] = coef; | |
75 | else | |
76 | pq16->coef[idx - 8] = coef; | |
77 | } | |
78 | ||
79 | static struct ioat_sed_ent * | |
80 | ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool) | |
81 | { | |
82 | struct ioat_sed_ent *sed; | |
83 | gfp_t flags = __GFP_ZERO | GFP_ATOMIC; | |
84 | ||
85 | sed = kmem_cache_alloc(ioat_sed_cache, flags); | |
86 | if (!sed) | |
87 | return NULL; | |
88 | ||
89 | sed->hw_pool = hw_pool; | |
90 | sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool], | |
91 | flags, &sed->dma); | |
92 | if (!sed->hw) { | |
93 | kmem_cache_free(ioat_sed_cache, sed); | |
94 | return NULL; | |
95 | } | |
96 | ||
97 | return sed; | |
98 | } | |
99 | ||
100 | struct dma_async_tx_descriptor * | |
101 | ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest, | |
102 | dma_addr_t dma_src, size_t len, unsigned long flags) | |
103 | { | |
104 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
105 | struct ioat_dma_descriptor *hw; | |
106 | struct ioat_ring_ent *desc; | |
107 | dma_addr_t dst = dma_dest; | |
108 | dma_addr_t src = dma_src; | |
109 | size_t total_len = len; | |
110 | int num_descs, idx, i; | |
111 | ||
ad4a7b50 DJ |
112 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) |
113 | return NULL; | |
114 | ||
599d49de DJ |
115 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); |
116 | if (likely(num_descs) && | |
117 | ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
118 | idx = ioat_chan->head; | |
119 | else | |
120 | return NULL; | |
121 | i = 0; | |
122 | do { | |
123 | size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log); | |
124 | ||
125 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
126 | hw = desc->hw; | |
127 | ||
128 | hw->size = copy; | |
129 | hw->ctl = 0; | |
130 | hw->src_addr = src; | |
131 | hw->dst_addr = dst; | |
132 | ||
133 | len -= copy; | |
134 | dst += copy; | |
135 | src += copy; | |
136 | dump_desc_dbg(ioat_chan, desc); | |
137 | } while (++i < num_descs); | |
138 | ||
139 | desc->txd.flags = flags; | |
140 | desc->len = total_len; | |
141 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
142 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
143 | hw->ctl_f.compl_write = 1; | |
144 | dump_desc_dbg(ioat_chan, desc); | |
145 | /* we leave the channel locked to ensure in order submission */ | |
146 | ||
147 | return &desc->txd; | |
148 | } | |
149 | ||
150 | ||
151 | static struct dma_async_tx_descriptor * | |
152 | __ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | |
153 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | |
154 | size_t len, unsigned long flags) | |
155 | { | |
156 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
157 | struct ioat_ring_ent *compl_desc; | |
158 | struct ioat_ring_ent *desc; | |
159 | struct ioat_ring_ent *ext; | |
160 | size_t total_len = len; | |
161 | struct ioat_xor_descriptor *xor; | |
162 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | |
163 | struct ioat_dma_descriptor *hw; | |
164 | int num_descs, with_ext, idx, i; | |
165 | u32 offset = 0; | |
166 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | |
167 | ||
168 | BUG_ON(src_cnt < 2); | |
169 | ||
170 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
171 | /* we need 2x the number of descriptors to cover greater than 5 | |
172 | * sources | |
173 | */ | |
174 | if (src_cnt > 5) { | |
175 | with_ext = 1; | |
176 | num_descs *= 2; | |
177 | } else | |
178 | with_ext = 0; | |
179 | ||
180 | /* completion writes from the raid engine may pass completion | |
181 | * writes from the legacy engine, so we need one extra null | |
182 | * (legacy) descriptor to ensure all completion writes arrive in | |
183 | * order. | |
184 | */ | |
185 | if (likely(num_descs) && | |
186 | ioat_check_space_lock(ioat_chan, num_descs+1) == 0) | |
187 | idx = ioat_chan->head; | |
188 | else | |
189 | return NULL; | |
190 | i = 0; | |
191 | do { | |
192 | struct ioat_raw_descriptor *descs[2]; | |
193 | size_t xfer_size = min_t(size_t, | |
194 | len, 1 << ioat_chan->xfercap_log); | |
195 | int s; | |
196 | ||
197 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
198 | xor = desc->xor; | |
199 | ||
200 | /* save a branch by unconditionally retrieving the | |
201 | * extended descriptor xor_set_src() knows to not write | |
202 | * to it in the single descriptor case | |
203 | */ | |
204 | ext = ioat_get_ring_ent(ioat_chan, idx + i + 1); | |
205 | xor_ex = ext->xor_ex; | |
206 | ||
207 | descs[0] = (struct ioat_raw_descriptor *) xor; | |
208 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | |
209 | for (s = 0; s < src_cnt; s++) | |
210 | xor_set_src(descs, src[s], offset, s); | |
211 | xor->size = xfer_size; | |
212 | xor->dst_addr = dest + offset; | |
213 | xor->ctl = 0; | |
214 | xor->ctl_f.op = op; | |
215 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | |
216 | ||
217 | len -= xfer_size; | |
218 | offset += xfer_size; | |
219 | dump_desc_dbg(ioat_chan, desc); | |
220 | } while ((i += 1 + with_ext) < num_descs); | |
221 | ||
222 | /* last xor descriptor carries the unmap parameters and fence bit */ | |
223 | desc->txd.flags = flags; | |
224 | desc->len = total_len; | |
225 | if (result) | |
226 | desc->result = result; | |
227 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
228 | ||
229 | /* completion descriptor carries interrupt bit */ | |
230 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
231 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
232 | hw = compl_desc->hw; | |
233 | hw->ctl = 0; | |
234 | hw->ctl_f.null = 1; | |
235 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
236 | hw->ctl_f.compl_write = 1; | |
237 | hw->size = NULL_DESC_BUFFER_SIZE; | |
238 | dump_desc_dbg(ioat_chan, compl_desc); | |
239 | ||
240 | /* we leave the channel locked to ensure in order submission */ | |
241 | return &compl_desc->txd; | |
242 | } | |
243 | ||
244 | struct dma_async_tx_descriptor * | |
245 | ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | |
246 | unsigned int src_cnt, size_t len, unsigned long flags) | |
247 | { | |
ad4a7b50 DJ |
248 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
249 | ||
250 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
251 | return NULL; | |
252 | ||
599d49de DJ |
253 | return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); |
254 | } | |
255 | ||
256 | struct dma_async_tx_descriptor * | |
257 | ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | |
258 | unsigned int src_cnt, size_t len, | |
259 | enum sum_check_flags *result, unsigned long flags) | |
260 | { | |
ad4a7b50 DJ |
261 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
262 | ||
263 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
264 | return NULL; | |
265 | ||
599d49de DJ |
266 | /* the cleanup routine only sets bits on validate failure, it |
267 | * does not clear bits on validate success... so clear it here | |
268 | */ | |
269 | *result = 0; | |
270 | ||
271 | return __ioat_prep_xor_lock(chan, result, src[0], &src[1], | |
272 | src_cnt - 1, len, flags); | |
273 | } | |
274 | ||
275 | static void | |
276 | dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc, | |
277 | struct ioat_ring_ent *ext) | |
278 | { | |
279 | struct device *dev = to_dev(ioat_chan); | |
280 | struct ioat_pq_descriptor *pq = desc->pq; | |
281 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | |
282 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | |
283 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | |
284 | int i; | |
285 | ||
286 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
287 | " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
288 | " src_cnt: %d)\n", | |
289 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
290 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | |
291 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, | |
292 | pq->ctl_f.int_en, pq->ctl_f.compl_write, | |
293 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
294 | pq->ctl_f.src_cnt); | |
295 | for (i = 0; i < src_cnt; i++) | |
296 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
297 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | |
298 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
299 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
300 | dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); | |
301 | } | |
302 | ||
303 | static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan, | |
304 | struct ioat_ring_ent *desc) | |
305 | { | |
306 | struct device *dev = to_dev(ioat_chan); | |
307 | struct ioat_pq_descriptor *pq = desc->pq; | |
308 | struct ioat_raw_descriptor *descs[] = { (void *)pq, | |
309 | (void *)pq, | |
310 | (void *)pq }; | |
311 | int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); | |
312 | int i; | |
313 | ||
314 | if (desc->sed) { | |
315 | descs[1] = (void *)desc->sed->hw; | |
316 | descs[2] = (void *)desc->sed->hw + 64; | |
317 | } | |
318 | ||
319 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | |
320 | " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | |
321 | " src_cnt: %d)\n", | |
322 | desc_id(desc), (unsigned long long) desc->txd.phys, | |
323 | (unsigned long long) pq->next, | |
324 | desc->txd.flags, pq->size, pq->ctl, | |
325 | pq->ctl_f.op, pq->ctl_f.int_en, | |
326 | pq->ctl_f.compl_write, | |
327 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | |
328 | pq->ctl_f.src_cnt); | |
329 | for (i = 0; i < src_cnt; i++) { | |
330 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | |
331 | (unsigned long long) pq16_get_src(descs, i), | |
332 | pq->coef[i]); | |
333 | } | |
334 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | |
335 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | |
336 | } | |
337 | ||
338 | static struct dma_async_tx_descriptor * | |
339 | __ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |
340 | const dma_addr_t *dst, const dma_addr_t *src, | |
341 | unsigned int src_cnt, const unsigned char *scf, | |
342 | size_t len, unsigned long flags) | |
343 | { | |
344 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
345 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
346 | struct ioat_ring_ent *compl_desc; | |
347 | struct ioat_ring_ent *desc; | |
348 | struct ioat_ring_ent *ext; | |
349 | size_t total_len = len; | |
350 | struct ioat_pq_descriptor *pq; | |
351 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | |
352 | struct ioat_dma_descriptor *hw; | |
353 | u32 offset = 0; | |
354 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | |
355 | int i, s, idx, with_ext, num_descs; | |
356 | int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0; | |
357 | ||
358 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
359 | /* the engine requires at least two sources (we provide | |
360 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | |
361 | */ | |
362 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | |
363 | ||
364 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
365 | /* we need 2x the number of descriptors to cover greater than 3 | |
366 | * sources (we need 1 extra source in the q-only continuation | |
367 | * case and 3 extra sources in the p+q continuation case. | |
368 | */ | |
369 | if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || | |
370 | (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { | |
371 | with_ext = 1; | |
372 | num_descs *= 2; | |
373 | } else | |
374 | with_ext = 0; | |
375 | ||
376 | /* completion writes from the raid engine may pass completion | |
377 | * writes from the legacy engine, so we need one extra null | |
378 | * (legacy) descriptor to ensure all completion writes arrive in | |
379 | * order. | |
380 | */ | |
381 | if (likely(num_descs) && | |
382 | ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0) | |
383 | idx = ioat_chan->head; | |
384 | else | |
385 | return NULL; | |
386 | i = 0; | |
387 | do { | |
388 | struct ioat_raw_descriptor *descs[2]; | |
389 | size_t xfer_size = min_t(size_t, len, | |
390 | 1 << ioat_chan->xfercap_log); | |
391 | ||
392 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
393 | pq = desc->pq; | |
394 | ||
395 | /* save a branch by unconditionally retrieving the | |
396 | * extended descriptor pq_set_src() knows to not write | |
397 | * to it in the single descriptor case | |
398 | */ | |
399 | ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext); | |
400 | pq_ex = ext->pq_ex; | |
401 | ||
402 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
403 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | |
404 | ||
405 | for (s = 0; s < src_cnt; s++) | |
406 | pq_set_src(descs, src[s], offset, scf[s], s); | |
407 | ||
408 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
409 | if (dmaf_p_disabled_continue(flags)) | |
410 | pq_set_src(descs, dst[1], offset, 1, s++); | |
411 | else if (dmaf_continue(flags)) { | |
412 | pq_set_src(descs, dst[0], offset, 0, s++); | |
413 | pq_set_src(descs, dst[1], offset, 1, s++); | |
414 | pq_set_src(descs, dst[1], offset, 0, s++); | |
415 | } | |
416 | pq->size = xfer_size; | |
417 | pq->p_addr = dst[0] + offset; | |
418 | pq->q_addr = dst[1] + offset; | |
419 | pq->ctl = 0; | |
420 | pq->ctl_f.op = op; | |
421 | /* we turn on descriptor write back error status */ | |
422 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
423 | pq->ctl_f.wb_en = result ? 1 : 0; | |
424 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | |
425 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
426 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
427 | ||
428 | len -= xfer_size; | |
429 | offset += xfer_size; | |
430 | } while ((i += 1 + with_ext) < num_descs); | |
431 | ||
432 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
433 | desc->txd.flags = flags; | |
434 | desc->len = total_len; | |
435 | if (result) | |
436 | desc->result = result; | |
437 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
438 | dump_pq_desc_dbg(ioat_chan, desc, ext); | |
439 | ||
440 | if (!cb32) { | |
441 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
442 | pq->ctl_f.compl_write = 1; | |
443 | compl_desc = desc; | |
444 | } else { | |
445 | /* completion descriptor carries interrupt bit */ | |
446 | compl_desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
447 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | |
448 | hw = compl_desc->hw; | |
449 | hw->ctl = 0; | |
450 | hw->ctl_f.null = 1; | |
451 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
452 | hw->ctl_f.compl_write = 1; | |
453 | hw->size = NULL_DESC_BUFFER_SIZE; | |
454 | dump_desc_dbg(ioat_chan, compl_desc); | |
455 | } | |
456 | ||
457 | ||
458 | /* we leave the channel locked to ensure in order submission */ | |
459 | return &compl_desc->txd; | |
460 | } | |
461 | ||
462 | static struct dma_async_tx_descriptor * | |
463 | __ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, | |
464 | const dma_addr_t *dst, const dma_addr_t *src, | |
465 | unsigned int src_cnt, const unsigned char *scf, | |
466 | size_t len, unsigned long flags) | |
467 | { | |
468 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
469 | struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma; | |
470 | struct ioat_ring_ent *desc; | |
471 | size_t total_len = len; | |
472 | struct ioat_pq_descriptor *pq; | |
473 | u32 offset = 0; | |
474 | u8 op; | |
475 | int i, s, idx, num_descs; | |
476 | ||
477 | /* this function is only called with 9-16 sources */ | |
478 | op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; | |
479 | ||
480 | dev_dbg(to_dev(ioat_chan), "%s\n", __func__); | |
481 | ||
482 | num_descs = ioat_xferlen_to_descs(ioat_chan, len); | |
483 | ||
484 | /* | |
485 | * 16 source pq is only available on cb3.3 and has no completion | |
486 | * write hw bug. | |
487 | */ | |
488 | if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0) | |
489 | idx = ioat_chan->head; | |
490 | else | |
491 | return NULL; | |
492 | ||
493 | i = 0; | |
494 | ||
495 | do { | |
496 | struct ioat_raw_descriptor *descs[4]; | |
497 | size_t xfer_size = min_t(size_t, len, | |
498 | 1 << ioat_chan->xfercap_log); | |
499 | ||
500 | desc = ioat_get_ring_ent(ioat_chan, idx + i); | |
501 | pq = desc->pq; | |
502 | ||
503 | descs[0] = (struct ioat_raw_descriptor *) pq; | |
504 | ||
505 | desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3); | |
506 | if (!desc->sed) { | |
507 | dev_err(to_dev(ioat_chan), | |
508 | "%s: no free sed entries\n", __func__); | |
509 | return NULL; | |
510 | } | |
511 | ||
512 | pq->sed_addr = desc->sed->dma; | |
513 | desc->sed->parent = desc; | |
514 | ||
515 | descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; | |
516 | descs[2] = (void *)descs[1] + 64; | |
517 | ||
518 | for (s = 0; s < src_cnt; s++) | |
519 | pq16_set_src(descs, src[s], offset, scf[s], s); | |
520 | ||
521 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | |
522 | if (dmaf_p_disabled_continue(flags)) | |
523 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
524 | else if (dmaf_continue(flags)) { | |
525 | pq16_set_src(descs, dst[0], offset, 0, s++); | |
526 | pq16_set_src(descs, dst[1], offset, 1, s++); | |
527 | pq16_set_src(descs, dst[1], offset, 0, s++); | |
528 | } | |
529 | ||
530 | pq->size = xfer_size; | |
531 | pq->p_addr = dst[0] + offset; | |
532 | pq->q_addr = dst[1] + offset; | |
533 | pq->ctl = 0; | |
534 | pq->ctl_f.op = op; | |
535 | pq->ctl_f.src_cnt = src16_cnt_to_hw(s); | |
536 | /* we turn on descriptor write back error status */ | |
537 | if (ioat_dma->cap & IOAT_CAP_DWBES) | |
538 | pq->ctl_f.wb_en = result ? 1 : 0; | |
539 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | |
540 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | |
541 | ||
542 | len -= xfer_size; | |
543 | offset += xfer_size; | |
544 | } while (++i < num_descs); | |
545 | ||
546 | /* last pq descriptor carries the unmap parameters and fence bit */ | |
547 | desc->txd.flags = flags; | |
548 | desc->len = total_len; | |
549 | if (result) | |
550 | desc->result = result; | |
551 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
552 | ||
553 | /* with cb3.3 we should be able to do completion w/o a null desc */ | |
554 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | |
555 | pq->ctl_f.compl_write = 1; | |
556 | ||
557 | dump_pq16_desc_dbg(ioat_chan, desc); | |
558 | ||
559 | /* we leave the channel locked to ensure in order submission */ | |
560 | return &desc->txd; | |
561 | } | |
562 | ||
563 | static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) | |
564 | { | |
565 | if (dmaf_p_disabled_continue(flags)) | |
566 | return src_cnt + 1; | |
567 | else if (dmaf_continue(flags)) | |
568 | return src_cnt + 3; | |
569 | else | |
570 | return src_cnt; | |
571 | } | |
572 | ||
573 | struct dma_async_tx_descriptor * | |
574 | ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | |
575 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
576 | unsigned long flags) | |
577 | { | |
ad4a7b50 DJ |
578 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
579 | ||
580 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
581 | return NULL; | |
582 | ||
599d49de DJ |
583 | /* specify valid address for disabled result */ |
584 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
585 | dst[0] = dst[1]; | |
586 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
587 | dst[1] = dst[0]; | |
588 | ||
589 | /* handle the single source multiply case from the raid6 | |
590 | * recovery path | |
591 | */ | |
592 | if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) { | |
593 | dma_addr_t single_source[2]; | |
594 | unsigned char single_source_coef[2]; | |
595 | ||
596 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | |
597 | single_source[0] = src[0]; | |
598 | single_source[1] = src[0]; | |
599 | single_source_coef[0] = scf[0]; | |
600 | single_source_coef[1] = 0; | |
601 | ||
602 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
603 | __ioat_prep_pq16_lock(chan, NULL, dst, single_source, | |
604 | 2, single_source_coef, len, | |
605 | flags) : | |
606 | __ioat_prep_pq_lock(chan, NULL, dst, single_source, 2, | |
607 | single_source_coef, len, flags); | |
608 | ||
609 | } else { | |
610 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
611 | __ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt, | |
612 | scf, len, flags) : | |
613 | __ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt, | |
614 | scf, len, flags); | |
615 | } | |
616 | } | |
617 | ||
618 | struct dma_async_tx_descriptor * | |
619 | ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | |
620 | unsigned int src_cnt, const unsigned char *scf, size_t len, | |
621 | enum sum_check_flags *pqres, unsigned long flags) | |
622 | { | |
ad4a7b50 DJ |
623 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
624 | ||
625 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
626 | return NULL; | |
627 | ||
599d49de DJ |
628 | /* specify valid address for disabled result */ |
629 | if (flags & DMA_PREP_PQ_DISABLE_P) | |
630 | pq[0] = pq[1]; | |
631 | if (flags & DMA_PREP_PQ_DISABLE_Q) | |
632 | pq[1] = pq[0]; | |
633 | ||
634 | /* the cleanup routine only sets bits on validate failure, it | |
635 | * does not clear bits on validate success... so clear it here | |
636 | */ | |
637 | *pqres = 0; | |
638 | ||
639 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
640 | __ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
641 | flags) : | |
642 | __ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | |
643 | flags); | |
644 | } | |
645 | ||
646 | struct dma_async_tx_descriptor * | |
647 | ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | |
648 | unsigned int src_cnt, size_t len, unsigned long flags) | |
649 | { | |
7b7d0ca7 | 650 | unsigned char scf[MAX_SCF]; |
599d49de | 651 | dma_addr_t pq[2]; |
ad4a7b50 DJ |
652 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
653 | ||
654 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
655 | return NULL; | |
599d49de | 656 | |
7b7d0ca7 DJ |
657 | if (src_cnt > MAX_SCF) |
658 | return NULL; | |
659 | ||
599d49de DJ |
660 | memset(scf, 0, src_cnt); |
661 | pq[0] = dst; | |
662 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
663 | pq[1] = dst; /* specify valid address for disabled result */ | |
664 | ||
665 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
666 | __ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
667 | flags) : | |
668 | __ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | |
669 | flags); | |
670 | } | |
671 | ||
672 | struct dma_async_tx_descriptor * | |
673 | ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | |
674 | unsigned int src_cnt, size_t len, | |
675 | enum sum_check_flags *result, unsigned long flags) | |
676 | { | |
7b7d0ca7 | 677 | unsigned char scf[MAX_SCF]; |
599d49de | 678 | dma_addr_t pq[2]; |
ad4a7b50 DJ |
679 | struct ioatdma_chan *ioat_chan = to_ioat_chan(chan); |
680 | ||
681 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) | |
682 | return NULL; | |
599d49de | 683 | |
7b7d0ca7 DJ |
684 | if (src_cnt > MAX_SCF) |
685 | return NULL; | |
686 | ||
599d49de DJ |
687 | /* the cleanup routine only sets bits on validate failure, it |
688 | * does not clear bits on validate success... so clear it here | |
689 | */ | |
690 | *result = 0; | |
691 | ||
692 | memset(scf, 0, src_cnt); | |
693 | pq[0] = src[0]; | |
694 | flags |= DMA_PREP_PQ_DISABLE_Q; | |
695 | pq[1] = pq[0]; /* specify valid address for disabled result */ | |
696 | ||
697 | return src_cnt_flags(src_cnt, flags) > 8 ? | |
698 | __ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, | |
699 | scf, len, flags) : | |
700 | __ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, | |
701 | scf, len, flags); | |
702 | } | |
703 | ||
704 | struct dma_async_tx_descriptor * | |
705 | ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | |
706 | { | |
707 | struct ioatdma_chan *ioat_chan = to_ioat_chan(c); | |
708 | struct ioat_ring_ent *desc; | |
709 | struct ioat_dma_descriptor *hw; | |
710 | ||
ad4a7b50 DJ |
711 | if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state)) |
712 | return NULL; | |
713 | ||
599d49de DJ |
714 | if (ioat_check_space_lock(ioat_chan, 1) == 0) |
715 | desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head); | |
716 | else | |
717 | return NULL; | |
718 | ||
719 | hw = desc->hw; | |
720 | hw->ctl = 0; | |
721 | hw->ctl_f.null = 1; | |
722 | hw->ctl_f.int_en = 1; | |
723 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | |
724 | hw->ctl_f.compl_write = 1; | |
725 | hw->size = NULL_DESC_BUFFER_SIZE; | |
726 | hw->src_addr = 0; | |
727 | hw->dst_addr = 0; | |
728 | ||
729 | desc->txd.flags = flags; | |
730 | desc->len = 1; | |
731 | ||
732 | dump_desc_dbg(ioat_chan, desc); | |
733 | ||
734 | /* we leave the channel locked to ensure in order submission */ | |
735 | return &desc->txd; | |
736 | } | |
737 |