2 * AMD Cryptographic Coprocessor (CCP) driver
4 * Copyright (C) 2013 Advanced Micro Devices, Inc.
6 * Author: Tom Lendacky <thomas.lendacky@amd.com>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
31 CCP_MEMTYPE_SYSTEM = 0,
41 enum dma_data_direction dir;
44 struct ccp_dm_workarea {
46 struct dma_pool *dma_pool;
50 struct ccp_dma_info dma;
53 struct ccp_sg_workarea {
54 struct scatterlist *sg;
57 struct scatterlist *dma_sg;
58 struct device *dma_dev;
59 unsigned int dma_count;
60 enum dma_data_direction dma_dir;
68 struct ccp_sg_workarea sg_wa;
69 struct ccp_dm_workarea dm_wa;
73 enum ccp_memtype type;
75 struct ccp_dma_info dma;
81 enum ccp_aes_type type;
82 enum ccp_aes_mode mode;
83 enum ccp_aes_action action;
86 struct ccp_xts_aes_op {
87 enum ccp_aes_action action;
88 enum ccp_xts_aes_unit_size unit_size;
92 enum ccp_sha_type type;
101 struct ccp_passthru_op {
102 enum ccp_passthru_bitwise bit_mod;
103 enum ccp_passthru_byteswap byte_swap;
107 enum ccp_ecc_function function;
111 struct ccp_cmd_queue *cmd_q;
125 struct ccp_aes_op aes;
126 struct ccp_xts_aes_op xts;
127 struct ccp_sha_op sha;
128 struct ccp_rsa_op rsa;
129 struct ccp_passthru_op passthru;
130 struct ccp_ecc_op ecc;
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
136 cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
137 cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
138 cpu_to_be32(SHA1_H4), 0, 0, 0,
141 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
142 cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
143 cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
144 cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
145 cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
148 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
149 cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
150 cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
151 cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
152 cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
155 static u32 ccp_addr_lo(struct ccp_dma_info *info)
157 return lower_32_bits(info->address + info->offset);
160 static u32 ccp_addr_hi(struct ccp_dma_info *info)
162 return upper_32_bits(info->address + info->offset) & 0x0000ffff;
165 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
167 struct ccp_cmd_queue *cmd_q = op->cmd_q;
168 struct ccp_device *ccp = cmd_q->ccp;
169 void __iomem *cr_addr;
174 /* We could read a status register to see how many free slots
175 * are actually available, but reading that register resets it
176 * and you could lose some error information.
180 cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
181 | (op->jobid << REQ0_JOBID_SHIFT)
182 | REQ0_WAIT_FOR_WRITE;
185 cr0 |= REQ0_STOP_ON_COMPLETE
186 | REQ0_INT_ON_COMPLETE;
188 if (op->ioc || !cmd_q->free_slots)
189 cr0 |= REQ0_INT_ON_COMPLETE;
191 /* Start at CMD_REQ1 */
192 cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
194 mutex_lock(&ccp->req_mutex);
196 /* Write CMD_REQ1 through CMD_REQx first */
197 for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
198 iowrite32(*(cr + i), cr_addr);
200 /* Tell the CCP to start */
202 iowrite32(cr0, ccp->io_regs + CMD_REQ0);
204 mutex_unlock(&ccp->req_mutex);
206 if (cr0 & REQ0_INT_ON_COMPLETE) {
207 /* Wait for the job to complete */
208 ret = wait_event_interruptible(cmd_q->int_queue,
210 if (ret || cmd_q->cmd_error) {
211 /* On error delete all related jobs from the queue */
212 cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
215 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
219 } else if (op->soc) {
220 /* Delete just head job from the queue on SoC */
222 | (cmd_q->id << DEL_Q_ID_SHIFT)
225 iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
228 cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
236 static int ccp_perform_aes(struct ccp_op *op)
240 /* Fill out the register contents for REQ1 through REQ6 */
241 cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
242 | (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
243 | (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
244 | (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
245 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
246 cr[1] = op->src.u.dma.length - 1;
247 cr[2] = ccp_addr_lo(&op->src.u.dma);
248 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
249 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
250 | ccp_addr_hi(&op->src.u.dma);
251 cr[4] = ccp_addr_lo(&op->dst.u.dma);
252 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
253 | ccp_addr_hi(&op->dst.u.dma);
255 if (op->u.aes.mode == CCP_AES_MODE_CFB)
256 cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
264 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
267 static int ccp_perform_xts_aes(struct ccp_op *op)
271 /* Fill out the register contents for REQ1 through REQ6 */
272 cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
273 | (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
274 | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
275 | (op->ksb_key << REQ1_KEY_KSB_SHIFT);
276 cr[1] = op->src.u.dma.length - 1;
277 cr[2] = ccp_addr_lo(&op->src.u.dma);
278 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
279 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
280 | ccp_addr_hi(&op->src.u.dma);
281 cr[4] = ccp_addr_lo(&op->dst.u.dma);
282 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
283 | ccp_addr_hi(&op->dst.u.dma);
291 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
294 static int ccp_perform_sha(struct ccp_op *op)
298 /* Fill out the register contents for REQ1 through REQ6 */
299 cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
300 | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
302 cr[1] = op->src.u.dma.length - 1;
303 cr[2] = ccp_addr_lo(&op->src.u.dma);
304 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
305 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
306 | ccp_addr_hi(&op->src.u.dma);
310 cr[4] = lower_32_bits(op->u.sha.msg_bits);
311 cr[5] = upper_32_bits(op->u.sha.msg_bits);
317 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
320 static int ccp_perform_rsa(struct ccp_op *op)
324 /* Fill out the register contents for REQ1 through REQ6 */
325 cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
326 | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
327 | (op->ksb_key << REQ1_KEY_KSB_SHIFT)
329 cr[1] = op->u.rsa.input_len - 1;
330 cr[2] = ccp_addr_lo(&op->src.u.dma);
331 cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
332 | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
333 | ccp_addr_hi(&op->src.u.dma);
334 cr[4] = ccp_addr_lo(&op->dst.u.dma);
335 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
336 | ccp_addr_hi(&op->dst.u.dma);
338 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
341 static int ccp_perform_passthru(struct ccp_op *op)
345 /* Fill out the register contents for REQ1 through REQ6 */
346 cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
347 | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
348 | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
350 if (op->src.type == CCP_MEMTYPE_SYSTEM)
351 cr[1] = op->src.u.dma.length - 1;
353 cr[1] = op->dst.u.dma.length - 1;
355 if (op->src.type == CCP_MEMTYPE_SYSTEM) {
356 cr[2] = ccp_addr_lo(&op->src.u.dma);
357 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
358 | ccp_addr_hi(&op->src.u.dma);
360 if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
361 cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
363 cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
364 cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
367 if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
368 cr[4] = ccp_addr_lo(&op->dst.u.dma);
369 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
370 | ccp_addr_hi(&op->dst.u.dma);
372 cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
373 cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
379 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
382 static int ccp_perform_ecc(struct ccp_op *op)
386 /* Fill out the register contents for REQ1 through REQ6 */
387 cr[0] = REQ1_ECC_AFFINE_CONVERT
388 | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
389 | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
391 cr[1] = op->src.u.dma.length - 1;
392 cr[2] = ccp_addr_lo(&op->src.u.dma);
393 cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
394 | ccp_addr_hi(&op->src.u.dma);
395 cr[4] = ccp_addr_lo(&op->dst.u.dma);
396 cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
397 | ccp_addr_hi(&op->dst.u.dma);
399 return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
402 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
407 mutex_lock(&ccp->ksb_mutex);
409 start = (u32)bitmap_find_next_zero_area(ccp->ksb,
413 if (start <= ccp->ksb_count) {
414 bitmap_set(ccp->ksb, start, count);
416 mutex_unlock(&ccp->ksb_mutex);
422 mutex_unlock(&ccp->ksb_mutex);
424 /* Wait for KSB entries to become available */
425 if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
429 return KSB_START + start;
432 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
438 mutex_lock(&ccp->ksb_mutex);
440 bitmap_clear(ccp->ksb, start - KSB_START, count);
444 mutex_unlock(&ccp->ksb_mutex);
446 wake_up_interruptible_all(&ccp->ksb_queue);
449 static u32 ccp_gen_jobid(struct ccp_device *ccp)
451 return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
454 static void ccp_sg_free(struct ccp_sg_workarea *wa)
457 dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
462 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
463 struct scatterlist *sg, u64 len,
464 enum dma_data_direction dma_dir)
466 memset(wa, 0, sizeof(*wa));
472 wa->nents = sg_nents_for_len(sg, len);
476 wa->bytes_left = len;
482 if (dma_dir == DMA_NONE)
487 wa->dma_dir = dma_dir;
488 wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
495 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
497 unsigned int nbytes = min_t(u64, len, wa->bytes_left);
502 wa->sg_used += nbytes;
503 wa->bytes_left -= nbytes;
504 if (wa->sg_used == wa->sg->length) {
505 wa->sg = sg_next(wa->sg);
510 static void ccp_dm_free(struct ccp_dm_workarea *wa)
512 if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
514 dma_pool_free(wa->dma_pool, wa->address,
518 dma_unmap_single(wa->dev, wa->dma.address, wa->length,
527 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
528 struct ccp_cmd_queue *cmd_q,
530 enum dma_data_direction dir)
532 memset(wa, 0, sizeof(*wa));
537 wa->dev = cmd_q->ccp->dev;
540 if (len <= CCP_DMAPOOL_MAX_SIZE) {
541 wa->dma_pool = cmd_q->dma_pool;
543 wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
548 wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
550 memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
552 wa->address = kzalloc(len, GFP_KERNEL);
556 wa->dma.address = dma_map_single(wa->dev, wa->address, len,
558 if (!wa->dma.address)
561 wa->dma.length = len;
568 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
569 struct scatterlist *sg, unsigned int sg_offset,
572 WARN_ON(!wa->address);
574 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
578 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
579 struct scatterlist *sg, unsigned int sg_offset,
582 WARN_ON(!wa->address);
584 scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
588 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
589 struct scatterlist *sg,
590 unsigned int len, unsigned int se_len,
593 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
594 u8 buffer[CCP_REVERSE_BUF_SIZE];
596 if (WARN_ON(se_len > sizeof(buffer)))
603 ksb_len = min_t(unsigned int, nbytes, se_len);
604 sg_offset -= ksb_len;
606 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
607 for (i = 0; i < ksb_len; i++)
608 wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
610 dm_offset += ksb_len;
613 if ((ksb_len != se_len) && sign_extend) {
614 /* Must sign-extend to nearest sign-extend length */
615 if (wa->address[dm_offset - 1] & 0x80)
616 memset(wa->address + dm_offset, 0xff,
624 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
625 struct scatterlist *sg,
628 unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
629 u8 buffer[CCP_REVERSE_BUF_SIZE];
635 ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
636 dm_offset -= ksb_len;
638 for (i = 0; i < ksb_len; i++)
639 buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
640 scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
642 sg_offset += ksb_len;
647 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
649 ccp_dm_free(&data->dm_wa);
650 ccp_sg_free(&data->sg_wa);
653 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
654 struct scatterlist *sg, u64 sg_len,
656 enum dma_data_direction dir)
660 memset(data, 0, sizeof(*data));
662 ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
667 ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
674 ccp_free_data(data, cmd_q);
679 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
681 struct ccp_sg_workarea *sg_wa = &data->sg_wa;
682 struct ccp_dm_workarea *dm_wa = &data->dm_wa;
683 unsigned int buf_count, nbytes;
685 /* Clear the buffer if setting it */
687 memset(dm_wa->address, 0, dm_wa->length);
692 /* Perform the copy operation
693 * nbytes will always be <= UINT_MAX because dm_wa->length is
696 nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
697 scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
700 /* Update the structures and generate the count */
702 while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
703 nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
704 dm_wa->length - buf_count);
705 nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
708 ccp_update_sg_workarea(sg_wa, nbytes);
714 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
716 return ccp_queue_buf(data, 0);
719 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
721 return ccp_queue_buf(data, 1);
724 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
725 struct ccp_op *op, unsigned int block_size,
728 unsigned int sg_src_len, sg_dst_len, op_len;
730 /* The CCP can only DMA from/to one address each per operation. This
731 * requires that we find the smallest DMA area between the source
732 * and destination. The resulting len values will always be <= UINT_MAX
733 * because the dma length is an unsigned int.
735 sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
736 sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
739 sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
740 sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
741 op_len = min(sg_src_len, sg_dst_len);
746 /* The data operation length will be at least block_size in length
747 * or the smaller of available sg room remaining for the source or
750 op_len = max(op_len, block_size);
752 /* Unless we have to buffer data, there's no reason to wait */
755 if (sg_src_len < block_size) {
756 /* Not enough data in the sg element, so it
757 * needs to be buffered into a blocksize chunk
759 int cp_len = ccp_fill_queue_buf(src);
762 op->src.u.dma.address = src->dm_wa.dma.address;
763 op->src.u.dma.offset = 0;
764 op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
766 /* Enough data in the sg element, but we need to
767 * adjust for any previously copied data
769 op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
770 op->src.u.dma.offset = src->sg_wa.sg_used;
771 op->src.u.dma.length = op_len & ~(block_size - 1);
773 ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
777 if (sg_dst_len < block_size) {
778 /* Not enough room in the sg element or we're on the
779 * last piece of data (when using padding), so the
780 * output needs to be buffered into a blocksize chunk
783 op->dst.u.dma.address = dst->dm_wa.dma.address;
784 op->dst.u.dma.offset = 0;
785 op->dst.u.dma.length = op->src.u.dma.length;
787 /* Enough room in the sg element, but we need to
788 * adjust for any previously used area
790 op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
791 op->dst.u.dma.offset = dst->sg_wa.sg_used;
792 op->dst.u.dma.length = op->src.u.dma.length;
797 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
803 if (op->dst.u.dma.address == dst->dm_wa.dma.address)
804 ccp_empty_queue_buf(dst);
806 ccp_update_sg_workarea(&dst->sg_wa,
807 op->dst.u.dma.length);
811 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
812 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
813 u32 byte_swap, bool from)
817 memset(&op, 0, sizeof(op));
825 op.src.type = CCP_MEMTYPE_KSB;
827 op.dst.type = CCP_MEMTYPE_SYSTEM;
828 op.dst.u.dma.address = wa->dma.address;
829 op.dst.u.dma.length = wa->length;
831 op.src.type = CCP_MEMTYPE_SYSTEM;
832 op.src.u.dma.address = wa->dma.address;
833 op.src.u.dma.length = wa->length;
834 op.dst.type = CCP_MEMTYPE_KSB;
838 op.u.passthru.byte_swap = byte_swap;
840 return ccp_perform_passthru(&op);
843 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
844 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
847 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
850 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
851 struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
854 return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
857 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
860 struct ccp_aes_engine *aes = &cmd->u.aes;
861 struct ccp_dm_workarea key, ctx;
864 unsigned int dm_offset;
867 if (!((aes->key_len == AES_KEYSIZE_128) ||
868 (aes->key_len == AES_KEYSIZE_192) ||
869 (aes->key_len == AES_KEYSIZE_256)))
872 if (aes->src_len & (AES_BLOCK_SIZE - 1))
875 if (aes->iv_len != AES_BLOCK_SIZE)
878 if (!aes->key || !aes->iv || !aes->src)
881 if (aes->cmac_final) {
882 if (aes->cmac_key_len != AES_BLOCK_SIZE)
889 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
890 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
893 memset(&op, 0, sizeof(op));
895 op.jobid = ccp_gen_jobid(cmd_q->ccp);
896 op.ksb_key = cmd_q->ksb_key;
897 op.ksb_ctx = cmd_q->ksb_ctx;
899 op.u.aes.type = aes->type;
900 op.u.aes.mode = aes->mode;
901 op.u.aes.action = aes->action;
903 /* All supported key sizes fit in a single (32-byte) KSB entry
904 * and must be in little endian format. Use the 256-bit byte
905 * swap passthru option to convert from big endian to little
908 ret = ccp_init_dm_workarea(&key, cmd_q,
909 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
914 dm_offset = CCP_KSB_BYTES - aes->key_len;
915 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
916 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
917 CCP_PASSTHRU_BYTESWAP_256BIT);
919 cmd->engine_error = cmd_q->cmd_error;
923 /* The AES context fits in a single (32-byte) KSB entry and
924 * must be in little endian format. Use the 256-bit byte swap
925 * passthru option to convert from big endian to little endian.
927 ret = ccp_init_dm_workarea(&ctx, cmd_q,
928 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
933 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
934 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
935 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
936 CCP_PASSTHRU_BYTESWAP_256BIT);
938 cmd->engine_error = cmd_q->cmd_error;
942 /* Send data to the CCP AES engine */
943 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
944 AES_BLOCK_SIZE, DMA_TO_DEVICE);
948 while (src.sg_wa.bytes_left) {
949 ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
950 if (aes->cmac_final && !src.sg_wa.bytes_left) {
953 /* Push the K1/K2 key to the CCP now */
954 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
956 CCP_PASSTHRU_BYTESWAP_256BIT);
958 cmd->engine_error = cmd_q->cmd_error;
962 ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
964 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
965 CCP_PASSTHRU_BYTESWAP_256BIT);
967 cmd->engine_error = cmd_q->cmd_error;
972 ret = ccp_perform_aes(&op);
974 cmd->engine_error = cmd_q->cmd_error;
978 ccp_process_data(&src, NULL, &op);
981 /* Retrieve the AES context - convert from LE to BE using
982 * 32-byte (256-bit) byteswapping
984 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
985 CCP_PASSTHRU_BYTESWAP_256BIT);
987 cmd->engine_error = cmd_q->cmd_error;
991 /* ...but we only need AES_BLOCK_SIZE bytes */
992 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
993 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
996 ccp_free_data(&src, cmd_q);
1007 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1009 struct ccp_aes_engine *aes = &cmd->u.aes;
1010 struct ccp_dm_workarea key, ctx;
1011 struct ccp_data src, dst;
1013 unsigned int dm_offset;
1014 bool in_place = false;
1017 if (aes->mode == CCP_AES_MODE_CMAC)
1018 return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1020 if (!((aes->key_len == AES_KEYSIZE_128) ||
1021 (aes->key_len == AES_KEYSIZE_192) ||
1022 (aes->key_len == AES_KEYSIZE_256)))
1025 if (((aes->mode == CCP_AES_MODE_ECB) ||
1026 (aes->mode == CCP_AES_MODE_CBC) ||
1027 (aes->mode == CCP_AES_MODE_CFB)) &&
1028 (aes->src_len & (AES_BLOCK_SIZE - 1)))
1031 if (!aes->key || !aes->src || !aes->dst)
1034 if (aes->mode != CCP_AES_MODE_ECB) {
1035 if (aes->iv_len != AES_BLOCK_SIZE)
1042 BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1043 BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1046 memset(&op, 0, sizeof(op));
1048 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1049 op.ksb_key = cmd_q->ksb_key;
1050 op.ksb_ctx = cmd_q->ksb_ctx;
1051 op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1052 op.u.aes.type = aes->type;
1053 op.u.aes.mode = aes->mode;
1054 op.u.aes.action = aes->action;
1056 /* All supported key sizes fit in a single (32-byte) KSB entry
1057 * and must be in little endian format. Use the 256-bit byte
1058 * swap passthru option to convert from big endian to little
1061 ret = ccp_init_dm_workarea(&key, cmd_q,
1062 CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1067 dm_offset = CCP_KSB_BYTES - aes->key_len;
1068 ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1069 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1070 CCP_PASSTHRU_BYTESWAP_256BIT);
1072 cmd->engine_error = cmd_q->cmd_error;
1076 /* The AES context fits in a single (32-byte) KSB entry and
1077 * must be in little endian format. Use the 256-bit byte swap
1078 * passthru option to convert from big endian to little endian.
1080 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1081 CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1086 if (aes->mode != CCP_AES_MODE_ECB) {
1087 /* Load the AES context - conver to LE */
1088 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1089 ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1090 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1091 CCP_PASSTHRU_BYTESWAP_256BIT);
1093 cmd->engine_error = cmd_q->cmd_error;
1098 /* Prepare the input and output data workareas. For in-place
1099 * operations we need to set the dma direction to BIDIRECTIONAL
1100 * and copy the src workarea to the dst workarea.
1102 if (sg_virt(aes->src) == sg_virt(aes->dst))
1105 ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1107 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1114 ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1115 AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1120 /* Send data to the CCP AES engine */
1121 while (src.sg_wa.bytes_left) {
1122 ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1123 if (!src.sg_wa.bytes_left) {
1126 /* Since we don't retrieve the AES context in ECB
1127 * mode we have to wait for the operation to complete
1128 * on the last piece of data
1130 if (aes->mode == CCP_AES_MODE_ECB)
1134 ret = ccp_perform_aes(&op);
1136 cmd->engine_error = cmd_q->cmd_error;
1140 ccp_process_data(&src, &dst, &op);
1143 if (aes->mode != CCP_AES_MODE_ECB) {
1144 /* Retrieve the AES context - convert from LE to BE using
1145 * 32-byte (256-bit) byteswapping
1147 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1148 CCP_PASSTHRU_BYTESWAP_256BIT);
1150 cmd->engine_error = cmd_q->cmd_error;
1154 /* ...but we only need AES_BLOCK_SIZE bytes */
1155 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1156 ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1161 ccp_free_data(&dst, cmd_q);
1164 ccp_free_data(&src, cmd_q);
1175 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1176 struct ccp_cmd *cmd)
1178 struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1179 struct ccp_dm_workarea key, ctx;
1180 struct ccp_data src, dst;
1182 unsigned int unit_size, dm_offset;
1183 bool in_place = false;
1186 switch (xts->unit_size) {
1187 case CCP_XTS_AES_UNIT_SIZE_16:
1190 case CCP_XTS_AES_UNIT_SIZE_512:
1193 case CCP_XTS_AES_UNIT_SIZE_1024:
1196 case CCP_XTS_AES_UNIT_SIZE_2048:
1199 case CCP_XTS_AES_UNIT_SIZE_4096:
1207 if (xts->key_len != AES_KEYSIZE_128)
1210 if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1213 if (xts->iv_len != AES_BLOCK_SIZE)
1216 if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1219 BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1220 BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1223 memset(&op, 0, sizeof(op));
1225 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1226 op.ksb_key = cmd_q->ksb_key;
1227 op.ksb_ctx = cmd_q->ksb_ctx;
1229 op.u.xts.action = xts->action;
1230 op.u.xts.unit_size = xts->unit_size;
1232 /* All supported key sizes fit in a single (32-byte) KSB entry
1233 * and must be in little endian format. Use the 256-bit byte
1234 * swap passthru option to convert from big endian to little
1237 ret = ccp_init_dm_workarea(&key, cmd_q,
1238 CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1243 dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1244 ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1245 ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1246 ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1247 CCP_PASSTHRU_BYTESWAP_256BIT);
1249 cmd->engine_error = cmd_q->cmd_error;
1253 /* The AES context fits in a single (32-byte) KSB entry and
1254 * for XTS is already in little endian format so no byte swapping
1257 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1258 CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1263 ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1264 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1265 CCP_PASSTHRU_BYTESWAP_NOOP);
1267 cmd->engine_error = cmd_q->cmd_error;
1271 /* Prepare the input and output data workareas. For in-place
1272 * operations we need to set the dma direction to BIDIRECTIONAL
1273 * and copy the src workarea to the dst workarea.
1275 if (sg_virt(xts->src) == sg_virt(xts->dst))
1278 ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1280 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1287 ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1288 unit_size, DMA_FROM_DEVICE);
1293 /* Send data to the CCP AES engine */
1294 while (src.sg_wa.bytes_left) {
1295 ccp_prepare_data(&src, &dst, &op, unit_size, true);
1296 if (!src.sg_wa.bytes_left)
1299 ret = ccp_perform_xts_aes(&op);
1301 cmd->engine_error = cmd_q->cmd_error;
1305 ccp_process_data(&src, &dst, &op);
1308 /* Retrieve the AES context - convert from LE to BE using
1309 * 32-byte (256-bit) byteswapping
1311 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1312 CCP_PASSTHRU_BYTESWAP_256BIT);
1314 cmd->engine_error = cmd_q->cmd_error;
1318 /* ...but we only need AES_BLOCK_SIZE bytes */
1319 dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1320 ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1324 ccp_free_data(&dst, cmd_q);
1327 ccp_free_data(&src, cmd_q);
1338 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1340 struct ccp_sha_engine *sha = &cmd->u.sha;
1341 struct ccp_dm_workarea ctx;
1342 struct ccp_data src;
1346 if (sha->ctx_len != CCP_SHA_CTXSIZE)
1352 if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1355 if (!sha->src_len) {
1358 /* Not final, just return */
1362 /* CCP can't do a zero length sha operation so the caller
1363 * must buffer the data.
1368 /* The CCP cannot perform zero-length sha operations so the
1369 * caller is required to buffer data for the final operation.
1370 * However, a sha operation for a message with a total length
1371 * of zero is valid so known values are required to supply
1374 switch (sha->type) {
1375 case CCP_SHA_TYPE_1:
1376 sha_zero = sha1_zero_message_hash;
1378 case CCP_SHA_TYPE_224:
1379 sha_zero = sha224_zero_message_hash;
1381 case CCP_SHA_TYPE_256:
1382 sha_zero = sha256_zero_message_hash;
1388 scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1397 BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1399 memset(&op, 0, sizeof(op));
1401 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1402 op.ksb_ctx = cmd_q->ksb_ctx;
1403 op.u.sha.type = sha->type;
1404 op.u.sha.msg_bits = sha->msg_bits;
1406 /* The SHA context fits in a single (32-byte) KSB entry and
1407 * must be in little endian format. Use the 256-bit byte swap
1408 * passthru option to convert from big endian to little endian.
1410 ret = ccp_init_dm_workarea(&ctx, cmd_q,
1411 CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1419 switch (sha->type) {
1420 case CCP_SHA_TYPE_1:
1421 init = ccp_sha1_init;
1423 case CCP_SHA_TYPE_224:
1424 init = ccp_sha224_init;
1426 case CCP_SHA_TYPE_256:
1427 init = ccp_sha256_init;
1433 memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1435 ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1438 ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1439 CCP_PASSTHRU_BYTESWAP_256BIT);
1441 cmd->engine_error = cmd_q->cmd_error;
1445 /* Send data to the CCP SHA engine */
1446 ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1447 CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1451 while (src.sg_wa.bytes_left) {
1452 ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1453 if (sha->final && !src.sg_wa.bytes_left)
1456 ret = ccp_perform_sha(&op);
1458 cmd->engine_error = cmd_q->cmd_error;
1462 ccp_process_data(&src, NULL, &op);
1465 /* Retrieve the SHA context - convert from LE to BE using
1466 * 32-byte (256-bit) byteswapping to BE
1468 ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1469 CCP_PASSTHRU_BYTESWAP_256BIT);
1471 cmd->engine_error = cmd_q->cmd_error;
1475 ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1477 if (sha->final && sha->opad) {
1478 /* HMAC operation, recursively perform final SHA */
1479 struct ccp_cmd hmac_cmd;
1480 struct scatterlist sg;
1481 u64 block_size, digest_size;
1484 switch (sha->type) {
1485 case CCP_SHA_TYPE_1:
1486 block_size = SHA1_BLOCK_SIZE;
1487 digest_size = SHA1_DIGEST_SIZE;
1489 case CCP_SHA_TYPE_224:
1490 block_size = SHA224_BLOCK_SIZE;
1491 digest_size = SHA224_DIGEST_SIZE;
1493 case CCP_SHA_TYPE_256:
1494 block_size = SHA256_BLOCK_SIZE;
1495 digest_size = SHA256_DIGEST_SIZE;
1502 if (sha->opad_len != block_size) {
1507 hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1512 sg_init_one(&sg, hmac_buf, block_size + digest_size);
1514 scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1515 memcpy(hmac_buf + block_size, ctx.address, digest_size);
1517 memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1518 hmac_cmd.engine = CCP_ENGINE_SHA;
1519 hmac_cmd.u.sha.type = sha->type;
1520 hmac_cmd.u.sha.ctx = sha->ctx;
1521 hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1522 hmac_cmd.u.sha.src = &sg;
1523 hmac_cmd.u.sha.src_len = block_size + digest_size;
1524 hmac_cmd.u.sha.opad = NULL;
1525 hmac_cmd.u.sha.opad_len = 0;
1526 hmac_cmd.u.sha.first = 1;
1527 hmac_cmd.u.sha.final = 1;
1528 hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1530 ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1532 cmd->engine_error = hmac_cmd.engine_error;
1538 ccp_free_data(&src, cmd_q);
1546 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1548 struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1549 struct ccp_dm_workarea exp, src;
1550 struct ccp_data dst;
1552 unsigned int ksb_count, i_len, o_len;
1555 if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1558 if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1561 /* The RSA modulus must precede the message being acted upon, so
1562 * it must be copied to a DMA area where the message and the
1563 * modulus can be concatenated. Therefore the input buffer
1564 * length required is twice the output buffer length (which
1565 * must be a multiple of 256-bits).
1567 o_len = ((rsa->key_size + 255) / 256) * 32;
1570 ksb_count = o_len / CCP_KSB_BYTES;
1572 memset(&op, 0, sizeof(op));
1574 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1575 op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1579 /* The RSA exponent may span multiple (32-byte) KSB entries and must
1580 * be in little endian format. Reverse copy each 32-byte chunk
1581 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1582 * and each byte within that chunk and do not perform any byte swap
1583 * operations on the passthru operation.
1585 ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1589 ret = ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len,
1590 CCP_KSB_BYTES, false);
1593 ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1594 CCP_PASSTHRU_BYTESWAP_NOOP);
1596 cmd->engine_error = cmd_q->cmd_error;
1600 /* Concatenate the modulus and the message. Both the modulus and
1601 * the operands must be in little endian format. Since the input
1602 * is in big endian format it must be converted.
1604 ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1608 ret = ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len,
1609 CCP_KSB_BYTES, false);
1612 src.address += o_len; /* Adjust the address for the copy operation */
1613 ret = ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len,
1614 CCP_KSB_BYTES, false);
1617 src.address -= o_len; /* Reset the address to original value */
1619 /* Prepare the output area for the operation */
1620 ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1621 o_len, DMA_FROM_DEVICE);
1626 op.src.u.dma.address = src.dma.address;
1627 op.src.u.dma.offset = 0;
1628 op.src.u.dma.length = i_len;
1629 op.dst.u.dma.address = dst.dm_wa.dma.address;
1630 op.dst.u.dma.offset = 0;
1631 op.dst.u.dma.length = o_len;
1633 op.u.rsa.mod_size = rsa->key_size;
1634 op.u.rsa.input_len = i_len;
1636 ret = ccp_perform_rsa(&op);
1638 cmd->engine_error = cmd_q->cmd_error;
1642 ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1645 ccp_free_data(&dst, cmd_q);
1654 ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1659 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1660 struct ccp_cmd *cmd)
1662 struct ccp_passthru_engine *pt = &cmd->u.passthru;
1663 struct ccp_dm_workarea mask;
1664 struct ccp_data src, dst;
1666 bool in_place = false;
1670 if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1673 if (!pt->src || !pt->dst)
1676 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1677 if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1683 BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1685 memset(&op, 0, sizeof(op));
1687 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1689 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1691 op.ksb_key = cmd_q->ksb_key;
1693 ret = ccp_init_dm_workarea(&mask, cmd_q,
1694 CCP_PASSTHRU_KSB_COUNT *
1700 ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1701 ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1702 CCP_PASSTHRU_BYTESWAP_NOOP);
1704 cmd->engine_error = cmd_q->cmd_error;
1709 /* Prepare the input and output data workareas. For in-place
1710 * operations we need to set the dma direction to BIDIRECTIONAL
1711 * and copy the src workarea to the dst workarea.
1713 if (sg_virt(pt->src) == sg_virt(pt->dst))
1716 ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1717 CCP_PASSTHRU_MASKSIZE,
1718 in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1725 ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1726 CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1731 /* Send data to the CCP Passthru engine
1732 * Because the CCP engine works on a single source and destination
1733 * dma address at a time, each entry in the source scatterlist
1734 * (after the dma_map_sg call) must be less than or equal to the
1735 * (remaining) length in the destination scatterlist entry and the
1736 * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1738 dst.sg_wa.sg_used = 0;
1739 for (i = 1; i <= src.sg_wa.dma_count; i++) {
1740 if (!dst.sg_wa.sg ||
1741 (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1746 if (i == src.sg_wa.dma_count) {
1751 op.src.type = CCP_MEMTYPE_SYSTEM;
1752 op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1753 op.src.u.dma.offset = 0;
1754 op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1756 op.dst.type = CCP_MEMTYPE_SYSTEM;
1757 op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1758 op.dst.u.dma.offset = dst.sg_wa.sg_used;
1759 op.dst.u.dma.length = op.src.u.dma.length;
1761 ret = ccp_perform_passthru(&op);
1763 cmd->engine_error = cmd_q->cmd_error;
1767 dst.sg_wa.sg_used += src.sg_wa.sg->length;
1768 if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1769 dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1770 dst.sg_wa.sg_used = 0;
1772 src.sg_wa.sg = sg_next(src.sg_wa.sg);
1777 ccp_free_data(&dst, cmd_q);
1780 ccp_free_data(&src, cmd_q);
1783 if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1789 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1791 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1792 struct ccp_dm_workarea src, dst;
1797 if (!ecc->u.mm.operand_1 ||
1798 (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1801 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1802 if (!ecc->u.mm.operand_2 ||
1803 (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1806 if (!ecc->u.mm.result ||
1807 (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1810 memset(&op, 0, sizeof(op));
1812 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1814 /* Concatenate the modulus and the operands. Both the modulus and
1815 * the operands must be in little endian format. Since the input
1816 * is in big endian format it must be converted and placed in a
1817 * fixed length buffer.
1819 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1824 /* Save the workarea address since it is updated in order to perform
1829 /* Copy the ECC modulus */
1830 ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1831 CCP_ECC_OPERAND_SIZE, false);
1834 src.address += CCP_ECC_OPERAND_SIZE;
1836 /* Copy the first operand */
1837 ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1838 ecc->u.mm.operand_1_len,
1839 CCP_ECC_OPERAND_SIZE, false);
1842 src.address += CCP_ECC_OPERAND_SIZE;
1844 if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1845 /* Copy the second operand */
1846 ret = ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1847 ecc->u.mm.operand_2_len,
1848 CCP_ECC_OPERAND_SIZE, false);
1851 src.address += CCP_ECC_OPERAND_SIZE;
1854 /* Restore the workarea address */
1857 /* Prepare the output area for the operation */
1858 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1864 op.src.u.dma.address = src.dma.address;
1865 op.src.u.dma.offset = 0;
1866 op.src.u.dma.length = src.length;
1867 op.dst.u.dma.address = dst.dma.address;
1868 op.dst.u.dma.offset = 0;
1869 op.dst.u.dma.length = dst.length;
1871 op.u.ecc.function = cmd->u.ecc.function;
1873 ret = ccp_perform_ecc(&op);
1875 cmd->engine_error = cmd_q->cmd_error;
1879 ecc->ecc_result = le16_to_cpup(
1880 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1881 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1886 /* Save the ECC result */
1887 ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1898 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1900 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1901 struct ccp_dm_workarea src, dst;
1906 if (!ecc->u.pm.point_1.x ||
1907 (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1908 !ecc->u.pm.point_1.y ||
1909 (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1912 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1913 if (!ecc->u.pm.point_2.x ||
1914 (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1915 !ecc->u.pm.point_2.y ||
1916 (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1919 if (!ecc->u.pm.domain_a ||
1920 (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1923 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1924 if (!ecc->u.pm.scalar ||
1925 (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1929 if (!ecc->u.pm.result.x ||
1930 (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1931 !ecc->u.pm.result.y ||
1932 (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1935 memset(&op, 0, sizeof(op));
1937 op.jobid = ccp_gen_jobid(cmd_q->ccp);
1939 /* Concatenate the modulus and the operands. Both the modulus and
1940 * the operands must be in little endian format. Since the input
1941 * is in big endian format it must be converted and placed in a
1942 * fixed length buffer.
1944 ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1949 /* Save the workarea address since it is updated in order to perform
1954 /* Copy the ECC modulus */
1955 ret = ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1956 CCP_ECC_OPERAND_SIZE, false);
1959 src.address += CCP_ECC_OPERAND_SIZE;
1961 /* Copy the first point X and Y coordinate */
1962 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1963 ecc->u.pm.point_1.x_len,
1964 CCP_ECC_OPERAND_SIZE, false);
1967 src.address += CCP_ECC_OPERAND_SIZE;
1968 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1969 ecc->u.pm.point_1.y_len,
1970 CCP_ECC_OPERAND_SIZE, false);
1973 src.address += CCP_ECC_OPERAND_SIZE;
1975 /* Set the first point Z coordianate to 1 */
1976 *src.address = 0x01;
1977 src.address += CCP_ECC_OPERAND_SIZE;
1979 if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1980 /* Copy the second point X and Y coordinate */
1981 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1982 ecc->u.pm.point_2.x_len,
1983 CCP_ECC_OPERAND_SIZE, false);
1986 src.address += CCP_ECC_OPERAND_SIZE;
1987 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1988 ecc->u.pm.point_2.y_len,
1989 CCP_ECC_OPERAND_SIZE, false);
1992 src.address += CCP_ECC_OPERAND_SIZE;
1994 /* Set the second point Z coordianate to 1 */
1995 *src.address = 0x01;
1996 src.address += CCP_ECC_OPERAND_SIZE;
1998 /* Copy the Domain "a" parameter */
1999 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
2000 ecc->u.pm.domain_a_len,
2001 CCP_ECC_OPERAND_SIZE, false);
2004 src.address += CCP_ECC_OPERAND_SIZE;
2006 if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2007 /* Copy the scalar value */
2008 ret = ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2009 ecc->u.pm.scalar_len,
2010 CCP_ECC_OPERAND_SIZE,
2014 src.address += CCP_ECC_OPERAND_SIZE;
2018 /* Restore the workarea address */
2021 /* Prepare the output area for the operation */
2022 ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2028 op.src.u.dma.address = src.dma.address;
2029 op.src.u.dma.offset = 0;
2030 op.src.u.dma.length = src.length;
2031 op.dst.u.dma.address = dst.dma.address;
2032 op.dst.u.dma.offset = 0;
2033 op.dst.u.dma.length = dst.length;
2035 op.u.ecc.function = cmd->u.ecc.function;
2037 ret = ccp_perform_ecc(&op);
2039 cmd->engine_error = cmd_q->cmd_error;
2043 ecc->ecc_result = le16_to_cpup(
2044 (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2045 if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2050 /* Save the workarea address since it is updated as we walk through
2051 * to copy the point math result
2055 /* Save the ECC result X and Y coordinates */
2056 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2057 CCP_ECC_MODULUS_BYTES);
2058 dst.address += CCP_ECC_OUTPUT_SIZE;
2059 ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2060 CCP_ECC_MODULUS_BYTES);
2061 dst.address += CCP_ECC_OUTPUT_SIZE;
2063 /* Restore the workarea address */
2075 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2077 struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2079 ecc->ecc_result = 0;
2082 (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2085 switch (ecc->function) {
2086 case CCP_ECC_FUNCTION_MMUL_384BIT:
2087 case CCP_ECC_FUNCTION_MADD_384BIT:
2088 case CCP_ECC_FUNCTION_MINV_384BIT:
2089 return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2091 case CCP_ECC_FUNCTION_PADD_384BIT:
2092 case CCP_ECC_FUNCTION_PMUL_384BIT:
2093 case CCP_ECC_FUNCTION_PDBL_384BIT:
2094 return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2101 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2105 cmd->engine_error = 0;
2106 cmd_q->cmd_error = 0;
2107 cmd_q->int_rcvd = 0;
2108 cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2110 switch (cmd->engine) {
2111 case CCP_ENGINE_AES:
2112 ret = ccp_run_aes_cmd(cmd_q, cmd);
2114 case CCP_ENGINE_XTS_AES_128:
2115 ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2117 case CCP_ENGINE_SHA:
2118 ret = ccp_run_sha_cmd(cmd_q, cmd);
2120 case CCP_ENGINE_RSA:
2121 ret = ccp_run_rsa_cmd(cmd_q, cmd);
2123 case CCP_ENGINE_PASSTHRU:
2124 ret = ccp_run_passthru_cmd(cmd_q, cmd);
2126 case CCP_ENGINE_ECC:
2127 ret = ccp_run_ecc_cmd(cmd_q, cmd);