Commit | Line | Data |
---|---|---|
27a274db TL |
1 | /* |
2 | * Copyright (c) 2016 MediaTek Inc. | |
3 | * Author: Daniel Hsiao <daniel.hsiao@mediatek.com> | |
4 | * PoChun Lin <pochun.lin@mediatek.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | */ | |
16 | ||
17 | #include <linux/interrupt.h> | |
18 | #include <linux/kernel.h> | |
19 | #include <linux/slab.h> | |
20 | ||
21 | #include "../mtk_vcodec_drv.h" | |
22 | #include "../mtk_vcodec_util.h" | |
23 | #include "../mtk_vcodec_intr.h" | |
24 | #include "../mtk_vcodec_enc.h" | |
25 | #include "../mtk_vcodec_enc_pm.h" | |
26 | #include "../venc_drv_base.h" | |
27 | #include "../venc_ipi_msg.h" | |
28 | #include "../venc_vpu_if.h" | |
29 | #include "mtk_vpu.h" | |
30 | ||
31 | #define VENC_BITSTREAM_FRAME_SIZE 0x0098 | |
32 | #define VENC_BITSTREAM_HEADER_LEN 0x00e8 | |
33 | ||
34 | /* This ac_tag is vp8 frame tag. */ | |
35 | #define MAX_AC_TAG_SIZE 10 | |
36 | ||
37 | /** | |
38 | * enum venc_vp8_vpu_work_buf - vp8 encoder buffer index | |
39 | */ | |
40 | enum venc_vp8_vpu_work_buf { | |
41 | VENC_VP8_VPU_WORK_BUF_LUMA, | |
42 | VENC_VP8_VPU_WORK_BUF_LUMA2, | |
43 | VENC_VP8_VPU_WORK_BUF_LUMA3, | |
44 | VENC_VP8_VPU_WORK_BUF_CHROMA, | |
45 | VENC_VP8_VPU_WORK_BUF_CHROMA2, | |
46 | VENC_VP8_VPU_WORK_BUF_CHROMA3, | |
47 | VENC_VP8_VPU_WORK_BUF_MV_INFO, | |
48 | VENC_VP8_VPU_WORK_BUF_BS_HEADER, | |
49 | VENC_VP8_VPU_WORK_BUF_PROB_BUF, | |
50 | VENC_VP8_VPU_WORK_BUF_RC_INFO, | |
51 | VENC_VP8_VPU_WORK_BUF_RC_CODE, | |
52 | VENC_VP8_VPU_WORK_BUF_RC_CODE2, | |
53 | VENC_VP8_VPU_WORK_BUF_RC_CODE3, | |
54 | VENC_VP8_VPU_WORK_BUF_MAX, | |
55 | }; | |
56 | ||
57 | /* | |
58 | * struct venc_vp8_vpu_config - Structure for vp8 encoder configuration | |
19d6837a TL |
59 | * AP-W/R : AP is writer/reader on this item |
60 | * VPU-W/R: VPU is write/reader on this item | |
27a274db TL |
61 | * @input_fourcc: input fourcc |
62 | * @bitrate: target bitrate (in bps) | |
63 | * @pic_w: picture width. Picture size is visible stream resolution, in pixels, | |
64 | * to be used for display purposes; must be smaller or equal to buffer | |
65 | * size. | |
66 | * @pic_h: picture height | |
67 | * @buf_w: buffer width (with 16 alignment). Buffer size is stream resolution | |
68 | * in pixels aligned to hardware requirements. | |
69 | * @buf_h: buffer height (with 16 alignment) | |
70 | * @gop_size: group of picture size (key frame) | |
71 | * @framerate: frame rate in fps | |
72 | * @ts_mode: temporal scalability mode (0: disable, 1: enable) | |
73 | * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps. | |
74 | */ | |
75 | struct venc_vp8_vpu_config { | |
76 | u32 input_fourcc; | |
77 | u32 bitrate; | |
78 | u32 pic_w; | |
79 | u32 pic_h; | |
80 | u32 buf_w; | |
81 | u32 buf_h; | |
82 | u32 gop_size; | |
83 | u32 framerate; | |
84 | u32 ts_mode; | |
85 | }; | |
86 | ||
87 | /* | |
19d6837a TL |
88 | * struct venc_vp8_vpu_buf - Structure for buffer information |
89 | * AP-W/R : AP is writer/reader on this item | |
90 | * VPU-W/R: VPU is write/reader on this item | |
27a274db TL |
91 | * @iova: IO virtual address |
92 | * @vpua: VPU side memory addr which is used by RC_CODE | |
93 | * @size: buffer size (in bytes) | |
94 | */ | |
95 | struct venc_vp8_vpu_buf { | |
27a274db TL |
96 | u32 iova; |
97 | u32 vpua; | |
98 | u32 size; | |
99 | }; | |
100 | ||
101 | /* | |
102 | * struct venc_vp8_vsi - Structure for VPU driver control and info share | |
19d6837a TL |
103 | * AP-W/R : AP is writer/reader on this item |
104 | * VPU-W/R: VPU is write/reader on this item | |
27a274db TL |
105 | * This structure is allocated in VPU side and shared to AP side. |
106 | * @config: vp8 encoder configuration | |
107 | * @work_bufs: working buffer information in VPU side | |
108 | * The work_bufs here is for storing the 'size' info shared to AP side. | |
109 | * The similar item in struct venc_vp8_inst is for memory allocation | |
110 | * in AP side. The AP driver will copy the 'size' from here to the one in | |
111 | * struct mtk_vcodec_mem, then invoke mtk_vcodec_mem_alloc to allocate | |
112 | * the buffer. After that, bypass the 'dma_addr' to the 'iova' field here for | |
113 | * register setting in VPU side. | |
114 | */ | |
115 | struct venc_vp8_vsi { | |
116 | struct venc_vp8_vpu_config config; | |
117 | struct venc_vp8_vpu_buf work_bufs[VENC_VP8_VPU_WORK_BUF_MAX]; | |
118 | }; | |
119 | ||
120 | /* | |
121 | * struct venc_vp8_inst - vp8 encoder AP driver instance | |
122 | * @hw_base: vp8 encoder hardware register base | |
123 | * @work_bufs: working buffer | |
124 | * @work_buf_allocated: working buffer allocated flag | |
125 | * @frm_cnt: encoded frame count, it's used for I-frame judgement and | |
126 | * reset when force intra cmd received. | |
127 | * @ts_mode: temporal scalability mode (0: disable, 1: enable) | |
128 | * support three temporal layers - 0: 7.5fps 1: 7.5fps 2: 15fps. | |
129 | * @vpu_inst: VPU instance to exchange information between AP and VPU | |
130 | * @vsi: driver structure allocated by VPU side and shared to AP side for | |
131 | * control and info share | |
132 | * @ctx: context for v4l2 layer integration | |
133 | */ | |
134 | struct venc_vp8_inst { | |
135 | void __iomem *hw_base; | |
136 | struct mtk_vcodec_mem work_bufs[VENC_VP8_VPU_WORK_BUF_MAX]; | |
137 | bool work_buf_allocated; | |
138 | unsigned int frm_cnt; | |
139 | unsigned int ts_mode; | |
140 | struct venc_vpu_inst vpu_inst; | |
141 | struct venc_vp8_vsi *vsi; | |
142 | struct mtk_vcodec_ctx *ctx; | |
143 | }; | |
144 | ||
27a274db TL |
145 | static inline u32 vp8_enc_read_reg(struct venc_vp8_inst *inst, u32 addr) |
146 | { | |
147 | return readl(inst->hw_base + addr); | |
148 | } | |
149 | ||
150 | static void vp8_enc_free_work_buf(struct venc_vp8_inst *inst) | |
151 | { | |
152 | int i; | |
153 | ||
154 | mtk_vcodec_debug_enter(inst); | |
155 | ||
156 | /* Buffers need to be freed by AP. */ | |
157 | for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) { | |
158 | if ((inst->work_bufs[i].size == 0)) | |
159 | continue; | |
160 | mtk_vcodec_mem_free(inst->ctx, &inst->work_bufs[i]); | |
161 | } | |
162 | ||
163 | mtk_vcodec_debug_leave(inst); | |
164 | } | |
165 | ||
166 | static int vp8_enc_alloc_work_buf(struct venc_vp8_inst *inst) | |
167 | { | |
168 | int i; | |
169 | int ret = 0; | |
170 | struct venc_vp8_vpu_buf *wb = inst->vsi->work_bufs; | |
171 | ||
172 | mtk_vcodec_debug_enter(inst); | |
173 | ||
174 | for (i = 0; i < VENC_VP8_VPU_WORK_BUF_MAX; i++) { | |
175 | if ((wb[i].size == 0)) | |
176 | continue; | |
177 | /* | |
178 | * This 'wb' structure is set by VPU side and shared to AP for | |
179 | * buffer allocation and IO virtual addr mapping. For most of | |
180 | * the buffers, AP will allocate the buffer according to 'size' | |
181 | * field and store the IO virtual addr in 'iova' field. For the | |
182 | * RC_CODEx buffers, they are pre-allocated in the VPU side | |
183 | * because they are inside VPU SRAM, and save the VPU addr in | |
184 | * the 'vpua' field. The AP will translate the VPU addr to the | |
185 | * corresponding IO virtual addr and store in 'iova' field. | |
186 | */ | |
187 | inst->work_bufs[i].size = wb[i].size; | |
188 | ret = mtk_vcodec_mem_alloc(inst->ctx, &inst->work_bufs[i]); | |
189 | if (ret) { | |
190 | mtk_vcodec_err(inst, | |
191 | "cannot alloc work_bufs[%d]", i); | |
192 | goto err_alloc; | |
193 | } | |
194 | /* | |
195 | * This RC_CODEx is pre-allocated by VPU and saved in VPU addr. | |
196 | * So we need use memcpy to copy RC_CODEx from VPU addr into IO | |
197 | * virtual addr in 'iova' field for reg setting in VPU side. | |
198 | */ | |
199 | if (i == VENC_VP8_VPU_WORK_BUF_RC_CODE || | |
200 | i == VENC_VP8_VPU_WORK_BUF_RC_CODE2 || | |
201 | i == VENC_VP8_VPU_WORK_BUF_RC_CODE3) { | |
202 | void *tmp_va; | |
203 | ||
204 | tmp_va = vpu_mapping_dm_addr(inst->vpu_inst.dev, | |
205 | wb[i].vpua); | |
206 | memcpy(inst->work_bufs[i].va, tmp_va, wb[i].size); | |
207 | } | |
208 | wb[i].iova = inst->work_bufs[i].dma_addr; | |
209 | ||
210 | mtk_vcodec_debug(inst, | |
19e476c7 | 211 | "work_bufs[%d] va=0x%p,iova=%pad,size=%zu", |
27a274db | 212 | i, inst->work_bufs[i].va, |
19e476c7 | 213 | &inst->work_bufs[i].dma_addr, |
27a274db TL |
214 | inst->work_bufs[i].size); |
215 | } | |
216 | ||
217 | mtk_vcodec_debug_leave(inst); | |
218 | ||
219 | return ret; | |
220 | ||
221 | err_alloc: | |
222 | vp8_enc_free_work_buf(inst); | |
223 | ||
224 | return ret; | |
225 | } | |
226 | ||
227 | static unsigned int vp8_enc_wait_venc_done(struct venc_vp8_inst *inst) | |
228 | { | |
229 | unsigned int irq_status = 0; | |
230 | struct mtk_vcodec_ctx *ctx = (struct mtk_vcodec_ctx *)inst->ctx; | |
231 | ||
232 | if (!mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED, | |
233 | WAIT_INTR_TIMEOUT_MS)) { | |
234 | irq_status = ctx->irq_status; | |
235 | mtk_vcodec_debug(inst, "isr return %x", irq_status); | |
236 | } | |
237 | return irq_status; | |
238 | } | |
239 | ||
240 | /* | |
241 | * Compose ac_tag, bitstream header and bitstream payload into | |
242 | * one bitstream buffer. | |
243 | */ | |
244 | static int vp8_enc_compose_one_frame(struct venc_vp8_inst *inst, | |
245 | struct mtk_vcodec_mem *bs_buf, | |
246 | unsigned int *bs_size) | |
247 | { | |
248 | unsigned int not_key; | |
249 | u32 bs_frm_size; | |
250 | u32 bs_hdr_len; | |
251 | unsigned int ac_tag_size; | |
252 | u8 ac_tag[MAX_AC_TAG_SIZE]; | |
a874a170 | 253 | u32 tag; |
27a274db TL |
254 | |
255 | bs_frm_size = vp8_enc_read_reg(inst, VENC_BITSTREAM_FRAME_SIZE); | |
256 | bs_hdr_len = vp8_enc_read_reg(inst, VENC_BITSTREAM_HEADER_LEN); | |
257 | ||
258 | /* if a frame is key frame, not_key is 0 */ | |
259 | not_key = !inst->vpu_inst.is_key_frm; | |
a874a170 TL |
260 | tag = (bs_hdr_len << 5) | 0x10 | not_key; |
261 | ac_tag[0] = tag & 0xff; | |
262 | ac_tag[1] = (tag >> 8) & 0xff; | |
263 | ac_tag[2] = (tag >> 16) & 0xff; | |
264 | ||
27a274db TL |
265 | /* key frame */ |
266 | if (not_key == 0) { | |
267 | ac_tag_size = MAX_AC_TAG_SIZE; | |
268 | ac_tag[3] = 0x9d; | |
269 | ac_tag[4] = 0x01; | |
270 | ac_tag[5] = 0x2a; | |
271 | ac_tag[6] = inst->vsi->config.pic_w; | |
272 | ac_tag[7] = inst->vsi->config.pic_w >> 8; | |
273 | ac_tag[8] = inst->vsi->config.pic_h; | |
274 | ac_tag[9] = inst->vsi->config.pic_h >> 8; | |
275 | } else { | |
276 | ac_tag_size = 3; | |
277 | } | |
278 | ||
279 | if (bs_buf->size < bs_hdr_len + bs_frm_size + ac_tag_size) { | |
280 | mtk_vcodec_err(inst, "bitstream buf size is too small(%zu)", | |
281 | bs_buf->size); | |
282 | return -EINVAL; | |
283 | } | |
284 | ||
285 | /* | |
286 | * (1) The vp8 bitstream header and body are generated by the HW vp8 | |
287 | * encoder separately at the same time. We cannot know the bitstream | |
288 | * header length in advance. | |
289 | * (2) From the vp8 spec, there is no stuffing byte allowed between the | |
290 | * ac tag, bitstream header and bitstream body. | |
291 | */ | |
292 | memmove(bs_buf->va + bs_hdr_len + ac_tag_size, | |
293 | bs_buf->va, bs_frm_size); | |
294 | memcpy(bs_buf->va + ac_tag_size, | |
295 | inst->work_bufs[VENC_VP8_VPU_WORK_BUF_BS_HEADER].va, | |
296 | bs_hdr_len); | |
297 | memcpy(bs_buf->va, ac_tag, ac_tag_size); | |
298 | *bs_size = bs_frm_size + bs_hdr_len + ac_tag_size; | |
299 | ||
300 | return 0; | |
301 | } | |
302 | ||
303 | static int vp8_enc_encode_frame(struct venc_vp8_inst *inst, | |
304 | struct venc_frm_buf *frm_buf, | |
305 | struct mtk_vcodec_mem *bs_buf, | |
306 | unsigned int *bs_size) | |
307 | { | |
308 | int ret = 0; | |
309 | unsigned int irq_status; | |
310 | ||
311 | mtk_vcodec_debug(inst, "->frm_cnt=%d", inst->frm_cnt); | |
312 | ||
313 | ret = vpu_enc_encode(&inst->vpu_inst, 0, frm_buf, bs_buf, bs_size); | |
314 | if (ret) | |
315 | return ret; | |
316 | ||
317 | irq_status = vp8_enc_wait_venc_done(inst); | |
318 | if (irq_status != MTK_VENC_IRQ_STATUS_FRM) { | |
319 | mtk_vcodec_err(inst, "irq_status=%d failed", irq_status); | |
320 | return -EIO; | |
321 | } | |
322 | ||
323 | if (vp8_enc_compose_one_frame(inst, bs_buf, bs_size)) { | |
324 | mtk_vcodec_err(inst, "vp8_enc_compose_one_frame failed"); | |
325 | return -EINVAL; | |
326 | } | |
327 | ||
328 | inst->frm_cnt++; | |
329 | mtk_vcodec_debug(inst, "<-size=%d key_frm=%d", *bs_size, | |
330 | inst->vpu_inst.is_key_frm); | |
331 | ||
332 | return ret; | |
333 | } | |
334 | ||
335 | static int vp8_enc_init(struct mtk_vcodec_ctx *ctx, unsigned long *handle) | |
336 | { | |
337 | int ret = 0; | |
338 | struct venc_vp8_inst *inst; | |
339 | ||
340 | inst = kzalloc(sizeof(*inst), GFP_KERNEL); | |
341 | if (!inst) | |
342 | return -ENOMEM; | |
343 | ||
344 | inst->ctx = ctx; | |
345 | inst->vpu_inst.ctx = ctx; | |
346 | inst->vpu_inst.dev = ctx->dev->vpu_plat_dev; | |
347 | inst->vpu_inst.id = IPI_VENC_VP8; | |
348 | inst->hw_base = mtk_vcodec_get_reg_addr(inst->ctx, VENC_LT_SYS); | |
349 | ||
350 | mtk_vcodec_debug_enter(inst); | |
351 | ||
352 | ret = vpu_enc_init(&inst->vpu_inst); | |
353 | ||
354 | inst->vsi = (struct venc_vp8_vsi *)inst->vpu_inst.vsi; | |
355 | ||
356 | mtk_vcodec_debug_leave(inst); | |
357 | ||
358 | if (ret) | |
359 | kfree(inst); | |
360 | else | |
361 | (*handle) = (unsigned long)inst; | |
362 | ||
363 | return ret; | |
364 | } | |
365 | ||
366 | static int vp8_enc_encode(unsigned long handle, | |
367 | enum venc_start_opt opt, | |
368 | struct venc_frm_buf *frm_buf, | |
369 | struct mtk_vcodec_mem *bs_buf, | |
370 | struct venc_done_result *result) | |
371 | { | |
372 | int ret = 0; | |
373 | struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; | |
374 | struct mtk_vcodec_ctx *ctx = inst->ctx; | |
375 | ||
376 | mtk_vcodec_debug_enter(inst); | |
377 | ||
378 | enable_irq(ctx->dev->enc_lt_irq); | |
379 | ||
380 | switch (opt) { | |
381 | case VENC_START_OPT_ENCODE_FRAME: | |
382 | ret = vp8_enc_encode_frame(inst, frm_buf, bs_buf, | |
383 | &result->bs_size); | |
384 | if (ret) | |
385 | goto encode_err; | |
386 | result->is_key_frm = inst->vpu_inst.is_key_frm; | |
387 | break; | |
388 | ||
389 | default: | |
390 | mtk_vcodec_err(inst, "opt not support:%d", opt); | |
391 | ret = -EINVAL; | |
392 | break; | |
393 | } | |
394 | ||
395 | encode_err: | |
396 | ||
397 | disable_irq(ctx->dev->enc_lt_irq); | |
398 | mtk_vcodec_debug_leave(inst); | |
399 | ||
400 | return ret; | |
401 | } | |
402 | ||
403 | static int vp8_enc_set_param(unsigned long handle, | |
404 | enum venc_set_param_type type, | |
405 | struct venc_enc_param *enc_prm) | |
406 | { | |
407 | int ret = 0; | |
408 | struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; | |
409 | ||
410 | mtk_vcodec_debug(inst, "->type=%d", type); | |
411 | ||
412 | switch (type) { | |
413 | case VENC_SET_PARAM_ENC: | |
414 | inst->vsi->config.input_fourcc = enc_prm->input_yuv_fmt; | |
415 | inst->vsi->config.bitrate = enc_prm->bitrate; | |
416 | inst->vsi->config.pic_w = enc_prm->width; | |
417 | inst->vsi->config.pic_h = enc_prm->height; | |
418 | inst->vsi->config.buf_w = enc_prm->buf_width; | |
419 | inst->vsi->config.buf_h = enc_prm->buf_height; | |
420 | inst->vsi->config.gop_size = enc_prm->gop_size; | |
421 | inst->vsi->config.framerate = enc_prm->frm_rate; | |
422 | inst->vsi->config.ts_mode = inst->ts_mode; | |
423 | ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm); | |
424 | if (ret) | |
425 | break; | |
426 | if (inst->work_buf_allocated) { | |
427 | vp8_enc_free_work_buf(inst); | |
428 | inst->work_buf_allocated = false; | |
429 | } | |
430 | ret = vp8_enc_alloc_work_buf(inst); | |
431 | if (ret) | |
432 | break; | |
433 | inst->work_buf_allocated = true; | |
434 | break; | |
435 | ||
436 | /* | |
437 | * VENC_SET_PARAM_TS_MODE must be called before VENC_SET_PARAM_ENC | |
438 | */ | |
439 | case VENC_SET_PARAM_TS_MODE: | |
440 | inst->ts_mode = 1; | |
441 | mtk_vcodec_debug(inst, "set ts_mode"); | |
442 | break; | |
443 | ||
444 | default: | |
445 | ret = vpu_enc_set_param(&inst->vpu_inst, type, enc_prm); | |
446 | break; | |
447 | } | |
448 | ||
449 | mtk_vcodec_debug_leave(inst); | |
450 | ||
451 | return ret; | |
452 | } | |
453 | ||
454 | static int vp8_enc_deinit(unsigned long handle) | |
455 | { | |
456 | int ret = 0; | |
457 | struct venc_vp8_inst *inst = (struct venc_vp8_inst *)handle; | |
458 | ||
459 | mtk_vcodec_debug_enter(inst); | |
460 | ||
461 | ret = vpu_enc_deinit(&inst->vpu_inst); | |
462 | ||
463 | if (inst->work_buf_allocated) | |
464 | vp8_enc_free_work_buf(inst); | |
465 | ||
466 | mtk_vcodec_debug_leave(inst); | |
467 | kfree(inst); | |
468 | ||
469 | return ret; | |
470 | } | |
471 | ||
472 | static struct venc_common_if venc_vp8_if = { | |
473 | vp8_enc_init, | |
474 | vp8_enc_encode, | |
475 | vp8_enc_set_param, | |
476 | vp8_enc_deinit, | |
477 | }; | |
478 | ||
479 | struct venc_common_if *get_vp8_enc_comm_if(void); | |
480 | ||
481 | struct venc_common_if *get_vp8_enc_comm_if(void) | |
482 | { | |
483 | return &venc_vp8_if; | |
484 | } |