Merge tag 'gvt-fixes-2022-08-22' of https://github.com/intel/gvt-linux into drm-intel...
[linux-block.git] / drivers / media / platform / mediatek / vcodec / vdec / vdec_vp9_req_lat_if.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2021 MediaTek Inc.
4  * Author: George Sun <george.sun@mediatek.com>
5  */
6
7 #include <linux/module.h>
8 #include <linux/slab.h>
9 #include <media/videobuf2-dma-contig.h>
10 #include <media/v4l2-vp9.h>
11
12 #include "../mtk_vcodec_util.h"
13 #include "../mtk_vcodec_dec.h"
14 #include "../mtk_vcodec_intr.h"
15 #include "../vdec_drv_base.h"
16 #include "../vdec_drv_if.h"
17 #include "../vdec_vpu_if.h"
18
19 /* reset_frame_context defined in VP9 spec */
20 #define VP9_RESET_FRAME_CONTEXT_NONE0 0
21 #define VP9_RESET_FRAME_CONTEXT_NONE1 1
22 #define VP9_RESET_FRAME_CONTEXT_SPEC 2
23 #define VP9_RESET_FRAME_CONTEXT_ALL 3
24
25 #define VP9_TILE_BUF_SIZE 4096
26 #define VP9_PROB_BUF_SIZE 2560
27 #define VP9_COUNTS_BUF_SIZE 16384
28
29 #define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
30 #define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
31 #define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
32 #define VP9_BAND_6(band) ((band) == 0 ? 3 : 6)
33
34 /*
35  * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
36  */
37 struct vdec_vp9_slice_frame_ctx {
38         struct {
39                 u8 probs[6][3];
40                 u8 padding[2];
41         } coef_probs[4][2][2][6];
42
43         u8 y_mode_prob[4][16];
44         u8 switch_interp_prob[4][16];
45         u8 seg[32];  /* ignore */
46         u8 comp_inter_prob[16];
47         u8 comp_ref_prob[16];
48         u8 single_ref_prob[5][2];
49         u8 single_ref_prob_padding[6];
50
51         u8 joint[3];
52         u8 joint_padding[13];
53         struct {
54                 u8 sign;
55                 u8 classes[10];
56                 u8 padding[5];
57         } sign_classes[2];
58         struct {
59                 u8 class0[1];
60                 u8 bits[10];
61                 u8 padding[5];
62         } class0_bits[2];
63         struct {
64                 u8 class0_fp[2][3];
65                 u8 fp[3];
66                 u8 class0_hp;
67                 u8 hp;
68                 u8 padding[5];
69         } class0_fp_hp[2];
70
71         u8 uv_mode_prob[10][16];
72         u8 uv_mode_prob_padding[2][16];
73
74         u8 partition_prob[16][4];
75
76         u8 inter_mode_probs[7][4];
77         u8 skip_probs[4];
78
79         u8 tx_p8x8[2][4];
80         u8 tx_p16x16[2][4];
81         u8 tx_p32x32[2][4];
82         u8 intra_inter_prob[8];
83 };
84
85 /*
86  * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
87  */
88 struct vdec_vp9_slice_frame_counts {
89         union {
90                 struct {
91                         u32 band_0[3];
92                         u32 padding0[1];
93                         u32 band_1_5[5][6];
94                         u32 padding1[2];
95                 } eob_branch[4][2][2];
96                 u32 eob_branch_space[256 * 4];
97         };
98
99         struct {
100                 u32 band_0[3][4];
101                 u32 band_1_5[5][6][4];
102         } coef_probs[4][2][2];
103
104         u32 intra_inter[4][2];
105         u32 comp_inter[5][2];
106         u32 comp_inter_padding[2];
107         u32 comp_ref[5][2];
108         u32 comp_ref_padding[2];
109         u32 single_ref[5][2][2];
110         u32 inter_mode[7][4];
111         u32 y_mode[4][12];
112         u32 uv_mode[10][10];
113         u32 partition[16][4];
114         u32 switchable_interp[4][4];
115
116         u32 tx_p8x8[2][2];
117         u32 tx_p16x16[2][4];
118         u32 tx_p32x32[2][4];
119
120         u32 skip[3][4];
121
122         u32 joint[4];
123
124         struct {
125                 u32 sign[2];
126                 u32 class0[2];
127                 u32 classes[12];
128                 u32 bits[10][2];
129                 u32 padding[4];
130                 u32 class0_fp[2][4];
131                 u32 fp[4];
132                 u32 class0_hp[2];
133                 u32 hp[2];
134         } mvcomp[2];
135
136         u32 reserved[126][4];
137 };
138
139 /**
140  * struct vdec_vp9_slice_counts_map - vp9 counts tables to map
141  *                                    v4l2_vp9_frame_symbol_counts
142  * @skip:       skip counts.
143  * @y_mode:     Y prediction mode counts.
144  * @filter:     interpolation filter counts.
145  * @mv_joint:   motion vector joint counts.
146  * @sign:       motion vector sign counts.
147  * @classes:    motion vector class counts.
148  * @class0:     motion vector class0 bit counts.
149  * @bits:       motion vector bits counts.
150  * @class0_fp:  motion vector class0 fractional bit counts.
151  * @fp: motion vector fractional bit counts.
152  * @class0_hp:  motion vector class0 high precision fractional bit counts.
153  * @hp: motion vector high precision fractional bit counts.
154  */
155 struct vdec_vp9_slice_counts_map {
156         u32 skip[3][2];
157         u32 y_mode[4][10];
158         u32 filter[4][3];
159         u32 sign[2][2];
160         u32 classes[2][11];
161         u32 class0[2][2];
162         u32 bits[2][10][2];
163         u32 class0_fp[2][2][4];
164         u32 fp[2][4];
165         u32 class0_hp[2][2];
166         u32 hp[2][2];
167 };
168
169 /*
170  * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
171  *                                             used for decoding
172  */
173 struct vdec_vp9_slice_uncompressed_header {
174         u8 profile;
175         u8 last_frame_type;
176         u8 frame_type;
177
178         u8 last_show_frame;
179         u8 show_frame;
180         u8 error_resilient_mode;
181
182         u8 bit_depth;
183         u8 padding0[1];
184         u16 last_frame_width;
185         u16 last_frame_height;
186         u16 frame_width;
187         u16 frame_height;
188
189         u8 intra_only;
190         u8 reset_frame_context;
191         u8 ref_frame_sign_bias[4];
192         u8 allow_high_precision_mv;
193         u8 interpolation_filter;
194
195         u8 refresh_frame_context;
196         u8 frame_parallel_decoding_mode;
197         u8 frame_context_idx;
198
199         /* loop_filter_params */
200         u8 loop_filter_level;
201         u8 loop_filter_sharpness;
202         u8 loop_filter_delta_enabled;
203         s8 loop_filter_ref_deltas[4];
204         s8 loop_filter_mode_deltas[2];
205
206         /* quantization_params */
207         u8 base_q_idx;
208         s8 delta_q_y_dc;
209         s8 delta_q_uv_dc;
210         s8 delta_q_uv_ac;
211
212         /* segmentation_params */
213         u8 segmentation_enabled;
214         u8 segmentation_update_map;
215         u8 segmentation_tree_probs[7];
216         u8 padding1[1];
217         u8 segmentation_temporal_udpate;
218         u8 segmentation_pred_prob[3];
219         u8 segmentation_update_data;
220         u8 segmentation_abs_or_delta_update;
221         u8 feature_enabled[8];
222         s16 feature_value[8][4];
223
224         /* tile_info */
225         u8 tile_cols_log2;
226         u8 tile_rows_log2;
227         u8 padding2[2];
228
229         u16 uncompressed_header_size;
230         u16 header_size_in_bytes;
231
232         /* LAT OUT, CORE IN */
233         u32 dequant[8][4];
234 };
235
236 /*
237  * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
238  *                                           used for decoding.
239  */
240 struct vdec_vp9_slice_compressed_header {
241         u8 tx_mode;
242         u8 ref_mode;
243         u8 comp_fixed_ref;
244         u8 comp_var_ref[2];
245         u8 padding[3];
246 };
247
248 /*
249  * struct vdec_vp9_slice_tiles - vp9 tile syntax
250  */
251 struct vdec_vp9_slice_tiles {
252         u32 size[4][64];
253         u32 mi_rows[4];
254         u32 mi_cols[64];
255         u8 actual_rows;
256         u8 padding[7];
257 };
258
259 /*
260  * struct vdec_vp9_slice_reference - vp9 reference frame information
261  */
262 struct vdec_vp9_slice_reference {
263         u16 frame_width;
264         u16 frame_height;
265         u8 bit_depth;
266         u8 subsampling_x;
267         u8 subsampling_y;
268         u8 padding;
269 };
270
271 /*
272  * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
273  */
274 struct vdec_vp9_slice_frame {
275         struct vdec_vp9_slice_uncompressed_header uh;
276         struct vdec_vp9_slice_compressed_header ch;
277         struct vdec_vp9_slice_tiles tiles;
278         struct vdec_vp9_slice_reference ref[3];
279 };
280
281 /*
282  * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
283  */
284 struct vdec_vp9_slice_init_vsi {
285         unsigned int architecture;
286         unsigned int reserved;
287         u64 core_vsi;
288         /* default frame context's position in MicroP */
289         u64 default_frame_ctx;
290 };
291
292 /*
293  * struct vdec_vp9_slice_mem - memory address and size
294  */
295 struct vdec_vp9_slice_mem {
296         union {
297                 u64 buf;
298                 dma_addr_t dma_addr;
299         };
300         union {
301                 size_t size;
302                 dma_addr_t dma_addr_end;
303                 u64 padding;
304         };
305 };
306
307 /*
308  * struct vdec_vp9_slice_bs - input buffer for decoding
309  */
310 struct vdec_vp9_slice_bs {
311         struct vdec_vp9_slice_mem buf;
312         struct vdec_vp9_slice_mem frame;
313 };
314
315 /*
316  * struct vdec_vp9_slice_fb - frame buffer for decoding
317  */
318 struct vdec_vp9_slice_fb {
319         struct vdec_vp9_slice_mem y;
320         struct vdec_vp9_slice_mem c;
321 };
322
323 /*
324  * struct vdec_vp9_slice_state - decoding state
325  */
326 struct vdec_vp9_slice_state {
327         int err;
328         unsigned int full;
329         unsigned int timeout;
330         unsigned int perf;
331
332         unsigned int crc[12];
333 };
334
335 /**
336  * struct vdec_vp9_slice_vsi - exchange decoding information
337  *                             between Main CPU and MicroP
338  *
339  * @bs: input buffer
340  * @fb: output buffer
341  * @ref:        3 reference buffers
342  * @mv: mv working buffer
343  * @seg:        segmentation working buffer
344  * @tile:       tile buffer
345  * @prob:       prob table buffer, used to set/update prob table
346  * @counts:     counts table buffer, used to update prob table
347  * @ube:        general buffer
348  * @trans:      trans buffer position in general buffer
349  * @err_map:    error buffer
350  * @row_info:   row info buffer
351  * @frame:      decoding syntax
352  * @state:      decoding state
353  */
354 struct vdec_vp9_slice_vsi {
355         /* used in LAT stage */
356         struct vdec_vp9_slice_bs bs;
357         /* used in Core stage */
358         struct vdec_vp9_slice_fb fb;
359         struct vdec_vp9_slice_fb ref[3];
360
361         struct vdec_vp9_slice_mem mv[2];
362         struct vdec_vp9_slice_mem seg[2];
363         struct vdec_vp9_slice_mem tile;
364         struct vdec_vp9_slice_mem prob;
365         struct vdec_vp9_slice_mem counts;
366
367         /* LAT stage's output, Core stage's input */
368         struct vdec_vp9_slice_mem ube;
369         struct vdec_vp9_slice_mem trans;
370         struct vdec_vp9_slice_mem err_map;
371         struct vdec_vp9_slice_mem row_info;
372
373         /* decoding parameters */
374         struct vdec_vp9_slice_frame frame;
375
376         struct vdec_vp9_slice_state state;
377 };
378
379 /**
380  * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
381  *                             pass it from lat to core
382  *
383  * @vsi:        local vsi. copy to/from remote vsi before/after decoding
384  * @ref_idx:    reference buffer index
385  * @seq:        picture sequence
386  * @state:      decoding state
387  */
388 struct vdec_vp9_slice_pfc {
389         struct vdec_vp9_slice_vsi vsi;
390
391         u64 ref_idx[3];
392
393         int seq;
394
395         /* LAT/Core CRC */
396         struct vdec_vp9_slice_state state[2];
397 };
398
399 /*
400  * enum vdec_vp9_slice_resolution_level
401  */
402 enum vdec_vp9_slice_resolution_level {
403         VP9_RES_NONE,
404         VP9_RES_FHD,
405         VP9_RES_4K,
406         VP9_RES_8K,
407 };
408
409 /*
410  * struct vdec_vp9_slice_ref - picture's width & height should kept
411  *                             for later decoding as reference picture
412  */
413 struct vdec_vp9_slice_ref {
414         unsigned int width;
415         unsigned int height;
416 };
417
418 /**
419  * struct vdec_vp9_slice_instance - represent one vp9 instance
420  *
421  * @ctx:                pointer to codec's context
422  * @vpu:                VPU instance
423  * @seq:                global picture sequence
424  * @level:              level of current resolution
425  * @width:              width of last picture
426  * @height:             height of last picture
427  * @frame_type: frame_type of last picture
428  * @irq:                irq to Main CPU or MicroP
429  * @show_frame: show_frame of last picture
430  * @dpb:                picture information (width/height) for reference
431  * @mv:         mv working buffer
432  * @seg:                segmentation working buffer
433  * @tile:               tile buffer
434  * @prob:               prob table buffer, used to set/update prob table
435  * @counts:             counts table buffer, used to update prob table
436  * @frame_ctx:          4 frame context according to VP9 Spec
437  * @frame_ctx_helper:   4 frame context according to newest kernel spec
438  * @dirty:              state of each frame context
439  * @init_vsi:           vsi used for initialized VP9 instance
440  * @vsi:                vsi used for decoding/flush ...
441  * @core_vsi:           vsi used for Core stage
442  *
443  * @sc_pfc:             per frame context single core
444  * @counts_map: used map to counts_helper
445  * @counts_helper:      counts table according to newest kernel spec
446  */
447 struct vdec_vp9_slice_instance {
448         struct mtk_vcodec_ctx *ctx;
449         struct vdec_vpu_inst vpu;
450
451         int seq;
452
453         enum vdec_vp9_slice_resolution_level level;
454
455         /* for resolution change and get_pic_info */
456         unsigned int width;
457         unsigned int height;
458
459         /* for last_frame_type */
460         unsigned int frame_type;
461         unsigned int irq;
462
463         unsigned int show_frame;
464
465         /* maintain vp9 reference frame state */
466         struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
467
468         /*
469          * normal working buffers
470          * mv[0]/seg[0]/tile/prob/counts is used for LAT
471          * mv[1]/seg[1] is used for CORE
472          */
473         struct mtk_vcodec_mem mv[2];
474         struct mtk_vcodec_mem seg[2];
475         struct mtk_vcodec_mem tile;
476         struct mtk_vcodec_mem prob;
477         struct mtk_vcodec_mem counts;
478
479         /* 4 prob tables */
480         struct vdec_vp9_slice_frame_ctx frame_ctx[4];
481         /*4 helper tables */
482         struct v4l2_vp9_frame_context frame_ctx_helper;
483         unsigned char dirty[4];
484
485         /* MicroP vsi */
486         union {
487                 struct vdec_vp9_slice_init_vsi *init_vsi;
488                 struct vdec_vp9_slice_vsi *vsi;
489         };
490         struct vdec_vp9_slice_vsi *core_vsi;
491
492         struct vdec_vp9_slice_pfc sc_pfc;
493         struct vdec_vp9_slice_counts_map counts_map;
494         struct v4l2_vp9_frame_symbol_counts counts_helper;
495 };
496
497 /*
498  * all VP9 instances could share this default frame context.
499  */
500 static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
501 static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
502
503 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
504
505 static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
506 {
507         struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
508         struct vdec_vp9_slice_frame_ctx *frame_ctx;
509         struct mtk_vcodec_ctx *ctx;
510         struct vdec_vp9_slice_init_vsi *vsi;
511         int ret = 0;
512
513         ctx = instance->ctx;
514         vsi = instance->vpu.vsi;
515         if (!ctx || !vsi)
516                 return -EINVAL;
517
518         remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
519                                                      (u32)vsi->default_frame_ctx);
520         if (!remote_frame_ctx) {
521                 mtk_vcodec_err(instance, "failed to map default frame ctx\n");
522                 return -EINVAL;
523         }
524
525         mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
526         if (vdec_vp9_slice_default_frame_ctx)
527                 goto out;
528
529         frame_ctx = kmemdup(remote_frame_ctx, sizeof(*frame_ctx), GFP_KERNEL);
530         if (!frame_ctx) {
531                 ret = -ENOMEM;
532                 goto out;
533         }
534
535         vdec_vp9_slice_default_frame_ctx = frame_ctx;
536
537 out:
538         mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
539
540         return ret;
541 }
542
543 static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
544                                                struct vdec_vp9_slice_vsi *vsi)
545 {
546         struct mtk_vcodec_ctx *ctx = instance->ctx;
547         enum vdec_vp9_slice_resolution_level level;
548         /* super blocks */
549         unsigned int max_sb_w;
550         unsigned int max_sb_h;
551         unsigned int max_w;
552         unsigned int max_h;
553         unsigned int w;
554         unsigned int h;
555         size_t size;
556         int ret;
557         int i;
558
559         w = vsi->frame.uh.frame_width;
560         h = vsi->frame.uh.frame_height;
561
562         if (w > VCODEC_DEC_4K_CODED_WIDTH ||
563             h > VCODEC_DEC_4K_CODED_HEIGHT) {
564                 return -EINVAL;
565         } else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
566                 /* 4K */
567                 level = VP9_RES_4K;
568                 max_w = VCODEC_DEC_4K_CODED_WIDTH;
569                 max_h = VCODEC_DEC_4K_CODED_HEIGHT;
570         } else {
571                 /* FHD */
572                 level = VP9_RES_FHD;
573                 max_w = MTK_VDEC_MAX_W;
574                 max_h = MTK_VDEC_MAX_H;
575         }
576
577         if (level == instance->level)
578                 return 0;
579
580         mtk_vcodec_debug(instance, "resolution level changed, from %u to %u, %ux%u",
581                          instance->level, level, w, h);
582
583         max_sb_w = DIV_ROUND_UP(max_w, 64);
584         max_sb_h = DIV_ROUND_UP(max_h, 64);
585         ret = -ENOMEM;
586
587         /*
588          * Lat-flush must wait core idle, otherwise core will
589          * use released buffers
590          */
591
592         size = (max_sb_w * max_sb_h + 2) * 576;
593         for (i = 0; i < 2; i++) {
594                 if (instance->mv[i].va)
595                         mtk_vcodec_mem_free(ctx, &instance->mv[i]);
596                 instance->mv[i].size = size;
597                 if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
598                         goto err;
599         }
600
601         size = (max_sb_w * max_sb_h * 32) + 256;
602         for (i = 0; i < 2; i++) {
603                 if (instance->seg[i].va)
604                         mtk_vcodec_mem_free(ctx, &instance->seg[i]);
605                 instance->seg[i].size = size;
606                 if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
607                         goto err;
608         }
609
610         if (!instance->tile.va) {
611                 instance->tile.size = VP9_TILE_BUF_SIZE;
612                 if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
613                         goto err;
614         }
615
616         if (!instance->prob.va) {
617                 instance->prob.size = VP9_PROB_BUF_SIZE;
618                 if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
619                         goto err;
620         }
621
622         if (!instance->counts.va) {
623                 instance->counts.size = VP9_COUNTS_BUF_SIZE;
624                 if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
625                         goto err;
626         }
627
628         instance->level = level;
629         return 0;
630
631 err:
632         instance->level = VP9_RES_NONE;
633         return ret;
634 }
635
636 static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
637 {
638         struct mtk_vcodec_ctx *ctx = instance->ctx;
639         int i;
640
641         for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
642                 if (instance->mv[i].va)
643                         mtk_vcodec_mem_free(ctx, &instance->mv[i]);
644         }
645         for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
646                 if (instance->seg[i].va)
647                         mtk_vcodec_mem_free(ctx, &instance->seg[i]);
648         }
649         if (instance->tile.va)
650                 mtk_vcodec_mem_free(ctx, &instance->tile);
651         if (instance->prob.va)
652                 mtk_vcodec_mem_free(ctx, &instance->prob);
653         if (instance->counts.va)
654                 mtk_vcodec_mem_free(ctx, &instance->counts);
655
656         instance->level = VP9_RES_NONE;
657 }
658
659 static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
660                                            struct vdec_vp9_slice_vsi *remote_vsi,
661                                            int skip)
662 {
663         struct vdec_vp9_slice_frame *rf;
664         struct vdec_vp9_slice_frame *f;
665
666         /*
667          * compressed header
668          * dequant
669          * buffer position
670          * decode state
671          */
672         if (!skip) {
673                 rf = &remote_vsi->frame;
674                 f = &vsi->frame;
675                 memcpy(&f->ch, &rf->ch, sizeof(f->ch));
676                 memcpy(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
677                 memcpy(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
678         }
679
680         memcpy(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
681 }
682
683 static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
684                                          struct vdec_vp9_slice_vsi *remote_vsi)
685 {
686         memcpy(remote_vsi, vsi, sizeof(*vsi));
687 }
688
689 static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
690 {
691         int sbs = (mi_num + 7) >> 3;
692         int offset = ((idx * sbs) >> tile_log2) << 3;
693
694         return min(offset, mi_num);
695 }
696
697 static
698 int vdec_vp9_slice_setup_single_from_src_to_dst(struct vdec_vp9_slice_instance *instance)
699 {
700         struct vb2_v4l2_buffer *src;
701         struct vb2_v4l2_buffer *dst;
702
703         src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
704         if (!src)
705                 return -EINVAL;
706
707         dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
708         if (!dst)
709                 return -EINVAL;
710
711         v4l2_m2m_buf_copy_metadata(src, dst, true);
712
713         return 0;
714 }
715
716 static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
717                                                  struct vdec_lat_buf *lat_buf)
718 {
719         struct vb2_v4l2_buffer *src;
720         struct vb2_v4l2_buffer *dst;
721
722         src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
723         if (!src)
724                 return -EINVAL;
725
726         lat_buf->src_buf_req = src->vb2_buf.req_obj.req;
727
728         dst = &lat_buf->ts_info;
729         v4l2_m2m_buf_copy_metadata(src, dst, true);
730         return 0;
731 }
732
733 static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
734                                      struct vdec_vp9_slice_uncompressed_header *uh,
735                                      struct v4l2_ctrl_vp9_frame *hdr)
736 {
737         int i;
738
739         uh->profile = hdr->profile;
740         uh->last_frame_type = instance->frame_type;
741         uh->frame_type = !HDR_FLAG(KEY_FRAME);
742         uh->last_show_frame = instance->show_frame;
743         uh->show_frame = HDR_FLAG(SHOW_FRAME);
744         uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
745         uh->bit_depth = hdr->bit_depth;
746         uh->last_frame_width = instance->width;
747         uh->last_frame_height = instance->height;
748         uh->frame_width = hdr->frame_width_minus_1 + 1;
749         uh->frame_height = hdr->frame_height_minus_1 + 1;
750         uh->intra_only = HDR_FLAG(INTRA_ONLY);
751         /* map v4l2 enum to values defined in VP9 spec for firmware */
752         switch (hdr->reset_frame_context) {
753         case V4L2_VP9_RESET_FRAME_CTX_NONE:
754                 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
755                 break;
756         case V4L2_VP9_RESET_FRAME_CTX_SPEC:
757                 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
758                 break;
759         case V4L2_VP9_RESET_FRAME_CTX_ALL:
760                 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
761                 break;
762         default:
763                 uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
764                 break;
765         }
766         /*
767          * ref_frame_sign_bias specifies the intended direction
768          * of the motion vector in time for each reference frame.
769          * - INTRA_FRAME = 0,
770          * - LAST_FRAME = 1,
771          * - GOLDEN_FRAME = 2,
772          * - ALTREF_FRAME = 3,
773          * ref_frame_sign_bias[INTRA_FRAME] is always 0
774          * and VDA only passes another 3 directions
775          */
776         uh->ref_frame_sign_bias[0] = 0;
777         for (i = 0; i < 3; i++)
778                 uh->ref_frame_sign_bias[i + 1] =
779                         !!(hdr->ref_frame_sign_bias & (1 << i));
780         uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
781         uh->interpolation_filter = hdr->interpolation_filter;
782         uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
783         uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
784         uh->frame_context_idx = hdr->frame_context_idx;
785
786         /* tile info */
787         uh->tile_cols_log2 = hdr->tile_cols_log2;
788         uh->tile_rows_log2 = hdr->tile_rows_log2;
789
790         uh->uncompressed_header_size = hdr->uncompressed_header_size;
791         uh->header_size_in_bytes = hdr->compressed_header_size;
792 }
793
794 static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
795                                            struct vdec_vp9_slice_uncompressed_header *uh,
796                                            struct v4l2_ctrl_vp9_frame *hdr)
797 {
798         int error_resilient_mode;
799         int reset_frame_context;
800         int key_frame;
801         int intra_only;
802         int i;
803
804         key_frame = HDR_FLAG(KEY_FRAME);
805         intra_only = HDR_FLAG(INTRA_ONLY);
806         error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
807         reset_frame_context = uh->reset_frame_context;
808
809         /*
810          * according to "6.2 Uncompressed header syntax" in
811          * "VP9 Bitstream & Decoding Process Specification",
812          * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
813          */
814         if (key_frame || intra_only || error_resilient_mode) {
815                 /*
816                  * @reset_frame_context specifies
817                  * whether the frame context should be
818                  * reset to default values:
819                  * 0 or 1 means do not reset any frame context
820                  * 2 resets just the context specified in the frame header
821                  * 3 resets all contexts
822                  */
823                 if (key_frame || error_resilient_mode ||
824                     reset_frame_context == 3) {
825                         /* use default table */
826                         for (i = 0; i < 4; i++)
827                                 instance->dirty[i] = 0;
828                 } else if (reset_frame_context == 2) {
829                         instance->dirty[uh->frame_context_idx] = 0;
830                 }
831                 uh->frame_context_idx = 0;
832         }
833 }
834
835 static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
836                                              struct v4l2_vp9_loop_filter *lf)
837 {
838         int i;
839
840         uh->loop_filter_level = lf->level;
841         uh->loop_filter_sharpness = lf->sharpness;
842         uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
843         for (i = 0; i < 4; i++)
844                 uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
845         for (i = 0; i < 2; i++)
846                 uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
847 }
848
849 static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
850                                               struct v4l2_vp9_quantization *quant)
851 {
852         uh->base_q_idx = quant->base_q_idx;
853         uh->delta_q_y_dc = quant->delta_q_y_dc;
854         uh->delta_q_uv_dc = quant->delta_q_uv_dc;
855         uh->delta_q_uv_ac = quant->delta_q_uv_ac;
856 }
857
858 static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
859                                               struct v4l2_vp9_segmentation *seg)
860 {
861         int i;
862         int j;
863
864         uh->segmentation_enabled = SEG_FLAG(ENABLED);
865         uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
866         for (i = 0; i < 7; i++)
867                 uh->segmentation_tree_probs[i] = seg->tree_probs[i];
868         uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
869         for (i = 0; i < 3; i++)
870                 uh->segmentation_pred_prob[i] = seg->pred_probs[i];
871         uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
872         uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
873         for (i = 0; i < 8; i++) {
874                 uh->feature_enabled[i] = seg->feature_enabled[i];
875                 for (j = 0; j < 4; j++)
876                         uh->feature_value[i][j] = seg->feature_data[i][j];
877         }
878 }
879
880 static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
881                                      struct v4l2_ctrl_vp9_frame *hdr)
882 {
883         unsigned int rows_log2;
884         unsigned int cols_log2;
885         unsigned int rows;
886         unsigned int cols;
887         unsigned int mi_rows;
888         unsigned int mi_cols;
889         struct vdec_vp9_slice_tiles *tiles;
890         int offset;
891         int start;
892         int end;
893         int i;
894
895         rows_log2 = hdr->tile_rows_log2;
896         cols_log2 = hdr->tile_cols_log2;
897         rows = 1 << rows_log2;
898         cols = 1 << cols_log2;
899         tiles = &vsi->frame.tiles;
900         tiles->actual_rows = 0;
901
902         if (rows > 4 || cols > 64)
903                 return -EINVAL;
904
905         /* setup mi rows/cols information */
906         mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
907         mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
908
909         for (i = 0; i < rows; i++) {
910                 start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
911                 end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
912                 offset = end - start;
913                 tiles->mi_rows[i] = (offset + 7) >> 3;
914                 if (tiles->mi_rows[i])
915                         tiles->actual_rows++;
916         }
917
918         for (i = 0; i < cols; i++) {
919                 start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
920                 end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
921                 offset = end - start;
922                 tiles->mi_cols[i] = (offset + 7) >> 3;
923         }
924
925         return 0;
926 }
927
928 static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
929 {
930         memset(&vsi->state, 0, sizeof(vsi->state));
931 }
932
933 static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
934                                          struct v4l2_ctrl_vp9_frame *hdr)
935 {
936         pfc->ref_idx[0] = hdr->last_frame_ts;
937         pfc->ref_idx[1] = hdr->golden_frame_ts;
938         pfc->ref_idx[2] = hdr->alt_frame_ts;
939 }
940
941 static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
942                                     struct vdec_vp9_slice_pfc *pfc)
943 {
944         struct v4l2_ctrl_vp9_frame *hdr;
945         struct vdec_vp9_slice_uncompressed_header *uh;
946         struct v4l2_ctrl *hdr_ctrl;
947         struct vdec_vp9_slice_vsi *vsi;
948         int ret;
949
950         /* frame header */
951         hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
952         if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
953                 return -EINVAL;
954
955         hdr = hdr_ctrl->p_cur.p;
956         vsi = &pfc->vsi;
957         uh = &vsi->frame.uh;
958
959         /* setup vsi information */
960         vdec_vp9_slice_setup_hdr(instance, uh, hdr);
961         vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
962         vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
963         vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
964         vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
965         ret = vdec_vp9_slice_setup_tile(vsi, hdr);
966         if (ret)
967                 return ret;
968         vdec_vp9_slice_setup_state(vsi);
969
970         /* core stage needs buffer index to get ref y/c ... */
971         vdec_vp9_slice_setup_ref_idx(pfc, hdr);
972
973         pfc->seq = instance->seq;
974         instance->seq++;
975
976         return 0;
977 }
978
979 static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
980                                            struct vdec_vp9_slice_vsi *vsi,
981                                            struct mtk_vcodec_mem *bs,
982                                            struct vdec_lat_buf *lat_buf)
983 {
984         int i;
985
986         vsi->bs.buf.dma_addr = bs->dma_addr;
987         vsi->bs.buf.size = bs->size;
988         vsi->bs.frame.dma_addr = bs->dma_addr;
989         vsi->bs.frame.size = bs->size;
990
991         for (i = 0; i < 2; i++) {
992                 vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
993                 vsi->mv[i].size = instance->mv[i].size;
994         }
995         for (i = 0; i < 2; i++) {
996                 vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
997                 vsi->seg[i].size = instance->seg[i].size;
998         }
999         vsi->tile.dma_addr = instance->tile.dma_addr;
1000         vsi->tile.size = instance->tile.size;
1001         vsi->prob.dma_addr = instance->prob.dma_addr;
1002         vsi->prob.size = instance->prob.size;
1003         vsi->counts.dma_addr = instance->counts.dma_addr;
1004         vsi->counts.size = instance->counts.size;
1005
1006         vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
1007         vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
1008         vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
1009         /* used to store trans end */
1010         vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
1011         vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
1012         vsi->err_map.size = lat_buf->wdma_err_addr.size;
1013
1014         vsi->row_info.buf = 0;
1015         vsi->row_info.size = 0;
1016
1017         return 0;
1018 }
1019
1020 static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
1021                                             struct vdec_vp9_slice_vsi *vsi)
1022 {
1023         struct vdec_vp9_slice_frame_ctx *frame_ctx;
1024         struct vdec_vp9_slice_uncompressed_header *uh;
1025
1026         uh = &vsi->frame.uh;
1027
1028         mtk_vcodec_debug(instance, "ctx dirty %u idx %d\n",
1029                          instance->dirty[uh->frame_context_idx],
1030                          uh->frame_context_idx);
1031
1032         if (instance->dirty[uh->frame_context_idx])
1033                 frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1034         else
1035                 frame_ctx = vdec_vp9_slice_default_frame_ctx;
1036         memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
1037
1038         return 0;
1039 }
1040
1041 static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
1042                                             struct vdec_vp9_slice_vsi *vsi,
1043                                             struct mtk_vcodec_mem *buf)
1044 {
1045         struct vdec_vp9_slice_uncompressed_header *uh;
1046
1047         /* reset segment buffer */
1048         uh = &vsi->frame.uh;
1049         if (uh->frame_type == 0 ||
1050             uh->intra_only ||
1051             uh->error_resilient_mode ||
1052             uh->frame_width != instance->width ||
1053             uh->frame_height != instance->height) {
1054                 mtk_vcodec_debug(instance, "reset seg\n");
1055                 memset(buf->va, 0, buf->size);
1056         }
1057 }
1058
1059 /*
1060  * parse tiles according to `6.4 Decode tiles syntax`
1061  * in "vp9-bitstream-specification"
1062  *
1063  * frame contains uncompress header, compressed header and several tiles.
1064  * this function parses tiles' position and size, stores them to tile buffer
1065  * for decoding.
1066  */
1067 static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
1068                                             struct vdec_vp9_slice_vsi *vsi,
1069                                             struct mtk_vcodec_mem *bs)
1070 {
1071         struct vdec_vp9_slice_uncompressed_header *uh;
1072         unsigned int rows_log2;
1073         unsigned int cols_log2;
1074         unsigned int rows;
1075         unsigned int cols;
1076         unsigned int mi_row;
1077         unsigned int mi_col;
1078         unsigned int offset;
1079         unsigned int pa;
1080         unsigned int size;
1081         struct vdec_vp9_slice_tiles *tiles;
1082         unsigned char *pos;
1083         unsigned char *end;
1084         unsigned char *va;
1085         unsigned int *tb;
1086         int i;
1087         int j;
1088
1089         uh = &vsi->frame.uh;
1090         rows_log2 = uh->tile_rows_log2;
1091         cols_log2 = uh->tile_cols_log2;
1092         rows = 1 << rows_log2;
1093         cols = 1 << cols_log2;
1094
1095         if (rows > 4 || cols > 64) {
1096                 mtk_vcodec_err(instance, "tile_rows %u tile_cols %u\n",
1097                                rows, cols);
1098                 return -EINVAL;
1099         }
1100
1101         offset = uh->uncompressed_header_size +
1102                 uh->header_size_in_bytes;
1103         if (bs->size <= offset) {
1104                 mtk_vcodec_err(instance, "bs size %zu tile offset %u\n",
1105                                bs->size, offset);
1106                 return -EINVAL;
1107         }
1108
1109         tiles = &vsi->frame.tiles;
1110         /* setup tile buffer */
1111
1112         va = (unsigned char *)bs->va;
1113         pos = va + offset;
1114         end = va + bs->size;
1115         /* truncated */
1116         pa = (unsigned int)bs->dma_addr + offset;
1117         tb = instance->tile.va;
1118         for (i = 0; i < rows; i++) {
1119                 for (j = 0; j < cols; j++) {
1120                         if (i == rows - 1 &&
1121                             j == cols - 1) {
1122                                 size = (unsigned int)(end - pos);
1123                         } else {
1124                                 if (end - pos < 4)
1125                                         return -EINVAL;
1126
1127                                 size = (pos[0] << 24) | (pos[1] << 16) |
1128                                         (pos[2] << 8) | pos[3];
1129                                 pos += 4;
1130                                 pa += 4;
1131                                 offset += 4;
1132                                 if (end - pos < size)
1133                                         return -EINVAL;
1134                         }
1135                         tiles->size[i][j] = size;
1136                         if (tiles->mi_rows[i]) {
1137                                 *tb++ = (size << 3) + ((offset << 3) & 0x7f);
1138                                 *tb++ = pa & ~0xf;
1139                                 *tb++ = (pa << 3) & 0x7f;
1140                                 mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
1141                                 mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
1142                                 *tb++ = (mi_row << 6) + mi_col;
1143                         }
1144                         pos += size;
1145                         pa += size;
1146                         offset += size;
1147                 }
1148         }
1149
1150         return 0;
1151 }
1152
1153 static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
1154                                     struct mtk_vcodec_mem *bs,
1155                                     struct vdec_lat_buf *lat_buf,
1156                                     struct vdec_vp9_slice_pfc *pfc)
1157 {
1158         struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1159         int ret;
1160
1161         ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
1162         if (ret)
1163                 goto err;
1164
1165         ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1166         if (ret)
1167                 goto err;
1168
1169         ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1170         if (ret)
1171                 goto err;
1172
1173         ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
1174         if (ret)
1175                 goto err;
1176
1177         vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1178
1179         /* setup prob/tile buffers for LAT */
1180
1181         ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1182         if (ret)
1183                 goto err;
1184
1185         ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1186         if (ret)
1187                 goto err;
1188
1189         return 0;
1190
1191 err:
1192         return ret;
1193 }
1194
1195 static
1196 void vdec_vp9_slice_map_counts_eob_coef(unsigned int i, unsigned int j, unsigned int k,
1197                                         struct vdec_vp9_slice_frame_counts *counts,
1198                                         struct v4l2_vp9_frame_symbol_counts *counts_helper)
1199 {
1200         u32 l = 0, m;
1201
1202         /*
1203          * helper eo -> mtk eo
1204          * helpre e1 -> mtk c3
1205          * helper c0 -> c0
1206          * helper c1 -> c1
1207          * helper c2 -> c2
1208          */
1209         for (m = 0; m < 3; m++) {
1210                 counts_helper->coeff[i][j][k][l][m] =
1211                         (u32 (*)[3]) & counts->coef_probs[i][j][k].band_0[m];
1212                 counts_helper->eob[i][j][k][l][m][0] =
1213                         &counts->eob_branch[i][j][k].band_0[m];
1214                 counts_helper->eob[i][j][k][l][m][1] =
1215                         &counts->coef_probs[i][j][k].band_0[m][3];
1216         }
1217
1218         for (l = 1; l < 6; l++) {
1219                 for (m = 0; m < 6; m++) {
1220                         counts_helper->coeff[i][j][k][l][m] =
1221                                 (u32 (*)[3]) & counts->coef_probs[i][j][k].band_1_5[l - 1][m];
1222                         counts_helper->eob[i][j][k][l][m][0] =
1223                                 &counts->eob_branch[i][j][k].band_1_5[l - 1][m];
1224                         counts_helper->eob[i][j][k][l][m][1] =
1225                                 &counts->coef_probs[i][j][k].band_1_5[l - 1][m][3];
1226                 }
1227         }
1228 }
1229
1230 static void vdec_vp9_slice_counts_map_helper(struct vdec_vp9_slice_counts_map *counts_map,
1231                                              struct vdec_vp9_slice_frame_counts *counts,
1232                                              struct v4l2_vp9_frame_symbol_counts *counts_helper)
1233 {
1234         int i, j, k;
1235
1236         counts_helper->partition = &counts->partition;
1237         counts_helper->intra_inter = &counts->intra_inter;
1238         counts_helper->tx32p = &counts->tx_p32x32;
1239         counts_helper->tx16p = &counts->tx_p16x16;
1240         counts_helper->tx8p = &counts->tx_p8x8;
1241         counts_helper->uv_mode = &counts->uv_mode;
1242
1243         counts_helper->comp = &counts->comp_inter;
1244         counts_helper->comp_ref = &counts->comp_ref;
1245         counts_helper->single_ref = &counts->single_ref;
1246         counts_helper->mv_mode = &counts->inter_mode;
1247         counts_helper->mv_joint = &counts->joint;
1248
1249         for (i = 0; i < ARRAY_SIZE(counts_map->skip); i++)
1250                 memcpy(counts_map->skip[i], counts->skip[i],
1251                        sizeof(counts_map->skip[0]));
1252         counts_helper->skip = &counts_map->skip;
1253
1254         for (i = 0; i < ARRAY_SIZE(counts_map->y_mode); i++)
1255                 memcpy(counts_map->y_mode[i], counts->y_mode[i],
1256                        sizeof(counts_map->y_mode[0]));
1257         counts_helper->y_mode = &counts_map->y_mode;
1258
1259         for (i = 0; i < ARRAY_SIZE(counts_map->filter); i++)
1260                 memcpy(counts_map->filter[i], counts->switchable_interp[i],
1261                        sizeof(counts_map->filter[0]));
1262         counts_helper->filter = &counts_map->filter;
1263
1264         for (i = 0; i < ARRAY_SIZE(counts_map->sign); i++)
1265                 memcpy(counts_map->sign[i], counts->mvcomp[i].sign,
1266                        sizeof(counts_map->sign[0]));
1267         counts_helper->sign = &counts_map->sign;
1268
1269         for (i = 0; i < ARRAY_SIZE(counts_map->classes); i++)
1270                 memcpy(counts_map->classes[i], counts->mvcomp[i].classes,
1271                        sizeof(counts_map->classes[0]));
1272         counts_helper->classes = &counts_map->classes;
1273
1274         for (i = 0; i < ARRAY_SIZE(counts_map->class0); i++)
1275                 memcpy(counts_map->class0[i], counts->mvcomp[i].class0,
1276                        sizeof(counts_map->class0[0]));
1277         counts_helper->class0 = &counts_map->class0;
1278
1279         for (i = 0; i < ARRAY_SIZE(counts_map->bits); i++)
1280                 for (j = 0; j < ARRAY_SIZE(counts_map->bits[0]); j++)
1281                         memcpy(counts_map->bits[i][j], counts->mvcomp[i].bits[j],
1282                                sizeof(counts_map->bits[0][0]));
1283         counts_helper->bits = &counts_map->bits;
1284
1285         for (i = 0; i < ARRAY_SIZE(counts_map->class0_fp); i++)
1286                 for (j = 0; j < ARRAY_SIZE(counts_map->class0_fp[0]); j++)
1287                         memcpy(counts_map->class0_fp[i][j], counts->mvcomp[i].class0_fp[j],
1288                                sizeof(counts_map->class0_fp[0][0]));
1289         counts_helper->class0_fp = &counts_map->class0_fp;
1290
1291         for (i = 0; i < ARRAY_SIZE(counts_map->fp); i++)
1292                 memcpy(counts_map->fp[i], counts->mvcomp[i].fp,
1293                        sizeof(counts_map->fp[0]));
1294         counts_helper->fp = &counts_map->fp;
1295
1296         for (i = 0; i < ARRAY_SIZE(counts_map->class0_hp); i++)
1297                 memcpy(counts_map->class0_hp[i], counts->mvcomp[i].class0_hp,
1298                        sizeof(counts_map->class0_hp[0]));
1299         counts_helper->class0_hp = &counts_map->class0_hp;
1300
1301         for (i = 0; i < ARRAY_SIZE(counts_map->hp); i++)
1302                 memcpy(counts_map->hp[i], counts->mvcomp[i].hp, sizeof(counts_map->hp[0]));
1303
1304         counts_helper->hp = &counts_map->hp;
1305
1306         for (i = 0; i < 4; i++)
1307                 for (j = 0; j < 2; j++)
1308                         for (k = 0; k < 2; k++)
1309                                 vdec_vp9_slice_map_counts_eob_coef(i, j, k, counts, counts_helper);
1310 }
1311
1312 static void vdec_vp9_slice_map_to_coef(unsigned int i, unsigned int j, unsigned int k,
1313                                        struct vdec_vp9_slice_frame_ctx *frame_ctx,
1314                                        struct v4l2_vp9_frame_context *frame_ctx_helper)
1315 {
1316         u32 l, m;
1317
1318         for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1319                 for (m = 0; m < VP9_BAND_6(l); m++) {
1320                         memcpy(frame_ctx_helper->coef[i][j][k][l][m],
1321                                frame_ctx->coef_probs[i][j][k][l].probs[m],
1322                                sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1323                 }
1324         }
1325 }
1326
1327 static void vdec_vp9_slice_map_from_coef(unsigned int i, unsigned int j, unsigned int k,
1328                                          struct vdec_vp9_slice_frame_ctx *frame_ctx,
1329                                          struct v4l2_vp9_frame_context *frame_ctx_helper)
1330 {
1331         u32 l, m;
1332
1333         for (l = 0; l < ARRAY_SIZE(frame_ctx_helper->coef[0][0][0]); l++) {
1334                 for (m = 0; m < VP9_BAND_6(l); m++) {
1335                         memcpy(frame_ctx->coef_probs[i][j][k][l].probs[m],
1336                                frame_ctx_helper->coef[i][j][k][l][m],
1337                                sizeof(frame_ctx_helper->coef[i][j][k][l][0]));
1338                 }
1339         }
1340 }
1341
1342 static
1343 void vdec_vp9_slice_framectx_map_helper(bool frame_is_intra,
1344                                         struct vdec_vp9_slice_frame_ctx *pre_frame_ctx,
1345                                         struct vdec_vp9_slice_frame_ctx *frame_ctx,
1346                                         struct v4l2_vp9_frame_context *frame_ctx_helper)
1347 {
1348         struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1349         u32 i, j, k;
1350
1351         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1352                 for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1353                         for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1354                                 vdec_vp9_slice_map_to_coef(i, j, k, pre_frame_ctx,
1355                                                            frame_ctx_helper);
1356
1357         /*
1358          * use previous prob when frame is not intra or
1359          * we should use the prob updated by the compressed header parse
1360          */
1361         if (!frame_is_intra)
1362                 frame_ctx = pre_frame_ctx;
1363
1364         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1365                 memcpy(frame_ctx_helper->tx8[i], frame_ctx->tx_p8x8[i],
1366                        sizeof(frame_ctx_helper->tx8[0]));
1367
1368         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1369                 memcpy(frame_ctx_helper->tx16[i], frame_ctx->tx_p16x16[i],
1370                        sizeof(frame_ctx_helper->tx16[0]));
1371
1372         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1373                 memcpy(frame_ctx_helper->tx32[i], frame_ctx->tx_p32x32[i],
1374                        sizeof(frame_ctx_helper->tx32[0]));
1375
1376         memcpy(frame_ctx_helper->skip, frame_ctx->skip_probs, sizeof(frame_ctx_helper->skip));
1377
1378         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1379                 memcpy(frame_ctx_helper->inter_mode[i], frame_ctx->inter_mode_probs[i],
1380                        sizeof(frame_ctx_helper->inter_mode[0]));
1381
1382         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1383                 memcpy(frame_ctx_helper->interp_filter[i], frame_ctx->switch_interp_prob[i],
1384                        sizeof(frame_ctx_helper->interp_filter[0]));
1385
1386         memcpy(frame_ctx_helper->is_inter, frame_ctx->intra_inter_prob,
1387                sizeof(frame_ctx_helper->is_inter));
1388
1389         memcpy(frame_ctx_helper->comp_mode, frame_ctx->comp_inter_prob,
1390                sizeof(frame_ctx_helper->comp_mode));
1391
1392         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1393                 memcpy(frame_ctx_helper->single_ref[i], frame_ctx->single_ref_prob[i],
1394                        sizeof(frame_ctx_helper->single_ref[0]));
1395
1396         memcpy(frame_ctx_helper->comp_ref, frame_ctx->comp_ref_prob,
1397                sizeof(frame_ctx_helper->comp_ref));
1398
1399         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1400                 memcpy(frame_ctx_helper->y_mode[i], frame_ctx->y_mode_prob[i],
1401                        sizeof(frame_ctx_helper->y_mode[0]));
1402
1403         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1404                 memcpy(frame_ctx_helper->uv_mode[i], frame_ctx->uv_mode_prob[i],
1405                        sizeof(frame_ctx_helper->uv_mode[0]));
1406
1407         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1408                 memcpy(frame_ctx_helper->partition[i], frame_ctx->partition_prob[i],
1409                        sizeof(frame_ctx_helper->partition[0]));
1410
1411         memcpy(mv->joint, frame_ctx->joint, sizeof(mv->joint));
1412
1413         for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1414                 mv->sign[i] = frame_ctx->sign_classes[i].sign;
1415
1416         for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1417                 memcpy(mv->classes[i], frame_ctx->sign_classes[i].classes,
1418                        sizeof(mv->classes[i]));
1419
1420         for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1421                 mv->class0_bit[i] = frame_ctx->class0_bits[i].class0[0];
1422
1423         for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1424                 memcpy(mv->bits[i], frame_ctx->class0_bits[i].bits, sizeof(mv->bits[0]));
1425
1426         for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1427                 for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1428                         memcpy(mv->class0_fr[i][j], frame_ctx->class0_fp_hp[i].class0_fp[j],
1429                                sizeof(mv->class0_fr[0][0]));
1430
1431         for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1432                 memcpy(mv->fr[i], frame_ctx->class0_fp_hp[i].fp, sizeof(mv->fr[0]));
1433
1434         for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1435                 mv->class0_hp[i] = frame_ctx->class0_fp_hp[i].class0_hp;
1436
1437         for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1438                 mv->hp[i] = frame_ctx->class0_fp_hp[i].hp;
1439 }
1440
1441 static void vdec_vp9_slice_helper_map_framectx(struct v4l2_vp9_frame_context *frame_ctx_helper,
1442                                                struct vdec_vp9_slice_frame_ctx *frame_ctx)
1443 {
1444         struct v4l2_vp9_frame_mv_context *mv = &frame_ctx_helper->mv;
1445         u32 i, j, k;
1446
1447         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx8); i++)
1448                 memcpy(frame_ctx->tx_p8x8[i], frame_ctx_helper->tx8[i],
1449                        sizeof(frame_ctx_helper->tx8[0]));
1450
1451         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx16); i++)
1452                 memcpy(frame_ctx->tx_p16x16[i], frame_ctx_helper->tx16[i],
1453                        sizeof(frame_ctx_helper->tx16[0]));
1454
1455         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->tx32); i++)
1456                 memcpy(frame_ctx->tx_p32x32[i], frame_ctx_helper->tx32[i],
1457                        sizeof(frame_ctx_helper->tx32[0]));
1458
1459         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->coef); i++)
1460                 for (j = 0; j < ARRAY_SIZE(frame_ctx_helper->coef[0]); j++)
1461                         for (k = 0; k < ARRAY_SIZE(frame_ctx_helper->coef[0][0]); k++)
1462                                 vdec_vp9_slice_map_from_coef(i, j, k, frame_ctx,
1463                                                              frame_ctx_helper);
1464
1465         memcpy(frame_ctx->skip_probs, frame_ctx_helper->skip, sizeof(frame_ctx_helper->skip));
1466
1467         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->inter_mode); i++)
1468                 memcpy(frame_ctx->inter_mode_probs[i], frame_ctx_helper->inter_mode[i],
1469                        sizeof(frame_ctx_helper->inter_mode[0]));
1470
1471         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->interp_filter); i++)
1472                 memcpy(frame_ctx->switch_interp_prob[i], frame_ctx_helper->interp_filter[i],
1473                        sizeof(frame_ctx_helper->interp_filter[0]));
1474
1475         memcpy(frame_ctx->intra_inter_prob, frame_ctx_helper->is_inter,
1476                sizeof(frame_ctx_helper->is_inter));
1477
1478         memcpy(frame_ctx->comp_inter_prob, frame_ctx_helper->comp_mode,
1479                sizeof(frame_ctx_helper->comp_mode));
1480
1481         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->single_ref); i++)
1482                 memcpy(frame_ctx->single_ref_prob[i], frame_ctx_helper->single_ref[i],
1483                        sizeof(frame_ctx_helper->single_ref[0]));
1484
1485         memcpy(frame_ctx->comp_ref_prob, frame_ctx_helper->comp_ref,
1486                sizeof(frame_ctx_helper->comp_ref));
1487
1488         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->y_mode); i++)
1489                 memcpy(frame_ctx->y_mode_prob[i], frame_ctx_helper->y_mode[i],
1490                        sizeof(frame_ctx_helper->y_mode[0]));
1491
1492         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->uv_mode); i++)
1493                 memcpy(frame_ctx->uv_mode_prob[i], frame_ctx_helper->uv_mode[i],
1494                        sizeof(frame_ctx_helper->uv_mode[0]));
1495
1496         for (i = 0; i < ARRAY_SIZE(frame_ctx_helper->partition); i++)
1497                 memcpy(frame_ctx->partition_prob[i], frame_ctx_helper->partition[i],
1498                        sizeof(frame_ctx_helper->partition[0]));
1499
1500         memcpy(frame_ctx->joint, mv->joint, sizeof(mv->joint));
1501
1502         for (i = 0; i < ARRAY_SIZE(mv->sign); i++)
1503                 frame_ctx->sign_classes[i].sign = mv->sign[i];
1504
1505         for (i = 0; i < ARRAY_SIZE(mv->classes); i++)
1506                 memcpy(frame_ctx->sign_classes[i].classes, mv->classes[i],
1507                        sizeof(mv->classes[i]));
1508
1509         for (i = 0; i < ARRAY_SIZE(mv->class0_bit); i++)
1510                 frame_ctx->class0_bits[i].class0[0] = mv->class0_bit[i];
1511
1512         for (i = 0; i < ARRAY_SIZE(mv->bits); i++)
1513                 memcpy(frame_ctx->class0_bits[i].bits, mv->bits[i], sizeof(mv->bits[0]));
1514
1515         for (i = 0; i < ARRAY_SIZE(mv->class0_fr); i++)
1516                 for (j = 0; j < ARRAY_SIZE(mv->class0_fr[0]); j++)
1517                         memcpy(frame_ctx->class0_fp_hp[i].class0_fp[j], mv->class0_fr[i][j],
1518                                sizeof(mv->class0_fr[0][0]));
1519
1520         for (i = 0; i < ARRAY_SIZE(mv->fr); i++)
1521                 memcpy(frame_ctx->class0_fp_hp[i].fp, mv->fr[i], sizeof(mv->fr[0]));
1522
1523         for (i = 0; i < ARRAY_SIZE(mv->class0_hp); i++)
1524                 frame_ctx->class0_fp_hp[i].class0_hp = mv->class0_hp[i];
1525
1526         for (i = 0; i < ARRAY_SIZE(mv->hp); i++)
1527                 frame_ctx->class0_fp_hp[i].hp = mv->hp[i];
1528 }
1529
1530 static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
1531                                       struct vdec_vp9_slice_vsi *vsi)
1532 {
1533         struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
1534         struct v4l2_vp9_frame_context *pre_frame_ctx_helper;
1535         struct vdec_vp9_slice_frame_ctx *frame_ctx;
1536         struct vdec_vp9_slice_frame_counts *counts;
1537         struct v4l2_vp9_frame_symbol_counts *counts_helper;
1538         struct vdec_vp9_slice_uncompressed_header *uh;
1539         bool frame_is_intra;
1540         bool use_128;
1541
1542         uh = &vsi->frame.uh;
1543         pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
1544         pre_frame_ctx_helper = &instance->frame_ctx_helper;
1545         frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
1546         counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
1547         counts_helper = &instance->counts_helper;
1548
1549         if (!uh->refresh_frame_context)
1550                 return 0;
1551
1552         if (!uh->frame_parallel_decoding_mode) {
1553                 vdec_vp9_slice_counts_map_helper(&instance->counts_map, counts, counts_helper);
1554
1555                 frame_is_intra = !vsi->frame.uh.frame_type || vsi->frame.uh.intra_only;
1556                 /* check default prob */
1557                 if (!instance->dirty[uh->frame_context_idx])
1558                         vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1559                                                            vdec_vp9_slice_default_frame_ctx,
1560                                                            frame_ctx,
1561                                                            pre_frame_ctx_helper);
1562                 else
1563                         vdec_vp9_slice_framectx_map_helper(frame_is_intra,
1564                                                            pre_frame_ctx,
1565                                                            frame_ctx,
1566                                                            pre_frame_ctx_helper);
1567
1568                 use_128 = !frame_is_intra && !vsi->frame.uh.last_frame_type;
1569                 v4l2_vp9_adapt_coef_probs(pre_frame_ctx_helper,
1570                                           counts_helper,
1571                                           use_128,
1572                                           frame_is_intra);
1573                 if (!frame_is_intra)
1574                         v4l2_vp9_adapt_noncoef_probs(pre_frame_ctx_helper,
1575                                                      counts_helper,
1576                                                      V4L2_VP9_REFERENCE_MODE_SINGLE_REFERENCE,
1577                                                      vsi->frame.uh.interpolation_filter,
1578                                                      vsi->frame.ch.tx_mode,
1579                                                      vsi->frame.uh.allow_high_precision_mv ?
1580                                                      V4L2_VP9_FRAME_FLAG_ALLOW_HIGH_PREC_MV : 0);
1581                 vdec_vp9_slice_helper_map_framectx(pre_frame_ctx_helper, pre_frame_ctx);
1582         } else {
1583                 memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
1584         }
1585
1586         instance->dirty[uh->frame_context_idx] = 1;
1587
1588         return 0;
1589 }
1590
1591 static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance,
1592                                         struct vdec_vp9_slice_pfc *pfc)
1593 {
1594         struct vdec_vp9_slice_vsi *vsi;
1595
1596         vsi = &pfc->vsi;
1597         memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1598
1599         mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1600                          pfc->seq,
1601                          vsi->state.crc[0], vsi->state.crc[1],
1602                          vsi->state.crc[2], vsi->state.crc[3]);
1603         mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n",
1604                          pfc->seq,
1605                          vsi->state.crc[4], vsi->state.crc[5],
1606                          vsi->state.crc[6], vsi->state.crc[7]);
1607
1608         vdec_vp9_slice_update_prob(instance, vsi);
1609
1610         instance->width = vsi->frame.uh.frame_width;
1611         instance->height = vsi->frame.uh.frame_height;
1612         instance->frame_type = vsi->frame.uh.frame_type;
1613         instance->show_frame = vsi->frame.uh.show_frame;
1614
1615         return 0;
1616 }
1617
1618 static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
1619                                      struct vdec_lat_buf *lat_buf,
1620                                      struct vdec_vp9_slice_pfc *pfc)
1621 {
1622         struct vdec_vp9_slice_vsi *vsi;
1623
1624         vsi = &pfc->vsi;
1625         memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
1626
1627         mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
1628                          pfc->seq, vsi->state.crc[0],
1629                          (unsigned long)vsi->trans.dma_addr,
1630                          (unsigned long)vsi->trans.dma_addr_end);
1631
1632         /* buffer full, need to re-decode */
1633         if (vsi->state.full) {
1634                 /* buffer not enough */
1635                 if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
1636                         vsi->ube.size)
1637                         return -ENOMEM;
1638                 return -EAGAIN;
1639         }
1640
1641         vdec_vp9_slice_update_prob(instance, vsi);
1642
1643         instance->width = vsi->frame.uh.frame_width;
1644         instance->height = vsi->frame.uh.frame_height;
1645         instance->frame_type = vsi->frame.uh.frame_type;
1646         instance->show_frame = vsi->frame.uh.show_frame;
1647
1648         return 0;
1649 }
1650
1651 static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
1652                                                 struct vdec_lat_buf *lat_buf)
1653 {
1654         struct vb2_v4l2_buffer *dst;
1655
1656         dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
1657         if (!dst)
1658                 return -EINVAL;
1659
1660         v4l2_m2m_buf_copy_metadata(&lat_buf->ts_info, dst, true);
1661         return 0;
1662 }
1663
1664 static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
1665                                             struct vdec_vp9_slice_pfc *pfc,
1666                                             struct vdec_vp9_slice_vsi *vsi,
1667                                             struct vdec_fb *fb,
1668                                             struct vdec_lat_buf *lat_buf)
1669 {
1670         struct vb2_buffer *vb;
1671         struct vb2_queue *vq;
1672         struct vdec_vp9_slice_reference *ref;
1673         int plane;
1674         int size;
1675         int w;
1676         int h;
1677         int i;
1678
1679         plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
1680         w = vsi->frame.uh.frame_width;
1681         h = vsi->frame.uh.frame_height;
1682         size = ALIGN(w, 64) * ALIGN(h, 64);
1683
1684         /* frame buffer */
1685         vsi->fb.y.dma_addr = fb->base_y.dma_addr;
1686         if (plane == 1)
1687                 vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
1688         else
1689                 vsi->fb.c.dma_addr = fb->base_c.dma_addr;
1690
1691         /* reference buffers */
1692         vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
1693                              V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
1694         if (!vq)
1695                 return -EINVAL;
1696
1697         /* get current output buffer */
1698         vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
1699         if (!vb)
1700                 return -EINVAL;
1701
1702         /* update internal buffer's width/height */
1703         for (i = 0; i < vq->num_buffers; i++) {
1704                 if (vb == vq->bufs[i]) {
1705                         instance->dpb[i].width = w;
1706                         instance->dpb[i].height = h;
1707                         break;
1708                 }
1709         }
1710
1711         /*
1712          * get buffer's width/height from instance
1713          * get buffer address from vb2buf
1714          */
1715         for (i = 0; i < 3; i++) {
1716                 ref = &vsi->frame.ref[i];
1717                 vb = vb2_find_buffer(vq, pfc->ref_idx[i]);
1718                 if (!vb) {
1719                         ref->frame_width = w;
1720                         ref->frame_height = h;
1721                         memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
1722                 } else {
1723                         int idx = vb->index;
1724
1725                         ref->frame_width = instance->dpb[idx].width;
1726                         ref->frame_height = instance->dpb[idx].height;
1727                         vsi->ref[i].y.dma_addr =
1728                                 vb2_dma_contig_plane_dma_addr(vb, 0);
1729                         if (plane == 1)
1730                                 vsi->ref[i].c.dma_addr =
1731                                         vsi->ref[i].y.dma_addr + size;
1732                         else
1733                                 vsi->ref[i].c.dma_addr =
1734                                         vb2_dma_contig_plane_dma_addr(vb, 1);
1735                 }
1736         }
1737
1738         return 0;
1739 }
1740
1741 static void vdec_vp9_slice_setup_single_buffer(struct vdec_vp9_slice_instance *instance,
1742                                                struct vdec_vp9_slice_pfc *pfc,
1743                                                struct vdec_vp9_slice_vsi *vsi,
1744                                                struct mtk_vcodec_mem *bs,
1745                                                struct vdec_fb *fb)
1746 {
1747         int i;
1748
1749         vsi->bs.buf.dma_addr = bs->dma_addr;
1750         vsi->bs.buf.size = bs->size;
1751         vsi->bs.frame.dma_addr = bs->dma_addr;
1752         vsi->bs.frame.size = bs->size;
1753
1754         for (i = 0; i < 2; i++) {
1755                 vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
1756                 vsi->mv[i].size = instance->mv[i].size;
1757         }
1758         for (i = 0; i < 2; i++) {
1759                 vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
1760                 vsi->seg[i].size = instance->seg[i].size;
1761         }
1762         vsi->tile.dma_addr = instance->tile.dma_addr;
1763         vsi->tile.size = instance->tile.size;
1764         vsi->prob.dma_addr = instance->prob.dma_addr;
1765         vsi->prob.size = instance->prob.size;
1766         vsi->counts.dma_addr = instance->counts.dma_addr;
1767         vsi->counts.size = instance->counts.size;
1768
1769         vsi->row_info.buf = 0;
1770         vsi->row_info.size = 0;
1771
1772         vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, NULL);
1773 }
1774
1775 static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
1776                                      struct vdec_fb *fb,
1777                                      struct vdec_lat_buf *lat_buf,
1778                                      struct vdec_vp9_slice_pfc *pfc)
1779 {
1780         struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1781         int ret;
1782
1783         vdec_vp9_slice_setup_state(vsi);
1784
1785         ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
1786         if (ret)
1787                 goto err;
1788
1789         ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
1790         if (ret)
1791                 goto err;
1792
1793         vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
1794
1795         return 0;
1796
1797 err:
1798         return ret;
1799 }
1800
1801 static int vdec_vp9_slice_setup_single(struct vdec_vp9_slice_instance *instance,
1802                                        struct mtk_vcodec_mem *bs,
1803                                        struct vdec_fb *fb,
1804                                        struct vdec_vp9_slice_pfc *pfc)
1805 {
1806         struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
1807         int ret;
1808
1809         ret = vdec_vp9_slice_setup_single_from_src_to_dst(instance);
1810         if (ret)
1811                 goto err;
1812
1813         ret = vdec_vp9_slice_setup_pfc(instance, pfc);
1814         if (ret)
1815                 goto err;
1816
1817         ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
1818         if (ret)
1819                 goto err;
1820
1821         vdec_vp9_slice_setup_single_buffer(instance, pfc, vsi, bs, fb);
1822         vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
1823
1824         ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
1825         if (ret)
1826                 goto err;
1827
1828         ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
1829         if (ret)
1830                 goto err;
1831
1832         return 0;
1833
1834 err:
1835         return ret;
1836 }
1837
1838 static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
1839                                       struct vdec_lat_buf *lat_buf,
1840                                       struct vdec_vp9_slice_pfc *pfc)
1841 {
1842         struct vdec_vp9_slice_vsi *vsi;
1843
1844         vsi = &pfc->vsi;
1845         memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
1846
1847         mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n",
1848                          pfc->seq,
1849                          vsi->state.crc[0], vsi->state.crc[1],
1850                          vsi->state.crc[2], vsi->state.crc[3]);
1851         mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n",
1852                          pfc->seq,
1853                          vsi->state.crc[4], vsi->state.crc[5],
1854                          vsi->state.crc[6], vsi->state.crc[7]);
1855
1856         return 0;
1857 }
1858
1859 static int vdec_vp9_slice_init(struct mtk_vcodec_ctx *ctx)
1860 {
1861         struct vdec_vp9_slice_instance *instance;
1862         struct vdec_vp9_slice_init_vsi *vsi;
1863         int ret;
1864
1865         instance = kzalloc(sizeof(*instance), GFP_KERNEL);
1866         if (!instance)
1867                 return -ENOMEM;
1868
1869         instance->ctx = ctx;
1870         instance->vpu.id = SCP_IPI_VDEC_LAT;
1871         instance->vpu.core_id = SCP_IPI_VDEC_CORE;
1872         instance->vpu.ctx = ctx;
1873         instance->vpu.codec_type = ctx->current_codec;
1874
1875         ret = vpu_dec_init(&instance->vpu);
1876         if (ret) {
1877                 mtk_vcodec_err(instance, "failed to init vpu dec, ret %d\n", ret);
1878                 goto error_vpu_init;
1879         }
1880
1881         /* init vsi and global flags */
1882
1883         vsi = instance->vpu.vsi;
1884         if (!vsi) {
1885                 mtk_vcodec_err(instance, "failed to get VP9 vsi\n");
1886                 ret = -EINVAL;
1887                 goto error_vsi;
1888         }
1889         instance->init_vsi = vsi;
1890         instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
1891                                                        (u32)vsi->core_vsi);
1892         if (!instance->core_vsi) {
1893                 mtk_vcodec_err(instance, "failed to get VP9 core vsi\n");
1894                 ret = -EINVAL;
1895                 goto error_vsi;
1896         }
1897
1898         instance->irq = 1;
1899
1900         ret = vdec_vp9_slice_init_default_frame_ctx(instance);
1901         if (ret)
1902                 goto error_default_frame_ctx;
1903
1904         ctx->drv_handle = instance;
1905
1906         return 0;
1907
1908 error_default_frame_ctx:
1909 error_vsi:
1910         vpu_dec_deinit(&instance->vpu);
1911 error_vpu_init:
1912         kfree(instance);
1913         return ret;
1914 }
1915
1916 static void vdec_vp9_slice_deinit(void *h_vdec)
1917 {
1918         struct vdec_vp9_slice_instance *instance = h_vdec;
1919
1920         if (!instance)
1921                 return;
1922
1923         vpu_dec_deinit(&instance->vpu);
1924         vdec_vp9_slice_free_working_buffer(instance);
1925         vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
1926         kfree(instance);
1927 }
1928
1929 static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
1930                                 struct vdec_fb *fb, bool *res_chg)
1931 {
1932         struct vdec_vp9_slice_instance *instance = h_vdec;
1933
1934         mtk_vcodec_debug(instance, "flush ...\n");
1935         if (instance->ctx->dev->vdec_pdata->hw_arch != MTK_VDEC_PURE_SINGLE_CORE)
1936                 vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
1937         return vpu_dec_reset(&instance->vpu);
1938 }
1939
1940 static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
1941 {
1942         struct mtk_vcodec_ctx *ctx = instance->ctx;
1943         unsigned int data[3];
1944
1945         mtk_vcodec_debug(instance, "w %u h %u\n",
1946                          ctx->picinfo.pic_w, ctx->picinfo.pic_h);
1947
1948         data[0] = ctx->picinfo.pic_w;
1949         data[1] = ctx->picinfo.pic_h;
1950         data[2] = ctx->capture_fourcc;
1951         vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
1952
1953         ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
1954         ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
1955         ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
1956         ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
1957 }
1958
1959 static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
1960                                         unsigned int *dpb_sz)
1961 {
1962         /* refer VP9 specification */
1963         *dpb_sz = 9;
1964 }
1965
1966 static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
1967 {
1968         struct vdec_vp9_slice_instance *instance = h_vdec;
1969
1970         switch (type) {
1971         case GET_PARAM_PIC_INFO:
1972                 vdec_vp9_slice_get_pic_info(instance);
1973                 break;
1974         case GET_PARAM_DPB_SIZE:
1975                 vdec_vp9_slice_get_dpb_size(instance, out);
1976                 break;
1977         case GET_PARAM_CROP_INFO:
1978                 mtk_vcodec_debug(instance, "No need to get vp9 crop information.");
1979                 break;
1980         default:
1981                 mtk_vcodec_err(instance, "invalid get parameter type=%d\n",
1982                                type);
1983                 return -EINVAL;
1984         }
1985
1986         return 0;
1987 }
1988
1989 static int vdec_vp9_slice_single_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
1990                                         struct vdec_fb *fb, bool *res_chg)
1991 {
1992         struct vdec_vp9_slice_instance *instance = h_vdec;
1993         struct vdec_vp9_slice_pfc *pfc = &instance->sc_pfc;
1994         struct vdec_vp9_slice_vsi *vsi;
1995         struct mtk_vcodec_ctx *ctx;
1996         int ret;
1997
1998         if (!instance || !instance->ctx)
1999                 return -EINVAL;
2000         ctx = instance->ctx;
2001
2002         /* bs NULL means flush decoder */
2003         if (!bs)
2004                 return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2005
2006         fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2007         if (!fb)
2008                 return -EBUSY;
2009
2010         vsi = &pfc->vsi;
2011
2012         ret = vdec_vp9_slice_setup_single(instance, bs, fb, pfc);
2013         if (ret) {
2014                 mtk_vcodec_err(instance, "Failed to setup VP9 single ret %d\n", ret);
2015                 return ret;
2016         }
2017         vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2018
2019         ret = vpu_dec_start(&instance->vpu, NULL, 0);
2020         if (ret) {
2021                 mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret);
2022                 return ret;
2023         }
2024
2025         ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2026                                            WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2027         /* update remote vsi if decode timeout */
2028         if (ret) {
2029                 mtk_vcodec_err(instance, "VP9 decode timeout %d\n", ret);
2030                 WRITE_ONCE(instance->vsi->state.timeout, 1);
2031         }
2032
2033         vpu_dec_end(&instance->vpu);
2034
2035         vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2036         ret = vdec_vp9_slice_update_single(instance, pfc);
2037         if (ret) {
2038                 mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
2039                 return ret;
2040         }
2041
2042         instance->ctx->decoded_frame_cnt++;
2043         return 0;
2044 }
2045
2046 static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2047                                      struct vdec_fb *fb, bool *res_chg)
2048 {
2049         struct vdec_vp9_slice_instance *instance = h_vdec;
2050         struct vdec_lat_buf *lat_buf;
2051         struct vdec_vp9_slice_pfc *pfc;
2052         struct vdec_vp9_slice_vsi *vsi;
2053         struct mtk_vcodec_ctx *ctx;
2054         int ret;
2055
2056         if (!instance || !instance->ctx)
2057                 return -EINVAL;
2058         ctx = instance->ctx;
2059
2060         /* init msgQ for the first time */
2061         if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
2062                                 vdec_vp9_slice_core_decode,
2063                                 sizeof(*pfc)))
2064                 return -ENOMEM;
2065
2066         /* bs NULL means flush decoder */
2067         if (!bs)
2068                 return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
2069
2070         lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
2071         if (!lat_buf) {
2072                 mtk_vcodec_err(instance, "Failed to get VP9 lat buf\n");
2073                 return -EBUSY;
2074         }
2075         pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
2076         if (!pfc)
2077                 return -EINVAL;
2078         vsi = &pfc->vsi;
2079
2080         ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
2081         if (ret) {
2082                 mtk_vcodec_err(instance, "Failed to setup VP9 lat ret %d\n", ret);
2083                 return ret;
2084         }
2085         vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
2086
2087         ret = vpu_dec_start(&instance->vpu, NULL, 0);
2088         if (ret) {
2089                 mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret);
2090                 return ret;
2091         }
2092
2093         if (instance->irq) {
2094                 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2095                                                    WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
2096                 /* update remote vsi if decode timeout */
2097                 if (ret) {
2098                         mtk_vcodec_err(instance, "VP9 decode timeout %d pic %d\n", ret, pfc->seq);
2099                         WRITE_ONCE(instance->vsi->state.timeout, 1);
2100                 }
2101                 vpu_dec_end(&instance->vpu);
2102         }
2103
2104         vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
2105         ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
2106
2107         /* LAT trans full, no more UBE or decode timeout */
2108         if (ret) {
2109                 mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
2110                 return ret;
2111         }
2112
2113         mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
2114                          (unsigned long)pfc->vsi.trans.dma_addr,
2115                          (unsigned long)pfc->vsi.trans.dma_addr_end);
2116
2117         vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
2118                                        vsi->trans.dma_addr_end +
2119                                        ctx->msg_queue.wdma_addr.dma_addr);
2120         vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
2121
2122         return 0;
2123 }
2124
2125 static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
2126                                  struct vdec_fb *fb, bool *res_chg)
2127 {
2128         struct vdec_vp9_slice_instance *instance = h_vdec;
2129         int ret;
2130
2131         if (instance->ctx->dev->vdec_pdata->hw_arch == MTK_VDEC_PURE_SINGLE_CORE)
2132                 ret = vdec_vp9_slice_single_decode(h_vdec, bs, fb, res_chg);
2133         else
2134                 ret = vdec_vp9_slice_lat_decode(h_vdec, bs, fb, res_chg);
2135
2136         return ret;
2137 }
2138
2139 static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
2140 {
2141         struct vdec_vp9_slice_instance *instance;
2142         struct vdec_vp9_slice_pfc *pfc;
2143         struct mtk_vcodec_ctx *ctx = NULL;
2144         struct vdec_fb *fb = NULL;
2145         int ret = -EINVAL;
2146
2147         if (!lat_buf)
2148                 goto err;
2149
2150         pfc = lat_buf->private_data;
2151         ctx = lat_buf->ctx;
2152         if (!pfc || !ctx)
2153                 goto err;
2154
2155         instance = ctx->drv_handle;
2156         if (!instance)
2157                 goto err;
2158
2159         fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
2160         if (!fb) {
2161                 ret = -EBUSY;
2162                 goto err;
2163         }
2164
2165         ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
2166         if (ret) {
2167                 mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
2168                 goto err;
2169         }
2170         vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
2171
2172         ret = vpu_dec_core(&instance->vpu);
2173         if (ret) {
2174                 mtk_vcodec_err(instance, "vpu_dec_core\n");
2175                 goto err;
2176         }
2177
2178         if (instance->irq) {
2179                 ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
2180                                                    WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
2181                 /* update remote vsi if decode timeout */
2182                 if (ret) {
2183                         mtk_vcodec_err(instance, "VP9 core timeout pic %d\n", pfc->seq);
2184                         WRITE_ONCE(instance->core_vsi->state.timeout, 1);
2185                 }
2186                 vpu_dec_core_end(&instance->vpu);
2187         }
2188
2189         vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
2190         ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
2191         if (ret) {
2192                 mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
2193                 goto err;
2194         }
2195
2196         pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
2197         mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
2198                          (unsigned long)pfc->vsi.trans.dma_addr_end);
2199         vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2200         ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
2201
2202         return 0;
2203
2204 err:
2205         if (ctx && pfc) {
2206                 /* always update read pointer */
2207                 vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
2208
2209                 if (fb)
2210                         ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
2211         }
2212         return ret;
2213 }
2214
2215 const struct vdec_common_if vdec_vp9_slice_lat_if = {
2216         .init           = vdec_vp9_slice_init,
2217         .decode         = vdec_vp9_slice_decode,
2218         .get_param      = vdec_vp9_slice_get_param,
2219         .deinit         = vdec_vp9_slice_deinit,
2220 };