media: vicodec: rename and use proper fwht prefix for codec
authorHans Verkuil <hans.verkuil@cisco.com>
Tue, 21 Aug 2018 06:53:34 +0000 (02:53 -0400)
committerMauro Carvalho Chehab <mchehab+samsung@kernel.org>
Fri, 31 Aug 2018 12:27:43 +0000 (08:27 -0400)
The codec source is generic and not vicodec specific. It can be used
by other drivers or userspace as well. So rename the source and header
to something more generic (codec-fwht.c/h) and prefix the defines, types
and functions with fwht_.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Documentation/media/uapi/v4l/pixfmt-compressed.rst
drivers/media/platform/vicodec/Makefile
drivers/media/platform/vicodec/codec-fwht.c [new file with mode: 0644]
drivers/media/platform/vicodec/codec-fwht.h [new file with mode: 0644]
drivers/media/platform/vicodec/vicodec-codec.c [deleted file]
drivers/media/platform/vicodec/vicodec-codec.h [deleted file]
drivers/media/platform/vicodec/vicodec-core.c

index d382e7a5c38e03f08e785623cc259f424bc9d9d0..d04b18adac3303caf1b0a899a6d2c001ddcd8522 100644 (file)
@@ -101,4 +101,4 @@ Compressed Formats
       - 'FWHT'
       - Video elementary stream using a codec based on the Fast Walsh Hadamard
         Transform. This codec is implemented by the vicodec ('Virtual Codec')
-       driver. See the vicodec-codec.h header for more details.
+       driver. See the codec-fwht.h header for more details.
index 197229428953a861aa95484f39f31ee037061114..a27242ff14ad63e0009e5f5c395759ecc82fa908 100644 (file)
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-vicodec-objs := vicodec-core.o vicodec-codec.o
+vicodec-objs := vicodec-core.o codec-fwht.o
 
 obj-$(CONFIG_VIDEO_VICODEC) += vicodec.o
diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c
new file mode 100644 (file)
index 0000000..f91f90f
--- /dev/null
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
+ *
+ * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
+ * R.D. Brown, 1977
+ */
+
+#include <linux/string.h>
+#include "codec-fwht.h"
+
+/*
+ * Note: bit 0 of the header must always be 0. Otherwise it cannot
+ * be guaranteed that the magic 8 byte sequence (see below) can
+ * never occur in the rlc output.
+ */
+#define PFRAME_BIT BIT(15)
+#define DUPS_MASK 0x1ffe
+
+#define PBLOCK 0
+#define IBLOCK 1
+
+#define ALL_ZEROS 15
+
+static const uint8_t zigzag[64] = {
+       0,
+       1,  8,
+       2,  9, 16,
+       3, 10, 17, 24,
+       4, 11, 18, 25, 32,
+       5, 12, 19, 26, 33, 40,
+       6, 13, 20, 27, 34, 41, 48,
+       7, 14, 21, 28, 35, 42, 49, 56,
+       15, 22, 29, 36, 43, 50, 57,
+       23, 30, 37, 44, 51, 58,
+       31, 38, 45, 52, 59,
+       39, 46, 53, 60,
+       47, 54, 61,
+       55, 62,
+       63,
+};
+
+
+static int rlc(const s16 *in, __be16 *output, int blocktype)
+{
+       s16 block[8 * 8];
+       s16 *wp = block;
+       int i = 0;
+       int x, y;
+       int ret = 0;
+
+       /* read in block from framebuffer */
+       int lastzero_run = 0;
+       int to_encode;
+
+       for (y = 0; y < 8; y++) {
+               for (x = 0; x < 8; x++) {
+                       *wp = in[x + y * 8];
+                       wp++;
+               }
+       }
+
+       /* keep track of amount of trailing zeros */
+       for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
+               lastzero_run++;
+
+       *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
+       ret++;
+
+       to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
+
+       i = 0;
+       while (i < to_encode) {
+               int cnt = 0;
+               int tmp;
+
+               /* count leading zeros */
+               while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
+                       cnt++;
+                       i++;
+                       if (i == to_encode) {
+                               cnt--;
+                               break;
+                       }
+               }
+               /* 4 bits for run, 12 for coefficient (quantization by 4) */
+               *output++ = htons((cnt | tmp << 4));
+               i++;
+               ret++;
+       }
+       if (lastzero_run > 14) {
+               *output = htons(ALL_ZEROS | 0);
+               ret++;
+       }
+
+       return ret;
+}
+
+/*
+ * This function will worst-case increase rlc_in by 65*2 bytes:
+ * one s16 value for the header and 8 * 8 coefficients of type s16.
+ */
+static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
+{
+       /* header */
+       const __be16 *input = *rlc_in;
+       s16 ret = ntohs(*input++);
+       int dec_count = 0;
+       s16 block[8 * 8 + 16];
+       s16 *wp = block;
+       int i;
+
+       /*
+        * Now de-compress, it expands one byte to up to 15 bytes
+        * (or fills the remainder of the 64 bytes with zeroes if it
+        * is the last byte to expand).
+        *
+        * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
+        * allow for overflow if the incoming data was malformed.
+        */
+       while (dec_count < 8 * 8) {
+               s16 in = ntohs(*input++);
+               int length = in & 0xf;
+               int coeff = in >> 4;
+
+               /* fill remainder with zeros */
+               if (length == 15) {
+                       for (i = 0; i < 64 - dec_count; i++)
+                               *wp++ = 0;
+                       break;
+               }
+
+               for (i = 0; i < length; i++)
+                       *wp++ = 0;
+               *wp++ = coeff;
+               dec_count += length + 1;
+       }
+
+       wp = block;
+
+       for (i = 0; i < 64; i++) {
+               int pos = zigzag[i];
+               int y = pos / 8;
+               int x = pos % 8;
+
+               dwht_out[x + y * 8] = *wp++;
+       }
+       *rlc_in = input;
+       return ret;
+}
+
+static const int quant_table[] = {
+       2, 2, 2, 2, 2, 2,  2,  2,
+       2, 2, 2, 2, 2, 2,  2,  2,
+       2, 2, 2, 2, 2, 2,  2,  3,
+       2, 2, 2, 2, 2, 2,  3,  6,
+       2, 2, 2, 2, 2, 3,  6,  6,
+       2, 2, 2, 2, 3, 6,  6,  6,
+       2, 2, 2, 3, 6, 6,  6,  6,
+       2, 2, 3, 6, 6, 6,  6,  8,
+};
+
+static const int quant_table_p[] = {
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  3,
+       3, 3, 3, 3, 3, 3,  3,  6,
+       3, 3, 3, 3, 3, 3,  6,  6,
+       3, 3, 3, 3, 3, 6,  6,  9,
+       3, 3, 3, 3, 6, 6,  9,  9,
+       3, 3, 3, 6, 6, 9,  9,  10,
+};
+
+static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+       const int *quant = quant_table;
+       int i, j;
+
+       for (j = 0; j < 8; j++) {
+               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+                       *coeff >>= *quant;
+                       if (*coeff >= -qp && *coeff <= qp)
+                               *coeff = *de_coeff = 0;
+                       else
+                               *de_coeff = *coeff << *quant;
+               }
+       }
+}
+
+static void dequantize_intra(s16 *coeff)
+{
+       const int *quant = quant_table;
+       int i, j;
+
+       for (j = 0; j < 8; j++)
+               for (i = 0; i < 8; i++, quant++, coeff++)
+                       *coeff <<= *quant;
+}
+
+static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
+{
+       const int *quant = quant_table_p;
+       int i, j;
+
+       for (j = 0; j < 8; j++) {
+               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
+                       *coeff >>= *quant;
+                       if (*coeff >= -qp && *coeff <= qp)
+                               *coeff = *de_coeff = 0;
+                       else
+                               *de_coeff = *coeff << *quant;
+               }
+       }
+}
+
+static void dequantize_inter(s16 *coeff)
+{
+       const int *quant = quant_table_p;
+       int i, j;
+
+       for (j = 0; j < 8; j++)
+               for (i = 0; i < 8; i++, quant++, coeff++)
+                       *coeff <<= *quant;
+}
+
+static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
+                unsigned int input_step, bool intra)
+{
+       /* we'll need more than 8 bits for the transformed coefficients */
+       s32 workspace1[8], workspace2[8];
+       const u8 *tmp = block;
+       s16 *out = output_block;
+       int add = intra ? 256 : 0;
+       unsigned int i;
+
+       /* stage 1 */
+       stride *= input_step;
+
+       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+               switch (input_step) {
+               case 1:
+                       workspace1[0]  = tmp[0] + tmp[1] - add;
+                       workspace1[1]  = tmp[0] - tmp[1];
+
+                       workspace1[2]  = tmp[2] + tmp[3] - add;
+                       workspace1[3]  = tmp[2] - tmp[3];
+
+                       workspace1[4]  = tmp[4] + tmp[5] - add;
+                       workspace1[5]  = tmp[4] - tmp[5];
+
+                       workspace1[6]  = tmp[6] + tmp[7] - add;
+                       workspace1[7]  = tmp[6] - tmp[7];
+                       break;
+               case 2:
+                       workspace1[0]  = tmp[0] + tmp[2] - add;
+                       workspace1[1]  = tmp[0] - tmp[2];
+
+                       workspace1[2]  = tmp[4] + tmp[6] - add;
+                       workspace1[3]  = tmp[4] - tmp[6];
+
+                       workspace1[4]  = tmp[8] + tmp[10] - add;
+                       workspace1[5]  = tmp[8] - tmp[10];
+
+                       workspace1[6]  = tmp[12] + tmp[14] - add;
+                       workspace1[7]  = tmp[12] - tmp[14];
+                       break;
+               case 3:
+                       workspace1[0]  = tmp[0] + tmp[3] - add;
+                       workspace1[1]  = tmp[0] - tmp[3];
+
+                       workspace1[2]  = tmp[6] + tmp[9] - add;
+                       workspace1[3]  = tmp[6] - tmp[9];
+
+                       workspace1[4]  = tmp[12] + tmp[15] - add;
+                       workspace1[5]  = tmp[12] - tmp[15];
+
+                       workspace1[6]  = tmp[18] + tmp[21] - add;
+                       workspace1[7]  = tmp[18] - tmp[21];
+                       break;
+               default:
+                       workspace1[0]  = tmp[0] + tmp[4] - add;
+                       workspace1[1]  = tmp[0] - tmp[4];
+
+                       workspace1[2]  = tmp[8] + tmp[12] - add;
+                       workspace1[3]  = tmp[8] - tmp[12];
+
+                       workspace1[4]  = tmp[16] + tmp[20] - add;
+                       workspace1[5]  = tmp[16] - tmp[20];
+
+                       workspace1[6]  = tmp[24] + tmp[28] - add;
+                       workspace1[7]  = tmp[24] - tmp[28];
+                       break;
+               }
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1 * 8];
+               workspace1[1]  = out[0] - out[1 * 8];
+
+               workspace1[2]  = out[2 * 8] + out[3 * 8];
+               workspace1[3]  = out[2 * 8] - out[3 * 8];
+
+               workspace1[4]  = out[4 * 8] + out[5 * 8];
+               workspace1[5]  = out[4 * 8] - out[5 * 8];
+
+               workspace1[6]  = out[6 * 8] + out[7 * 8];
+               workspace1[7]  = out[6 * 8] - out[7 * 8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+               /* stage 3 */
+               out[0 * 8] = workspace2[0] + workspace2[4];
+               out[1 * 8] = workspace2[0] - workspace2[4];
+               out[2 * 8] = workspace2[1] - workspace2[5];
+               out[3 * 8] = workspace2[1] + workspace2[5];
+               out[4 * 8] = workspace2[2] + workspace2[6];
+               out[5 * 8] = workspace2[2] - workspace2[6];
+               out[6 * 8] = workspace2[3] - workspace2[7];
+               out[7 * 8] = workspace2[3] + workspace2[7];
+       }
+}
+
+/*
+ * Not the nicest way of doing it, but P-blocks get twice the range of
+ * that of the I-blocks. Therefore we need a type bigger than 8 bits.
+ * Furthermore values can be negative... This is just a version that
+ * works with 16 signed data
+ */
+static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
+{
+       /* we'll need more than 8 bits for the transformed coefficients */
+       s32 workspace1[8], workspace2[8];
+       const s16 *tmp = block;
+       s16 *out = output_block;
+       int i;
+
+       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
+               /* stage 1 */
+               workspace1[0]  = tmp[0] + tmp[1];
+               workspace1[1]  = tmp[0] - tmp[1];
+
+               workspace1[2]  = tmp[2] + tmp[3];
+               workspace1[3]  = tmp[2] - tmp[3];
+
+               workspace1[4]  = tmp[4] + tmp[5];
+               workspace1[5]  = tmp[4] - tmp[5];
+
+               workspace1[6]  = tmp[6] + tmp[7];
+               workspace1[7]  = tmp[6] - tmp[7];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1*8];
+               workspace1[1]  = out[0] - out[1*8];
+
+               workspace1[2]  = out[2*8] + out[3*8];
+               workspace1[3]  = out[2*8] - out[3*8];
+
+               workspace1[4]  = out[4*8] + out[5*8];
+               workspace1[5]  = out[4*8] - out[5*8];
+
+               workspace1[6]  = out[6*8] + out[7*8];
+               workspace1[7]  = out[6*8] - out[7*8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0*8] = workspace2[0] + workspace2[4];
+               out[1*8] = workspace2[0] - workspace2[4];
+               out[2*8] = workspace2[1] - workspace2[5];
+               out[3*8] = workspace2[1] + workspace2[5];
+               out[4*8] = workspace2[2] + workspace2[6];
+               out[5*8] = workspace2[2] - workspace2[6];
+               out[6*8] = workspace2[3] - workspace2[7];
+               out[7*8] = workspace2[3] + workspace2[7];
+       }
+}
+
+static void ifwht(const s16 *block, s16 *output_block, int intra)
+{
+       /*
+        * we'll need more than 8 bits for the transformed coefficients
+        * use native unit of cpu
+        */
+       int workspace1[8], workspace2[8];
+       int inter = intra ? 0 : 1;
+       const s16 *tmp = block;
+       s16 *out = output_block;
+       int i;
+
+       for (i = 0; i < 8; i++, tmp += 8, out += 8) {
+               /* stage 1 */
+               workspace1[0]  = tmp[0] + tmp[1];
+               workspace1[1]  = tmp[0] - tmp[1];
+
+               workspace1[2]  = tmp[2] + tmp[3];
+               workspace1[3]  = tmp[2] - tmp[3];
+
+               workspace1[4]  = tmp[4] + tmp[5];
+               workspace1[5]  = tmp[4] - tmp[5];
+
+               workspace1[6]  = tmp[6] + tmp[7];
+               workspace1[7]  = tmp[6] - tmp[7];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               out[0] = workspace2[0] + workspace2[4];
+               out[1] = workspace2[0] - workspace2[4];
+               out[2] = workspace2[1] - workspace2[5];
+               out[3] = workspace2[1] + workspace2[5];
+               out[4] = workspace2[2] + workspace2[6];
+               out[5] = workspace2[2] - workspace2[6];
+               out[6] = workspace2[3] - workspace2[7];
+               out[7] = workspace2[3] + workspace2[7];
+       }
+
+       out = output_block;
+
+       for (i = 0; i < 8; i++, out++) {
+               /* stage 1 */
+               workspace1[0]  = out[0] + out[1 * 8];
+               workspace1[1]  = out[0] - out[1 * 8];
+
+               workspace1[2]  = out[2 * 8] + out[3 * 8];
+               workspace1[3]  = out[2 * 8] - out[3 * 8];
+
+               workspace1[4]  = out[4 * 8] + out[5 * 8];
+               workspace1[5]  = out[4 * 8] - out[5 * 8];
+
+               workspace1[6]  = out[6 * 8] + out[7 * 8];
+               workspace1[7]  = out[6 * 8] - out[7 * 8];
+
+               /* stage 2 */
+               workspace2[0] = workspace1[0] + workspace1[2];
+               workspace2[1] = workspace1[0] - workspace1[2];
+               workspace2[2] = workspace1[1] - workspace1[3];
+               workspace2[3] = workspace1[1] + workspace1[3];
+
+               workspace2[4] = workspace1[4] + workspace1[6];
+               workspace2[5] = workspace1[4] - workspace1[6];
+               workspace2[6] = workspace1[5] - workspace1[7];
+               workspace2[7] = workspace1[5] + workspace1[7];
+
+               /* stage 3 */
+               if (inter) {
+                       int d;
+
+                       out[0 * 8] = workspace2[0] + workspace2[4];
+                       out[1 * 8] = workspace2[0] - workspace2[4];
+                       out[2 * 8] = workspace2[1] - workspace2[5];
+                       out[3 * 8] = workspace2[1] + workspace2[5];
+                       out[4 * 8] = workspace2[2] + workspace2[6];
+                       out[5 * 8] = workspace2[2] - workspace2[6];
+                       out[6 * 8] = workspace2[3] - workspace2[7];
+                       out[7 * 8] = workspace2[3] + workspace2[7];
+
+                       for (d = 0; d < 8; d++)
+                               out[8 * d] >>= 6;
+               } else {
+                       int d;
+
+                       out[0 * 8] = workspace2[0] + workspace2[4];
+                       out[1 * 8] = workspace2[0] - workspace2[4];
+                       out[2 * 8] = workspace2[1] - workspace2[5];
+                       out[3 * 8] = workspace2[1] + workspace2[5];
+                       out[4 * 8] = workspace2[2] + workspace2[6];
+                       out[5 * 8] = workspace2[2] - workspace2[6];
+                       out[6 * 8] = workspace2[3] - workspace2[7];
+                       out[7 * 8] = workspace2[3] + workspace2[7];
+
+                       for (d = 0; d < 8; d++) {
+                               out[8 * d] >>= 6;
+                               out[8 * d] += 128;
+                       }
+               }
+       }
+}
+
+static void fill_encoder_block(const u8 *input, s16 *dst,
+                              unsigned int stride, unsigned int input_step)
+{
+       int i, j;
+
+       for (i = 0; i < 8; i++) {
+               for (j = 0; j < 8; j++, input += input_step)
+                       *dst++ = *input;
+               input += (stride - 8) * input_step;
+       }
+}
+
+static int var_intra(const s16 *input)
+{
+       int32_t mean = 0;
+       int32_t ret = 0;
+       const s16 *tmp = input;
+       int i;
+
+       for (i = 0; i < 8 * 8; i++, tmp++)
+               mean += *tmp;
+       mean /= 64;
+       tmp = input;
+       for (i = 0; i < 8 * 8; i++, tmp++)
+               ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
+       return ret;
+}
+
+static int var_inter(const s16 *old, const s16 *new)
+{
+       int32_t ret = 0;
+       int i;
+
+       for (i = 0; i < 8 * 8; i++, old++, new++)
+               ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
+       return ret;
+}
+
+static int decide_blocktype(const u8 *cur, const u8 *reference,
+                           s16 *deltablock, unsigned int stride,
+                           unsigned int input_step)
+{
+       s16 tmp[64];
+       s16 old[64];
+       s16 *work = tmp;
+       unsigned int k, l;
+       int vari;
+       int vard;
+
+       fill_encoder_block(cur, tmp, stride, input_step);
+       fill_encoder_block(reference, old, 8, 1);
+       vari = var_intra(tmp);
+
+       for (k = 0; k < 8; k++) {
+               for (l = 0; l < 8; l++) {
+                       *deltablock = *work - *reference;
+                       deltablock++;
+                       work++;
+                       reference++;
+               }
+       }
+       deltablock -= 64;
+       vard = var_inter(old, tmp);
+       return vari <= vard ? IBLOCK : PBLOCK;
+}
+
+static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
+{
+       int i, j;
+
+       for (i = 0; i < 8; i++) {
+               for (j = 0; j < 8; j++)
+                       *dst++ = *input++;
+               dst += stride - 8;
+       }
+}
+
+static void add_deltas(s16 *deltas, const u8 *ref, int stride)
+{
+       int k, l;
+
+       for (k = 0; k < 8; k++) {
+               for (l = 0; l < 8; l++) {
+                       *deltas += *ref++;
+                       /*
+                        * Due to quantizing, it might possible that the
+                        * decoded coefficients are slightly out of range
+                        */
+                       if (*deltas < 0)
+                               *deltas = 0;
+                       else if (*deltas > 255)
+                               *deltas = 255;
+                       deltas++;
+               }
+               ref += stride - 8;
+       }
+}
+
+static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
+                       struct fwht_cframe *cf, u32 height, u32 width,
+                       unsigned int input_step,
+                       bool is_intra, bool next_is_intra)
+{
+       u8 *input_start = input;
+       __be16 *rlco_start = *rlco;
+       s16 deltablock[64];
+       __be16 pframe_bit = htons(PFRAME_BIT);
+       u32 encoding = 0;
+       unsigned int last_size = 0;
+       unsigned int i, j;
+
+       for (j = 0; j < height / 8; j++) {
+               for (i = 0; i < width / 8; i++) {
+                       /* intra code, first frame is always intra coded. */
+                       int blocktype = IBLOCK;
+                       unsigned int size;
+
+                       if (!is_intra)
+                               blocktype = decide_blocktype(input, refp,
+                                       deltablock, width, input_step);
+                       if (blocktype == IBLOCK) {
+                               fwht(input, cf->coeffs, width, input_step, 1);
+                               quantize_intra(cf->coeffs, cf->de_coeffs,
+                                              cf->i_frame_qp);
+                       } else {
+                               /* inter code */
+                               encoding |= FWHT_FRAME_PCODED;
+                               fwht16(deltablock, cf->coeffs, 8, 0);
+                               quantize_inter(cf->coeffs, cf->de_coeffs,
+                                              cf->p_frame_qp);
+                       }
+                       if (!next_is_intra) {
+                               ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
+
+                               if (blocktype == PBLOCK)
+                                       add_deltas(cf->de_fwht, refp, 8);
+                               fill_decoder_block(refp, cf->de_fwht, 8);
+                       }
+
+                       input += 8 * input_step;
+                       refp += 8 * 8;
+
+                       size = rlc(cf->coeffs, *rlco, blocktype);
+                       if (last_size == size &&
+                           !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
+                               __be16 *last_rlco = *rlco - size;
+                               s16 hdr = ntohs(*last_rlco);
+
+                               if (!((*last_rlco ^ **rlco) & pframe_bit) &&
+                                   (hdr & DUPS_MASK) < DUPS_MASK)
+                                       *last_rlco = htons(hdr + 2);
+                               else
+                                       *rlco += size;
+                       } else {
+                               *rlco += size;
+                       }
+                       if (*rlco >= rlco_max) {
+                               encoding |= FWHT_FRAME_UNENCODED;
+                               goto exit_loop;
+                       }
+                       last_size = size;
+               }
+               input += width * 7 * input_step;
+       }
+
+exit_loop:
+       if (encoding & FWHT_FRAME_UNENCODED) {
+               u8 *out = (u8 *)rlco_start;
+
+               input = input_start;
+               /*
+                * The compressed stream should never contain the magic
+                * header, so when we copy the YUV data we replace 0xff
+                * by 0xfe. Since YUV is limited range such values
+                * shouldn't appear anyway.
+                */
+               for (i = 0; i < height * width; i++, input += input_step)
+                       *out++ = (*input == 0xff) ? 0xfe : *input;
+               *rlco = (__be16 *)out;
+               encoding &= ~FWHT_FRAME_PCODED;
+       }
+       return encoding;
+}
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+                     struct fwht_raw_frame *ref_frm,
+                     struct fwht_cframe *cf,
+                     bool is_intra, bool next_is_intra)
+{
+       unsigned int size = frm->height * frm->width;
+       __be16 *rlco = cf->rlc_data;
+       __be16 *rlco_max;
+       u32 encoding;
+       u32 chroma_h = frm->height / frm->height_div;
+       u32 chroma_w = frm->width / frm->width_div;
+       unsigned int chroma_size = chroma_h * chroma_w;
+
+       rlco_max = rlco + size / 2 - 256;
+       encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
+                               frm->height, frm->width,
+                               frm->luma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_LUMA_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       rlco_max = rlco + chroma_size / 2 - 256;
+       encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
+                                chroma_h, chroma_w,
+                                frm->chroma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_CB_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       rlco_max = rlco + chroma_size / 2 - 256;
+       encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
+                                chroma_h, chroma_w,
+                                frm->chroma_step, is_intra, next_is_intra);
+       if (encoding & FWHT_FRAME_UNENCODED)
+               encoding |= FWHT_CR_UNENCODED;
+       encoding &= ~FWHT_FRAME_UNENCODED;
+       cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
+       return encoding;
+}
+
+static void decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref,
+                        u32 height, u32 width, bool uncompressed)
+{
+       unsigned int copies = 0;
+       s16 copy[8 * 8];
+       s16 stat;
+       unsigned int i, j;
+
+       if (uncompressed) {
+               memcpy(ref, *rlco, width * height);
+               *rlco += width * height / 2;
+               return;
+       }
+
+       /*
+        * When decoding each macroblock the rlco pointer will be increased
+        * by 65 * 2 bytes worst-case.
+        * To avoid overflow the buffer has to be 65/64th of the actual raw
+        * image size, just in case someone feeds it malicious data.
+        */
+       for (j = 0; j < height / 8; j++) {
+               for (i = 0; i < width / 8; i++) {
+                       u8 *refp = ref + j * 8 * width + i * 8;
+
+                       if (copies) {
+                               memcpy(cf->de_fwht, copy, sizeof(copy));
+                               if (stat & PFRAME_BIT)
+                                       add_deltas(cf->de_fwht, refp, width);
+                               fill_decoder_block(refp, cf->de_fwht, width);
+                               copies--;
+                               continue;
+                       }
+
+                       stat = derlc(rlco, cf->coeffs);
+
+                       if (stat & PFRAME_BIT)
+                               dequantize_inter(cf->coeffs);
+                       else
+                               dequantize_intra(cf->coeffs);
+
+                       ifwht(cf->coeffs, cf->de_fwht,
+                             (stat & PFRAME_BIT) ? 0 : 1);
+
+                       copies = (stat & DUPS_MASK) >> 1;
+                       if (copies)
+                               memcpy(copy, cf->de_fwht, sizeof(copy));
+                       if (stat & PFRAME_BIT)
+                               add_deltas(cf->de_fwht, refp, width);
+                       fill_decoder_block(refp, cf->de_fwht, width);
+               }
+       }
+}
+
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+                      u32 hdr_flags)
+{
+       const __be16 *rlco = cf->rlc_data;
+       u32 h = cf->height / 2;
+       u32 w = cf->width / 2;
+
+       if (hdr_flags & FWHT_FL_CHROMA_FULL_HEIGHT)
+               h *= 2;
+       if (hdr_flags & FWHT_FL_CHROMA_FULL_WIDTH)
+               w *= 2;
+       decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
+                    hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED);
+       decode_plane(cf, &rlco, ref->cb, h, w,
+                    hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED);
+       decode_plane(cf, &rlco, ref->cr, h, w,
+                    hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED);
+}
diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h
new file mode 100644 (file)
index 0000000..1f9e473
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2016 Tom aan de Wiel
+ * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ */
+
+#ifndef CODEC_FWHT_H
+#define CODEC_FWHT_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+
+/*
+ * The compressed format consists of a fwht_cframe_hdr struct followed by the
+ * compressed frame data. The header contains the size of that data.
+ * Each Y, Cb and Cr plane is compressed separately. If the compressed
+ * size of each plane becomes larger than the uncompressed size, then
+ * that plane is stored uncompressed and the corresponding bit is set
+ * in the flags field of the header.
+ *
+ * Each compressed plane consists of macroblocks and each macroblock
+ * is run-length-encoded. Each macroblock starts with a 16 bit value.
+ * Bit 15 indicates if this is a P-coded macroblock (1) or not (0).
+ * P-coded macroblocks contain a delta against the previous frame.
+ *
+ * Bits 1-12 contain a number. If non-zero, then this same macroblock
+ * repeats that number of times. This results in a high degree of
+ * compression for generated images like colorbars.
+ *
+ * Following this macroblock header the MB coefficients are run-length
+ * encoded: the top 12 bits contain the coefficient, the bottom 4 bits
+ * tell how many times this coefficient occurs. The value 0xf indicates
+ * that the remainder of the macroblock should be filled with zeroes.
+ *
+ * All 16 and 32 bit values are stored in big-endian (network) order.
+ *
+ * Each fwht_cframe_hdr starts with an 8 byte magic header that is
+ * guaranteed not to occur in the compressed frame data. This header
+ * can be used to sync to the next frame.
+ *
+ * This codec uses the Fast Walsh Hadamard Transform. Tom aan de Wiel
+ * developed this as part of a university project, specifically for use
+ * with this driver. His project report can be found here:
+ *
+ * https://hverkuil.home.xs4all.nl/fwht.pdf
+ */
+
+/*
+ * This is a sequence of 8 bytes with the low 4 bits set to 0xf.
+ *
+ * This sequence cannot occur in the encoded data
+ *
+ * Note that these two magic values are symmetrical so endian issues here.
+ */
+#define FWHT_MAGIC1 0x4f4f4f4f
+#define FWHT_MAGIC2 0xffffffff
+
+#define FWHT_VERSION 1
+
+/* Set if this is an interlaced format */
+#define FWHT_FL_IS_INTERLACED          BIT(0)
+/* Set if this is a bottom-first (NTSC) interlaced format */
+#define FWHT_FL_IS_BOTTOM_FIRST                BIT(1)
+/* Set if each 'frame' contains just one field */
+#define FWHT_FL_IS_ALTERNATE           BIT(2)
+/*
+ * If FWHT_FL_IS_ALTERNATE was set, then this is set if this
+ * 'frame' is the bottom field, else it is the top field.
+ */
+#define FWHT_FL_IS_BOTTOM_FIELD                BIT(3)
+/* Set if this frame is uncompressed */
+#define FWHT_FL_LUMA_IS_UNCOMPRESSED   BIT(4)
+#define FWHT_FL_CB_IS_UNCOMPRESSED     BIT(5)
+#define FWHT_FL_CR_IS_UNCOMPRESSED     BIT(6)
+#define FWHT_FL_CHROMA_FULL_HEIGHT     BIT(7)
+#define FWHT_FL_CHROMA_FULL_WIDTH      BIT(8)
+
+struct fwht_cframe_hdr {
+       u32 magic1;
+       u32 magic2;
+       __be32 version;
+       __be32 width, height;
+       __be32 flags;
+       __be32 colorspace;
+       __be32 xfer_func;
+       __be32 ycbcr_enc;
+       __be32 quantization;
+       __be32 size;
+};
+
+struct fwht_cframe {
+       unsigned int width, height;
+       u16 i_frame_qp;
+       u16 p_frame_qp;
+       __be16 *rlc_data;
+       s16 coeffs[8 * 8];
+       s16 de_coeffs[8 * 8];
+       s16 de_fwht[8 * 8];
+       u32 size;
+};
+
+struct fwht_raw_frame {
+       unsigned int width, height;
+       unsigned int width_div;
+       unsigned int height_div;
+       unsigned int luma_step;
+       unsigned int chroma_step;
+       u8 *luma, *cb, *cr;
+};
+
+#define FWHT_FRAME_PCODED      BIT(0)
+#define FWHT_FRAME_UNENCODED   BIT(1)
+#define FWHT_LUMA_UNENCODED    BIT(2)
+#define FWHT_CB_UNENCODED      BIT(3)
+#define FWHT_CR_UNENCODED      BIT(4)
+
+u32 fwht_encode_frame(struct fwht_raw_frame *frm,
+                     struct fwht_raw_frame *ref_frm,
+                     struct fwht_cframe *cf,
+                     bool is_intra, bool next_is_intra);
+void fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref,
+                      u32 hdr_flags);
+
+#endif
diff --git a/drivers/media/platform/vicodec/vicodec-codec.c b/drivers/media/platform/vicodec/vicodec-codec.c
deleted file mode 100644 (file)
index 3547129..0000000
+++ /dev/null
@@ -1,835 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright 2016 Tom aan de Wiel
- * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- *
- * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
- *
- * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
- * R.D. Brown, 1977
- */
-
-#include <linux/string.h>
-#include "vicodec-codec.h"
-
-#define ALL_ZEROS 15
-
-static const uint8_t zigzag[64] = {
-       0,
-       1,  8,
-       2,  9, 16,
-       3, 10, 17, 24,
-       4, 11, 18, 25, 32,
-       5, 12, 19, 26, 33, 40,
-       6, 13, 20, 27, 34, 41, 48,
-       7, 14, 21, 28, 35, 42, 49, 56,
-       15, 22, 29, 36, 43, 50, 57,
-       23, 30, 37, 44, 51, 58,
-       31, 38, 45, 52, 59,
-       39, 46, 53, 60,
-       47, 54, 61,
-       55, 62,
-       63,
-};
-
-
-static int rlc(const s16 *in, __be16 *output, int blocktype)
-{
-       s16 block[8 * 8];
-       s16 *wp = block;
-       int i = 0;
-       int x, y;
-       int ret = 0;
-
-       /* read in block from framebuffer */
-       int lastzero_run = 0;
-       int to_encode;
-
-       for (y = 0; y < 8; y++) {
-               for (x = 0; x < 8; x++) {
-                       *wp = in[x + y * 8];
-                       wp++;
-               }
-       }
-
-       /* keep track of amount of trailing zeros */
-       for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
-               lastzero_run++;
-
-       *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
-       ret++;
-
-       to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
-
-       i = 0;
-       while (i < to_encode) {
-               int cnt = 0;
-               int tmp;
-
-               /* count leading zeros */
-               while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
-                       cnt++;
-                       i++;
-                       if (i == to_encode) {
-                               cnt--;
-                               break;
-                       }
-               }
-               /* 4 bits for run, 12 for coefficient (quantization by 4) */
-               *output++ = htons((cnt | tmp << 4));
-               i++;
-               ret++;
-       }
-       if (lastzero_run > 14) {
-               *output = htons(ALL_ZEROS | 0);
-               ret++;
-       }
-
-       return ret;
-}
-
-/*
- * This function will worst-case increase rlc_in by 65*2 bytes:
- * one s16 value for the header and 8 * 8 coefficients of type s16.
- */
-static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
-{
-       /* header */
-       const __be16 *input = *rlc_in;
-       s16 ret = ntohs(*input++);
-       int dec_count = 0;
-       s16 block[8 * 8 + 16];
-       s16 *wp = block;
-       int i;
-
-       /*
-        * Now de-compress, it expands one byte to up to 15 bytes
-        * (or fills the remainder of the 64 bytes with zeroes if it
-        * is the last byte to expand).
-        *
-        * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
-        * allow for overflow if the incoming data was malformed.
-        */
-       while (dec_count < 8 * 8) {
-               s16 in = ntohs(*input++);
-               int length = in & 0xf;
-               int coeff = in >> 4;
-
-               /* fill remainder with zeros */
-               if (length == 15) {
-                       for (i = 0; i < 64 - dec_count; i++)
-                               *wp++ = 0;
-                       break;
-               }
-
-               for (i = 0; i < length; i++)
-                       *wp++ = 0;
-               *wp++ = coeff;
-               dec_count += length + 1;
-       }
-
-       wp = block;
-
-       for (i = 0; i < 64; i++) {
-               int pos = zigzag[i];
-               int y = pos / 8;
-               int x = pos % 8;
-
-               dwht_out[x + y * 8] = *wp++;
-       }
-       *rlc_in = input;
-       return ret;
-}
-
-static const int quant_table[] = {
-       2, 2, 2, 2, 2, 2,  2,  2,
-       2, 2, 2, 2, 2, 2,  2,  2,
-       2, 2, 2, 2, 2, 2,  2,  3,
-       2, 2, 2, 2, 2, 2,  3,  6,
-       2, 2, 2, 2, 2, 3,  6,  6,
-       2, 2, 2, 2, 3, 6,  6,  6,
-       2, 2, 2, 3, 6, 6,  6,  6,
-       2, 2, 3, 6, 6, 6,  6,  8,
-};
-
-static const int quant_table_p[] = {
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  3,
-       3, 3, 3, 3, 3, 3,  3,  6,
-       3, 3, 3, 3, 3, 3,  6,  6,
-       3, 3, 3, 3, 3, 6,  6,  9,
-       3, 3, 3, 3, 6, 6,  9,  9,
-       3, 3, 3, 6, 6, 9,  9,  10,
-};
-
-static void quantize_intra(s16 *coeff, s16 *de_coeff, u16 qp)
-{
-       const int *quant = quant_table;
-       int i, j;
-
-       for (j = 0; j < 8; j++) {
-               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
-                       *coeff >>= *quant;
-                       if (*coeff >= -qp && *coeff <= qp)
-                               *coeff = *de_coeff = 0;
-                       else
-                               *de_coeff = *coeff << *quant;
-               }
-       }
-}
-
-static void dequantize_intra(s16 *coeff)
-{
-       const int *quant = quant_table;
-       int i, j;
-
-       for (j = 0; j < 8; j++)
-               for (i = 0; i < 8; i++, quant++, coeff++)
-                       *coeff <<= *quant;
-}
-
-static void quantize_inter(s16 *coeff, s16 *de_coeff, u16 qp)
-{
-       const int *quant = quant_table_p;
-       int i, j;
-
-       for (j = 0; j < 8; j++) {
-               for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
-                       *coeff >>= *quant;
-                       if (*coeff >= -qp && *coeff <= qp)
-                               *coeff = *de_coeff = 0;
-                       else
-                               *de_coeff = *coeff << *quant;
-               }
-       }
-}
-
-static void dequantize_inter(s16 *coeff)
-{
-       const int *quant = quant_table_p;
-       int i, j;
-
-       for (j = 0; j < 8; j++)
-               for (i = 0; i < 8; i++, quant++, coeff++)
-                       *coeff <<= *quant;
-}
-
-static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
-                unsigned int input_step, bool intra)
-{
-       /* we'll need more than 8 bits for the transformed coefficients */
-       s32 workspace1[8], workspace2[8];
-       const u8 *tmp = block;
-       s16 *out = output_block;
-       int add = intra ? 256 : 0;
-       unsigned int i;
-
-       /* stage 1 */
-       stride *= input_step;
-
-       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
-               switch (input_step) {
-               case 1:
-                       workspace1[0]  = tmp[0] + tmp[1] - add;
-                       workspace1[1]  = tmp[0] - tmp[1];
-
-                       workspace1[2]  = tmp[2] + tmp[3] - add;
-                       workspace1[3]  = tmp[2] - tmp[3];
-
-                       workspace1[4]  = tmp[4] + tmp[5] - add;
-                       workspace1[5]  = tmp[4] - tmp[5];
-
-                       workspace1[6]  = tmp[6] + tmp[7] - add;
-                       workspace1[7]  = tmp[6] - tmp[7];
-                       break;
-               case 2:
-                       workspace1[0]  = tmp[0] + tmp[2] - add;
-                       workspace1[1]  = tmp[0] - tmp[2];
-
-                       workspace1[2]  = tmp[4] + tmp[6] - add;
-                       workspace1[3]  = tmp[4] - tmp[6];
-
-                       workspace1[4]  = tmp[8] + tmp[10] - add;
-                       workspace1[5]  = tmp[8] - tmp[10];
-
-                       workspace1[6]  = tmp[12] + tmp[14] - add;
-                       workspace1[7]  = tmp[12] - tmp[14];
-                       break;
-               case 3:
-                       workspace1[0]  = tmp[0] + tmp[3] - add;
-                       workspace1[1]  = tmp[0] - tmp[3];
-
-                       workspace1[2]  = tmp[6] + tmp[9] - add;
-                       workspace1[3]  = tmp[6] - tmp[9];
-
-                       workspace1[4]  = tmp[12] + tmp[15] - add;
-                       workspace1[5]  = tmp[12] - tmp[15];
-
-                       workspace1[6]  = tmp[18] + tmp[21] - add;
-                       workspace1[7]  = tmp[18] - tmp[21];
-                       break;
-               default:
-                       workspace1[0]  = tmp[0] + tmp[4] - add;
-                       workspace1[1]  = tmp[0] - tmp[4];
-
-                       workspace1[2]  = tmp[8] + tmp[12] - add;
-                       workspace1[3]  = tmp[8] - tmp[12];
-
-                       workspace1[4]  = tmp[16] + tmp[20] - add;
-                       workspace1[5]  = tmp[16] - tmp[20];
-
-                       workspace1[6]  = tmp[24] + tmp[28] - add;
-                       workspace1[7]  = tmp[24] - tmp[28];
-                       break;
-               }
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1 * 8];
-               workspace1[1]  = out[0] - out[1 * 8];
-
-               workspace1[2]  = out[2 * 8] + out[3 * 8];
-               workspace1[3]  = out[2 * 8] - out[3 * 8];
-
-               workspace1[4]  = out[4 * 8] + out[5 * 8];
-               workspace1[5]  = out[4 * 8] - out[5 * 8];
-
-               workspace1[6]  = out[6 * 8] + out[7 * 8];
-               workspace1[7]  = out[6 * 8] - out[7 * 8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-               /* stage 3 */
-               out[0 * 8] = workspace2[0] + workspace2[4];
-               out[1 * 8] = workspace2[0] - workspace2[4];
-               out[2 * 8] = workspace2[1] - workspace2[5];
-               out[3 * 8] = workspace2[1] + workspace2[5];
-               out[4 * 8] = workspace2[2] + workspace2[6];
-               out[5 * 8] = workspace2[2] - workspace2[6];
-               out[6 * 8] = workspace2[3] - workspace2[7];
-               out[7 * 8] = workspace2[3] + workspace2[7];
-       }
-}
-
-/*
- * Not the nicest way of doing it, but P-blocks get twice the range of
- * that of the I-blocks. Therefore we need a type bigger than 8 bits.
- * Furthermore values can be negative... This is just a version that
- * works with 16 signed data
- */
-static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
-{
-       /* we'll need more than 8 bits for the transformed coefficients */
-       s32 workspace1[8], workspace2[8];
-       const s16 *tmp = block;
-       s16 *out = output_block;
-       int i;
-
-       for (i = 0; i < 8; i++, tmp += stride, out += 8) {
-               /* stage 1 */
-               workspace1[0]  = tmp[0] + tmp[1];
-               workspace1[1]  = tmp[0] - tmp[1];
-
-               workspace1[2]  = tmp[2] + tmp[3];
-               workspace1[3]  = tmp[2] - tmp[3];
-
-               workspace1[4]  = tmp[4] + tmp[5];
-               workspace1[5]  = tmp[4] - tmp[5];
-
-               workspace1[6]  = tmp[6] + tmp[7];
-               workspace1[7]  = tmp[6] - tmp[7];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1*8];
-               workspace1[1]  = out[0] - out[1*8];
-
-               workspace1[2]  = out[2*8] + out[3*8];
-               workspace1[3]  = out[2*8] - out[3*8];
-
-               workspace1[4]  = out[4*8] + out[5*8];
-               workspace1[5]  = out[4*8] - out[5*8];
-
-               workspace1[6]  = out[6*8] + out[7*8];
-               workspace1[7]  = out[6*8] - out[7*8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0*8] = workspace2[0] + workspace2[4];
-               out[1*8] = workspace2[0] - workspace2[4];
-               out[2*8] = workspace2[1] - workspace2[5];
-               out[3*8] = workspace2[1] + workspace2[5];
-               out[4*8] = workspace2[2] + workspace2[6];
-               out[5*8] = workspace2[2] - workspace2[6];
-               out[6*8] = workspace2[3] - workspace2[7];
-               out[7*8] = workspace2[3] + workspace2[7];
-       }
-}
-
-static void ifwht(const s16 *block, s16 *output_block, int intra)
-{
-       /*
-        * we'll need more than 8 bits for the transformed coefficients
-        * use native unit of cpu
-        */
-       int workspace1[8], workspace2[8];
-       int inter = intra ? 0 : 1;
-       const s16 *tmp = block;
-       s16 *out = output_block;
-       int i;
-
-       for (i = 0; i < 8; i++, tmp += 8, out += 8) {
-               /* stage 1 */
-               workspace1[0]  = tmp[0] + tmp[1];
-               workspace1[1]  = tmp[0] - tmp[1];
-
-               workspace1[2]  = tmp[2] + tmp[3];
-               workspace1[3]  = tmp[2] - tmp[3];
-
-               workspace1[4]  = tmp[4] + tmp[5];
-               workspace1[5]  = tmp[4] - tmp[5];
-
-               workspace1[6]  = tmp[6] + tmp[7];
-               workspace1[7]  = tmp[6] - tmp[7];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               out[0] = workspace2[0] + workspace2[4];
-               out[1] = workspace2[0] - workspace2[4];
-               out[2] = workspace2[1] - workspace2[5];
-               out[3] = workspace2[1] + workspace2[5];
-               out[4] = workspace2[2] + workspace2[6];
-               out[5] = workspace2[2] - workspace2[6];
-               out[6] = workspace2[3] - workspace2[7];
-               out[7] = workspace2[3] + workspace2[7];
-       }
-
-       out = output_block;
-
-       for (i = 0; i < 8; i++, out++) {
-               /* stage 1 */
-               workspace1[0]  = out[0] + out[1 * 8];
-               workspace1[1]  = out[0] - out[1 * 8];
-
-               workspace1[2]  = out[2 * 8] + out[3 * 8];
-               workspace1[3]  = out[2 * 8] - out[3 * 8];
-
-               workspace1[4]  = out[4 * 8] + out[5 * 8];
-               workspace1[5]  = out[4 * 8] - out[5 * 8];
-
-               workspace1[6]  = out[6 * 8] + out[7 * 8];
-               workspace1[7]  = out[6 * 8] - out[7 * 8];
-
-               /* stage 2 */
-               workspace2[0] = workspace1[0] + workspace1[2];
-               workspace2[1] = workspace1[0] - workspace1[2];
-               workspace2[2] = workspace1[1] - workspace1[3];
-               workspace2[3] = workspace1[1] + workspace1[3];
-
-               workspace2[4] = workspace1[4] + workspace1[6];
-               workspace2[5] = workspace1[4] - workspace1[6];
-               workspace2[6] = workspace1[5] - workspace1[7];
-               workspace2[7] = workspace1[5] + workspace1[7];
-
-               /* stage 3 */
-               if (inter) {
-                       int d;
-
-                       out[0 * 8] = workspace2[0] + workspace2[4];
-                       out[1 * 8] = workspace2[0] - workspace2[4];
-                       out[2 * 8] = workspace2[1] - workspace2[5];
-                       out[3 * 8] = workspace2[1] + workspace2[5];
-                       out[4 * 8] = workspace2[2] + workspace2[6];
-                       out[5 * 8] = workspace2[2] - workspace2[6];
-                       out[6 * 8] = workspace2[3] - workspace2[7];
-                       out[7 * 8] = workspace2[3] + workspace2[7];
-
-                       for (d = 0; d < 8; d++)
-                               out[8 * d] >>= 6;
-               } else {
-                       int d;
-
-                       out[0 * 8] = workspace2[0] + workspace2[4];
-                       out[1 * 8] = workspace2[0] - workspace2[4];
-                       out[2 * 8] = workspace2[1] - workspace2[5];
-                       out[3 * 8] = workspace2[1] + workspace2[5];
-                       out[4 * 8] = workspace2[2] + workspace2[6];
-                       out[5 * 8] = workspace2[2] - workspace2[6];
-                       out[6 * 8] = workspace2[3] - workspace2[7];
-                       out[7 * 8] = workspace2[3] + workspace2[7];
-
-                       for (d = 0; d < 8; d++) {
-                               out[8 * d] >>= 6;
-                               out[8 * d] += 128;
-                       }
-               }
-       }
-}
-
-static void fill_encoder_block(const u8 *input, s16 *dst,
-                              unsigned int stride, unsigned int input_step)
-{
-       int i, j;
-
-       for (i = 0; i < 8; i++) {
-               for (j = 0; j < 8; j++, input += input_step)
-                       *dst++ = *input;
-               input += (stride - 8) * input_step;
-       }
-}
-
-static int var_intra(const s16 *input)
-{
-       int32_t mean = 0;
-       int32_t ret = 0;
-       const s16 *tmp = input;
-       int i;
-
-       for (i = 0; i < 8 * 8; i++, tmp++)
-               mean += *tmp;
-       mean /= 64;
-       tmp = input;
-       for (i = 0; i < 8 * 8; i++, tmp++)
-               ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
-       return ret;
-}
-
-static int var_inter(const s16 *old, const s16 *new)
-{
-       int32_t ret = 0;
-       int i;
-
-       for (i = 0; i < 8 * 8; i++, old++, new++)
-               ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
-       return ret;
-}
-
-static int decide_blocktype(const u8 *cur, const u8 *reference,
-                           s16 *deltablock, unsigned int stride,
-                           unsigned int input_step)
-{
-       s16 tmp[64];
-       s16 old[64];
-       s16 *work = tmp;
-       unsigned int k, l;
-       int vari;
-       int vard;
-
-       fill_encoder_block(cur, tmp, stride, input_step);
-       fill_encoder_block(reference, old, 8, 1);
-       vari = var_intra(tmp);
-
-       for (k = 0; k < 8; k++) {
-               for (l = 0; l < 8; l++) {
-                       *deltablock = *work - *reference;
-                       deltablock++;
-                       work++;
-                       reference++;
-               }
-       }
-       deltablock -= 64;
-       vard = var_inter(old, tmp);
-       return vari <= vard ? IBLOCK : PBLOCK;
-}
-
-static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
-{
-       int i, j;
-
-       for (i = 0; i < 8; i++) {
-               for (j = 0; j < 8; j++)
-                       *dst++ = *input++;
-               dst += stride - 8;
-       }
-}
-
-static void add_deltas(s16 *deltas, const u8 *ref, int stride)
-{
-       int k, l;
-
-       for (k = 0; k < 8; k++) {
-               for (l = 0; l < 8; l++) {
-                       *deltas += *ref++;
-                       /*
-                        * Due to quantizing, it might possible that the
-                        * decoded coefficients are slightly out of range
-                        */
-                       if (*deltas < 0)
-                               *deltas = 0;
-                       else if (*deltas > 255)
-                               *deltas = 255;
-                       deltas++;
-               }
-               ref += stride - 8;
-       }
-}
-
-static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
-                       struct cframe *cf, u32 height, u32 width,
-                       unsigned int input_step,
-                       bool is_intra, bool next_is_intra)
-{
-       u8 *input_start = input;
-       __be16 *rlco_start = *rlco;
-       s16 deltablock[64];
-       __be16 pframe_bit = htons(PFRAME_BIT);
-       u32 encoding = 0;
-       unsigned int last_size = 0;
-       unsigned int i, j;
-
-       for (j = 0; j < height / 8; j++) {
-               for (i = 0; i < width / 8; i++) {
-                       /* intra code, first frame is always intra coded. */
-                       int blocktype = IBLOCK;
-                       unsigned int size;
-
-                       if (!is_intra)
-                               blocktype = decide_blocktype(input, refp,
-                                       deltablock, width, input_step);
-                       if (blocktype == IBLOCK) {
-                               fwht(input, cf->coeffs, width, input_step, 1);
-                               quantize_intra(cf->coeffs, cf->de_coeffs,
-                                              cf->i_frame_qp);
-                       } else {
-                               /* inter code */
-                               encoding |= FRAME_PCODED;
-                               fwht16(deltablock, cf->coeffs, 8, 0);
-                               quantize_inter(cf->coeffs, cf->de_coeffs,
-                                              cf->p_frame_qp);
-                       }
-                       if (!next_is_intra) {
-                               ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
-
-                               if (blocktype == PBLOCK)
-                                       add_deltas(cf->de_fwht, refp, 8);
-                               fill_decoder_block(refp, cf->de_fwht, 8);
-                       }
-
-                       input += 8 * input_step;
-                       refp += 8 * 8;
-
-                       size = rlc(cf->coeffs, *rlco, blocktype);
-                       if (last_size == size &&
-                           !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
-                               __be16 *last_rlco = *rlco - size;
-                               s16 hdr = ntohs(*last_rlco);
-
-                               if (!((*last_rlco ^ **rlco) & pframe_bit) &&
-                                   (hdr & DUPS_MASK) < DUPS_MASK)
-                                       *last_rlco = htons(hdr + 2);
-                               else
-                                       *rlco += size;
-                       } else {
-                               *rlco += size;
-                       }
-                       if (*rlco >= rlco_max) {
-                               encoding |= FRAME_UNENCODED;
-                               goto exit_loop;
-                       }
-                       last_size = size;
-               }
-               input += width * 7 * input_step;
-       }
-
-exit_loop:
-       if (encoding & FRAME_UNENCODED) {
-               u8 *out = (u8 *)rlco_start;
-
-               input = input_start;
-               /*
-                * The compressed stream should never contain the magic
-                * header, so when we copy the YUV data we replace 0xff
-                * by 0xfe. Since YUV is limited range such values
-                * shouldn't appear anyway.
-                */
-               for (i = 0; i < height * width; i++, input += input_step)
-                       *out++ = (*input == 0xff) ? 0xfe : *input;
-               *rlco = (__be16 *)out;
-               encoding &= ~FRAME_PCODED;
-       }
-       return encoding;
-}
-
-u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
-                struct cframe *cf, bool is_intra, bool next_is_intra)
-{
-       unsigned int size = frm->height * frm->width;
-       __be16 *rlco = cf->rlc_data;
-       __be16 *rlco_max;
-       u32 encoding;
-       u32 chroma_h = frm->height / frm->height_div;
-       u32 chroma_w = frm->width / frm->width_div;
-       unsigned int chroma_size = chroma_h * chroma_w;
-
-       rlco_max = rlco + size / 2 - 256;
-       encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
-                               frm->height, frm->width,
-                               frm->luma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= LUMA_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       rlco_max = rlco + chroma_size / 2 - 256;
-       encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
-                                chroma_h, chroma_w,
-                                frm->chroma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= CB_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       rlco_max = rlco + chroma_size / 2 - 256;
-       encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
-                                chroma_h, chroma_w,
-                                frm->chroma_step, is_intra, next_is_intra);
-       if (encoding & FRAME_UNENCODED)
-               encoding |= CR_UNENCODED;
-       encoding &= ~FRAME_UNENCODED;
-       cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
-       return encoding;
-}
-
-static void decode_plane(struct cframe *cf, const __be16 **rlco, u8 *ref,
-                        u32 height, u32 width, bool uncompressed)
-{
-       unsigned int copies = 0;
-       s16 copy[8 * 8];
-       s16 stat;
-       unsigned int i, j;
-
-       if (uncompressed) {
-               memcpy(ref, *rlco, width * height);
-               *rlco += width * height / 2;
-               return;
-       }
-
-       /*
-        * When decoding each macroblock the rlco pointer will be increased
-        * by 65 * 2 bytes worst-case.
-        * To avoid overflow the buffer has to be 65/64th of the actual raw
-        * image size, just in case someone feeds it malicious data.
-        */
-       for (j = 0; j < height / 8; j++) {
-               for (i = 0; i < width / 8; i++) {
-                       u8 *refp = ref + j * 8 * width + i * 8;
-
-                       if (copies) {
-                               memcpy(cf->de_fwht, copy, sizeof(copy));
-                               if (stat & PFRAME_BIT)
-                                       add_deltas(cf->de_fwht, refp, width);
-                               fill_decoder_block(refp, cf->de_fwht, width);
-                               copies--;
-                               continue;
-                       }
-
-                       stat = derlc(rlco, cf->coeffs);
-
-                       if (stat & PFRAME_BIT)
-                               dequantize_inter(cf->coeffs);
-                       else
-                               dequantize_intra(cf->coeffs);
-
-                       ifwht(cf->coeffs, cf->de_fwht,
-                             (stat & PFRAME_BIT) ? 0 : 1);
-
-                       copies = (stat & DUPS_MASK) >> 1;
-                       if (copies)
-                               memcpy(copy, cf->de_fwht, sizeof(copy));
-                       if (stat & PFRAME_BIT)
-                               add_deltas(cf->de_fwht, refp, width);
-                       fill_decoder_block(refp, cf->de_fwht, width);
-               }
-       }
-}
-
-void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags)
-{
-       const __be16 *rlco = cf->rlc_data;
-       u32 h = cf->height / 2;
-       u32 w = cf->width / 2;
-
-       if (hdr_flags & VICODEC_FL_CHROMA_FULL_HEIGHT)
-               h *= 2;
-       if (hdr_flags & VICODEC_FL_CHROMA_FULL_WIDTH)
-               w *= 2;
-       decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
-                    hdr_flags & VICODEC_FL_LUMA_IS_UNCOMPRESSED);
-       decode_plane(cf, &rlco, ref->cb, h, w,
-                    hdr_flags & VICODEC_FL_CB_IS_UNCOMPRESSED);
-       decode_plane(cf, &rlco, ref->cr, h, w,
-                    hdr_flags & VICODEC_FL_CR_IS_UNCOMPRESSED);
-}
diff --git a/drivers/media/platform/vicodec/vicodec-codec.h b/drivers/media/platform/vicodec/vicodec-codec.h
deleted file mode 100644 (file)
index ff69d92..0000000
+++ /dev/null
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Copyright 2016 Tom aan de Wiel
- * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
- */
-
-#ifndef VICODEC_RLC_H
-#define VICODEC_RLC_H
-
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <asm/byteorder.h>
-
-/*
- * The compressed format consists of a cframe_hdr struct followed by the
- * compressed frame data. The header contains the size of that data.
- * Each Y, Cb and Cr plane is compressed separately. If the compressed
- * size of each plane becomes larger than the uncompressed size, then
- * that plane is stored uncompressed and the corresponding bit is set
- * in the flags field of the header.
- *
- * Each compressed plane consists of macroblocks and each macroblock
- * is run-length-encoded. Each macroblock starts with a 16 bit value.
- * Bit 15 indicates if this is a P-coded macroblock (1) or not (0).
- * P-coded macroblocks contain a delta against the previous frame.
- *
- * Bits 1-12 contain a number. If non-zero, then this same macroblock
- * repeats that number of times. This results in a high degree of
- * compression for generated images like colorbars.
- *
- * Following this macroblock header the MB coefficients are run-length
- * encoded: the top 12 bits contain the coefficient, the bottom 4 bits
- * tell how many times this coefficient occurs. The value 0xf indicates
- * that the remainder of the macroblock should be filled with zeroes.
- *
- * All 16 and 32 bit values are stored in big-endian (network) order.
- *
- * Each cframe_hdr starts with an 8 byte magic header that is
- * guaranteed not to occur in the compressed frame data. This header
- * can be used to sync to the next frame.
- *
- * This codec uses the Fast Walsh Hadamard Transform. Tom aan de Wiel
- * developed this as part of a university project, specifically for use
- * with this driver. His project report can be found here:
- *
- * https://hverkuil.home.xs4all.nl/fwht.pdf
- */
-
-/*
- * Note: bit 0 of the header must always be 0. Otherwise it cannot
- * be guaranteed that the magic 8 byte sequence (see below) can
- * never occur in the rlc output.
- */
-#define PFRAME_BIT (1 << 15)
-#define DUPS_MASK 0x1ffe
-
-/*
- * This is a sequence of 8 bytes with the low 4 bits set to 0xf.
- *
- * This sequence cannot occur in the encoded data
- */
-#define VICODEC_MAGIC1 0x4f4f4f4f
-#define VICODEC_MAGIC2 0xffffffff
-
-#define VICODEC_VERSION 1
-
-#define VICODEC_MAX_WIDTH 3840
-#define VICODEC_MAX_HEIGHT 2160
-#define VICODEC_MIN_WIDTH 640
-#define VICODEC_MIN_HEIGHT 480
-
-#define PBLOCK 0
-#define IBLOCK 1
-
-/* Set if this is an interlaced format */
-#define VICODEC_FL_IS_INTERLACED       BIT(0)
-/* Set if this is a bottom-first (NTSC) interlaced format */
-#define VICODEC_FL_IS_BOTTOM_FIRST     BIT(1)
-/* Set if each 'frame' contains just one field */
-#define VICODEC_FL_IS_ALTERNATE                BIT(2)
-/*
- * If VICODEC_FL_IS_ALTERNATE was set, then this is set if this
- * 'frame' is the bottom field, else it is the top field.
- */
-#define VICODEC_FL_IS_BOTTOM_FIELD     BIT(3)
-/* Set if this frame is uncompressed */
-#define VICODEC_FL_LUMA_IS_UNCOMPRESSED        BIT(4)
-#define VICODEC_FL_CB_IS_UNCOMPRESSED  BIT(5)
-#define VICODEC_FL_CR_IS_UNCOMPRESSED  BIT(6)
-#define VICODEC_FL_CHROMA_FULL_HEIGHT  BIT(7)
-#define VICODEC_FL_CHROMA_FULL_WIDTH   BIT(8)
-
-struct cframe_hdr {
-       u32 magic1;
-       u32 magic2;
-       __be32 version;
-       __be32 width, height;
-       __be32 flags;
-       __be32 colorspace;
-       __be32 xfer_func;
-       __be32 ycbcr_enc;
-       __be32 quantization;
-       __be32 size;
-};
-
-struct cframe {
-       unsigned int width, height;
-       u16 i_frame_qp;
-       u16 p_frame_qp;
-       __be16 *rlc_data;
-       s16 coeffs[8 * 8];
-       s16 de_coeffs[8 * 8];
-       s16 de_fwht[8 * 8];
-       u32 size;
-};
-
-struct raw_frame {
-       unsigned int width, height;
-       unsigned int width_div;
-       unsigned int height_div;
-       unsigned int luma_step;
-       unsigned int chroma_step;
-       u8 *luma, *cb, *cr;
-};
-
-#define FRAME_PCODED   BIT(0)
-#define FRAME_UNENCODED        BIT(1)
-#define LUMA_UNENCODED BIT(2)
-#define CB_UNENCODED   BIT(3)
-#define CR_UNENCODED   BIT(4)
-
-u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
-                struct cframe *cf, bool is_intra, bool next_is_intra);
-void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags);
-
-#endif
index caff521d94c6a8d64ec89fe53531636a704c786e..4f2c35533e08a2a389adee6114d5bc440d1b1a52 100644 (file)
@@ -23,7 +23,7 @@
 #include <media/v4l2-event.h>
 #include <media/videobuf2-vmalloc.h>
 
-#include "vicodec-codec.h"
+#include "codec-fwht.h"
 
 MODULE_DESCRIPTION("Virtual codec device");
 MODULE_AUTHOR("Hans Verkuil <hans.verkuil@cisco.com>");
@@ -152,7 +152,7 @@ struct vicodec_ctx {
 
        /* Source and destination queue data */
        struct vicodec_q_data   q_data[2];
-       struct raw_frame        ref_frame;
+       struct fwht_raw_frame   ref_frame;
        u8                      *compressed_frame;
        u32                     cur_buf_offset;
        u32                     comp_max_size;
@@ -191,9 +191,9 @@ static void encode(struct vicodec_ctx *ctx,
 {
        unsigned int size = q_data->width * q_data->height;
        const struct pixfmt_info *info = q_data->info;
-       struct cframe_hdr *p_hdr;
-       struct cframe cf;
-       struct raw_frame rf;
+       struct fwht_cframe_hdr *p_hdr;
+       struct fwht_cframe cf;
+       struct fwht_raw_frame rf;
        u32 encoding;
 
        rf.width = q_data->width;
@@ -279,29 +279,29 @@ static void encode(struct vicodec_ctx *ctx,
        cf.p_frame_qp = ctx->p_frame_qp;
        cf.rlc_data = (__be16 *)(p_out + sizeof(*p_hdr));
 
-       encoding = encode_frame(&rf, &ctx->ref_frame, &cf, !ctx->gop_cnt,
-                               ctx->gop_cnt == ctx->gop_size - 1);
-       if (!(encoding & FRAME_PCODED))
+       encoding = fwht_encode_frame(&rf, &ctx->ref_frame, &cf, !ctx->gop_cnt,
+                                    ctx->gop_cnt == ctx->gop_size - 1);
+       if (!(encoding & FWHT_FRAME_PCODED))
                ctx->gop_cnt = 0;
        if (++ctx->gop_cnt >= ctx->gop_size)
                ctx->gop_cnt = 0;
 
-       p_hdr = (struct cframe_hdr *)p_out;
-       p_hdr->magic1 = VICODEC_MAGIC1;
-       p_hdr->magic2 = VICODEC_MAGIC2;
-       p_hdr->version = htonl(VICODEC_VERSION);
+       p_hdr = (struct fwht_cframe_hdr *)p_out;
+       p_hdr->magic1 = FWHT_MAGIC1;
+       p_hdr->magic2 = FWHT_MAGIC2;
+       p_hdr->version = htonl(FWHT_VERSION);
        p_hdr->width = htonl(cf.width);
        p_hdr->height = htonl(cf.height);
-       if (encoding & LUMA_UNENCODED)
-               flags |= VICODEC_FL_LUMA_IS_UNCOMPRESSED;
-       if (encoding & CB_UNENCODED)
-               flags |= VICODEC_FL_CB_IS_UNCOMPRESSED;
-       if (encoding & CR_UNENCODED)
-               flags |= VICODEC_FL_CR_IS_UNCOMPRESSED;
+       if (encoding & FWHT_LUMA_UNENCODED)
+               flags |= FWHT_FL_LUMA_IS_UNCOMPRESSED;
+       if (encoding & FWHT_CB_UNENCODED)
+               flags |= FWHT_FL_CB_IS_UNCOMPRESSED;
+       if (encoding & FWHT_CR_UNENCODED)
+               flags |= FWHT_FL_CR_IS_UNCOMPRESSED;
        if (rf.height_div == 1)
-               flags |= VICODEC_FL_CHROMA_FULL_HEIGHT;
+               flags |= FWHT_FL_CHROMA_FULL_HEIGHT;
        if (rf.width_div == 1)
-               flags |= VICODEC_FL_CHROMA_FULL_WIDTH;
+               flags |= FWHT_FL_CHROMA_FULL_WIDTH;
        p_hdr->flags = htonl(flags);
        p_hdr->colorspace = htonl(ctx->colorspace);
        p_hdr->xfer_func = htonl(ctx->xfer_func);
@@ -320,11 +320,11 @@ static int decode(struct vicodec_ctx *ctx,
        unsigned int chroma_size = size;
        unsigned int i;
        u32 flags;
-       struct cframe_hdr *p_hdr;
-       struct cframe cf;
+       struct fwht_cframe_hdr *p_hdr;
+       struct fwht_cframe cf;
        u8 *p;
 
-       p_hdr = (struct cframe_hdr *)p_in;
+       p_hdr = (struct fwht_cframe_hdr *)p_in;
        cf.width = ntohl(p_hdr->width);
        cf.height = ntohl(p_hdr->height);
        flags = ntohl(p_hdr->flags);
@@ -334,13 +334,13 @@ static int decode(struct vicodec_ctx *ctx,
        ctx->quantization = ntohl(p_hdr->quantization);
        cf.rlc_data = (__be16 *)(p_in + sizeof(*p_hdr));
 
-       if (p_hdr->magic1 != VICODEC_MAGIC1 ||
-           p_hdr->magic2 != VICODEC_MAGIC2 ||
-           ntohl(p_hdr->version) != VICODEC_VERSION ||
-           cf.width < VICODEC_MIN_WIDTH ||
-           cf.width > VICODEC_MAX_WIDTH ||
-           cf.height < VICODEC_MIN_HEIGHT ||
-           cf.height > VICODEC_MAX_HEIGHT ||
+       if (p_hdr->magic1 != FWHT_MAGIC1 ||
+           p_hdr->magic2 != FWHT_MAGIC2 ||
+           ntohl(p_hdr->version) != FWHT_VERSION ||
+           cf.width < MIN_WIDTH ||
+           cf.width > MAX_WIDTH ||
+           cf.height < MIN_HEIGHT ||
+           cf.height > MAX_HEIGHT ||
            (cf.width & 7) || (cf.height & 7))
                return -EINVAL;
 
@@ -348,12 +348,12 @@ static int decode(struct vicodec_ctx *ctx,
        if (cf.width != q_data->width || cf.height != q_data->height)
                return -EINVAL;
 
-       if (!(flags & VICODEC_FL_CHROMA_FULL_WIDTH))
+       if (!(flags & FWHT_FL_CHROMA_FULL_WIDTH))
                chroma_size /= 2;
-       if (!(flags & VICODEC_FL_CHROMA_FULL_HEIGHT))
+       if (!(flags & FWHT_FL_CHROMA_FULL_HEIGHT))
                chroma_size /= 2;
 
-       decode_frame(&cf, &ctx->ref_frame, flags);
+       fwht_decode_frame(&cf, &ctx->ref_frame, flags);
 
        switch (q_data->info->id) {
        case V4L2_PIX_FMT_YUV420:
@@ -484,7 +484,7 @@ static int device_process(struct vicodec_ctx *ctx,
        }
 
        if (ctx->is_enc) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)p_out;
+               struct fwht_cframe_hdr *p_hdr = (struct fwht_cframe_hdr *)p_out;
 
                encode(ctx, q_out, p_in, p_out, 0);
                vb2_set_plane_payload(&out_vb->vb2_buf, 0,
@@ -635,9 +635,10 @@ restart:
                }
                ctx->comp_size = sizeof(magic);
        }
-       if (ctx->comp_size < sizeof(struct cframe_hdr)) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)ctx->compressed_frame;
-               u32 copy = sizeof(struct cframe_hdr) - ctx->comp_size;
+       if (ctx->comp_size < sizeof(struct fwht_cframe_hdr)) {
+               struct fwht_cframe_hdr *p_hdr =
+                       (struct fwht_cframe_hdr *)ctx->compressed_frame;
+               u32 copy = sizeof(struct fwht_cframe_hdr) - ctx->comp_size;
 
                if (copy > p_out + sz - p)
                        copy = p_out + sz - p;
@@ -645,7 +646,7 @@ restart:
                       p, copy);
                p += copy;
                ctx->comp_size += copy;
-               if (ctx->comp_size < sizeof(struct cframe_hdr)) {
+               if (ctx->comp_size < sizeof(struct fwht_cframe_hdr)) {
                        job_remove_out_buf(ctx, state);
                        goto restart;
                }
@@ -670,8 +671,8 @@ restart:
        ctx->cur_buf_offset = p - p_out;
        ctx->comp_has_frame = true;
        ctx->comp_has_next_frame = false;
-       if (sz - ctx->cur_buf_offset >= sizeof(struct cframe_hdr)) {
-               struct cframe_hdr *p_hdr = (struct cframe_hdr *)p;
+       if (sz - ctx->cur_buf_offset >= sizeof(struct fwht_cframe_hdr)) {
+               struct fwht_cframe_hdr *p_hdr = (struct fwht_cframe_hdr *)p;
                u32 frame_size = ntohl(p_hdr->size);
                u32 remaining = sz - ctx->cur_buf_offset - sizeof(*p_hdr);
 
@@ -845,7 +846,7 @@ static int vidioc_try_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f)
                pix->sizeimage = pix->width * pix->height *
                        info->sizeimage_mult / info->sizeimage_div;
                if (pix->pixelformat == V4L2_PIX_FMT_FWHT)
-                       pix->sizeimage += sizeof(struct cframe_hdr);
+                       pix->sizeimage += sizeof(struct fwht_cframe_hdr);
                break;
        case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
        case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
@@ -863,7 +864,7 @@ static int vidioc_try_fmt(struct vicodec_ctx *ctx, struct v4l2_format *f)
                plane->sizeimage = pix_mp->width * pix_mp->height *
                        info->sizeimage_mult / info->sizeimage_div;
                if (pix_mp->pixelformat == V4L2_PIX_FMT_FWHT)
-                       plane->sizeimage += sizeof(struct cframe_hdr);
+                       plane->sizeimage += sizeof(struct fwht_cframe_hdr);
                memset(pix_mp->reserved, 0, sizeof(pix_mp->reserved));
                memset(plane->reserved, 0, sizeof(plane->reserved));
                break;
@@ -1308,7 +1309,7 @@ static int vicodec_start_streaming(struct vb2_queue *q,
        ctx->ref_frame.width = ctx->ref_frame.height = 0;
        ctx->ref_frame.luma = kvmalloc(size + 2 * size / chroma_div, GFP_KERNEL);
        ctx->comp_max_size = size + 2 * size / chroma_div +
-                            sizeof(struct cframe_hdr);
+                            sizeof(struct fwht_cframe_hdr);
        ctx->compressed_frame = kvmalloc(ctx->comp_max_size, GFP_KERNEL);
        if (!ctx->ref_frame.luma || !ctx->compressed_frame) {
                kvfree(ctx->ref_frame.luma);
@@ -1493,7 +1494,7 @@ static int vicodec_open(struct file *file)
        ctx->q_data[V4L2_M2M_DST].sizeimage = size;
        ctx->colorspace = V4L2_COLORSPACE_REC709;
 
-       size += sizeof(struct cframe_hdr);
+       size += sizeof(struct fwht_cframe_hdr);
        if (ctx->is_enc) {
                ctx->q_data[V4L2_M2M_DST].sizeimage = size;
                ctx->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->enc_dev, ctx,