/* Copyright (C) 1994, MPEG Software Simulation Group. All Rights Reserved. */

/*
 * Disclaimer of Warranty
 *
 * These software programs are available to the user without any license fee or
 * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
 * any and all warranties, whether express, implied, or statuary, including any
 * implied warranties or merchantability or of fitness for a particular
 * purpose.  In no event shall the copyright-holder be liable for any
 * incidental, punitive, or consequential damages of any kind whatsoever
 * arising from the use of these programs.
 *
 * This disclaimer of warranty extends to the user of these programs and user's
 * customers, employees, agents, transferees, successors, and assigns.
 *
 * The MPEG Software Simulation Group does not represent or warrant that the
 * programs furnished hereunder are free of infringement of any third-party
 * patents.
 *
 * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
 * are subject to royalty fees to patent holders.  Many of these patents are
 * general enough such that they are unavoidable regardless of implementation
 * design.
 *
 */


 /* these routines are closely modeled after those from
  * mpeg_play 2.0 by the Berkeley Plateau Research Group
  */

#include <malloc.h>
#include <stdio.h>
#include <stdlib.h>

#include "config.h"
#include "global.h"

/* private prototypes */
static void ditherframe _ANSI_ARGS_((unsigned char *src[]));
static void dithertop _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbot _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void dithertop420 _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbot420 _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherframeRGB _ANSI_ARGS_((unsigned char *src[]));
static void dithertopRGB _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void ditherbotRGB _ANSI_ARGS_((unsigned char *src[], unsigned char *dst));
static void init_ditherRGB _ANSI_ARGS_((void));

/* local data */
static unsigned char *dithered_image, *dithered_image2;

static unsigned char ytab[256+16];
static unsigned char utab[128+16];
static unsigned char vtab[128+16];
static unsigned char pixel[256];

typedef void (*DITHER_FRAME_FUNC)(unsigned char **);
typedef void (*DITHER_FIELD_FUNC)(unsigned char **,unsigned char *);
static DITHER_FRAME_FUNC dither_frame;
static DITHER_FIELD_FUNC dither_even, dither_odd;

/* color space conversion coefficients
 *
 * entries are {crv,cbu,cgu,cgv}
 *
 * crv=(255/224)*65536*(1-cr)/0.5
 * cbu=(255/224)*65536*(1-cb)/0.5
 * cgu=(255/224)*65536*(cb/cg)*(1-cb)/0.5
 * cgv=(255/224)*65536*(cr/cg)*(1-cr)/0.5
 *
 * where Y=cr*R+cg*G+cb*B (cr+cg+cb=1)
 */
/*
int convmat[8][4] = {
  {117504, 138453, 13954, 34903}, /* no sequence_display_extension */
/*  {117504, 138453, 13954, 34903}, /* ITU-R Rec. 709 (1990) */
/*  {104597, 132201, 25675, 53279}, /* unspecified */
/*  {104597, 132201, 25675, 53279}, /* reserved */
/*  {104448, 132798, 24759, 53109}, /* FCC */
/*  {104597, 132201, 25675, 53279}, /* ITU-R Rec. 624-4 System B, G */
/*  {104597, 132201, 25675, 53279}, /* SMPTE 170M */
/*  {117579, 136230, 16907, 35559}  /* SMPTE 240M (1987) */
/* };

/* 4x4 ordered dither
 *
 * threshold pattern:
 *   0  8  2 10
 *  12  4 14  6
 *   3 11  1  9
 *  15  7 13  5
 */

void dither(src)
unsigned char *src[];
{
  if (prog_seq)
     dither_frame(src);
  else
  {
    if ((pict_struct==FRAME_PICTURE && topfirst) || pict_struct==BOTTOM_FIELD)
    {
      /* top field first */
      dither_even(src,dithered_image);
      dither_odd (src,dithered_image2);
    }
    else
    {
      /* bottom field first */
      dither_odd (src,dithered_image);
      dither_even(src,dithered_image2);
    }
  }
  display_image(dithered_image);
}

static void ditherframe(src)
unsigned char *src[];
{
  int i,j;
  int y,u,v;
  unsigned char *py,*pu,*pv,*dst;

  py = src[0];
  pu = src[1];
  pv = src[2];

#ifdef _WIN32
  dst = dithered_image+(coded_picture_height-1)*coded_picture_width;
#else
  dst = dithered_image;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y]|utab[u]|vtab[v]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+12]|utab[u+12]|vtab[v+12]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+4]|utab[u+4]|vtab[v+4]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+14]|utab[u+14]|vtab[v+14]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+6]|utab[u+6]|vtab[v+6]];
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+15]|utab[u+15]|vtab[v+15]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+7]|utab[u+7]|vtab[v+7]];
      y = *py++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++ = pixel[ytab[y+13]|utab[u+13]|vtab[v+13]];
      y = *py++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++ = pixel[ytab[y+5]|utab[u+5]|vtab[v+5]];
    }

#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif
  }

}

static void dithertop(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];

#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+6]|utab[u+6]|vtab[v+6]];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2. j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[y+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[((y+y2)>>1)+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void ditherbot(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  int y,y2,u,v;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)]|utab[u]|vtab[v]];
      *dst2++ = pixel[ytab[y2+12]|utab[u+12]|vtab[v+12]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[y+8]|utab[u+8]|vtab[v+8]];
      *dst2++ = pixel[ytab[y2+4]|utab[u+4]|vtab[v+4]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+2]|utab[u+2]|vtab[v+2]];
      *dst2++ = pixel[ytab[y2+14]|utab[u+14]|vtab[v+14]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+10]|utab[u+10]|vtab[v+10]];
      *dst2++ = pixel[ytab[y2+6]|utab[u+6]|vtab[v+6]];
    }

    if (j==0)
      py -= coded_picture_width;
    else
      py += coded_picture_width;

    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2. j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+3]|utab[u+3]|vtab[v+3]];
      *dst2++ = pixel[ytab[y2+15]|utab[u+15]|vtab[v+15]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+11]|utab[u+11]|vtab[v+11]];
      *dst2++ = pixel[ytab[y2+7]|utab[u+7]|vtab[v+7]];

      y = *py++;
      y2 = *py2++;
      u = *pu++ >> 1;
      v = *pv++ >> 1;
      *dst++  = pixel[ytab[((y+y2)>>1)+1]|utab[u+1]|vtab[v+1]];
      *dst2++ = pixel[ytab[y2+13]|utab[u+13]|vtab[v+13]];

      y = *py++;
      y2 = *py2++;
      if (chroma_format==CHROMA444)
      {
        u = *pu++ >> 1;
        v = *pv++ >> 1;
      }
      *dst++  = pixel[ytab[((y+y2)>>1)+9]|utab[u+9]|vtab[v+9]];
      *dst2++ = pixel[ytab[y2+5]|utab[u+5]|vtab[v+5]];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}

void init_dither(int bpp)
{
  int i, v;

  if ( bpp==8 )
  {
     dither_frame=ditherframe;
     dither_even =dithertop;
     dither_odd  =ditherbot;
  }
  else
     error("unsuported dither type");

  bpp/=8;
  if(!(dithered_image = (unsigned char *)malloc(bpp*coded_picture_width*
                                                coded_picture_height)))
    error("malloc failed");

  if(!(dithered_image2 = (unsigned char *)malloc(bpp*coded_picture_width*
                                                 coded_picture_height)))
    error("malloc failed");

  for (i=-8; i<256+8; i++)
  {
    v = i>>4;
    if (v<1)
      v = 1;
    else if (v>14)
      v = 14;
    ytab[i+8] = v<<4;
  }

  for (i=0; i<128+16; i++)
  {
    v = (i-40)>>4;
    if (v<0)
      v = 0;
    else if (v>3)
      v = 3;
    utab[i] = v<<2;
    vtab[i] = v;
  }

  for (i=0; i<256; i++)
     pixel[i]=i;
}

/*
 portable display function
*/
void display_second_field()
{
  display_image(dithered_image2);
}

#if 0
//
// This code does not work
//

static unsigned char uvtab[256*269+270];

static void ditherframe(src)
unsigned char *src[];
{
  int i,j;
  unsigned int uv;
  unsigned char *py,*pu,*pv,*dst;

  py = src[0];
  pu = src[1];
  pv = src[2];

#ifdef _WIN32
  dst = dithered_image+(coded_picture_height-1)*coded_picture_width;
#else
  dst = dithered_image;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0 */
    for (i=0; i<coded_picture_width; i+=8)
    {
      uv = uvtab[(*pu++<<8)|*pv++];
      *dst++ = ytab[((*py++)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1028];
      *dst++ = ytab[((*py++ +2)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uv>>4)];
      uv = uvtab[(*pu++<<8)|*pv++];
      *dst++ = ytab[((*py++)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +8)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1028];
      *dst++ = ytab[((*py++ +2)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +10)<<4)|(uv>>4)];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 1 */
    for (i=0; i<coded_picture_width; i+=8)
    {
      uv = uvtab[((*pu++<<8)|*pv++)+2056];
      *dst++ = ytab[((*py++ +12)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3084];
      *dst++ = ytab[((*py++ +14)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2056];
      *dst++ = ytab[((*py++ +12)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +4)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3084];
      *dst++ = ytab[((*py++ +14)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +6)<<4)|(uv&15)];
    }
#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 2 */
    for (i=0; i<coded_picture_width; i+=8)
    {
      uv = uvtab[((*pu++<<8)|*pv++)+1542];
      *dst++ = ytab[((*py++ +3)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+514];
      *dst++ = ytab[((*py++ +1)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+1542];
      *dst++ = ytab[((*py++ +3)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +11)<<4)|(uv>>4)];
      uv = uvtab[((*pu++<<8)|*pv++)+514];
      *dst++ = ytab[((*py++ +1)<<4)|(uv&15)];
      *dst++ = ytab[((*py++ +9)<<4)|(uv>>4)];
    }

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif

    /* line j + 3 */
    for (i=0; i<coded_picture_width; i+=8)
    {
      uv = uvtab[((*pu++<<8)|*pv++)+3598];
      *dst++ = ytab[((*py++ +15)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2570];
      *dst++ = ytab[((*py++ +13)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+3598];
      *dst++ = ytab[((*py++ +15)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +7)<<4)|(uv&15)];
      uv = uvtab[((*pu++<<8)|*pv++)+2570];
      *dst++ = ytab[((*py++ +13)<<4)|(uv>>4)];
      *dst++ = ytab[((*py++ +5)<<4)|(uv&15)];
    }
#ifdef _WIN32
    dst -= 2*coded_picture_width;
#endif
  }
}

static void dithertop(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  unsigned int y,uv1,uv2;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0];
  py2 = src[0] + (coded_picture_width<<1);
  pu = src[1];
  pv = src[2];
#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2];
      uv2 = uvtab[uv2+2056];
      *dst++  = ytab[((y)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+12)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+8)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+4)<<4)|(uv2&15)];

      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1028];
      uv2 = uvtab[uv2+3072];
      *dst++  = ytab[((y+2)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+14)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+10)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+6)<<4)|(uv2&15)];
    }

    py += coded_picture_width;

    if (j!=(coded_picture_height-4))
      py2 += coded_picture_width;
    else
      py2 -= coded_picture_width;

#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1542];
      uv2 = uvtab[uv2+3598];
      *dst++  = ytab[((y+3)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+15)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+11)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+7)<<4)|(uv2&15)];

      y = *py++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+514];
      uv2 = uvtab[uv2+2570];
      *dst++  = ytab[((y+1)<<4)|(uv1&15)];
      *dst2++ = ytab[((((y + *py2++)>>1)+13)<<4)|(uv2>>4)];

      y = *py++;
      *dst++  = ytab[((y+9)<<4)|(uv1>>4)];
      *dst2++ = ytab[((((y + *py2++)>>1)+5)<<4)|(uv2&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}

static void ditherbot(src,dst)
unsigned char *src[];
unsigned char *dst;
{
  int i,j;
  unsigned int y2,uv1,uv2;
  unsigned char *py,*py2,*pu,*pv,*dst2;

  py = src[0] + coded_picture_width;
  py2 = py;
  pu = src[1] + chrom_width;
  pv = src[2] + chrom_width;
#ifdef _WIN32
  dst += (coded_picture_height-1)*coded_picture_width;
  dst2 = dst - coded_picture_width;
#else
  dst2 = dst + coded_picture_width;
#endif

  for (j=0; j<coded_picture_height; j+=4)
  {
    /* line j + 0, j + 1 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2];
      uv2 = uvtab[uv2+2056];
      *dst++  = ytab[((((*py++ + y2)>>1))<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+12)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+8)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+4)<<4)|(uv2&15)];

      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1028];
      uv2 = uvtab[uv2+3072];
      *dst++  = ytab[((((*py++ + y2)>>1)+2)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+14)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+10)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+6)<<4)|(uv2&15)];
    }

    if (j==0)
      py -= coded_picture_width;
    else
      py += coded_picture_width;

    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif

    if (chroma_format==CHROMA420)
    {
      pu -= chrom_width;
      pv -= chrom_width;
    }
    else
    {
      pu += chrom_width;
      pv += chrom_width;
    }

    /* line j + 2, j + 3 */
    for (i=0; i<coded_picture_width; i+=4)
    {
      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+1542];
      uv2 = uvtab[uv2+3598];
      *dst++  = ytab[((((*py++ + y2)>>1)+3)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+15)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+11)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+7)<<4)|(uv2&15)];

      y2 = *py2++;
      uv2 = (*pu++<<8)|*pv++;
      uv1 = uvtab[uv2+514];
      uv2 = uvtab[uv2+2570];
      *dst++  = ytab[((((*py++ + y2)>>1)+1)<<4)|(uv1&15)];
      *dst2++ = ytab[((y2+13)<<4)|(uv2>>4)];

      y2 = *py2++;
      *dst++  = ytab[((((*py++ + y2)>>1)+9)<<4)|(uv1>>4)];
      *dst2++ = ytab[((y2+5)<<4)|(uv2&15)];
    }

    py += coded_picture_width;
    py2 += coded_picture_width;
#ifdef _WIN32
    dst  = dst2 - 2*coded_picture_width;
    dst2 = dst  - coded_picture_width;
#else
    dst += coded_picture_width;
    dst2+= coded_picture_width;
#endif
    pu += chrom_width;
    pv += chrom_width;
  }
}
void init_dither(int bpp)
{
  int i, j, v;
  unsigned char ctab[256+32];

  if ( bpp==8 )
  {
     dither_frame=ditherframe;
     dither_even =dithertop;
     dither_odd  =ditherbot;
  }
  else
     error("unsuported dither type");

  bpp/=8;
  if(!(dithered_image = (unsigned char *)malloc(bpp*coded_picture_width*
                                                coded_picture_height)))
    error("malloc failed");

  if(!(dithered_image2 = (unsigned char *)malloc(bpp*coded_picture_width*
                                                 coded_picture_height)))
    error("malloc failed");

  for (i=0; i<256; i++)
     pixel[i]=i;

  for (i=0; i<256+16; i++)
  {
    v = (i-8)>>4;
    if (v<2)
      v = 2;
    else if (v>14)
      v = 14;
    for (j=0; j<16; j++)
      ytab[16*i+j] = pixel[(v<<4)+j];
  }

  for (i=0; i<256+32; i++)
  {
    v = (i+48-128)>>5;
    if (v<0)
      v = 0;
    else if (v>3)
      v = 3;
    ctab[i] = v;
  }

  for (i=0; i<255+15; i++)
    for (j=0; j<255+15; j++)
      uvtab[256*i+j]=(ctab[i+16]<<6)|(ctab[j+16]<<4)|(ctab[i]<<2)|ctab[j];

}
#endif
